From 0968df43cdd7cbb154c021cd32a7cac25a19aff7 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Sun, 20 Sep 2020 15:25:55 -0400 Subject: c++: Add test for PR90199. Fixed by r11-2998, which fixed this ICE too. gcc/testsuite/ChangeLog: PR c++/90199 * g++.dg/cpp1y/constexpr-90199.C: New test. --- gcc/testsuite/g++.dg/cpp1y/constexpr-90199.C | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-90199.C (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-90199.C b/gcc/testsuite/g++.dg/cpp1y/constexpr-90199.C new file mode 100644 index 0000000..0e3f2be --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-90199.C @@ -0,0 +1,28 @@ +// PR c++/90199 +// { dg-do compile { target c++14 } } +// { dg-additional-options "-frounding-math" } + +template +class complex; + +template constexpr complex +operator+ (complex hd, complex qc) +{ + hd += qc; + return hd; +} + +template <> +class complex { +public: + constexpr complex + operator+= (complex rm) + { + jp += rm.jp; + return *this; + } + + _Complex float jp; +}; + +constexpr complex fl{3.3}, ka{1.1}, r0 = fl + ka; -- cgit v1.1 From 363e7755f227656684c8e284307ceee451503ca4 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sun, 20 Sep 2020 19:37:17 +0000 Subject: Fix linkage with -nodefaultlibs option. 2020-09-20 John David Anglin < danglin@gcc.gnu.org> gcc/ChangeLog * config/pa/pa-hpux11.h (LINK_GCC_C_SEQUENCE_SPEC): Delete. * config/pa/pa64-hpux.h (LINK_GCC_C_SEQUENCE_SPEC): Likewise. (ENDFILE_SPEC): Link with libgcc_stub.a and mill.a. * config/pa/pa32-linux.h (ENDFILE_SPEC): Link with libgcc.a. --- gcc/config/pa/pa-hpux11.h | 5 ----- gcc/config/pa/pa32-linux.h | 5 +++++ gcc/config/pa/pa64-hpux.h | 12 +++++------- 3 files changed, 10 insertions(+), 12 deletions(-) (limited to 'gcc') diff --git a/gcc/config/pa/pa-hpux11.h b/gcc/config/pa/pa-hpux11.h index 794bf8e..2820720 100644 --- a/gcc/config/pa/pa-hpux11.h +++ b/gcc/config/pa/pa-hpux11.h @@ -154,11 +154,6 @@ along with GCC; see the file COPYING3. If not see %{!mt:%{!pthread:-a shared -lc -a archive}}}}\ %{shared:%{mt|pthread:-lpthread}}" -/* The libgcc_stub.a library needs to come last. */ -#undef LINK_GCC_C_SEQUENCE_SPEC -#define LINK_GCC_C_SEQUENCE_SPEC \ - "%G %{!nolibc:%L} %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}}}" - #undef STARTFILE_SPEC #define STARTFILE_SPEC \ "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \ diff --git a/gcc/config/pa/pa32-linux.h b/gcc/config/pa/pa32-linux.h index f271bbf..970722a 100644 --- a/gcc/config/pa/pa32-linux.h +++ b/gcc/config/pa/pa32-linux.h @@ -57,6 +57,11 @@ call_ ## FUNC (void) \ } #endif +/* We need to link against libgcc.a for __canonicalize_funcptr_for_compare + and $$dyncall. */ +#undef ENDFILE_SPEC +#define ENDFILE_SPEC GNU_USER_TARGET_ENDFILE_SPEC "libgcc.a%s" + #undef WCHAR_TYPE #define WCHAR_TYPE "long int" diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h index c7d127f7..096aa4b 100644 --- a/gcc/config/pa/pa64-hpux.h +++ b/gcc/config/pa/pa64-hpux.h @@ -103,12 +103,6 @@ along with GCC; see the file COPYING3. If not see %{shared:%{mt|pthread:-lpthread}}" #endif -/* The libgcc_stub.a and milli.a libraries need to come last. */ -#undef LINK_GCC_C_SEQUENCE_SPEC -#define LINK_GCC_C_SEQUENCE_SPEC "\ - %G %{!nolibc:%L} %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}\ - milli.a%s}}" - /* Under hpux11, the normal location of the `ld' and `as' programs is the /usr/ccs/bin directory. */ @@ -335,8 +329,12 @@ do { \ %{static:crtbeginT%O%s} %{!static:%{!shared:crtbegin%O%s} \ %{shared:crtbeginS%O%s}}" #endif + +/* The libgcc_stub.a and milli.a libraries must come last. We need + to link with these libraries whenever start files are needed. */ #undef ENDFILE_SPEC -#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}" +#define ENDFILE_SPEC \ + "%{!shared:crtend%O%s libgcc_stub.a%s} %{shared:crtendS%O%s} milli.a%s" /* Since HP uses the .init and .fini sections for array initializers and finalizers, we need different defines for INIT_SECTION_ASM_OP -- cgit v1.1 From 11da31998af6d7b7d4d6fcd3f705c17d69baf58b Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Mon, 21 Sep 2020 00:16:24 +0000 Subject: Daily bump. --- gcc/ChangeLog | 46 ++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/testsuite/ChangeLog | 5 +++++ 3 files changed, 52 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 41b34b2..20009c2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,49 @@ +2020-09-20 John David Anglin < danglin@gcc.gnu.org> + + * config/pa/pa-hpux11.h (LINK_GCC_C_SEQUENCE_SPEC): Delete. + * config/pa/pa64-hpux.h (LINK_GCC_C_SEQUENCE_SPEC): Likewise. + (ENDFILE_SPEC): Link with libgcc_stub.a and mill.a. + * config/pa/pa32-linux.h (ENDFILE_SPEC): Link with libgcc.a. + +2020-09-20 Jan Hubicka + + * ipa-modref.c (dump_lto_records): Fix ICE. + +2020-09-20 David Cepelik + Jan Hubicka + + * Makefile.in: Add ipa-modref.c and ipa-modref-tree.c. + * alias.c: (reference_alias_ptr_type_1): Export. + * alias.h (reference_alias_ptr_type_1): Declare. + * common.opt (fipa-modref): New. + * gengtype.c (open_base_files): Add ipa-modref-tree.h and ipa-modref.h + * ipa-modref-tree.c: New file. + * ipa-modref-tree.h: New file. + * ipa-modref.c: New file. + * ipa-modref.h: New file. + * lto-section-in.c (lto_section_name): Add ipa_modref. + * lto-streamer.h (enum lto_section_type): Add LTO_section_ipa_modref. + * opts.c (default_options_table): Enable ipa-modref at -O1+. + * params.opt (-param=modref-max-bases, -param=modref-max-refs, + -param=modref-max-tests): New params. + * passes.def: Schedule pass_modref and pass_ipa_modref. + * timevar.def (TV_IPA_MODREF): New timevar. + (TV_TREE_MODREF): New timevar. + * tree-pass.h (make_pass_modref): Declare. + (make_pass_ipa_modref): Declare. + * tree-ssa-alias.c (dump_alias_stats): Include ipa-modref-tree.h + and ipa-modref.h + (alias_stats): Add modref_use_may_alias, modref_use_no_alias, + modref_clobber_may_alias, modref_clobber_no_alias, modref_tests. + (dump_alias_stats): Dump new stats. + (nonoverlapping_array_refs_p): Fix formating. + (modref_may_conflict): New function. + (ref_maybe_used_by_call_p_1): Use it. + (call_may_clobber_ref_p_1): Use it. + (call_may_clobber_ref_p): Update. + (stmt_may_clobber_ref_p_1): Update. + * tree-ssa-alias.h (call_may_clobber_ref_p_1): Update. + 2020-09-19 Martin Sebor PR middle-end/82608 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 2225e46..fb22edf 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200920 +20200921 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 716f644..ee34444 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-09-20 Marek Polacek + + PR c++/90199 + * g++.dg/cpp1y/constexpr-90199.C: New test. + 2020-09-19 Martin Sebor PR middle-end/82608 -- cgit v1.1 From 432c551b17d655823a4825855b4b966441c8cfb3 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Tue, 1 Sep 2020 14:14:45 +0200 Subject: Support new mallinfo2 function. gcc/ChangeLog: * config.in: Regenerate. * configure: Likewise. * configure.ac: Detect for mallinfo2. * ggc-common.c (defined): Use it. * system.h: Handle also HAVE_MALLINFO2. --- gcc/config.in | 16 ++++++++++++++-- gcc/configure | 4 ++-- gcc/configure.ac | 4 ++-- gcc/ggc-common.c | 12 +++++++++--- gcc/system.h | 2 +- 5 files changed, 28 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/config.in b/gcc/config.in index 478e74f..1832c11 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -983,13 +983,19 @@ #endif -/* Define to 1 if we found a declaration for 'mallinfo', otherwise define to - 0. */ +/* Define to 1 if we found a declaration for 'mallinfo */ #ifndef USED_FOR_TARGET #undef HAVE_DECL_MALLINFO #endif +/* Define to 1 if we found a declaration for 'mallinfo2', otherwise define to + 0. */ +#ifndef USED_FOR_TARGET +#undef HAVE_DECL_MALLINFO2 +#endif + + /* Define to 1 if we found a declaration for 'malloc', otherwise define to 0. */ #ifndef USED_FOR_TARGET @@ -1665,6 +1671,12 @@ #endif +/* Define to 1 if you have the `mallinfo2' function. */ +#ifndef USED_FOR_TARGET +#undef HAVE_MALLINFO2 +#endif + + /* Define to 1 if you have the header file. */ #ifndef USED_FOR_TARGET #undef HAVE_MALLOC_H diff --git a/gcc/configure b/gcc/configure index 0a09777..d33ee9f 100755 --- a/gcc/configure +++ b/gcc/configure @@ -10120,7 +10120,7 @@ fi for ac_func in times clock kill getrlimit setrlimit atoq \ popen sysconf strsignal getrusage nl_langinfo \ gettimeofday mbstowcs wcswidth mmap setlocale \ - clearerr_unlocked feof_unlocked ferror_unlocked fflush_unlocked fgetc_unlocked fgets_unlocked fileno_unlocked fprintf_unlocked fputc_unlocked fputs_unlocked fread_unlocked fwrite_unlocked getchar_unlocked getc_unlocked putchar_unlocked putc_unlocked madvise mallinfo + clearerr_unlocked feof_unlocked ferror_unlocked fflush_unlocked fgetc_unlocked fgets_unlocked fileno_unlocked fprintf_unlocked fputc_unlocked fputs_unlocked fread_unlocked fwrite_unlocked getchar_unlocked getc_unlocked putchar_unlocked putc_unlocked madvise mallinfo mallinfo2 do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_cxx_check_func "$LINENO" "$ac_func" "$as_ac_var" @@ -11549,7 +11549,7 @@ fi done -for ac_func in mallinfo +for ac_func in mallinfo, mallinfo2 do ac_tr_decl=`$as_echo "HAVE_DECL_$ac_func" | $as_tr_cpp` { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $ac_func is declared" >&5 diff --git a/gcc/configure.ac b/gcc/configure.ac index 6a233a3c..fedb7b2 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -1408,7 +1408,7 @@ define(gcc_UNLOCKED_FUNCS, clearerr_unlocked feof_unlocked dnl AC_CHECK_FUNCS(times clock kill getrlimit setrlimit atoq \ popen sysconf strsignal getrusage nl_langinfo \ gettimeofday mbstowcs wcswidth mmap setlocale \ - gcc_UNLOCKED_FUNCS madvise mallinfo) + gcc_UNLOCKED_FUNCS madvise mallinfo mallinfo2) if test x$ac_cv_func_mbstowcs = xyes; then AC_CACHE_CHECK(whether mbstowcs works, gcc_cv_func_mbstowcs_works, @@ -1488,7 +1488,7 @@ gcc_AC_CHECK_DECLS(getrlimit setrlimit getrusage, , ,[ #endif ]) -gcc_AC_CHECK_DECLS(mallinfo, , ,[ +gcc_AC_CHECK_DECLS([mallinfo, mallinfo2], , ,[ #include "ansidecl.h" #include "system.h" #ifdef HAVE_MALLOC_H diff --git a/gcc/ggc-common.c b/gcc/ggc-common.c index 94da02f..6f8da30 100644 --- a/gcc/ggc-common.c +++ b/gcc/ggc-common.c @@ -1008,13 +1008,19 @@ ggc_prune_overhead_list (void) } } -/* Return memory used by heap in kb, 0 if this info is not available. */ +/* Print memory used by heap in kb if this info is available. */ void report_heap_memory_use () { -#ifdef HAVE_MALLINFO +#if defined(HAVE_MALLINFO) || defined(HAVE_MALLINFO2) +#ifdef HAVE_MALLINFO2 + #define MALLINFO_FN mallinfo2 +#else + #define MALLINFO_FN mallinfo +#endif if (!quiet_flag) - fprintf (stderr," {heap %luk}", (unsigned long)(mallinfo().arena / 1024)); + fprintf (stderr," {heap %luk}", + (unsigned long) MALLINFO_FN ().arena / ONE_K); #endif } diff --git a/gcc/system.h b/gcc/system.h index 3c543a0..4f0482b 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -732,7 +732,7 @@ extern int vsnprintf (char *, size_t, const char *, va_list); #endif #ifdef INCLUDE_MALLOC_H -#ifdef HAVE_MALLINFO +#if defined(HAVE_MALLINFO) || defined(HAVE_MALLINFO2) #include #endif #endif -- cgit v1.1 From 79f4e20dd1280e6a44736070b0d5213f9a8f85d4 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Wed, 2 Sep 2020 14:30:16 +0200 Subject: Use SIZE_AMOUNT macro for GGC memory allocation numbers. gcc/ChangeLog: * ggc-common.c (ggc_prune_overhead_list): Use SIZE_AMOUNT. * ggc-page.c (release_pages): Likewise. (ggc_collect): Likewise. (ggc_trim): Likewise. (ggc_grow): Likewise. * timevar.c (timer::print): Likewise. gcc/testsuite/ChangeLog: * g++.dg/ext/timevar1.C: Prune more possible number values. * g++.dg/ext/timevar2.C: Likewise. --- gcc/ggc-common.c | 6 +++--- gcc/ggc-page.c | 15 +++++++-------- gcc/testsuite/g++.dg/ext/timevar1.C | 3 ++- gcc/testsuite/g++.dg/ext/timevar2.C | 3 ++- gcc/timevar.c | 8 ++++---- 5 files changed, 18 insertions(+), 17 deletions(-) (limited to 'gcc') diff --git a/gcc/ggc-common.c b/gcc/ggc-common.c index 6f8da30..007a028 100644 --- a/gcc/ggc-common.c +++ b/gcc/ggc-common.c @@ -1008,7 +1008,7 @@ ggc_prune_overhead_list (void) } } -/* Print memory used by heap in kb if this info is available. */ +/* Print memory used by heap if this info is available. */ void report_heap_memory_use () @@ -1020,7 +1020,7 @@ report_heap_memory_use () #define MALLINFO_FN mallinfo #endif if (!quiet_flag) - fprintf (stderr," {heap %luk}", - (unsigned long) MALLINFO_FN ().arena / ONE_K); + fprintf (stderr, " {heap " PRsa (0) "}", + SIZE_AMOUNT (MALLINFO_FN ().arena)); #endif } diff --git a/gcc/ggc-page.c b/gcc/ggc-page.c index 53b311c..9405f03 100644 --- a/gcc/ggc-page.c +++ b/gcc/ggc-page.c @@ -1164,9 +1164,9 @@ release_pages (void) { fprintf (stderr, " {GC"); if (n1) - fprintf (stderr, " released %luk", (unsigned long)(n1 / 1024)); + fprintf (stderr, " released " PRsa (0), SIZE_AMOUNT (n1)); if (n2) - fprintf (stderr, " madv_dontneed %luk", (unsigned long)(n2 / 1024)); + fprintf (stderr, " madv_dontneed " PRsa (0), SIZE_AMOUNT (n2)); fprintf (stderr, "}"); } } @@ -2208,7 +2208,7 @@ ggc_collect (void) /* Output this later so we do not interfere with release_pages. */ if (!quiet_flag) - fprintf (stderr, " {GC %luk -> ", (unsigned long) allocated / 1024); + fprintf (stderr, " {GC " PRsa (0) " -> ", SIZE_AMOUNT (allocated)); /* Indicate that we've seen collections at this context depth. */ G.context_depth_collections = ((unsigned long)1 << (G.context_depth + 1)) - 1; @@ -2235,7 +2235,7 @@ ggc_collect (void) timevar_pop (TV_GC); if (!quiet_flag) - fprintf (stderr, "%luk}", (unsigned long) G.allocated / 1024); + fprintf (stderr, PRsa (0) "}", SIZE_AMOUNT (G.allocated)); if (GGC_DEBUG_LEVEL >= 2) fprintf (G.debug_file, "END COLLECTING\n"); } @@ -2250,9 +2250,8 @@ ggc_trim () sweep_pages (); release_pages (); if (!quiet_flag) - fprintf (stderr, " {GC trimmed to %luk, %luk mapped}", - (unsigned long) G.allocated / 1024, - (unsigned long) G.bytes_mapped / 1024); + fprintf (stderr, " {GC trimmed to " PRsa (0) ", " PRsa (0) " mapped}", + SIZE_AMOUNT (G.allocated), SIZE_AMOUNT (G.bytes_mapped)); timevar_pop (TV_GC); } @@ -2269,7 +2268,7 @@ ggc_grow (void) else ggc_collect (); if (!quiet_flag) - fprintf (stderr, " {GC %luk} ", (unsigned long) G.allocated / 1024); + fprintf (stderr, " {GC " PRsa (0) "} ", SIZE_AMOUNT (G.allocated)); } void diff --git a/gcc/testsuite/g++.dg/ext/timevar1.C b/gcc/testsuite/g++.dg/ext/timevar1.C index 3f891a5..988a6f8 100644 --- a/gcc/testsuite/g++.dg/ext/timevar1.C +++ b/gcc/testsuite/g++.dg/ext/timevar1.C @@ -2,7 +2,8 @@ // { dg-options "-ftime-report" } // { dg-allow-blank-lines-in-output 1 } // { dg-prune-output "Time variable" } -// { dg-prune-output " kB" } +// { dg-prune-output "k" } +// { dg-prune-output " 0 " } // { dg-prune-output "checks" } void diff --git a/gcc/testsuite/g++.dg/ext/timevar2.C b/gcc/testsuite/g++.dg/ext/timevar2.C index dd96d45..46c3e1b 100644 --- a/gcc/testsuite/g++.dg/ext/timevar2.C +++ b/gcc/testsuite/g++.dg/ext/timevar2.C @@ -1,7 +1,8 @@ // PR c++/57524 // { dg-options "-ftime-report" } // { dg-prune-output "Time variable" } -// { dg-prune-output " kB" } +// { dg-prune-output "k" } +// { dg-prune-output " 0 " } // { dg-prune-output "checks" } namespace detail { diff --git a/gcc/timevar.c b/gcc/timevar.c index a3a882d..8fbf5fa 100644 --- a/gcc/timevar.c +++ b/gcc/timevar.c @@ -661,8 +661,8 @@ timer::print_row (FILE *fp, #endif /* HAVE_WALL_TIME */ /* Print the amount of ggc memory allocated. */ - fprintf (fp, "%8u kB (%3.0f%%)", - (unsigned) (elapsed.ggc_mem >> 10), + fprintf (fp, PRsa (6) " (%3.0f%%)", + SIZE_AMOUNT (elapsed.ggc_mem), (total->ggc_mem == 0 ? 0 : (float) elapsed.ggc_mem / total->ggc_mem) * 100); @@ -712,7 +712,7 @@ timer::print (FILE *fp) TIMEVAR. */ m_start_time = now; - fprintf (fp, "\n%-35s%16s%14s%14s%18s\n", "Time variable", "usr", "sys", + fprintf (fp, "\n%-35s%16s%14s%14s%14s\n", "Time variable", "usr", "sys", "wall", "GGC"); if (m_jit_client_items) fputs ("GCC items:\n", fp); @@ -776,7 +776,7 @@ timer::print (FILE *fp) #ifdef HAVE_WALL_TIME fprintf (fp, "%8.2f ", total->wall); #endif - fprintf (fp, "%9u kB\n", (unsigned) (total->ggc_mem >> 10)); + fprintf (fp, PRsa (7) "\n", SIZE_AMOUNT (total->ggc_mem)); if (CHECKING_P || flag_checking) fprintf (fp, "Extra diagnostic checks enabled; compiler may run slowly.\n"); -- cgit v1.1 From da87190421877142987487b594b4ba9b78ec29af Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Wed, 2 Sep 2020 14:34:21 +0200 Subject: Use ONE_? macros. gcc/ChangeLog: * ggc-common.c (ggc_rlimit_bound): Use ONE_? macro. (ggc_min_expand_heuristic): Likewise. (ggc_min_heapsize_heuristic): Likewise. * ggc-page.c (ggc_collect): Likewise. * system.h (ONE_G): Likewise. --- gcc/ggc-common.c | 16 ++++++++-------- gcc/ggc-page.c | 2 +- gcc/system.h | 1 + 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'gcc') diff --git a/gcc/ggc-common.c b/gcc/ggc-common.c index 007a028..571c50e 100644 --- a/gcc/ggc-common.c +++ b/gcc/ggc-common.c @@ -742,7 +742,7 @@ ggc_rlimit_bound (double limit) appears to be ignored. Ignore such silliness. If a limit this small was actually effective for mmap, GCC wouldn't even start up. */ - && rlim.rlim_cur >= 8 * 1024 * 1024) + && rlim.rlim_cur >= 8 * ONE_M) limit = rlim.rlim_cur; # endif /* RLIMIT_AS or RLIMIT_DATA */ #endif /* HAVE_GETRLIMIT */ @@ -761,7 +761,7 @@ ggc_min_expand_heuristic (void) /* The heuristic is a percentage equal to 30% + 70%*(RAM/1GB), yielding a lower bound of 30% and an upper bound of 100% (when RAM >= 1GB). */ - min_expand /= 1024*1024*1024; + min_expand /= ONE_G; min_expand *= 70; min_expand = MIN (min_expand, 70); min_expand += 30; @@ -776,8 +776,8 @@ ggc_min_heapsize_heuristic (void) double phys_kbytes = physmem_total (); double limit_kbytes = ggc_rlimit_bound (phys_kbytes * 2); - phys_kbytes /= 1024; /* Convert to Kbytes. */ - limit_kbytes /= 1024; + phys_kbytes /= ONE_K; /* Convert to Kbytes. */ + limit_kbytes /= ONE_K; /* The heuristic is RAM/8, with a lower bound of 4M and an upper bound of 128M (when RAM >= 1GB). */ @@ -790,7 +790,7 @@ ggc_min_heapsize_heuristic (void) struct rlimit rlim; if (getrlimit (RLIMIT_RSS, &rlim) == 0 && rlim.rlim_cur != (rlim_t) RLIM_INFINITY) - phys_kbytes = MIN (phys_kbytes, rlim.rlim_cur / 1024); + phys_kbytes = MIN (phys_kbytes, rlim.rlim_cur / ONE_K); } # endif @@ -798,12 +798,12 @@ ggc_min_heapsize_heuristic (void) *next* GC would be within 20Mb of the limit or within a quarter of the limit, whichever is larger. If GCC does hit the data limit, compilation will fail, so this tries to be conservative. */ - limit_kbytes = MAX (0, limit_kbytes - MAX (limit_kbytes / 4, 20 * 1024)); + limit_kbytes = MAX (0, limit_kbytes - MAX (limit_kbytes / 4, 20 * ONE_K)); limit_kbytes = (limit_kbytes * 100) / (110 + ggc_min_expand_heuristic ()); phys_kbytes = MIN (phys_kbytes, limit_kbytes); - phys_kbytes = MAX (phys_kbytes, 4 * 1024); - phys_kbytes = MIN (phys_kbytes, 128 * 1024); + phys_kbytes = MAX (phys_kbytes, 4 * ONE_K); + phys_kbytes = MIN (phys_kbytes, 128 * ONE_K); return phys_kbytes; } diff --git a/gcc/ggc-page.c b/gcc/ggc-page.c index 9405f03..07e108f 100644 --- a/gcc/ggc-page.c +++ b/gcc/ggc-page.c @@ -2184,7 +2184,7 @@ ggc_collect (void) total allocations haven't expanded much since the last collection. */ float allocated_last_gc = - MAX (G.allocated_last_gc, (size_t)param_ggc_min_heapsize * 1024); + MAX (G.allocated_last_gc, (size_t)param_ggc_min_heapsize * ONE_K); /* It is also good time to get memory block pool into limits. */ memory_block_pool::trim (); diff --git a/gcc/system.h b/gcc/system.h index 4f0482b..b0f3f1d 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -1237,6 +1237,7 @@ void gcc_stablesort (void *, size_t, size_t, #define ONE_K 1024 #define ONE_M (ONE_K * ONE_K) +#define ONE_G (ONE_K * ONE_M) /* Display a number as an integer multiple of either: - 1024, if said integer is >= to 10 K (in base 2) -- cgit v1.1 From f5e73de00e9c853ce65333efada7409b0d00f758 Mon Sep 17 00:00:00 2001 From: Andrea Corallo Date: Mon, 14 Sep 2020 14:47:24 +0100 Subject: aarch64: Fix ICE on fpsr fpcr getters [PR96968] gcc/ChangeLog 2020-09-14 Andrea Corallo PR target/96968 * config/aarch64/aarch64-builtins.c (aarch64_expand_fpsr_fpcr_setter): Fix comment nit. (aarch64_expand_fpsr_fpcr_getter): New function, expand these getters using expand_insn machinery. (aarch64_general_expand_builtin): Make use of. gcc/testsuite/ChangeLog 2020-09-14 Andrea Corallo PR target/96968 * gcc.target/aarch64/pr96968.c: New test. --- gcc/config/aarch64/aarch64-builtins.c | 30 +++++++++++++++++++++--------- gcc/testsuite/gcc.target/aarch64/pr96968.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr96968.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 4f33dd9..2f4753b 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -2024,7 +2024,7 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target) return target; } -/* Expand an expression EXP as fpsr or cpsr setter (depending on +/* Expand an expression EXP as fpsr or fpcr setter (depending on UNSPEC) using MODE. */ static void aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp) @@ -2034,6 +2034,18 @@ aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp) emit_insn (gen_aarch64_set (unspec, mode, op)); } +/* Expand a fpsr or fpcr getter (depending on UNSPEC) using MODE. + Return the target. */ +static rtx +aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode, + rtx target) +{ + expand_operand op; + create_output_operand (&op, target, mode); + expand_insn (icode, 1, &op); + return op.value; +} + /* Expand an expression EXP that calls built-in function FCODE, with result going to TARGET if that's convenient. IGNORE is true if the result of the builtin is ignored. */ @@ -2048,26 +2060,26 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, switch (fcode) { case AARCH64_BUILTIN_GET_FPCR: - emit_insn (gen_aarch64_get (UNSPECV_GET_FPCR, SImode, target)); - return target; + return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrsi, + SImode, target); case AARCH64_BUILTIN_SET_FPCR: aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, SImode, exp); return target; case AARCH64_BUILTIN_GET_FPSR: - emit_insn (gen_aarch64_get (UNSPECV_GET_FPSR, SImode, target)); - return target; + return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrsi, + SImode, target); case AARCH64_BUILTIN_SET_FPSR: aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, SImode, exp); return target; case AARCH64_BUILTIN_GET_FPCR64: - emit_insn (gen_aarch64_get (UNSPECV_GET_FPCR, DImode, target)); - return target; + return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrdi, + DImode, target); case AARCH64_BUILTIN_SET_FPCR64: aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, DImode, exp); return target; case AARCH64_BUILTIN_GET_FPSR64: - emit_insn (gen_aarch64_get (UNSPECV_GET_FPSR, DImode, target)); - return target; + return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrdi, + DImode, target); case AARCH64_BUILTIN_SET_FPSR64: aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, DImode, exp); return target; diff --git a/gcc/testsuite/gcc.target/aarch64/pr96968.c b/gcc/testsuite/gcc.target/aarch64/pr96968.c new file mode 100644 index 0000000..21ffd95 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr96968.c @@ -0,0 +1,28 @@ +/* { dg-options "-O1" } */ + +void +fpsr_getter (void) +{ + unsigned int fpsr = __builtin_aarch64_get_fpsr (); +} + +void +fpsr64_getter (void) +{ + unsigned long fpsr = __builtin_aarch64_get_fpsr64 (); +} + +void +fpcr_getter (void) +{ + unsigned int fpcr = __builtin_aarch64_get_fpcr (); +} + +void +fpcr64_getter (void) +{ + unsigned long fpcr = __builtin_aarch64_get_fpcr64 (); +} + +/* { dg-final { scan-assembler-times {\tmrs\tx0, fpsr\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tmrs\tx0, fpcr\n} 2 } } */ -- cgit v1.1 From 48b0c1250a5c7d72be6b3fbbb1117d1cce43daee Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Mon, 21 Sep 2020 12:46:00 +0200 Subject: POLY_INT_CST: remove extra space in dump gcc/ChangeLog: * print-tree.c (print_node): Remove extra space. --- gcc/print-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/print-tree.c b/gcc/print-tree.c index 2a9c98e..d1150e4 100644 --- a/gcc/print-tree.c +++ b/gcc/print-tree.c @@ -851,7 +851,7 @@ print_node (FILE *file, const char *prefix, tree node, int indent, char buf[10]; for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i) { - snprintf (buf, sizeof (buf), "elt%u: ", i); + snprintf (buf, sizeof (buf), "elt%u:", i); print_node (file, buf, POLY_INT_CST_COEFF (node, i), indent + 4); } -- cgit v1.1 From d726ecd9554a805d4a5e044cb21ca23a7f7ca49f Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Mon, 21 Sep 2020 12:50:37 +0200 Subject: Fix ICE in tree-switch-conversion. With SVE we can end up with: switch (POLY_INT_CST [2, 2]) [INV], case 2: [INV], case 4: [INV]> which is fine to expand and we can remove the assert. gcc/ChangeLog: PR tree-optimization/96915 * tree-switch-conversion.c (switch_conversion::expand): Accept also integer constants. gcc/testsuite/ChangeLog: PR tree-optimization/96915 * gcc.target/aarch64/sve/pr96915.c: New test. --- gcc/testsuite/gcc.target/aarch64/sve/pr96915.c | 11 +++++++++++ gcc/tree-switch-conversion.c | 3 --- 2 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr96915.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr96915.c b/gcc/testsuite/gcc.target/aarch64/sve/pr96915.c new file mode 100644 index 0000000..fae4cd4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr96915.c @@ -0,0 +1,11 @@ +/* PR tree-optimization/96915 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8.2-a+sve" } */ + +#pragma GCC aarch64 "arm_sve.h" +void b() { + switch (svcntd()) + case 2: + case 4: + b(); +} diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c index 4b43594..186411f 100644 --- a/gcc/tree-switch-conversion.c +++ b/gcc/tree-switch-conversion.c @@ -984,9 +984,6 @@ switch_conversion::expand (gswitch *swtch) during gimplification). */ gcc_checking_assert (TREE_TYPE (m_index_expr) != error_mark_node); - /* A switch on a constant should have been optimized in tree-cfg-cleanup. */ - gcc_checking_assert (!TREE_CONSTANT (m_index_expr)); - /* Prefer bit test if possible. */ if (tree_fits_uhwi_p (m_range_size) && bit_test_cluster::can_be_handled (tree_to_uhwi (m_range_size), m_uniq) -- cgit v1.1 From 0df746afc50a47d1eb53a401e017c4373cf05641 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 21 Sep 2020 14:04:25 +0200 Subject: tree-optimization/97135 - fix dependence check in store-motion The following fixes a dependence check where in the particular place we cannot ignore self-dependences. 2020-09-21 Richard Biener PR tree-optimization/97135 * tree-ssa-loop-im.c (sm_seq_push_down): Do not ignore self-dependences. * gcc.dg/torture/pr97135.c: New testcase. --- gcc/testsuite/gcc.dg/torture/pr97135.c | 21 +++++++++++++++++++++ gcc/tree-ssa-loop-im.c | 8 +++++--- 2 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr97135.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/torture/pr97135.c b/gcc/testsuite/gcc.dg/torture/pr97135.c new file mode 100644 index 0000000..223f4d0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr97135.c @@ -0,0 +1,21 @@ +/* { dg-do run } */ + +long long e, *d = &e; +int a, b, c; + +int +main () +{ + for (; c <= 5; c++) + for (b = 0; b <= 5; b++) + { + for (a = 1; a <= 5; a++) + ; + *d = 0; + if (c) + break; + } + if (a != 6) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index f87c287..139c7e7 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -2232,9 +2232,11 @@ sm_seq_push_down (vec &seq, unsigned ptr, unsigned *at) || (against.second == sm_other && against.from != NULL_TREE)) /* Found the tail of the sequence. */ break; - if (!refs_independent_p (memory_accesses.refs_list[new_cand.first], - memory_accesses.refs_list[against.first], - false)) + /* We may not ignore self-dependences here. */ + if (new_cand.first == against.first + || !refs_independent_p (memory_accesses.refs_list[new_cand.first], + memory_accesses.refs_list[against.first], + false)) /* ??? Prune new_cand from the list of refs to apply SM to. */ return false; std::swap (new_cand, against); -- cgit v1.1 From 2c62952f8160bdc8d4111edb34a4bc75096c1e05 Mon Sep 17 00:00:00 2001 From: Andrea Corallo Date: Thu, 17 Sep 2020 17:17:52 +0100 Subject: aarch64: Do not alter value on a force_reg returned rtx expanding __jcvt 2020-09-17 Andrea Corallo * config/aarch64/aarch64-builtins.c (aarch64_general_expand_builtin): Use expand machinery not to alter the value of an rtx returned by force_reg. --- gcc/config/aarch64/aarch64-builtins.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 2f4753b..1cfb5c0 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -2140,14 +2140,14 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, return target; case AARCH64_JSCVT: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = force_reg (DFmode, expand_normal (arg0)); - if (!target) - target = gen_reg_rtx (SImode); - else - target = force_reg (SImode, target); - emit_insn (GEN_FCN (CODE_FOR_aarch64_fjcvtzs) (target, op0)); - return target; + { + expand_operand ops[2]; + create_output_operand (&ops[0], target, SImode); + op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); + create_input_operand (&ops[1], op0, DFmode); + expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops); + return ops[0].value; + } case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF: case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF: -- cgit v1.1 From 5187b01a4fcbea82f52798240595a32ddf7e0fe5 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 21 Sep 2020 14:20:05 +0200 Subject: Fix fallout from Support new mallinfo2 function. 2020-09-21 Jakub Jelinek * configure.ac: Use mallinfo mallinfo2 as first operand of gcc_AC_CHECK_DECLS rather than [mallinfo, mallinfo2]. * configure: Regenerated. * config.in: Regenerated. --- gcc/config.in | 3 ++- gcc/configure | 2 +- gcc/configure.ac | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/config.in b/gcc/config.in index 1832c11..5835cea 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -983,7 +983,8 @@ #endif -/* Define to 1 if we found a declaration for 'mallinfo */ +/* Define to 1 if we found a declaration for 'mallinfo', otherwise define to + 0. */ #ifndef USED_FOR_TARGET #undef HAVE_DECL_MALLINFO #endif diff --git a/gcc/configure b/gcc/configure index d33ee9f..33a3e34 100755 --- a/gcc/configure +++ b/gcc/configure @@ -11549,7 +11549,7 @@ fi done -for ac_func in mallinfo, mallinfo2 +for ac_func in mallinfo mallinfo2 do ac_tr_decl=`$as_echo "HAVE_DECL_$ac_func" | $as_tr_cpp` { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $ac_func is declared" >&5 diff --git a/gcc/configure.ac b/gcc/configure.ac index fedb7b2..975f6d9 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -1488,7 +1488,7 @@ gcc_AC_CHECK_DECLS(getrlimit setrlimit getrusage, , ,[ #endif ]) -gcc_AC_CHECK_DECLS([mallinfo, mallinfo2], , ,[ +gcc_AC_CHECK_DECLS(mallinfo mallinfo2, , ,[ #include "ansidecl.h" #include "system.h" #ifdef HAVE_MALLOC_H -- cgit v1.1 From b6ff694e592669e7865d39a884100dd677e7ceec Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Fri, 18 Sep 2020 19:37:05 -0400 Subject: c++: Detect deduction guide redeclaration [PR97099] [temp.deduct.guide]p3: Two deduction guide declarations in the same translation unit for the same class template shall not have equivalent parameter-declaration-clauses. So let's detect that. gcc/cp/ChangeLog: PR c++/97099 * decl.c (redeclaration_error_message): Detect a redeclaration of deduction guides. gcc/testsuite/ChangeLog: PR c++/97099 * g++.dg/cpp1z/class-deduction74.C: New test. --- gcc/cp/decl.c | 20 ++++++++++++----- gcc/testsuite/g++.dg/cpp1z/class-deduction74.C | 31 ++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction74.C (limited to 'gcc') diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 13f065d..af79649 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -3003,6 +3003,10 @@ redeclaration_error_message (tree newdecl, tree olddecl) } } + if (deduction_guide_p (olddecl) + && deduction_guide_p (newdecl)) + return G_("deduction guide %q+D redeclared"); + /* [class.compare.default]: A definition of a comparison operator as defaulted that appears in a class shall be the first declaration of that function. */ @@ -3053,24 +3057,28 @@ redeclaration_error_message (tree newdecl, tree olddecl) "% attribute"); else return G_("%q+D redeclared inline without " - "% attribute"); + "% attribute"); } } - /* Core issue #226 (C++0x): - + if (deduction_guide_p (olddecl) + && deduction_guide_p (newdecl)) + return G_("deduction guide %q+D redeclared"); + + /* Core issue #226 (C++11): + If a friend function template declaration specifies a default template-argument, that declaration shall be a definition and shall be the only declaration of the function template in the translation unit. */ - if ((cxx_dialect != cxx98) + if ((cxx_dialect != cxx98) && TREE_CODE (ot) == FUNCTION_DECL && DECL_FRIEND_P (ot) - && !check_default_tmpl_args (nt, DECL_TEMPLATE_PARMS (newdecl), + && !check_default_tmpl_args (nt, DECL_TEMPLATE_PARMS (newdecl), /*is_primary=*/true, /*is_partial=*/false, /*is_friend_decl=*/2)) return G_("redeclaration of friend %q#D " - "may not have default template arguments"); + "may not have default template arguments"); return NULL; } diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction74.C b/gcc/testsuite/g++.dg/cpp1z/class-deduction74.C new file mode 100644 index 0000000..fe11381 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction74.C @@ -0,0 +1,31 @@ +// PR c++/97099 +// { dg-do compile { target c++17 } } +// [temp.deduct.guide]p3: Two deduction guide declarations in the same +// translation unit for the same class template shall not have equivalent +// parameter-declaration-clauses. + +template struct S { }; +template struct X { }; + +S() -> S; // { dg-message "previously declared here|old declaration" } +S() -> S; // { dg-error "redeclared" } +X() -> X; +S() -> S; // { dg-error "ambiguating new declaration of" } + +S(bool) -> S; // { dg-message "previously declared here" } +explicit S(bool) -> S; // { dg-error "redeclared" } + +explicit S(char) -> S; // { dg-message "previously declared here" } +S(char) -> S; // { dg-error "redeclared" } + +template S(T, T) -> S; // { dg-message "previously declared here" } +template X(T, T) -> X; +template S(T, T) -> S; // { dg-error "redeclared" } + +// OK: Use SFINAE. +template S(T) -> S; +template S(T) -> S; + +// OK: Non-template wins. +S(int) -> S; +template S(int) -> S; -- cgit v1.1 From e6f58fb6196ba16ce070e3722451f040a13f963b Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 21 Sep 2020 16:51:33 +0200 Subject: tree-optimization/97139 - fix BB SLP live lane extraction This fixes SLP live lane extraction with pattern stmts. 2020-09-21 Richard Biener PR tree-optimization/97139 * tree-vect-slp.c (vect_bb_slp_mark_live_stmts): Only mark the pattern root, track visited vectorized stmts. * gcc.dg/vect/pr97139.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr97139.c | 27 +++++++++++++++++++++++++++ gcc/tree-vect-slp.c | 10 +++++++--- 2 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr97139.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/pr97139.c b/gcc/testsuite/gcc.dg/vect/pr97139.c new file mode 100644 index 0000000..1b9f31c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97139.c @@ -0,0 +1,27 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +int pix[4]; + +int __attribute__((noipa)) foo (void) +{ + pix[0] = pix[0] / 4; + pix[1] = pix[1] / 4; + pix[2] = pix[2] / 4; + pix[3] = pix[3] / 4; + return pix[0] + pix[1] + pix[2] + pix[3]; +} + +int main () +{ + check_vect (); + + pix[0] = 8; + pix[1] = 16; + pix[2] = 32; + pix[3] = 64; + if (foo () != 30) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index ef62c2d..c44fd39 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3021,10 +3021,14 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node, bool all_visited = true; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) { - stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); - if (svisited.contains (orig_stmt_info)) + if (svisited.contains (stmt_info)) continue; all_visited = false; + stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); + if (STMT_VINFO_IN_PATTERN_P (orig_stmt_info) + && STMT_VINFO_RELATED_STMT (orig_stmt_info) != stmt_info) + /* Only the pattern root stmt computes the original scalar value. */ + continue; bool mark_visited = true; gimple *orig_stmt = orig_stmt_info->stmt; ssa_op_iter op_iter; @@ -3091,7 +3095,7 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node, } } if (mark_visited) - svisited.add (orig_stmt_info); + svisited.add (stmt_info); } if (all_visited) return; -- cgit v1.1 From b0c990f2661a2979c68c840781847efe27a0779b Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Mon, 21 Sep 2020 15:51:48 +0000 Subject: [arm] gcc.target/arm/cs*: Use dg-add-options arm_arch_v8_1m_main These testcases need thumb mode, which may not be the default. Using dg-add-options arm_arch_v8_1m_main ensures that -mthumb is used and makes the test pass in more configurations. 2020-09-21 Christophe Lyon gcc/testsuite/ * gcc.target/arm/csinc-1.c: Use dg-add-options arm_arch_v8_1m_main. * gcc.target/arm/csinv-1.c: Likewise. * gcc.target/arm/csneg.c: Likewise. --- gcc/testsuite/gcc.target/arm/csinc-1.c | 3 ++- gcc/testsuite/gcc.target/arm/csinv-1.c | 3 ++- gcc/testsuite/gcc.target/arm/csneg.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/arm/csinc-1.c b/gcc/testsuite/gcc.target/arm/csinc-1.c index b992849..255e6e8 100644 --- a/gcc/testsuite/gcc.target/arm/csinc-1.c +++ b/gcc/testsuite/gcc.target/arm/csinc-1.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target arm_arch_v8_1m_main_ok } */ -/* { dg-options "-O2 -march=armv8.1-m.main" } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8_1m_main } */ int test_csinc32_condasn1(int w0, int w1, int w2, int w3) diff --git a/gcc/testsuite/gcc.target/arm/csinv-1.c b/gcc/testsuite/gcc.target/arm/csinv-1.c index 6b5383a..28450a4 100644 --- a/gcc/testsuite/gcc.target/arm/csinv-1.c +++ b/gcc/testsuite/gcc.target/arm/csinv-1.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target arm_arch_v8_1m_main_ok } */ -/* { dg-options "-O2 -march=armv8.1-m.main" } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8_1m_main } */ int test_csinv32_condasn1(int w0, int w1, int w2, int w3) diff --git a/gcc/testsuite/gcc.target/arm/csneg.c b/gcc/testsuite/gcc.target/arm/csneg.c index e486062..cf3df13 100644 --- a/gcc/testsuite/gcc.target/arm/csneg.c +++ b/gcc/testsuite/gcc.target/arm/csneg.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target arm_arch_v8_1m_main_ok } */ -/* { dg-options "-O2 -march=armv8.1-m.main" } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_arch_v8_1m_main } */ int test_csneg32_condasn1(int w0, int w1, int w2, int w3) -- cgit v1.1 From 4f5a297f7520cf5ba10a3a16f32ea0e1655b203f Mon Sep 17 00:00:00 2001 From: Iain Sandoe Date: Sat, 19 Sep 2020 14:35:58 +0100 Subject: Darwin, testsuite : Skip a test that requires ELF. The symver support is only available to ELF targets. gcc/testsuite/ChangeLog: * gcc.dg/ipa/symver1.c: Skip for Darwin. --- gcc/testsuite/gcc.dg/ipa/symver1.c | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/ipa/symver1.c b/gcc/testsuite/gcc.dg/ipa/symver1.c index 645de7e..fca5220 100644 --- a/gcc/testsuite/gcc.dg/ipa/symver1.c +++ b/gcc/testsuite/gcc.dg/ipa/symver1.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-skip-if "only works for ELF targets" { *-*-darwin* } } */ __attribute__ ((__symver__ ("foo@VER_2"))) __attribute__ ((__symver__ ("foo@VER_3"))) -- cgit v1.1 From 33605e9bef15555b19f06424ea0b777b5fdc0dfa Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Mon, 21 Sep 2020 08:55:15 -0700 Subject: c++: ts_lambda is not needed We don't need ts_lambda, as IDENTIFIER_LAMBDA_P is sufficient. Killed thusly. gcc/cp/ * decl.c (xref_tag_1): Use IDENTIFIER_LAMBDA_P to detect lambdas. * lambda.c (begin_lambda_type): Use ts_current to push the tag. * name-lookup.h (enum tag_scope): Drop ts_lambda. --- gcc/cp/decl.c | 33 ++++++++++++++------------------- gcc/cp/lambda.c | 2 +- gcc/cp/name-lookup.h | 1 - 3 files changed, 15 insertions(+), 21 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index af79649..bbecebe 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -14857,10 +14857,10 @@ check_elaborated_type_specifier (enum tag_types tag_code, return type; } -/* Lookup NAME in elaborate type specifier in scope according to - SCOPE and issue diagnostics if necessary. - Return *_TYPE node upon success, NULL_TREE when the NAME is not - found, and ERROR_MARK_NODE for type error. */ +/* Lookup NAME of an elaborated type specifier according to SCOPE and + issue diagnostics if necessary. Return *_TYPE node upon success, + NULL_TREE when the NAME is not found, and ERROR_MARK_NODE for type + error. */ static tree lookup_and_check_tag (enum tag_types tag_code, tree name, @@ -14997,9 +14997,9 @@ xref_tag_1 (enum tag_types tag_code, tree name, /* In case of anonymous name, xref_tag is only called to make type node and push name. Name lookup is not required. */ tree t = NULL_TREE; - if (scope != ts_lambda && !IDENTIFIER_ANON_P (name)) + if (!IDENTIFIER_ANON_P (name)) t = lookup_and_check_tag (tag_code, name, scope, template_header_p); - + if (t == error_mark_node) return error_mark_node; @@ -15052,19 +15052,14 @@ xref_tag_1 (enum tag_types tag_code, tree name, error ("use of enum %q#D without previous declaration", name); return error_mark_node; } - else - { - t = make_class_type (code); - TYPE_CONTEXT (t) = context; - if (scope == ts_lambda) - { - /* Mark it as a lambda type. */ - CLASSTYPE_LAMBDA_EXPR (t) = error_mark_node; - /* And push it into current scope. */ - scope = ts_current; - } - t = pushtag (name, t, scope); - } + + t = make_class_type (code); + TYPE_CONTEXT (t) = context; + if (IDENTIFIER_LAMBDA_P (name)) + /* Mark it as a lambda type right now. Our caller will + correct the value. */ + CLASSTYPE_LAMBDA_EXPR (t) = error_mark_node; + t = pushtag (name, t, scope); } else { diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c index c94fe8e..364a3e9 100644 --- a/gcc/cp/lambda.c +++ b/gcc/cp/lambda.c @@ -135,7 +135,7 @@ begin_lambda_type (tree lambda) /* Create the new RECORD_TYPE for this lambda. */ tree type = xref_tag (/*tag_code=*/record_type, name, - /*scope=*/ts_lambda, /*template_header_p=*/false); + /*scope=*/ts_current, /*template_header_p=*/false); if (type == error_mark_node) return error_mark_node; diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h index 723fbb0..a0815e1 100644 --- a/gcc/cp/name-lookup.h +++ b/gcc/cp/name-lookup.h @@ -139,7 +139,6 @@ enum tag_scope { only, for friend class lookup according to [namespace.memdef]/3 and [class.friend]/9. */ - ts_lambda = 3 /* Declaring a lambda closure. */ }; struct GTY(()) cp_class_binding { -- cgit v1.1 From 5c5ce6099082b642294091c83461c928bd028ea1 Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Mon, 21 Sep 2020 21:50:36 +0200 Subject: PR fortran/90903 [part2] - Add runtime checking for the MVBITS intrinsic Implement inline expansion of the intrinsic elemental subroutine MVBITS with optional runtime checks for valid argument range. gcc/fortran/ChangeLog: * iresolve.c (gfc_resolve_mvbits): Remove unneeded conversion of FROMPOS, LEN and TOPOS arguments to fit a C int. * trans-intrinsic.c (gfc_conv_intrinsic_mvbits): Add inline expansion of MVBITS intrinsic elemental subroutine and add code for runtime argument checking. (gfc_conv_intrinsic_subroutine): Recognise MVBITS intrinsic, but defer handling to gfc_trans_call. * trans-stmt.c (replace_ss): (gfc_trans_call): Adjust to handle inline expansion, scalarization of intrinsic subroutine MVBITS in gfc_conv_intrinsic_mvbits. * trans.h (gfc_conv_intrinsic_mvbits): Add prototype for gfc_conv_intrinsic_mvbits. gcc/testsuite/ChangeLog: * gfortran.dg/check_bits_2.f90: New test. Co-authored-by: Paul Thomas --- gcc/fortran/iresolve.c | 14 --- gcc/fortran/trans-intrinsic.c | 167 +++++++++++++++++++++++++++++ gcc/fortran/trans-stmt.c | 48 +++++++-- gcc/fortran/trans.h | 4 + gcc/testsuite/gfortran.dg/check_bits_2.f90 | 38 +++++++ 5 files changed, 247 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/check_bits_2.f90 (limited to 'gcc') diff --git a/gcc/fortran/iresolve.c b/gcc/fortran/iresolve.c index 7376961..c2a4865 100644 --- a/gcc/fortran/iresolve.c +++ b/gcc/fortran/iresolve.c @@ -3311,21 +3311,7 @@ gfc_resolve_mvbits (gfc_code *c) { static const sym_intent INTENTS[] = {INTENT_IN, INTENT_IN, INTENT_IN, INTENT_INOUT, INTENT_IN}; - const char *name; - gfc_typespec ts; - gfc_clear_ts (&ts); - - /* FROMPOS, LEN and TOPOS are restricted to small values. As such, - they will be converted so that they fit into a C int. */ - ts.type = BT_INTEGER; - ts.kind = gfc_c_int_kind; - if (c->ext.actual->next->expr->ts.kind != gfc_c_int_kind) - gfc_convert_type (c->ext.actual->next->expr, &ts, 2); - if (c->ext.actual->next->next->expr->ts.kind != gfc_c_int_kind) - gfc_convert_type (c->ext.actual->next->next->expr, &ts, 2); - if (c->ext.actual->next->next->next->next->expr->ts.kind != gfc_c_int_kind) - gfc_convert_type (c->ext.actual->next->next->next->next->expr, &ts, 2); /* TO and FROM are guaranteed to have the same kind parameter. */ name = gfc_get_string (PREFIX ("mvbits_i%d"), diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c index 32fe988..3b3bd86 100644 --- a/gcc/fortran/trans-intrinsic.c +++ b/gcc/fortran/trans-intrinsic.c @@ -11790,6 +11790,169 @@ conv_intrinsic_event_query (gfc_code *code) return gfc_finish_block (&se.pre); } + +/* This is a peculiar case because of the need to do dependency checking. + It is called via trans-stmt.c(gfc_trans_call), where it is picked out as + a special case and this function called instead of + gfc_conv_procedure_call. */ +void +gfc_conv_intrinsic_mvbits (gfc_se *se, gfc_actual_arglist *actual_args, + gfc_loopinfo *loop) +{ + gfc_actual_arglist *actual; + gfc_se argse[5]; + gfc_expr *arg[5]; + gfc_ss *lss; + int n; + + tree from, frompos, len, to, topos; + tree lenmask, oldbits, newbits, bitsize; + tree type, utype, above, mask1, mask2; + + if (loop) + lss = loop->ss; + else + lss = gfc_ss_terminator; + + actual = actual_args; + for (n = 0; n < 5; n++, actual = actual->next) + { + arg[n] = actual->expr; + gfc_init_se (&argse[n], NULL); + + if (lss != gfc_ss_terminator) + { + gfc_copy_loopinfo_to_se (&argse[n], loop); + /* Find the ss for the expression if it is there. */ + argse[n].ss = lss; + gfc_mark_ss_chain_used (lss, 1); + } + + gfc_conv_expr (&argse[n], arg[n]); + + if (loop) + lss = argse[n].ss; + } + + from = argse[0].expr; + frompos = argse[1].expr; + len = argse[2].expr; + to = argse[3].expr; + topos = argse[4].expr; + + /* The type of the result (TO). */ + type = TREE_TYPE (to); + bitsize = build_int_cst (integer_type_node, TYPE_PRECISION (type)); + + /* Optionally generate code for runtime argument check. */ + if (gfc_option.rtcheck & GFC_RTCHECK_BITS) + { + tree nbits, below, ccond; + tree fp = fold_convert (long_integer_type_node, frompos); + tree ln = fold_convert (long_integer_type_node, len); + tree tp = fold_convert (long_integer_type_node, topos); + below = fold_build2_loc (input_location, LT_EXPR, + logical_type_node, frompos, + build_int_cst (TREE_TYPE (frompos), 0)); + above = fold_build2_loc (input_location, GT_EXPR, + logical_type_node, frompos, + fold_convert (TREE_TYPE (frompos), bitsize)); + ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR, + logical_type_node, below, above); + gfc_trans_runtime_check (true, false, ccond, &argse[1].pre, + &arg[1]->where, + "FROMPOS argument (%ld) out of range 0:%d " + "in intrinsic MVBITS", fp, bitsize); + below = fold_build2_loc (input_location, LT_EXPR, + logical_type_node, len, + build_int_cst (TREE_TYPE (len), 0)); + above = fold_build2_loc (input_location, GT_EXPR, + logical_type_node, len, + fold_convert (TREE_TYPE (len), bitsize)); + ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR, + logical_type_node, below, above); + gfc_trans_runtime_check (true, false, ccond, &argse[2].pre, + &arg[2]->where, + "LEN argument (%ld) out of range 0:%d " + "in intrinsic MVBITS", ln, bitsize); + below = fold_build2_loc (input_location, LT_EXPR, + logical_type_node, topos, + build_int_cst (TREE_TYPE (topos), 0)); + above = fold_build2_loc (input_location, GT_EXPR, + logical_type_node, topos, + fold_convert (TREE_TYPE (topos), bitsize)); + ccond = fold_build2_loc (input_location, TRUTH_ORIF_EXPR, + logical_type_node, below, above); + gfc_trans_runtime_check (true, false, ccond, &argse[4].pre, + &arg[4]->where, + "TOPOS argument (%ld) out of range 0:%d " + "in intrinsic MVBITS", tp, bitsize); + + /* The tests above ensure that FROMPOS, LEN and TOPOS fit into short + integers. Additions below cannot overflow. */ + nbits = fold_convert (long_integer_type_node, bitsize); + above = fold_build2_loc (input_location, PLUS_EXPR, + long_integer_type_node, fp, ln); + ccond = fold_build2_loc (input_location, GT_EXPR, + logical_type_node, above, nbits); + gfc_trans_runtime_check (true, false, ccond, &argse[1].pre, + &arg[1]->where, + "FROMPOS(%ld)+LEN(%ld)>BIT_SIZE(%d) " + "in intrinsic MVBITS", fp, ln, bitsize); + above = fold_build2_loc (input_location, PLUS_EXPR, + long_integer_type_node, tp, ln); + ccond = fold_build2_loc (input_location, GT_EXPR, + logical_type_node, above, nbits); + gfc_trans_runtime_check (true, false, ccond, &argse[4].pre, + &arg[4]->where, + "TOPOS(%ld)+LEN(%ld)>BIT_SIZE(%d) " + "in intrinsic MVBITS", tp, ln, bitsize); + } + + for (n = 0; n < 5; n++) + { + gfc_add_block_to_block (&se->pre, &argse[n].pre); + gfc_add_block_to_block (&se->post, &argse[n].post); + } + + /* lenmask = (LEN >= bit_size (TYPE)) ? ~(TYPE)0 : ((TYPE)1 << LEN) - 1 */ + above = fold_build2_loc (input_location, GE_EXPR, logical_type_node, + len, fold_convert (TREE_TYPE (len), bitsize)); + mask1 = build_int_cst (type, -1); + mask2 = fold_build2_loc (input_location, LSHIFT_EXPR, type, + build_int_cst (type, 1), len); + mask2 = fold_build2_loc (input_location, MINUS_EXPR, type, + mask2, build_int_cst (type, 1)); + lenmask = fold_build3_loc (input_location, COND_EXPR, type, + above, mask1, mask2); + + /* newbits = (((UTYPE)(FROM) >> FROMPOS) & lenmask) << TOPOS. + * For valid frompos+len <= bit_size(FROM) the conversion to unsigned is + * not strictly necessary; artificial bits from rshift will be masked. */ + utype = unsigned_type_for (type); + newbits = fold_build2_loc (input_location, RSHIFT_EXPR, utype, + fold_convert (utype, from), frompos); + newbits = fold_build2_loc (input_location, BIT_AND_EXPR, type, + fold_convert (type, newbits), lenmask); + newbits = fold_build2_loc (input_location, LSHIFT_EXPR, type, + newbits, topos); + + /* oldbits = TO & (~(lenmask << TOPOS)). */ + oldbits = fold_build2_loc (input_location, LSHIFT_EXPR, type, + lenmask, topos); + oldbits = fold_build1_loc (input_location, BIT_NOT_EXPR, type, oldbits); + oldbits = fold_build2_loc (input_location, BIT_AND_EXPR, type, oldbits, to); + + /* TO = newbits | oldbits. */ + se->expr = fold_build2_loc (input_location, BIT_IOR_EXPR, type, + oldbits, newbits); + + /* Return the assignment. */ + se->expr = fold_build2_loc (input_location, MODIFY_EXPR, + void_type_node, to, se->expr); +} + + static tree conv_intrinsic_move_alloc (gfc_code *code) { @@ -12119,6 +12282,10 @@ gfc_conv_intrinsic_subroutine (gfc_code *code) res = conv_intrinsic_kill_sub (code); break; + case GFC_ISYM_MVBITS: + res = NULL_TREE; + break; + case GFC_ISYM_SYSTEM_CLOCK: res = conv_intrinsic_system_clock (code); break; diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index 1f183b9..389fec7 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -198,6 +198,13 @@ replace_ss (gfc_se *se, gfc_ss *old_ss, gfc_ss *new_ss) *sess = new_ss; new_ss->next = old_ss->next; + /* Make sure that trailing references are not lost. */ + if (old_ss->info + && old_ss->info->data.array.ref + && old_ss->info->data.array.ref->next + && !(new_ss->info->data.array.ref + && new_ss->info->data.array.ref->next)) + new_ss->info->data.array.ref = old_ss->info->data.array.ref; for (loopss = &(se->loop->ss); *loopss != gfc_ss_terminator; loopss = &((*loopss)->loop_chain)) @@ -383,6 +390,7 @@ gfc_trans_call (gfc_code * code, bool dependency_check, tree index = NULL_TREE; tree maskexpr = NULL_TREE; tree tmp; + bool is_intrinsic_mvbits; /* A CALL starts a new block because the actual arguments may have to be evaluated first. */ @@ -397,17 +405,29 @@ gfc_trans_call (gfc_code * code, bool dependency_check, get_proc_ifc_for_call (code), GFC_SS_REFERENCE); + /* MVBITS is inlined but needs the dependency checking found here. */ + is_intrinsic_mvbits = code->resolved_isym + && code->resolved_isym->id == GFC_ISYM_MVBITS; + /* Is not an elemental subroutine call with array valued arguments. */ if (ss == gfc_ss_terminator) { - /* Translate the call. */ - has_alternate_specifier - = gfc_conv_procedure_call (&se, code->resolved_sym, code->ext.actual, - code->expr1, NULL); + if (is_intrinsic_mvbits) + { + has_alternate_specifier = 0; + gfc_conv_intrinsic_mvbits (&se, code->ext.actual, NULL); + } + else + { + /* Translate the call. */ + has_alternate_specifier = + gfc_conv_procedure_call (&se, code->resolved_sym, + code->ext.actual, code->expr1, NULL); - /* A subroutine without side-effect, by definition, does nothing! */ - TREE_SIDE_EFFECTS (se.expr) = 1; + /* A subroutine without side-effect, by definition, does nothing! */ + TREE_SIDE_EFFECTS (se.expr) = 1; + } /* Chain the pieces together and return the block. */ if (has_alternate_specifier) @@ -490,10 +510,18 @@ gfc_trans_call (gfc_code * code, bool dependency_check, TREE_TYPE (maskexpr), maskexpr); } - /* Add the subroutine call to the block. */ - gfc_conv_procedure_call (&loopse, code->resolved_sym, - code->ext.actual, code->expr1, - NULL); + if (is_intrinsic_mvbits) + { + has_alternate_specifier = 0; + gfc_conv_intrinsic_mvbits (&loopse, code->ext.actual, &loop); + } + else + { + /* Add the subroutine call to the block. */ + gfc_conv_procedure_call (&loopse, code->resolved_sym, + code->ext.actual, code->expr1, + NULL); + } if (mask && count1) { diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index d257963..16b4215 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -818,6 +818,10 @@ bool gfc_omp_private_outer_ref (tree); struct gimplify_omp_ctx; void gfc_omp_firstprivatize_type_sizes (struct gimplify_omp_ctx *, tree); +/* In trans-intrinsic.c. */ +void gfc_conv_intrinsic_mvbits (gfc_se *, gfc_actual_arglist *, + gfc_loopinfo *); + /* Runtime library function decls. */ extern GTY(()) tree gfor_fndecl_pause_numeric; extern GTY(()) tree gfor_fndecl_pause_string; diff --git a/gcc/testsuite/gfortran.dg/check_bits_2.f90 b/gcc/testsuite/gfortran.dg/check_bits_2.f90 new file mode 100644 index 0000000..25357a0 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/check_bits_2.f90 @@ -0,0 +1,38 @@ +! { dg-do run } +! { dg-options "-fcheck=bits -fdump-tree-original" } +! { dg-shouldfail "Fortran runtime error: FROMPOS(64)+LEN(1)>BIT_SIZE(64) in intrinsic MVBITS" } +! { dg-output "At line 33 .*" } +! +! Verify that the runtime checks for the MVBITS intrinsic functions +! do not generate false-positives +program check + implicit none + integer, parameter :: bs4 = bit_size (1_4) + integer, parameter :: bs8 = bit_size (1_8) + integer(4), dimension(0:bs4) :: from4, frompos4, len4, to4, topos4 + integer(8), dimension(0:bs8) :: from8, frompos8, len8, to8, topos8 + integer :: i + from4 = -1 + to4 = -1 + len4 = [ (i, i=0,bs4) ] + frompos4 = bs4 - len4 + topos4 = frompos4 + call mvbits (from4, frompos4, len4, to4, topos4) + if (any (to4 /= -1)) stop 1 + from8 = -1 + to8 = -1 + len8 = [ (i, i=0,bs8) ] + frompos8 = bs8 - len8 + topos8 = frompos8 + call mvbits (from8, frompos8, len8, to8, topos8) + if (any (to8 /= -1)) stop 2 + from8 = -1 + to8 = -1 + len8(0) = 1 + ! The following line should fail with a runtime error: + call mvbits (from8, frompos8, len8, to8, topos8) + ! Should never get here with -fcheck=bits + stop 3 +end + +! { dg-final { scan-tree-dump-times "_gfortran_runtime_error_at" 15 "original" } } -- cgit v1.1 From 05193687dde2e5a6337164182a1946b584acfada Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Mon, 21 Sep 2020 14:33:29 -0600 Subject: Avoid incidental failures due to implicit attribute access. gcc/testsuite/ChangeLog: PR c/50584 * gcc.dg/ipa/ipa-sra-1.c: Use a plain pointer for argv instead of array. * gcc.dg/ipa/ipa-sra-12.c: Same. * gcc.dg/ipa/ipa-sra-13.c: Same. * gcc.dg/ipa/ipa-sra-14.c: Same. * gcc.dg/ipa/ipa-sra-15.c: Same. --- gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c | 2 +- gcc/testsuite/gcc.dg/ipa/ipa-sra-12.c | 2 +- gcc/testsuite/gcc.dg/ipa/ipa-sra-13.c | 2 +- gcc/testsuite/gcc.dg/ipa/ipa-sra-14.c | 2 +- gcc/testsuite/gcc.dg/ipa/ipa-sra-15.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c b/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c index 4a22e39..df7e356 100644 --- a/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c +++ b/gcc/testsuite/gcc.dg/ipa/ipa-sra-1.c @@ -24,7 +24,7 @@ ox (struct bovid cow) } int -main (int argc, char *argv[]) +main (int argc, char **argv) { struct bovid cow; diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-sra-12.c b/gcc/testsuite/gcc.dg/ipa/ipa-sra-12.c index 4d9057e..0cc76bd 100644 --- a/gcc/testsuite/gcc.dg/ipa/ipa-sra-12.c +++ b/gcc/testsuite/gcc.dg/ipa/ipa-sra-12.c @@ -34,7 +34,7 @@ bar (struct S s) } int -main (int argc, char *argv[]) +main (int argc, char **argv) { struct S s; diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-sra-13.c b/gcc/testsuite/gcc.dg/ipa/ipa-sra-13.c index 4d4ed74..e8751da 100644 --- a/gcc/testsuite/gcc.dg/ipa/ipa-sra-13.c +++ b/gcc/testsuite/gcc.dg/ipa/ipa-sra-13.c @@ -33,7 +33,7 @@ bar (struct S *s) } int -main (int argc, char *argv[]) +main (int argc, char **argv) { struct S s; diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-sra-14.c b/gcc/testsuite/gcc.dg/ipa/ipa-sra-14.c index 3ca302c..75619c6 100644 --- a/gcc/testsuite/gcc.dg/ipa/ipa-sra-14.c +++ b/gcc/testsuite/gcc.dg/ipa/ipa-sra-14.c @@ -43,7 +43,7 @@ bar (struct S s) } int -main (int argc, char *argv[]) +main (int argc, char **argv) { struct S s; diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-sra-15.c b/gcc/testsuite/gcc.dg/ipa/ipa-sra-15.c index 6c57c7b..aa13a94 100644 --- a/gcc/testsuite/gcc.dg/ipa/ipa-sra-15.c +++ b/gcc/testsuite/gcc.dg/ipa/ipa-sra-15.c @@ -45,7 +45,7 @@ bar (struct S *s, int rec) volatile int g; int -main (int argc, char *argv[]) +main (int argc, char **argv) { struct S s; -- cgit v1.1 From 68402af1c68301c6bc852ddba6c63966ed706178 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Mon, 21 Sep 2020 12:45:43 -0700 Subject: libgo: don't put golang.org packages in zstdpkglist.go This ensures that internal/goroot.IsStandardPackage does not treat golang.org packages as being in the standard library. For golang/go#41368 Fixes golang/go#41499 Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/256319 --- gcc/go/gofrontend/MERGE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 6b590f6..f79a1f0 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -6fd6418efb983827717f648a11bb5ca6fe93af30 +f2706d92d9560657333682a3de548f1f98e9f9b0 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. -- cgit v1.1 From 7029dfa38b663d20e0de40395fcd45a2845e2f71 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Sat, 19 Sep 2020 16:17:42 -0400 Subject: c++: Implement -Wctad-maybe-unsupported. I noticed that clang++ has this CTAD warning and thought that it might be useful to have it. From clang++: "Some style guides want to allow using CTAD only on types that "opt-in"; i.e. on types that are designed to support it and not just types that *happen* to work with it." So this warning warns when CTAD deduced a type, but the type does not define any deduction guides. In that case CTAD worked only because the compiler synthesized the implicit deduction guides. That might not be intended. It can be suppressed by adding a deduction guide that will never be considered: struct allow_ctad_t; template struct S { S(T) {} }; S(allow_ctad_t) -> S; This warning is off by default. It doesn't warn when the type comes from a system header unless -Wsystem-headers. gcc/c-family/ChangeLog: * c.opt (Wctad-maybe-unsupported): New option. gcc/cp/ChangeLog: * pt.c (deduction_guides_for): Add a bool parameter. Set it. (do_class_deduction): Warn when CTAD succeeds but the type doesn't have any explicit deduction guides. gcc/ChangeLog: * doc/invoke.texi: Document -Wctad-maybe-unsupported. gcc/testsuite/ChangeLog: * g++.dg/warn/Wctad-maybe-unsupported.C: New test. * g++.dg/warn/Wctad-maybe-unsupported2.C: New test. * g++.dg/warn/Wctad-maybe-unsupported3.C: New test. * g++.dg/warn/Wctad-maybe-unsupported.h: New file. --- gcc/c-family/c.opt | 5 ++ gcc/cp/pt.c | 28 ++++++- gcc/doc/invoke.texi | 22 +++++- .../g++.dg/warn/Wctad-maybe-unsupported.C | 88 ++++++++++++++++++++++ .../g++.dg/warn/Wctad-maybe-unsupported.h | 4 + .../g++.dg/warn/Wctad-maybe-unsupported2.C | 6 ++ .../g++.dg/warn/Wctad-maybe-unsupported3.C | 6 ++ 7 files changed, 154 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported.C create mode 100644 gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported.h create mode 100644 gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported2.C create mode 100644 gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported3.C (limited to 'gcc') diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 7a61351..da6c3e1 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -475,6 +475,11 @@ Wcpp C ObjC C++ ObjC++ CppReason(CPP_W_WARNING_DIRECTIVE) ; Documented in common.opt +Wctad-maybe-unsupported +C++ ObjC++ Var(warn_ctad_maybe_unsupported) Warning +Warn when performing class template argument deduction on a type with no +deduction guides. + Wctor-dtor-privacy C++ ObjC++ Var(warn_ctor_dtor_privacy) Warning Warn when all constructors and destructors are private. diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index fe45de8..97d0c24 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -28830,17 +28830,19 @@ static GTY((deletable)) hash_map *dguide_cache; /* Return the non-aggregate deduction guides for deducible template TMPL. The aggregate candidate is added separately because it depends on the - initializer. */ + initializer. Set ANY_DGUIDES_P if we find a non-implicit deduction + guide. */ static tree -deduction_guides_for (tree tmpl, tsubst_flags_t complain) +deduction_guides_for (tree tmpl, bool &any_dguides_p, tsubst_flags_t complain) { tree guides = NULL_TREE; if (DECL_ALIAS_TEMPLATE_P (tmpl)) { tree under = DECL_ORIGINAL_TYPE (DECL_TEMPLATE_RESULT (tmpl)); tree tinfo = get_template_info (under); - guides = deduction_guides_for (TI_TEMPLATE (tinfo), complain); + guides = deduction_guides_for (TI_TEMPLATE (tinfo), any_dguides_p, + complain); } else { @@ -28849,6 +28851,8 @@ deduction_guides_for (tree tmpl, tsubst_flags_t complain) LOOK_want::NORMAL, /*complain*/false); if (guides == error_mark_node) guides = NULL_TREE; + else + any_dguides_p = true; } /* Cache the deduction guides for a template. We also remember the result of @@ -28974,7 +28978,8 @@ do_class_deduction (tree ptype, tree tmpl, tree init, if (args == NULL) return error_mark_node; - tree cands = deduction_guides_for (tmpl, complain); + bool any_dguides_p = false; + tree cands = deduction_guides_for (tmpl, any_dguides_p, complain); if (cands == error_mark_node) return error_mark_node; @@ -29063,6 +29068,21 @@ do_class_deduction (tree ptype, tree tmpl, tree init, "for copy-initialization"); } + /* If CTAD succeeded but the type doesn't have any explicit deduction + guides, this deduction might not be what the user intended. */ + if (call != error_mark_node && !any_dguides_p) + { + tree fndecl = cp_get_callee_fndecl_nofold (call); + if (fndecl != NULL_TREE + && (!DECL_IN_SYSTEM_HEADER (fndecl) + || global_dc->dc_warn_system_headers) + && warning (OPT_Wctad_maybe_unsupported, + "%qT may not intend to support class template argument " + "deduction", type)) + inform (input_location, "add a deduction guide to suppress this " + "warning"); + } + return cp_build_qualified_type (TREE_TYPE (call), cp_type_quals (ptype)); } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 8be2b4f..665c0ff 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -236,7 +236,8 @@ in the following sections. -Wabi-tag -Wcatch-value -Wcatch-value=@var{n} @gol -Wno-class-conversion -Wclass-memaccess @gol -Wcomma-subscript -Wconditionally-supported @gol --Wno-conversion-null -Wctor-dtor-privacy -Wno-delete-incomplete @gol +-Wno-conversion-null -Wctad-maybe-unsupported @gol +-Wctor-dtor-privacy -Wno-delete-incomplete @gol -Wdelete-non-virtual-dtor -Wdeprecated-copy -Wdeprecated-copy-dtor @gol -Weffc++ -Wextra-semi -Wno-inaccessible-base @gol -Wno-inherited-variadic-ctor -Wno-init-list-lifetime @gol @@ -3304,6 +3305,25 @@ void f(int *a, int b, int c) @{ Enabled by default with @option{-std=c++20}. +@item -Wctad-maybe-unsupported @r{(C++ and Objective-C++ only)} +@opindex Wctad-maybe-unsupported +@opindex Wno-ctad-maybe-unsupported +Warn when performing class template argument deduction (CTAD) on a type with +no explicitly written deduction guides. This warning will point out cases +where CTAD succeeded only because the compiler synthesized the implicit +deduction guides, which might not be what the programmer intended. Certain +style guides allow CTAD only on types that specifically "opt-in"; i.e., on +types that are designed to support CTAD. This warning can be suppressed with +the following pattern: + +@smallexample +struct allow_ctad_t; // any name works +template struct S @{ + S(T) @{ @} +@}; +S(allow_ctad_t) -> S; // guide with incomplete parameter type will never be considered +@end smallexample + @item -Wctor-dtor-privacy @r{(C++ and Objective-C++ only)} @opindex Wctor-dtor-privacy @opindex Wno-ctor-dtor-privacy diff --git a/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported.C b/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported.C new file mode 100644 index 0000000..903e6f1 --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported.C @@ -0,0 +1,88 @@ +// Test -Wctad-maybe-unsupported. +// { dg-do compile { target c++17 } } +// { dg-options "-Wctad-maybe-unsupported" } + +template struct Empty { }; + +template +struct A { + A(T); // generates 'template A(T)-> A' + A(T, int); // generates 'template A(T, int)-> A' +}; + +// These only succeed because of the implicit guide. That may be +// undesired. +A a1(42); // { dg-warning "may not intend to support class template argument deduction" } +A a2{42}; // { dg-warning "may not intend to support class template argument deduction" } +A a3 = {42}; // { dg-warning "may not intend to support class template argument deduction" } + +template +struct B { + B(T); + B(T, int); +}; +template B(T, int) -> B>; + +B b1(42); +B b2{42}; +B b3 = {42}; + +// Motivating examples from Stephan Lavavej's 2018 CppCon talk. +template +struct Pair { + T first; + U second; + explicit Pair(const T &t, const U &u) {} +}; +// deduces to Pair +Pair p1(42, "hello world"); // { dg-warning "may not intend to support class template argument deduction" } +Pair p1b{42, "hello world"}; // { dg-warning "may not intend to support class template argument deduction" } + +template +struct Pair2 { + T first; + U second; + explicit Pair2(T t, U u) {} +}; +// deduces to Pair2 +Pair2 p2(42, "hello world"); // { dg-warning "may not intend to support class template argument deduction" } +Pair2 p2b{42, "hello world"}; // { dg-warning "may not intend to support class template argument deduction" } + +template +struct Pair3 { + T first; + U second; + explicit Pair3(T const& t, U const& u) {} +}; +template +Pair3(T1, T2) -> Pair3; + // deduces to Pair3 +Pair3 p3(42, "hello world"); +static_assert(__is_same(decltype(p3), Pair3)); + +// Test that explicit guides suppress the warning even if they +// aren't used as candidates. +template +struct C { + C(T) { } +}; +template +explicit C(C const&) -> C; +C c{42}; +C c2 = c; +static_assert(__is_same(decltype(c2), C)); + +// Clang's suppression test. +struct allow_ctad_t { + allow_ctad_t() = delete; +}; + +template +struct S { + S(T) {} +}; +S(allow_ctad_t) -> S; +S s("abc"); +S s2{"abc"}; +static_assert(__is_same(decltype(s), S)); +static_assert(__is_same(decltype(s2), S)); diff --git a/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported.h b/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported.h new file mode 100644 index 0000000..9bb3154 --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported.h @@ -0,0 +1,4 @@ +#pragma GCC system_header + +template +struct A { A(T); }; diff --git a/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported2.C b/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported2.C new file mode 100644 index 0000000..ce664bd --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported2.C @@ -0,0 +1,6 @@ +// { dg-do compile { target c++17 } } +// { dg-options "-Wctad-maybe-unsupported" } + +#include "Wctad-maybe-unsupported.h" + +A a{42}; // { dg-bogus "may not intend to support class template argument deduction" } diff --git a/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported3.C b/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported3.C new file mode 100644 index 0000000..c0ae633 --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/Wctad-maybe-unsupported3.C @@ -0,0 +1,6 @@ +// { dg-do compile { target c++17 } } +// { dg-options "-Wctad-maybe-unsupported -Wsystem-headers" } + +#include "Wctad-maybe-unsupported.h" + +A a{42}; // { dg-warning "may not intend to support class template argument deduction" } -- cgit v1.1 From defceb206be0f803b8d94e746180e51adad20b87 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Mon, 21 Sep 2020 13:24:25 -0400 Subject: c++: DR 1722: Make lambda to function pointer conv noexcept [PR90583] DR 1722 clarifies that the conversion function from lambda to pointer to function should be noexcept(true). gcc/cp/ChangeLog: PR c++/90583 DR 1722 * lambda.c (maybe_add_lambda_conv_op): Mark the conversion function as noexcept. gcc/testsuite/ChangeLog: PR c++/90583 DR 1722 * g++.dg/cpp0x/lambda/lambda-conv14.C: New test. --- gcc/cp/lambda.c | 2 ++ gcc/testsuite/g++.dg/cpp0x/lambda/lambda-conv14.C | 10 ++++++++++ 2 files changed, 12 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp0x/lambda/lambda-conv14.C (limited to 'gcc') diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c index 364a3e9..7fccccc 100644 --- a/gcc/cp/lambda.c +++ b/gcc/cp/lambda.c @@ -1189,6 +1189,8 @@ maybe_add_lambda_conv_op (tree type) tree name = make_conv_op_name (rettype); tree thistype = cp_build_qualified_type (type, TYPE_QUAL_CONST); tree fntype = build_method_type_directly (thistype, rettype, void_list_node); + /* DR 1722: The conversion function should be noexcept. */ + fntype = build_exception_variant (fntype, noexcept_true_spec); tree convfn = build_lang_decl (FUNCTION_DECL, name, fntype); SET_DECL_LANGUAGE (convfn, lang_cplusplus); tree fn = convfn; diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-conv14.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-conv14.C new file mode 100644 index 0000000..869e0d5 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-conv14.C @@ -0,0 +1,10 @@ +// PR c++/90583 +// DR 1722: Lambda to function pointer conversion should be noexcept. +// { dg-do compile { target c++11 } } + +void +foo () +{ + auto l = [](int){ return 42; }; + static_assert(noexcept((int (*)(int))(l)), ""); +} -- cgit v1.1 From be3027e9c8366d92f68e6b3021c1cbe815648480 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Chigot?= Date: Wed, 16 Sep 2020 13:58:17 +0200 Subject: go/internal/gccgoimporter: recognize aixbigafMagic archives Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/255201 --- gcc/go/gofrontend/MERGE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index f79a1f0..d8db888 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -f2706d92d9560657333682a3de548f1f98e9f9b0 +6f309797e4f7eed635950687e902a294126e6fc6 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. -- cgit v1.1 From 15e7b93ba4256884c90198c678ed7eded4e73464 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Fri, 18 Sep 2020 17:34:50 -0400 Subject: analyzer: decls are not on the heap Whilst debugging the remaining state explosion in PR analyzer/93355 I noticed that half of the states at an exploding program point had: 'malloc': {'&buf': 'non-heap'} whereas the other half didn't, presumably depending on whether the path to each enode had used this local buffer: char buf[400]; This patch tweaks malloc_state_machine::get_default_state to be smarter about this, so that we can implicitly treat pointers to decls as non-heap, preventing pointless differences between sm_state_map instances. With that, all of the states in question have equal (empty) malloc sm-state - though the state explosion continues for other reasons. gcc/analyzer/ChangeLog: PR analyzer/93355 * sm-malloc.cc (malloc_state_machine::get_default_state): Look at the base region when considering pointers. Treat pointers to decls as being non-heap. --- gcc/analyzer/sm-malloc.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/analyzer/sm-malloc.cc b/gcc/analyzer/sm-malloc.cc index 90d1da1..12b2383 100644 --- a/gcc/analyzer/sm-malloc.cc +++ b/gcc/analyzer/sm-malloc.cc @@ -183,7 +183,9 @@ public: if (const region_svalue *ptr = sval->dyn_cast_region_svalue ()) { const region *reg = ptr->get_pointee (); - if (reg->get_kind () == RK_STRING) + const region *base_reg = reg->get_base_region (); + if (base_reg->get_kind () == RK_DECL + || base_reg->get_kind () == RK_STRING) return m_non_heap; } return m_start; -- cgit v1.1 From 1e19ecd79b45af6df87a6869d1936b857c9f71fc Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Mon, 21 Sep 2020 11:59:26 -0400 Subject: analyzer: fix ICE on bogus decl of memset [PR97130] Verify that arguments are pointers before calling handling code that calls deref_rvalue on them. gcc/analyzer/ChangeLog: PR analyzer/97130 * region-model-impl-calls.cc (call_details::get_arg_type): New. * region-model.cc (region_model::on_call_pre): Check that the initial arg is a pointer before calling impl_call_memset and impl_call_strlen. * region-model.h (call_details::get_arg_type): New decl. gcc/testsuite/ChangeLog: PR analyzer/97130 * gcc.dg/analyzer/pr97130.c: New test. --- gcc/analyzer/region-model-impl-calls.cc | 8 ++++++++ gcc/analyzer/region-model.cc | 6 ++++-- gcc/analyzer/region-model.h | 1 + gcc/testsuite/gcc.dg/analyzer/pr97130.c | 10 ++++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr97130.c (limited to 'gcc') diff --git a/gcc/analyzer/region-model-impl-calls.cc b/gcc/analyzer/region-model-impl-calls.cc index 6582ffb..423f74a 100644 --- a/gcc/analyzer/region-model-impl-calls.cc +++ b/gcc/analyzer/region-model-impl-calls.cc @@ -103,6 +103,14 @@ call_details::get_arg_tree (unsigned idx) const return gimple_call_arg (m_call, idx); } +/* Get the type of argument IDX. */ + +tree +call_details::get_arg_type (unsigned idx) const +{ + return TREE_TYPE (gimple_call_arg (m_call, idx)); +} + /* Get argument IDX at the callsite as an svalue. */ const svalue * diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 1312391..6f04904 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -737,12 +737,14 @@ region_model::on_call_pre (const gcall *call, region_model_context *ctxt) /* No side-effects (tracking stream state is out-of-scope for the analyzer). */ } - else if (is_named_call_p (callee_fndecl, "memset", call, 3)) + else if (is_named_call_p (callee_fndecl, "memset", call, 3) + && POINTER_TYPE_P (cd.get_arg_type (0))) { impl_call_memset (cd); return false; } - else if (is_named_call_p (callee_fndecl, "strlen", call, 1)) + else if (is_named_call_p (callee_fndecl, "strlen", call, 1) + && POINTER_TYPE_P (cd.get_arg_type (0))) { if (impl_call_strlen (cd)) return false; diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index 1bb9798..4859df3 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -2482,6 +2482,7 @@ public: bool maybe_set_lhs (const svalue *result) const; tree get_arg_tree (unsigned idx) const; + tree get_arg_type (unsigned idx) const; const svalue *get_arg_svalue (unsigned idx) const; void dump_to_pp (pretty_printer *pp, bool simple) const; diff --git a/gcc/testsuite/gcc.dg/analyzer/pr97130.c b/gcc/testsuite/gcc.dg/analyzer/pr97130.c new file mode 100644 index 0000000..f437b76 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr97130.c @@ -0,0 +1,10 @@ +/* { dg-additional-options "-Wno-builtin-declaration-mismatch" } */ + +void * +memset (int, int, __SIZE_TYPE__); + +void +mp (int xl) +{ + memset (xl, 0, sizeof xl); +} -- cgit v1.1 From 27edc6c3e296a3eea15be291b1f605a647e94107 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 27 Aug 2020 22:18:45 -0700 Subject: compiler: finalize methods for type aliases of struct types Previously we would finalize the methods of the alias type itself, but since its a type alias we really need to finalize the methods of the aliased type. Also, handle method expressions of unnamed struct types. Test case is https://golang.org/cl/251168. Fixes golang/go#38125 Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/251279 --- gcc/go/gofrontend/MERGE | 2 +- gcc/go/gofrontend/expressions.cc | 50 ++++++++++++++++++++++++---------------- gcc/go/gofrontend/gogo.cc | 4 ++++ 3 files changed, 35 insertions(+), 21 deletions(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index d8db888..e4f8fac 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -6f309797e4f7eed635950687e902a294126e6fc6 +a59167c29d6ad2ddf533b3a12b365f72df0e1476 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc index 8bbc557..0350e51 100644 --- a/gcc/go/gofrontend/expressions.cc +++ b/gcc/go/gofrontend/expressions.cc @@ -14529,21 +14529,19 @@ Selector_expression::lower_method_expression(Gogo* gogo) is_pointer = true; type = type->points_to(); } - Named_type* nt = type->named_type(); - if (nt == NULL) - { - go_error_at(location, - ("method expression requires named type or " - "pointer to named type")); - return Expression::make_error(location); - } + Named_type* nt = type->named_type(); + Struct_type* st = type->struct_type(); bool is_ambiguous; - Method* method = nt->method_function(name, &is_ambiguous); + Method* method = NULL; + if (nt != NULL) + method = nt->method_function(name, &is_ambiguous); + else if (st != NULL) + method = st->method_function(name, &is_ambiguous); const Typed_identifier* imethod = NULL; if (method == NULL && !is_pointer) { - Interface_type* it = nt->interface_type(); + Interface_type* it = type->interface_type(); if (it != NULL) imethod = it->find_method(name); } @@ -14551,16 +14549,28 @@ Selector_expression::lower_method_expression(Gogo* gogo) if ((method == NULL && imethod == NULL) || (left_type->named_type() != NULL && left_type->points_to() != NULL)) { - if (!is_ambiguous) - go_error_at(location, "type %<%s%s%> has no method %<%s%>", - is_pointer ? "*" : "", - nt->message_name().c_str(), - Gogo::message_name(name).c_str()); + if (nt != NULL) + { + if (!is_ambiguous) + go_error_at(location, "type %<%s%s%> has no method %<%s%>", + is_pointer ? "*" : "", + nt->message_name().c_str(), + Gogo::message_name(name).c_str()); + else + go_error_at(location, "method %<%s%s%> is ambiguous in type %<%s%>", + Gogo::message_name(name).c_str(), + is_pointer ? "*" : "", + nt->message_name().c_str()); + } else - go_error_at(location, "method %<%s%s%> is ambiguous in type %<%s%>", - Gogo::message_name(name).c_str(), - is_pointer ? "*" : "", - nt->message_name().c_str()); + { + if (!is_ambiguous) + go_error_at(location, "type has no method %<%s%>", + Gogo::message_name(name).c_str()); + else + go_error_at(location, "method %<%s%> is ambiguous", + Gogo::message_name(name).c_str()); + } return Expression::make_error(location); } @@ -14657,7 +14667,7 @@ Selector_expression::lower_method_expression(Gogo* gogo) Expression* ve = Expression::make_var_reference(vno, location); Expression* bm; if (method != NULL) - bm = Type::bind_field_or_method(gogo, nt, ve, name, location); + bm = Type::bind_field_or_method(gogo, type, ve, name, location); else bm = Expression::make_interface_field_reference(ve, name, location); diff --git a/gcc/go/gofrontend/gogo.cc b/gcc/go/gofrontend/gogo.cc index 82d4c1f..aef1c47 100644 --- a/gcc/go/gofrontend/gogo.cc +++ b/gcc/go/gofrontend/gogo.cc @@ -3508,6 +3508,10 @@ Finalize_methods::type(Type* t) case Type::TYPE_NAMED: { Named_type* nt = t->named_type(); + + if (nt->is_alias()) + return TRAVERSE_CONTINUE; + Type* rt = nt->real_type(); if (rt->classification() != Type::TYPE_STRUCT) { -- cgit v1.1 From 44135373fcdbe4019c5524ec3dff8e93d9ef113c Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 22 Sep 2020 00:16:31 +0000 Subject: Daily bump. --- gcc/ChangeLog | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 16 +++++++++++ gcc/c-family/ChangeLog | 4 +++ gcc/cp/ChangeLog | 25 +++++++++++++++++ gcc/fortran/ChangeLog | 16 +++++++++++ gcc/testsuite/ChangeLog | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 208 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 20009c2..c2f4752 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,76 @@ +2020-09-21 Marek Polacek + + * doc/invoke.texi: Document -Wctad-maybe-unsupported. + +2020-09-21 Richard Biener + + PR tree-optimization/97139 + * tree-vect-slp.c (vect_bb_slp_mark_live_stmts): Only mark the + pattern root, track visited vectorized stmts. + +2020-09-21 Jakub Jelinek + + * configure.ac: Use mallinfo mallinfo2 as first operand of + gcc_AC_CHECK_DECLS rather than [mallinfo, mallinfo2]. + * configure: Regenerated. + * config.in: Regenerated. + +2020-09-21 Andrea Corallo + + * config/aarch64/aarch64-builtins.c + (aarch64_general_expand_builtin): Use expand machinery not to + alter the value of an rtx returned by force_reg. + +2020-09-21 Richard Biener + + PR tree-optimization/97135 + * tree-ssa-loop-im.c (sm_seq_push_down): Do not ignore + self-dependences. + +2020-09-21 Martin Liska + + PR tree-optimization/96915 + * tree-switch-conversion.c (switch_conversion::expand): Accept + also integer constants. + +2020-09-21 Martin Liska + + * print-tree.c (print_node): Remove extra space. + +2020-09-21 Andrea Corallo + + PR target/96968 + * config/aarch64/aarch64-builtins.c + (aarch64_expand_fpsr_fpcr_setter): Fix comment nit. + (aarch64_expand_fpsr_fpcr_getter): New function, expand these + getters using expand_insn machinery. + (aarch64_general_expand_builtin): Make use of. + +2020-09-21 Martin Liska + + * ggc-common.c (ggc_rlimit_bound): Use ONE_? macro. + (ggc_min_expand_heuristic): Likewise. + (ggc_min_heapsize_heuristic): Likewise. + * ggc-page.c (ggc_collect): Likewise. + * system.h (ONE_G): Likewise. + +2020-09-21 Martin Liska + + * ggc-common.c (ggc_prune_overhead_list): Use SIZE_AMOUNT. + * ggc-page.c (release_pages): Likewise. + (ggc_collect): Likewise. + (ggc_trim): Likewise. + (ggc_grow): Likewise. + * timevar.c (timer::print): Likewise. + +2020-09-21 Martin Liska + + * config.in: Regenerate. + * configure: Likewise. + * configure.ac: Detect for mallinfo2. + * ggc-common.c (defined): Use it. + * system.h: Handle also HAVE_MALLINFO2. + 2020-09-20 John David Anglin < danglin@gcc.gnu.org> * config/pa/pa-hpux11.h (LINK_GCC_C_SEQUENCE_SPEC): Delete. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index fb22edf..ca3ee10 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200921 +20200922 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 7ec4c14..8241610 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,19 @@ +2020-09-21 David Malcolm + + PR analyzer/97130 + * region-model-impl-calls.cc (call_details::get_arg_type): New. + * region-model.cc (region_model::on_call_pre): Check that the + initial arg is a pointer before calling impl_call_memset and + impl_call_strlen. + * region-model.h (call_details::get_arg_type): New decl. + +2020-09-21 David Malcolm + + PR analyzer/93355 + * sm-malloc.cc (malloc_state_machine::get_default_state): Look at + the base region when considering pointers. Treat pointers to + decls as being non-heap. + 2020-09-18 David Malcolm * checker-path.cc (warning_event::get_desc): Handle global state diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index bc002cf..69523ff 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,7 @@ +2020-09-21 Marek Polacek + + * c.opt (Wctad-maybe-unsupported): New option. + 2020-09-19 Martin Sebor PR c/50584 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 8e6fd0d..df4b24d 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,28 @@ +2020-09-21 Marek Polacek + + PR c++/90583 + DR 1722 + * lambda.c (maybe_add_lambda_conv_op): Mark the conversion function + as noexcept. + +2020-09-21 Marek Polacek + + * pt.c (deduction_guides_for): Add a bool parameter. Set it. + (do_class_deduction): Warn when CTAD succeeds but the type doesn't + have any explicit deduction guides. + +2020-09-21 Nathan Sidwell + + * decl.c (xref_tag_1): Use IDENTIFIER_LAMBDA_P to detect lambdas. + * lambda.c (begin_lambda_type): Use ts_current to push the tag. + * name-lookup.h (enum tag_scope): Drop ts_lambda. + +2020-09-21 Marek Polacek + + PR c++/97099 + * decl.c (redeclaration_error_message): Detect a redeclaration of + deduction guides. + 2020-09-19 Sandra Loosemore * cp-gimplify.c (enum bc_t, bc_label): Move to c-family. diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 3c8701e..e826128 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,19 @@ +2020-09-21 Harald Anlauf + Paul Thomas + + * iresolve.c (gfc_resolve_mvbits): Remove unneeded conversion of + FROMPOS, LEN and TOPOS arguments to fit a C int. + * trans-intrinsic.c (gfc_conv_intrinsic_mvbits): Add inline + expansion of MVBITS intrinsic elemental subroutine and add code + for runtime argument checking. + (gfc_conv_intrinsic_subroutine): Recognise MVBITS intrinsic, but + defer handling to gfc_trans_call. + * trans-stmt.c (replace_ss): + (gfc_trans_call): Adjust to handle inline expansion, scalarization + of intrinsic subroutine MVBITS in gfc_conv_intrinsic_mvbits. + * trans.h (gfc_conv_intrinsic_mvbits): Add prototype for + gfc_conv_intrinsic_mvbits. + 2020-09-19 Sandra Loosemore * interface.c (gfc_compare_actual_formal): Add assertion after diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ee34444..a9a9fc1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,76 @@ +2020-09-21 David Malcolm + + PR analyzer/97130 + * gcc.dg/analyzer/pr97130.c: New test. + +2020-09-21 Marek Polacek + + PR c++/90583 + DR 1722 + * g++.dg/cpp0x/lambda/lambda-conv14.C: New test. + +2020-09-21 Marek Polacek + + * g++.dg/warn/Wctad-maybe-unsupported.C: New test. + * g++.dg/warn/Wctad-maybe-unsupported2.C: New test. + * g++.dg/warn/Wctad-maybe-unsupported3.C: New test. + * g++.dg/warn/Wctad-maybe-unsupported.h: New file. + +2020-09-21 Martin Sebor + + PR c/50584 + * gcc.dg/ipa/ipa-sra-1.c: Use a plain pointer for argv instead of array. + * gcc.dg/ipa/ipa-sra-12.c: Same. + * gcc.dg/ipa/ipa-sra-13.c: Same. + * gcc.dg/ipa/ipa-sra-14.c: Same. + * gcc.dg/ipa/ipa-sra-15.c: Same. + +2020-09-21 Harald Anlauf + Paul Thomas + + * gfortran.dg/check_bits_2.f90: New test. + +2020-09-21 Iain Sandoe + + * gcc.dg/ipa/symver1.c: Skip for Darwin. + +2020-09-21 Christophe Lyon + + * gcc.target/arm/csinc-1.c: Use dg-add-options + arm_arch_v8_1m_main. + * gcc.target/arm/csinv-1.c: Likewise. + * gcc.target/arm/csneg.c: Likewise. + +2020-09-21 Richard Biener + + PR tree-optimization/97139 + * gcc.dg/vect/pr97139.c: New testcase. + +2020-09-21 Marek Polacek + + PR c++/97099 + * g++.dg/cpp1z/class-deduction74.C: New test. + +2020-09-21 Richard Biener + + PR tree-optimization/97135 + * gcc.dg/torture/pr97135.c: New testcase. + +2020-09-21 Martin Liska + + PR tree-optimization/96915 + * gcc.target/aarch64/sve/pr96915.c: New test. + +2020-09-21 Andrea Corallo + + PR target/96968 + * gcc.target/aarch64/pr96968.c: New test. + +2020-09-21 Martin Liska + + * g++.dg/ext/timevar1.C: Prune more possible number values. + * g++.dg/ext/timevar2.C: Likewise. + 2020-09-20 Marek Polacek PR c++/90199 -- cgit v1.1 From d149962d6ac3c940fee95707cb6bafdb055e9bea Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Tue, 22 Sep 2020 09:24:40 +0200 Subject: Add no strict aliasing to m128-check.h * gcc.target/i386/m128-check.h: Add no-strict-aliasing --- gcc/testsuite/gcc.target/i386/m128-check.h | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/i386/m128-check.h b/gcc/testsuite/gcc.target/i386/m128-check.h index 48b2332..6f414b0 100644 --- a/gcc/testsuite/gcc.target/i386/m128-check.h +++ b/gcc/testsuite/gcc.target/i386/m128-check.h @@ -78,6 +78,7 @@ typedef union #define CHECK_EXP(UINON_TYPE, VALUE_TYPE, FMT) \ static int \ +__attribute__((optimize ("no-strict-aliasing"))) \ __attribute__((noinline, unused)) \ check_##UINON_TYPE (UINON_TYPE u, const VALUE_TYPE *v) \ { \ -- cgit v1.1 From 466b907fad91075c1ac80acedd3314b190d52cc6 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 22 Sep 2020 10:02:47 +0200 Subject: ipa: Fix up ipa modref option help texts This fixes FAIL: compiler driver --help=common option(s): "^ +-.*[^:.]$" absent from output: " --param=modref-max-tests= Maximum number of tests perofmed by modref query" FAIL: compiler driver --help=optimizers option(s): "^ +-.*[^:.]$" absent from output: " -fipa-modref Perform interprocedural modref analysis" 2020-09-22 Jakub Jelinek * common.opt (-fipa-modref): Add dot at the end of option help. * params.opt (--param=modref-max-tests=): Likewise. --- gcc/common.opt | 2 +- gcc/params.opt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/common.opt b/gcc/common.opt index b833b98..05fedc4 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1827,7 +1827,7 @@ Perform interprocedural bitwise constant propagation. fipa-modref Common Report Var(flag_ipa_modref) Optimization -Perform interprocedural modref analysis +Perform interprocedural modref analysis. fipa-profile Common Report Var(flag_ipa_profile) Init(0) Optimization diff --git a/gcc/params.opt b/gcc/params.opt index 1d86404..3c64daf 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -882,7 +882,7 @@ Maximum number of refs stored in each modref tree. -param=modref-max-tests= Common Joined UInteger Var(param_modref_max_tests) Init(64) -Maximum number of tests perofmed by modref query +Maximum number of tests perofmed by modref query. -param=tm-max-aggregate-size= Common Joined UInteger Var(param_tm_max_aggregate_size) Init(9) Param Optimization -- cgit v1.1 From 46a274154786678e037fdce5dacd0305cddba0f9 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 22 Sep 2020 10:45:06 +0200 Subject: Fix some typos 2020-09-22 Jakub Jelinek gcc/ * params.opt (--param=modref-max-tests=): Fix typo in help text: perofmed -> performed. * common.opt: Fix typo: incrmeental -> incremental. * ipa-modref.c: Fix typos: recroding -> recording, becaue -> because, analsis -> analysis. (class modref_summaries): Fix typo: betweehn -> between. (analyze_call): Fix typo: calle -> callee. (read_modref_records): Fix typo: expcted -> expected. (pass_ipa_modref::execute): Fix typo: calle -> callee. gcc/c-family/ * c.opt (Wbuiltin-declaration-mismatch): Fix typo in variable name: warn_builtin_declaraion_mismatch -> warn_builtin_declaration_mismatch. --- gcc/c-family/c.opt | 2 +- gcc/common.opt | 2 +- gcc/ipa-modref.c | 14 +++++++------- gcc/params.opt | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index da6c3e1..7761eef 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -371,7 +371,7 @@ C ObjC C++ ObjC++ Var(warn_frame_address) Warning LangEnabledBy(C ObjC C++ ObjC+ Warn when __builtin_frame_address or __builtin_return_address is used unsafely. Wbuiltin-declaration-mismatch -C ObjC C++ ObjC++ Var(warn_builtin_declaraion_mismatch) Init(1) Warning +C ObjC C++ ObjC++ Var(warn_builtin_declaration_mismatch) Init(1) Warning Warn when a built-in function is declared with the wrong signature. Wbuiltin-macro-redefined diff --git a/gcc/common.opt b/gcc/common.opt index 05fedc4..292c2de 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -47,7 +47,7 @@ Variable bool in_lto_p = false ; This variable is set to non-0 only by LTO front-end. 1 indicates that -; the output produced will be used for incrmeental linking (thus weak symbols +; the output produced will be used for incremental linking (thus weak symbols ; can still be bound) and 2 indicates that the IL is going to be linked and ; and output to LTO object file. Variable diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index af0b710..0411f66 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -35,10 +35,10 @@ along with GCC; see the file COPYING3. If not see propagates across the callgraph and is able to handle recursion and works on whole program during link-time analysis. - LTO mode differs from the local mode by not recroding alias sets but types + LTO mode differs from the local mode by not recording alias sets but types that are translated to alias sets later. This is necessary in order stream - the information becaue the alias sets are rebuild at stream-in time and may - not correspond to ones seen during analsis. For this reason part of analysis + the information because the alias sets are rebuild at stream-in time and may + not correspond to ones seen during analysis. For this reason part of analysis is duplicated. */ #include "config.h" @@ -77,7 +77,7 @@ public: modref_summary *src_data, modref_summary *dst_data); /* This flag controls whether newly inserted functions should be analyzed - in IPA or normal mode. Functions inserted betweehn IPA analysis and + in IPA or normal mode. Functions inserted between IPA analysis and ipa-modref pass execution needs to be analyzed in IPA mode while all other insertions leads to normal analysis. */ bool ipa; @@ -413,7 +413,7 @@ analyze_call (modref_summary *cur_summary, struct cgraph_node *callee_node = cgraph_node::get_create (callee); - /* We can not safely optimize based on summary of calle if it does + /* We can not safely optimize based on summary of callee if it does not always bind to current def: it is possible that memory load was optimized out earlier which may not happen in the interposed variant. */ @@ -815,7 +815,7 @@ write_modref_records (modref_records_lto *tt, struct output_block *ob) /* Read a modref_tree from the input block IB using the data from DATA_IN. This assumes that the tree was encoded using write_modref_tree. Either nolto_ret or lto_ret is initialized by the tree depending whether - LTO streaming is expcted or not. */ + LTO streaming is expected or not. */ void read_modref_records (lto_input_block *ib, struct data_in *data_in, @@ -1238,7 +1238,7 @@ unsigned int pass_ipa_modref::execute (function *) fprintf (dump_file, " Call to %s\n", cur->dump_name ()); - /* We can not safely optimize based on summary of calle if it + /* We can not safely optimize based on summary of callee if it does not always bind to current def: it is possible that memory load was optimized out earlier which may not happen in the interposed variant. */ diff --git a/gcc/params.opt b/gcc/params.opt index 3c64daf..dcf5e02 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -882,7 +882,7 @@ Maximum number of refs stored in each modref tree. -param=modref-max-tests= Common Joined UInteger Var(param_modref_max_tests) Init(64) -Maximum number of tests perofmed by modref query. +Maximum number of tests performed by modref query. -param=tm-max-aggregate-size= Common Joined UInteger Var(param_tm_max_aggregate_size) Init(9) Param Optimization -- cgit v1.1 From fa9ad35dae03dcb20c4ccb50ba1b351a8ab77970 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 22 Sep 2020 11:58:36 +0100 Subject: AArch64: Implement poly-type vadd intrinsics This implements the vadd[p]_p* intrinsics. In terms of functionality they are aliases of veor operations on the relevant unsigned types. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ PR target/71233 * config/aarch64/arm_neon.h (vadd_p8, vadd_p16, vadd_p64, vaddq_p8, vaddq_p16, vaddq_p64, vaddq_p128): Define. gcc/testsuite/ PR target/71233 * gcc.target/aarch64/simd/vadd_poly_1.c: New test. --- gcc/config/aarch64/arm_neon.h | 49 +++++++++++++++++++++ .../gcc.target/aarch64/simd/vadd_poly_1.c | 50 ++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vadd_poly_1.c (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 50f8b23..81cabb2 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -35659,6 +35659,55 @@ vusmmlaq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) #pragma GCC pop_options +__extension__ extern __inline poly8x8_t +__attribute ((__always_inline__, __gnu_inline__, __artificial__)) +vadd_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return __a ^ __b; +} + +__extension__ extern __inline poly16x4_t +__attribute ((__always_inline__, __gnu_inline__, __artificial__)) +vadd_p16 (poly16x4_t __a, poly16x4_t __b) +{ + return __a ^ __b; +} + +__extension__ extern __inline poly64x1_t +__attribute ((__always_inline__, __gnu_inline__, __artificial__)) +vadd_p64 (poly64x1_t __a, poly64x1_t __b) +{ + return __a ^ __b; +} + +__extension__ extern __inline poly8x16_t +__attribute ((__always_inline__, __gnu_inline__, __artificial__)) +vaddq_p8 (poly8x16_t __a, poly8x16_t __b) +{ + return __a ^ __b; +} + +__extension__ extern __inline poly16x8_t +__attribute ((__always_inline__, __gnu_inline__, __artificial__)) +vaddq_p16 (poly16x8_t __a, poly16x8_t __b) +{ + return __a ^__b; +} + +__extension__ extern __inline poly64x2_t +__attribute ((__always_inline__, __gnu_inline__, __artificial__)) +vaddq_p64 (poly64x2_t __a, poly64x2_t __b) +{ + return __a ^ __b; +} + +__extension__ extern __inline poly128_t +__attribute ((__always_inline__, __gnu_inline__, __artificial__)) +vaddq_p128 (poly128_t __a, poly128_t __b) +{ + return __a ^ __b; +} + #undef __aarch64_vget_lane_any #undef __aarch64_vdup_lane_any diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vadd_poly_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vadd_poly_1.c new file mode 100644 index 0000000..a5cdf29 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vadd_poly_1.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +poly8x8_t +foo (poly8x8_t a, poly8x8_t b) +{ + return vadd_p8 (a, b); +} + +poly16x4_t +foo16 (poly16x4_t a, poly16x4_t b) +{ + return vadd_p16 (a, b); +} + +poly64x1_t +foo64 (poly64x1_t a, poly64x1_t b) +{ + return vadd_p64 (a, b); +} + +poly8x16_t +fooq (poly8x16_t a, poly8x16_t b) +{ + return vaddq_p8 (a, b); +} + +poly16x8_t +fooq16 (poly16x8_t a, poly16x8_t b) +{ + return vaddq_p16 (a, b); +} + +poly64x2_t +fooq64 (poly64x2_t a, poly64x2_t b) +{ + return vaddq_p64 (a, b); +} + +poly128_t +fooq128 (poly128_t a, poly128_t b) +{ + return vaddq_p128 (a, b); +} + +/* { dg-final { scan-assembler-times "eor\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b, v\[0-9\]+\.8b" 3 } } */ +/* { dg-final { scan-assembler-times "eor\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 3 } } */ +/* { dg-final { scan-assembler-times "eor\\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" 2 } } */ -- cgit v1.1 From d4703be185b422f637deebd3bb9222a41c8023d6 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 22 Sep 2020 12:00:38 +0100 Subject: AArch64: Implement missing vceq*_p* intrinsics This patch implements some missing vceq* intrinsics on poly types. The behaviour is to produce the appropriate CMEQ instruction as for the unsigned types. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ PR target/71233 * config/aarch64/arm_neon.h (vceqq_p64, vceqz_p64, vceqzq_p64): Define. gcc/testsuite/ PR target/71233 * gcc.target/aarch64/simd/vceq_poly_1.c: New test. --- gcc/config/aarch64/arm_neon.h | 21 ++++++++++++++++ .../gcc.target/aarch64/simd/vceq_poly_1.c | 29 ++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vceq_poly_1.c (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 81cabb2..caeba10 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -12670,6 +12670,13 @@ vceqq_u64 (uint64x2_t __a, uint64x2_t __b) return (__a == __b); } +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vceqq_p64 (poly64x2_t __a, poly64x2_t __b) +{ + return (__a == __b); +} + /* vceq - scalar. */ __extension__ extern __inline uint32_t @@ -12779,6 +12786,13 @@ vceqz_u64 (uint64x1_t __a) return (__a == __AARCH64_UINT64_C (0)); } +__extension__ extern __inline uint64x1_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vceqz_p64 (poly64x1_t __a) +{ + return (__a == __AARCH64_UINT64_C (0)); +} + __extension__ extern __inline uint32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vceqzq_f32 (float32x4_t __a) @@ -12856,6 +12870,13 @@ vceqzq_u64 (uint64x2_t __a) return (__a == __AARCH64_UINT64_C (0)); } +__extension__ extern __inline uint64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vceqzq_p64 (poly64x2_t __a) +{ + return (__a == __AARCH64_UINT64_C (0)); +} + /* vceqz - scalar. */ __extension__ extern __inline uint32_t diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vceq_poly_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vceq_poly_1.c new file mode 100644 index 0000000..7d43352 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vceq_poly_1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +uint64x2_t +foo (poly64x2_t a, poly64x2_t b) +{ + return vceqq_p64 (a, b); +} + +/* { dg-final { scan-assembler-times "cmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */ + +uint64x1_t +fooz (poly64x1_t a) +{ + return vceqz_p64 (a); +} + +/* { dg-final { scan-assembler-times "cmeq\\td\[0-9\]+, d\[0-9\]+, #0" 1 } } */ + +uint64x2_t +fooqz (poly64x2_t a) +{ + return vceqzq_p64 (a); +} + +/* { dg-final { scan-assembler-times "cmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, #0" 1 } } */ + -- cgit v1.1 From 30957092db46d8798e632feefb5df634488dbb33 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 22 Sep 2020 12:03:49 +0100 Subject: AArch64: Implement missing vcls intrinsics on unsigned types This patch implements some missing intrinsics that perform a CLS on unsigned SIMD types. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ PR target/71233 * config/aarch64/arm_neon.h (vcls_u8, vcls_u16, vcls_u32, vclsq_u8, vclsq_u16, vclsq_u32): Define. gcc/testsuite/ PR target/71233 * gcc.target/aarch64/simd/vcls_unsigned_1.c: New test. --- gcc/config/aarch64/arm_neon.h | 42 +++++++++++++++++ .../gcc.target/aarch64/simd/vcls_unsigned_1.c | 54 ++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vcls_unsigned_1.c (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index caeba10..341019b 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -14075,6 +14075,48 @@ vclsq_s32 (int32x4_t __a) return __builtin_aarch64_clrsbv4si (__a); } +__extension__ extern __inline int8x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcls_u8 (uint8x8_t __a) +{ + return __builtin_aarch64_clrsbv8qi ((int8x8_t) __a); +} + +__extension__ extern __inline int16x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcls_u16 (uint16x4_t __a) +{ + return __builtin_aarch64_clrsbv4hi ((int16x4_t) __a); +} + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vcls_u32 (uint32x2_t __a) +{ + return __builtin_aarch64_clrsbv2si ((int32x2_t) __a); +} + +__extension__ extern __inline int8x16_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vclsq_u8 (uint8x16_t __a) +{ + return __builtin_aarch64_clrsbv16qi ((int8x16_t) __a); +} + +__extension__ extern __inline int16x8_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vclsq_u16 (uint16x8_t __a) +{ + return __builtin_aarch64_clrsbv8hi ((int16x8_t) __a); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vclsq_u32 (uint32x4_t __a) +{ + return __builtin_aarch64_clrsbv4si ((int32x4_t) __a); +} + /* vclz. */ __extension__ extern __inline int8x8_t diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vcls_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vcls_unsigned_1.c new file mode 100644 index 0000000..f7078d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vcls_unsigned_1.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +int16x8_t +test_16x8 (uint16x8_t a) +{ + return vclsq_u16 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.8h, v\[0-9\]+\.8h" 1 } } */ + + +int8x16_t +test_8x16 (uint8x16_t a) +{ + return vclsq_u8 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ + +int32x4_t +test_32x4 (uint32x4_t a) +{ + return vclsq_u32 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.4s, v\[0-9\]+\.4s" 1 } } */ + +int16x4_t +test_16x4 (uint16x4_t a) +{ + return vcls_u16 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.4h, v\[0-9\]+\.4h" 1 } } */ + +int8x8_t +test_8x8 (uint8x8_t a) +{ + return vcls_u8 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.8b, v\[0-9\]+\.8b" 1 } } */ + +int32x2_t +test32x2 (uint32x2_t a) +{ + return vcls_u32 (a); +} + +/* { dg-final { scan-assembler-times "cls\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s" 1 } } */ + -- cgit v1.1 From 1a84651d164e9ebf080e0b64f0ad300eaae46297 Mon Sep 17 00:00:00 2001 From: Stefan Schulze Frielinghaus Date: Fri, 18 Sep 2020 09:10:19 +0200 Subject: IBM Z: Try to make use of load-and-test instructions This patch enables a peephole2 optimization which transforms a load of constant zero into a temporary register which is then finally used to compare against a floating-point register of interest into a single load and test instruction. However, the optimization is only applied if both registers are dead afterwards and if we test for (in)equality only. This is relaxed in case of fast math. This is a follow up to PR88856. gcc/ChangeLog: * config/s390/s390.md ("*cmp_ccs_0", "*cmp_ccz_0", "*cmp_ccs_0_fastmath"): Basically change "*cmp_ccs_0" into "*cmp_ccz_0" and for fast math add "*cmp_ccs_0_fastmath". gcc/testsuite/ChangeLog: * gcc.target/s390/load-and-test-fp-1.c: Change test to include all possible combinations of dead/live registers and comparisons (equality, relational). * gcc.target/s390/load-and-test-fp-2.c: Same as load-and-test-fp-1.c but for fast math. * gcc.target/s390/load-and-test-fp.h: New test included by load-and-test-fp-{1,2}.c. --- gcc/config/s390/s390.md | 54 +++++++++++++++++----- gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c | 21 ++++----- gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c | 17 ++----- gcc/testsuite/gcc.target/s390/load-and-test-fp.h | 12 +++++ 4 files changed, 68 insertions(+), 36 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/load-and-test-fp.h (limited to 'gcc') diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 4c3e540..18edea1 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -1391,23 +1391,55 @@ ; (TF|DF|SF|TD|DD|SD) instructions -; FIXME: load and test instructions turn SNaN into QNaN what is not -; acceptable if the target will be used afterwards. On the other hand -; they are quite convenient for implementing comparisons with 0.0. So -; try to enable them via splitter/peephole if the value isn't needed anymore. -; See testcases: load-and-test-fp-1.c and load-and-test-fp-2.c +; load and test instructions turn a signaling NaN into a quiet NaN. Thus they +; may only be used if the target register is dead afterwards or if fast math +; is enabled. The former is done via a peephole optimization. Note, load and +; test instructions may only be used for (in)equality comparisons because +; relational comparisons must treat a quiet NaN like a signaling NaN which is +; not the case for load and test instructions. For fast math insn +; "cmp_ccs_0_fastmath" applies. +; See testcases load-and-test-fp-{1,2}.c + +(define_peephole2 + [(set (match_operand:FP 0 "register_operand") + (match_operand:FP 1 "const0_operand")) + (set (reg:CCZ CC_REGNUM) + (compare:CCZ (match_operand:FP 2 "register_operand") + (match_operand:FP 3 "register_operand")))] + "TARGET_HARD_FLOAT + && FP_REG_P (operands[2]) + && REGNO (operands[0]) == REGNO (operands[3]) + && peep2_reg_dead_p (2, operands[0]) + && peep2_reg_dead_p (2, operands[2])" + [(parallel + [(set (reg:CCZ CC_REGNUM) + (compare:CCZ (match_dup 2) (match_dup 1))) + (clobber (match_dup 2))])] + "") ; ltxbr, ltdbr, ltebr, ltxtr, ltdtr -(define_insn "*cmp_ccs_0" - [(set (reg CC_REGNUM) - (compare (match_operand:FP 0 "register_operand" "f") - (match_operand:FP 1 "const0_operand" ""))) - (clobber (match_operand:FP 2 "register_operand" "=0"))] - "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT" +(define_insn "*cmp_ccz_0" + [(set (reg:CCZ CC_REGNUM) + (compare:CCZ (match_operand:FP 0 "register_operand" "f") + (match_operand:FP 1 "const0_operand"))) + (clobber (match_operand:FP 2 "register_operand" "=0"))] + "TARGET_HARD_FLOAT" "ltr\t%0,%0" [(set_attr "op_type" "RRE") (set_attr "type" "fsimp")]) +(define_insn "*cmp_ccs_0_fastmath" + [(set (reg CC_REGNUM) + (compare (match_operand:FP 0 "register_operand" "f") + (match_operand:FP 1 "const0_operand")))] + "s390_match_ccmode (insn, CCSmode) + && TARGET_HARD_FLOAT + && !flag_trapping_math + && !flag_signaling_nans" + "ltr\t%0,%0" + [(set_attr "op_type" "RRE") + (set_attr "type" "fsimp")]) + ; VX: TFmode in FPR pairs: use cxbr instead of wfcxb ; cxtr, cdtr, cxbr, cdbr, cebr, cdb, ceb, wfcsb, wfcdb (define_insn "*cmp_ccs" diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c b/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c index 2a7e88c..f89d2d3 100644 --- a/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c +++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp-1.c @@ -1,17 +1,12 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -mzarch" } */ +/* { dg-options "-O3 -mzarch -march=z196" } */ -/* a is used after the comparison. We cannot use load and test here - since it would turn SNaNs into QNaNs. */ +/* Use load-and-test instructions if compared for (in)equality and if variable + `a` is dead after the comparison. For all other cases use + compare-and-signal instructions. */ -double gl; +#include "load-and-test-fp.h" -double -foo (double dummy, double a) -{ - if (a == 0.0) - gl = 1; - return a; -} - -/* { dg-final { scan-assembler {\tcdbr?\t} } } */ +/* { dg-final { scan-assembler-times "ltdbr\t" 2 } } */ +/* { dg-final { scan-assembler-times "cdbr\t" 2 } } */ +/* { dg-final { scan-assembler-times "kdbr\t" 8 } } */ diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c b/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c index 7646fdd..53dab3c 100644 --- a/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c +++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp-2.c @@ -1,16 +1,9 @@ /* { dg-do compile } */ -/* { dg-options "-O3" } */ +/* { dg-options "-O3 -mzarch -ffast-math" } */ -/* a is not used after the comparison. So we should use load and test - here. */ +/* Fast-math implies -fno-trapping-math -fno-signaling-nans which imply + that no user visible trap will happen. */ -double gl; +#include "load-and-test-fp.h" -void -bar (double a) -{ - if (a == 0.0) - gl = 1; -} - -/* { dg-final { scan-assembler "ltdbr\t" } } */ +/* { dg-final { scan-assembler-times "ltdbr\t" 12 } } */ diff --git a/gcc/testsuite/gcc.target/s390/load-and-test-fp.h b/gcc/testsuite/gcc.target/s390/load-and-test-fp.h new file mode 100644 index 0000000..f153d96 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/load-and-test-fp.h @@ -0,0 +1,12 @@ +double gl; + +#define test(N, CMP) \ + void N ## _dead(double a) { if (a CMP 0.0) gl = 1; } \ + double N ## _live(double a) { if (a CMP 0.0) gl = 1; return a; } + +test(eq, ==) +test(ne, !=) +test(ge, >=) +test(gt, >) +test(le, <=) +test(lt, <) -- cgit v1.1 From c6df6039e9180c580945266302ec14047d358364 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Tue, 22 Sep 2020 12:23:35 +0200 Subject: switch lowering: limit number of cluster attemps gcc/ChangeLog: PR tree-optimization/96979 * doc/invoke.texi: Document new param max-switch-clustering-attempts. * params.opt: Add new parameter. * tree-switch-conversion.c (jump_table_cluster::find_jump_tables): Limit number of attempts. (bit_test_cluster::find_bit_tests): Likewise. gcc/testsuite/ChangeLog: PR tree-optimization/96979 * g++.dg/tree-ssa/pr96979.C: New test. --- gcc/doc/invoke.texi | 4 +++ gcc/params.opt | 4 +++ gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 50 +++++++++++++++++++++++++++++++++ gcc/tree-switch-conversion.c | 17 +++++++++++ 4 files changed, 75 insertions(+) create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr96979.C (limited to 'gcc') diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 665c0ff..6a7833b1 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -13452,6 +13452,10 @@ The smallest number of different values for which it is best to use a jump-table instead of a tree of conditional branches. If the value is 0, use the default for the machine. +@item max-switch-clustering-attempts +The maximum number of clustering attempts used +in bit-test and jump-table switch expansion. + @item jump-table-max-growth-ratio-for-size The maximum code size growth ratio when expanding into a jump table (in percent). The parameter is used when diff --git a/gcc/params.opt b/gcc/params.opt index dcf5e02..5f2e11d 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -82,6 +82,10 @@ The maximum length of a constant string for a builtin string cmp call eligible f Common Joined UInteger Var(param_case_values_threshold) Param Optimization The smallest number of different values for which it is best to use a jump-table instead of a tree of conditional branches, if 0, use the default for the machine. +-param=max-switch-clustering-attempts= +Common Joined UInteger Var(param_max_switch_clustering_attempts) Param Optimization Init(10000) +The maximum number of clustering attempts used in bit-test and jump-table switch expansion. + -param=comdat-sharing-probability= Common Joined UInteger Var(param_comdat_sharing_probability) Init(20) Param Optimization Probability that COMDAT function will be shared with different compilation unit. diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C new file mode 100644 index 0000000..85c703a --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C @@ -0,0 +1,50 @@ +/* PR tree-optimization/96979 */ +/* { dg-do compile } */ +/* { dg-options "-std=c++17 -O2 -fdump-tree-switchlower1" } */ + +using u64 = unsigned long long; + +constexpr inline u64 +foo (const char *str) noexcept +{ + u64 value = 0xcbf29ce484222325ULL; + for (u64 i = 0; str[i]; i++) + value = (value ^ u64(str[i])) * 0x100000001b3ULL; + return value; +} + +struct V +{ + enum W + { +#define A(n) n, +#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9) +#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9) +#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9) +#define E D(foo1) D(foo2) D(foo3) + E + last + }; + + constexpr static W + bar (const u64 h) noexcept + { + switch (h) + { +#undef A +#define F(n) #n +#define A(n) case foo (F(n)): return n; + E + } + return last; + } +}; + +int +baz (const char *s) +{ + const u64 h = foo (s); + return V::bar (h); +} + +/* { dg-final { scan-tree-dump-times ";; Bail out: --param=max-switch-clustering-attempts reached" 2 "switchlower1" } } */ diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c index 186411f..e6a2c7a 100644 --- a/gcc/tree-switch-conversion.c +++ b/gcc/tree-switch-conversion.c @@ -1183,6 +1183,7 @@ jump_table_cluster::find_jump_tables (vec &clusters) min.quick_push (min_cluster_item (0, 0, 0)); + HOST_WIDE_INT attempts = 0; for (unsigned i = 1; i <= l; i++) { /* Set minimal # of clusters with i-th item to infinite. */ @@ -1194,6 +1195,14 @@ jump_table_cluster::find_jump_tables (vec &clusters) if (i - j < case_values_threshold ()) s += i - j; + if (attempts++ == param_max_switch_clustering_attempts) + { + if (dump_file) + fprintf (dump_file, ";; Bail out: " + "--param=max-switch-clustering-attempts reached\n"); + return clusters.copy (); + } + /* Prefer clusters with smaller number of numbers covered. */ if ((min[j].m_count + 1 < min[i].m_count || (min[j].m_count + 1 == min[i].m_count @@ -1308,6 +1317,7 @@ bit_test_cluster::find_bit_tests (vec &clusters) min.quick_push (min_cluster_item (0, 0, 0)); + HOST_WIDE_INT attempts = 0; for (unsigned i = 1; i <= l; i++) { /* Set minimal # of clusters with i-th item to infinite. */ @@ -1315,6 +1325,13 @@ bit_test_cluster::find_bit_tests (vec &clusters) for (unsigned j = 0; j < i; j++) { + if (attempts++ == param_max_switch_clustering_attempts) + { + if (dump_file) + fprintf (dump_file, ";; Bail out: " + "--param=max-switch-clustering-attempts reached\n"); + return clusters.copy (); + } if (min[j].m_count + 1 < min[i].m_count && can_be_handled (clusters, j, i - 1)) min[i] = min_cluster_item (min[j].m_count + 1, j, INT_MAX); -- cgit v1.1 From c8042d5f6c1cd5258ff8299612e4d950ee58ce3e Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Mon, 21 Sep 2020 15:39:53 -0400 Subject: testsuite: Prune more output in timevar2.C. gcc/testsuite/ChangeLog: * g++.dg/ext/timevar2.C: Also prune N%. --- gcc/testsuite/g++.dg/ext/timevar2.C | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/ext/timevar2.C b/gcc/testsuite/g++.dg/ext/timevar2.C index 46c3e1b..7d3f121 100644 --- a/gcc/testsuite/g++.dg/ext/timevar2.C +++ b/gcc/testsuite/g++.dg/ext/timevar2.C @@ -4,6 +4,7 @@ // { dg-prune-output "k" } // { dg-prune-output " 0 " } // { dg-prune-output "checks" } +// { dg-prune-output "\[0-9\]+%" } namespace detail { namespace indirect_traits {} -- cgit v1.1 From 7df1534c136e2556ca10d3a60d2b2cc77544dbc8 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Tue, 22 Sep 2020 09:25:35 -0400 Subject: testsuite: Prune more output in timevar1.C. gcc/testsuite/ChangeLog: * g++.dg/ext/timevar1.C: Also prune N%. --- gcc/testsuite/g++.dg/ext/timevar1.C | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/ext/timevar1.C b/gcc/testsuite/g++.dg/ext/timevar1.C index 988a6f8..d68a8b7 100644 --- a/gcc/testsuite/g++.dg/ext/timevar1.C +++ b/gcc/testsuite/g++.dg/ext/timevar1.C @@ -5,6 +5,7 @@ // { dg-prune-output "k" } // { dg-prune-output " 0 " } // { dg-prune-output "checks" } +// { dg-prune-output "\[0-9\]+%" } void foo () -- cgit v1.1 From 7dfffe32419a963660f5c04b40b8f0becd509b69 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Tue, 22 Sep 2020 09:00:20 -0700 Subject: c++: fix injected friend of template class In working on fixing hiddenness, I discovered some suspicious code in template instantiation. I suspect it dates from when we didn't do the hidden friend injection thing at all. The xreftag finds the same class, but makes it visible to name lookup. Which is wrong. hurrah, fixing a bug by deleting code! gcc/cp/ * pt.c (instantiate_class_template_1): Do not repush and unhide injected friend. gcc/testsuite/ * g++.old-deja/g++.pt/friend34.C: Check injected friend is still invisible. --- gcc/cp/pt.c | 19 ------------------- gcc/testsuite/g++.old-deja/g++.pt/friend34.C | 5 ++++- 2 files changed, 4 insertions(+), 20 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 97d0c24..44ca14a 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -12030,25 +12030,6 @@ instantiate_class_template_1 (tree type) adjust_processing_template_decl = true; --processing_template_decl; } - else if (TREE_CODE (friend_type) != BOUND_TEMPLATE_TEMPLATE_PARM - && !CLASSTYPE_USE_TEMPLATE (friend_type) - && TYPE_HIDDEN_P (friend_type)) - { - /* friend class C; - - where C hasn't been declared yet. Let's lookup name - from namespace scope directly, bypassing any name that - come from dependent base class. */ - tree ns = decl_namespace_context (TYPE_MAIN_DECL (friend_type)); - - /* The call to xref_tag_from_type does injection for friend - classes. */ - push_nested_namespace (ns); - friend_type = - xref_tag_from_type (friend_type, NULL_TREE, - /*tag_scope=*/ts_current); - pop_nested_namespace (ns); - } else if (uses_template_parms (friend_type)) /* friend class C; */ friend_type = tsubst (friend_type, args, diff --git a/gcc/testsuite/g++.old-deja/g++.pt/friend34.C b/gcc/testsuite/g++.old-deja/g++.pt/friend34.C index 5e80ab9..dcd6df0 100644 --- a/gcc/testsuite/g++.old-deja/g++.pt/friend34.C +++ b/gcc/testsuite/g++.old-deja/g++.pt/friend34.C @@ -6,9 +6,12 @@ template class bar { public: - friend class foo; // this is not bar::foo, it forward-declares ::foo + friend class foo; // this is not bar::foo, it injects hidden ::foo class foo {}; bar() { foo(); } // but this should refer to bar::foo }; bar<> baz; + +// We still have not made foo visible. +foo *b; // { dg-error "does not name a type" } -- cgit v1.1 From 30bf947649e7381402637d1434d6b1ef22d031f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Chigot?= Date: Wed, 16 Sep 2020 14:03:10 +0200 Subject: compiler: call runtime.eqtype for non-interface type switch on aix All type switch clauses must call runtime.eqtype if the linker isn't able to merge type descriptors pointers. Previously, only interface-type clauses were doing it. Updates golang/go#39276 Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/255202 --- gcc/go/gofrontend/MERGE | 2 +- gcc/go/gofrontend/statements.cc | 27 ++++++++++++++++++--------- gcc/go/gofrontend/statements.h | 4 ++-- 3 files changed, 21 insertions(+), 12 deletions(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index e4f8fac..a8ba5a3 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -a59167c29d6ad2ddf533b3a12b365f72df0e1476 +b24062f0b2e8f6173731d5654afe0addf857270e The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/statements.cc b/gcc/go/gofrontend/statements.cc index a059ee4..ad89807 100644 --- a/gcc/go/gofrontend/statements.cc +++ b/gcc/go/gofrontend/statements.cc @@ -4627,7 +4627,8 @@ Type_case_clauses::Type_case_clause::traverse(Traverse* traverse) // statements. void -Type_case_clauses::Type_case_clause::lower(Type* switch_val_type, +Type_case_clauses::Type_case_clause::lower(Gogo* gogo, + Type* switch_val_type, Block* b, Temporary_statement* descriptor_temp, Unnamed_label* break_label, @@ -4666,9 +4667,16 @@ Type_case_clauses::Type_case_clause::lower(Type* switch_val_type, Expression::make_nil(loc), loc); else if (type->interface_type() == NULL) - cond = Expression::make_binary(OPERATOR_EQEQ, ref, - Expression::make_type_descriptor(type, loc), - loc); + { + if (!gogo->need_eqtype()) + cond = Expression::make_binary(OPERATOR_EQEQ, ref, + Expression::make_type_descriptor(type, loc), + loc); + else + cond = Runtime::make_call(Runtime::EQTYPE, loc, 2, + Expression::make_type_descriptor(type, loc), + ref); + } else cond = Runtime::make_call(Runtime::IFACET2IP, loc, 2, Expression::make_type_descriptor(type, loc), @@ -4826,7 +4834,8 @@ Type_case_clauses::check_duplicates() const // BREAK_LABEL is the label at the end of the type switch. void -Type_case_clauses::lower(Type* switch_val_type, Block* b, +Type_case_clauses::lower(Gogo* gogo, Type* switch_val_type, + Block* b, Temporary_statement* descriptor_temp, Unnamed_label* break_label) const { @@ -4838,7 +4847,7 @@ Type_case_clauses::lower(Type* switch_val_type, Block* b, ++p) { if (!p->is_default()) - p->lower(switch_val_type, b, descriptor_temp, break_label, + p->lower(gogo, switch_val_type, b, descriptor_temp, break_label, &stmts_label); else { @@ -4850,7 +4859,7 @@ Type_case_clauses::lower(Type* switch_val_type, Block* b, go_assert(stmts_label == NULL); if (default_case != NULL) - default_case->lower(switch_val_type, b, descriptor_temp, break_label, + default_case->lower(gogo, switch_val_type, b, descriptor_temp, break_label, NULL); } @@ -4905,7 +4914,7 @@ Type_switch_statement::do_traverse(Traverse* traverse) // equality testing. Statement* -Type_switch_statement::do_lower(Gogo*, Named_object*, Block* enclosing, +Type_switch_statement::do_lower(Gogo* gogo, Named_object*, Block* enclosing, Statement_inserter*) { const Location loc = this->location(); @@ -4943,7 +4952,7 @@ Type_switch_statement::do_lower(Gogo*, Named_object*, Block* enclosing, b->add_statement(s); if (this->clauses_ != NULL) - this->clauses_->lower(val_type, b, descriptor_temp, this->break_label()); + this->clauses_->lower(gogo, val_type, b, descriptor_temp, this->break_label()); s = Statement::make_unnamed_label_statement(this->break_label_); b->add_statement(s); diff --git a/gcc/go/gofrontend/statements.h b/gcc/go/gofrontend/statements.h index f1c6be9..47092b4 100644 --- a/gcc/go/gofrontend/statements.h +++ b/gcc/go/gofrontend/statements.h @@ -2089,7 +2089,7 @@ class Type_case_clauses // Lower to if and goto statements. void - lower(Type*, Block*, Temporary_statement* descriptor_temp, + lower(Gogo*, Type*, Block*, Temporary_statement* descriptor_temp, Unnamed_label* break_label) const; // Return true if these clauses may fall through to the statements @@ -2138,7 +2138,7 @@ class Type_case_clauses // Lower to if and goto statements. void - lower(Type*, Block*, Temporary_statement* descriptor_temp, + lower(Gogo*, Type*, Block*, Temporary_statement* descriptor_temp, Unnamed_label* break_label, Unnamed_label** stmts_label) const; // Return true if this clause may fall through to execute the -- cgit v1.1 From 7c8ba5da80d5d95a8521010d6731d0d83036145d Mon Sep 17 00:00:00 2001 From: David Faust Date: Tue, 22 Sep 2020 20:31:35 +0200 Subject: bpf: use xBPF signed div, mod insns when available The 'mod' and 'div' operators in eBPF are unsigned, with no signed counterpart. xBPF adds two new ALU operations, sdiv and smod, for signed division and modulus, respectively. Update bpf.md with 'define_insn' blocks for signed div and mod to use them when targetting xBPF, and add new tests to ensure they are used appropriately. 2020-09-17 David Faust gcc/ * config/bpf/bpf.md: Add defines for signed div and mod operators. gcc/testsuite/ * gcc.target/bpf/diag-sdiv.c: New test. * gcc.target/bpf/diag-smod.c: New test. * gcc.target/bpf/xbpf-sdiv-1.c: New test. * gcc.target/bpf/xbpf-smod-1.c: New test. --- gcc/config/bpf/bpf.md | 20 ++++++++++++++++++++ gcc/testsuite/gcc.target/bpf/diag-sdiv.c | 12 ++++++++++++ gcc/testsuite/gcc.target/bpf/diag-smod.c | 12 ++++++++++++ gcc/testsuite/gcc.target/bpf/xbpf-sdiv-1.c | 14 ++++++++++++++ gcc/testsuite/gcc.target/bpf/xbpf-smod-1.c | 14 ++++++++++++++ 5 files changed, 72 insertions(+) create mode 100644 gcc/testsuite/gcc.target/bpf/diag-sdiv.c create mode 100644 gcc/testsuite/gcc.target/bpf/diag-smod.c create mode 100644 gcc/testsuite/gcc.target/bpf/xbpf-sdiv-1.c create mode 100644 gcc/testsuite/gcc.target/bpf/xbpf-smod-1.c (limited to 'gcc') diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md index 769d8ea..8e7cf50 100644 --- a/gcc/config/bpf/bpf.md +++ b/gcc/config/bpf/bpf.md @@ -165,6 +165,16 @@ "div\t%0,%2" [(set_attr "type" "")]) +;; However, xBPF does provide a signed division operator, sdiv. + +(define_insn "div3" + [(set (match_operand:AM 0 "register_operand" "=r,r") + (div:AM (match_operand:AM 1 "register_operand" " 0,0") + (match_operand:AM 2 "reg_or_imm_operand" "r,I")))] + "TARGET_XBPF" + "sdiv\t%0,%2" + [(set_attr "type" "")]) + ;;; Modulus ;; Note that eBPF doesn't provide instructions for signed integer @@ -178,6 +188,16 @@ "mod\t%0,%2" [(set_attr "type" "")]) +;; Again, xBPF provides a signed version, smod. + +(define_insn "mod3" + [(set (match_operand:AM 0 "register_operand" "=r,r") + (mod:AM (match_operand:AM 1 "register_operand" " 0,0") + (match_operand:AM 2 "reg_or_imm_operand" "r,I")))] + "TARGET_XBPF" + "smod\t%0,%2" + [(set_attr "type" "")]) + ;;; Logical AND (define_insn "and3" [(set (match_operand:AM 0 "register_operand" "=r,r") diff --git a/gcc/testsuite/gcc.target/bpf/diag-sdiv.c b/gcc/testsuite/gcc.target/bpf/diag-sdiv.c new file mode 100644 index 0000000..db0c494 --- /dev/null +++ b/gcc/testsuite/gcc.target/bpf/diag-sdiv.c @@ -0,0 +1,12 @@ +/* Verify signed division does not produce 'sdiv' insn in eBPF. */ +/* { dg-do compile } */ +/* { dg-options "-O0" } */ + +void +foo () +{ + signed int x = 5; + signed int y = 2; + signed int z = x / y; +} +/* { dg-final { scan-assembler-not "sdiv(32)?\t%r" } } */ diff --git a/gcc/testsuite/gcc.target/bpf/diag-smod.c b/gcc/testsuite/gcc.target/bpf/diag-smod.c new file mode 100644 index 0000000..20234ee --- /dev/null +++ b/gcc/testsuite/gcc.target/bpf/diag-smod.c @@ -0,0 +1,12 @@ +/* Verify signed modulo does not produce 'smod' insn in eBPF. */ +/* { dg-do compile } */ +/* { dg-options "-O0" } */ + +void +foo () +{ + signed int x = 5; + signed int y = 2; + signed int z = x % y; +} +/* { dg-final { scan-assembler-not "smod(32)?\t%r" } } */ diff --git a/gcc/testsuite/gcc.target/bpf/xbpf-sdiv-1.c b/gcc/testsuite/gcc.target/bpf/xbpf-sdiv-1.c new file mode 100644 index 0000000..f6c5c9e --- /dev/null +++ b/gcc/testsuite/gcc.target/bpf/xbpf-sdiv-1.c @@ -0,0 +1,14 @@ +/* Verify that sdiv instruction is used for xBPF. */ +/* { dg-do compile } */ +/* { dg-options "-O0 -mxbpf" } */ + +void +foo () +{ + signed int x = 5; + signed int y = 2; + signed int z = x / y; + signed int w = x / 3; +} + +/* { dg-final { scan-assembler "sdiv(32)?\t%r" } } */ diff --git a/gcc/testsuite/gcc.target/bpf/xbpf-smod-1.c b/gcc/testsuite/gcc.target/bpf/xbpf-smod-1.c new file mode 100644 index 0000000..b3e5816 --- /dev/null +++ b/gcc/testsuite/gcc.target/bpf/xbpf-smod-1.c @@ -0,0 +1,14 @@ +/* Verify that smod instruction is used for xBPF. */ +/* { dg-do compile } */ +/* { dg-options "-O0 -mxbpf" } */ + +void +foo () +{ + signed int x = 5; + signed int y = 2; + signed int z = x % y; + signed int w = x % 3; +} + +/* { dg-final { scan-assembler "smod(32)?\t%r" } } */ -- cgit v1.1 From 809192e77e6e112a0fe32dee7fada7a49fbf25cd Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Fri, 18 Sep 2020 13:59:21 -0400 Subject: analyzer: add -fdump-analyzer-json I've found this useful for debugging state explosions in the analyzer. gcc/analyzer/ChangeLog: * analysis-plan.cc: Include "json.h". * analyzer.opt (fdump-analyzer-json): New. * call-string.cc: Include "json.h". (call_string::to_json): New. * call-string.h (call_string::to_json): New decl. * checker-path.cc: Include "json.h". * constraint-manager.cc: Include "json.h". (equiv_class::to_json): New. (constraint::to_json): New. (constraint_manager::to_json): New. * constraint-manager.h (equiv_class::to_json): New decl. (constraint::to_json): New decl. (constraint_manager::to_json): New decl. * diagnostic-manager.cc: Include "json.h". (saved_diagnostic::to_json): New. (diagnostic_manager::to_json): New. * diagnostic-manager.h (saved_diagnostic::to_json): New decl. (diagnostic_manager::to_json): New decl. * engine.cc: Include "json.h", . (exploded_node::status_to_str): New. (exploded_node::to_json): New. (exploded_edge::to_json): New. (exploded_graph::to_json): New. (dump_analyzer_json): New. (impl_run_checkers): Call it. * exploded-graph.h (exploded_node::status_to_str): New decl. (exploded_node::to_json): New. (exploded_edge::to_json): New. (exploded_graph::to_json): New. * pending-diagnostic.cc: Include "json.h". * program-point.cc: Include "json.h". (program_point::to_json): New. * program-point.h (program_point::to_json): New decl. * program-state.cc: Include "json.h". (extrinsic_state::to_json): New. (sm_state_map::to_json): New. (program_state::to_json): New. * program-state.h (extrinsic_state::to_json): New decl. (sm_state_map::to_json): New decl. (program_state::to_json): New decl. * region-model-impl-calls.cc: Include "json.h". * region-model-manager.cc: Include "json.h". * region-model-reachability.cc: Include "json.h". * region-model.cc: Include "json.h". * region-model.h (svalue::to_json): New decl. (region::to_json): New decl. * region.cc: Include "json.h". (region::to_json: New. * sm-file.cc: Include "json.h". * sm-malloc.cc: Include "json.h". * sm-pattern-test.cc: Include "json.h". * sm-sensitive.cc: Include "json.h". * sm-signal.cc: Include "json.h". (signal_delivery_edge_info_t::to_json): New. * sm-taint.cc: Include "json.h". * sm.cc: Include "diagnostic.h", "tree-diagnostic.h", and "json.h". (state_machine::state::to_json): New. (state_machine::to_json): New. * sm.h (state_machine::state::to_json): New. (state_machine::to_json): New. * state-purge.cc: Include "json.h". * store.cc: Include "json.h". (binding_key::get_desc): New. (binding_map::to_json): New. (binding_cluster::to_json): New. (store::to_json): New. * store.h (binding_key::get_desc): New decl. (binding_map::to_json): New decl. (binding_cluster::to_json): New decl. (store::to_json): New decl. * supergraph.cc: Include "json.h". (supergraph::to_json): New. (supernode::to_json): New. (superedge::to_json): New. * supergraph.h (supergraph::to_json): New decl. (supernode::to_json): New decl. (superedge::to_json): New decl. * svalue.cc: Include "json.h". (svalue::to_json): New. gcc/ChangeLog: * doc/analyzer.texi (Other Debugging Techniques): Mention -fdump-analyzer-json. * doc/invoke.texi (Static Analyzer Options): Add -fdump-analyzer-json. --- gcc/analyzer/analysis-plan.cc | 1 + gcc/analyzer/analyzer.opt | 4 + gcc/analyzer/call-string.cc | 29 ++++++ gcc/analyzer/call-string.h | 2 + gcc/analyzer/checker-path.cc | 1 + gcc/analyzer/constraint-manager.cc | 77 ++++++++++++++++ gcc/analyzer/constraint-manager.h | 6 ++ gcc/analyzer/diagnostic-manager.cc | 58 ++++++++++++ gcc/analyzer/diagnostic-manager.h | 4 + gcc/analyzer/engine.cc | 146 ++++++++++++++++++++++++++++++ gcc/analyzer/exploded-graph.h | 7 ++ gcc/analyzer/pending-diagnostic.cc | 1 + gcc/analyzer/program-point.cc | 38 ++++++++ gcc/analyzer/program-point.h | 2 + gcc/analyzer/program-state.cc | 85 +++++++++++++++++ gcc/analyzer/program-state.h | 6 ++ gcc/analyzer/region-model-impl-calls.cc | 1 + gcc/analyzer/region-model-manager.cc | 1 + gcc/analyzer/region-model-reachability.cc | 1 + gcc/analyzer/region-model.cc | 1 + gcc/analyzer/region-model.h | 4 + gcc/analyzer/region.cc | 12 +++ gcc/analyzer/sm-file.cc | 1 + gcc/analyzer/sm-malloc.cc | 1 + gcc/analyzer/sm-pattern-test.cc | 1 + gcc/analyzer/sm-sensitive.cc | 1 + gcc/analyzer/sm-signal.cc | 7 ++ gcc/analyzer/sm-taint.cc | 1 + gcc/analyzer/sm.cc | 36 ++++++++ gcc/analyzer/sm.h | 3 + gcc/analyzer/state-purge.cc | 1 + gcc/analyzer/store.cc | 118 ++++++++++++++++++++++++ gcc/analyzer/store.h | 7 ++ gcc/analyzer/supergraph.cc | 112 +++++++++++++++++++++++ gcc/analyzer/supergraph.h | 6 ++ gcc/analyzer/svalue.cc | 12 +++ gcc/doc/analyzer.texi | 3 + gcc/doc/invoke.texi | 7 ++ 38 files changed, 804 insertions(+) (limited to 'gcc') diff --git a/gcc/analyzer/analysis-plan.cc b/gcc/analyzer/analysis-plan.cc index 3c8b10b..7e48f52 100644 --- a/gcc/analyzer/analysis-plan.cc +++ b/gcc/analyzer/analysis-plan.cc @@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. If not see #include "timevar.h" #include "ipa-utils.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "diagnostic-core.h" #include "analyzer/analyzer-logging.h" diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt index 94a686d..872fb31 100644 --- a/gcc/analyzer/analyzer.opt +++ b/gcc/analyzer/analyzer.opt @@ -186,6 +186,10 @@ fdump-analyzer-exploded-nodes-3 Common RejectNegative Var(flag_dump_analyzer_exploded_nodes_3) Dump a textual representation of the exploded graph to SRCFILE.eg-ID.txt. +fdump-analyzer-json +Common RejectNegative Var(flag_dump_analyzer_json) +Dump analyzer-specific data to a SRCFILE.analyzer.json.gz file. + fdump-analyzer-state-purge Common RejectNegative Var(flag_dump_analyzer_state_purge) Dump state-purging information to a SRCFILE.state-purge.dot file. diff --git a/gcc/analyzer/call-string.cc b/gcc/analyzer/call-string.cc index d363031..72568c6 100644 --- a/gcc/analyzer/call-string.cc +++ b/gcc/analyzer/call-string.cc @@ -24,6 +24,7 @@ along with GCC; see the file COPYING3. If not see #include "pretty-print.h" #include "tree.h" #include "options.h" +#include "json.h" #include "analyzer/call-string.h" #include "ordered-hash-map.h" #include "options.h" @@ -104,6 +105,34 @@ call_string::print (pretty_printer *pp) const pp_string (pp, "]"); } +/* Return a new json::array of the form + [{"src_snode_idx" : int, + "dst_snode_idx" : int, + "funcname" : str}, + ...for each return_superedge in the callstring]. */ + +json::value * +call_string::to_json () const +{ + json::array *arr = new json::array (); + + const return_superedge *e; + int i; + FOR_EACH_VEC_ELT (m_return_edges, i, e) + { + json::object *e_obj = new json::object (); + e_obj->set ("src_snode_idx", + new json::integer_number (e->m_src->m_index)); + e_obj->set ("dst_snode_idx", + new json::integer_number (e->m_dest->m_index)); + e_obj->set ("funcname", + new json::string (function_name (e->m_dest->m_fun))); + arr->append (e_obj); + } + + return arr; +} + /* Generate a hash value for this call_string. */ hashval_t diff --git a/gcc/analyzer/call-string.h b/gcc/analyzer/call-string.h index 1b5db0a..5a03c59 100644 --- a/gcc/analyzer/call-string.h +++ b/gcc/analyzer/call-string.h @@ -47,6 +47,8 @@ public: void print (pretty_printer *pp) const; + json::value *to_json () const; + hashval_t hash () const; bool empty_p () const { return m_return_edges.is_empty (); } diff --git a/gcc/analyzer/checker-path.cc b/gcc/analyzer/checker-path.cc index c281316..1f6d6a8 100644 --- a/gcc/analyzer/checker-path.cc +++ b/gcc/analyzer/checker-path.cc @@ -38,6 +38,7 @@ along with GCC; see the file COPYING3. If not see #include "fibonacci_heap.h" #include "diagnostic-event-id.h" #include "shortest-paths.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "analyzer/sm.h" diff --git a/gcc/analyzer/constraint-manager.cc b/gcc/analyzer/constraint-manager.cc index 521501f..5cd2c9e 100644 --- a/gcc/analyzer/constraint-manager.cc +++ b/gcc/analyzer/constraint-manager.cc @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-core.h" #include "graphviz.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "ordered-hash-map.h" #include "options.h" @@ -299,6 +300,33 @@ equiv_class::print (pretty_printer *pp) const pp_character (pp, '}'); } +/* Return a new json::object of the form + {"svals" : [str], + "constant" : optional str}. */ + +json::object * +equiv_class::to_json () const +{ + json::object *ec_obj = new json::object (); + + json::array *sval_arr = new json::array (); + int i; + const svalue *sval; + FOR_EACH_VEC_ELT (m_vars, i, sval) + sval_arr->append (sval->to_json ()); + ec_obj->set ("svals", sval_arr); + + if (m_constant) + { + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp_printf (&pp, "%qE", m_constant); + ec_obj->set ("constant", new json::string (pp_formatted_text (&pp))); + } + + return ec_obj; +} + /* Generate a hash value for this equiv_class. This relies on the ordering of m_vars, and so this object needs to have been canonicalized for this to be meaningful. */ @@ -499,6 +527,23 @@ constraint::print (pretty_printer *pp, const constraint_manager &cm) const m_rhs.get_obj (cm).print (pp); } +/* Return a new json::object of the form + {"lhs" : int, the EC index + "op" : str, + "rhs" : int, the EC index}. */ + +json::object * +constraint::to_json () const +{ + json::object *con_obj = new json::object (); + + con_obj->set ("lhs", new json::integer_number (m_lhs.as_int ())); + con_obj->set ("op", new json::string (constraint_op_code (m_op))); + con_obj->set ("rhs", new json::integer_number (m_rhs.as_int ())); + + return con_obj; +} + /* Generate a hash value for this constraint. */ hashval_t @@ -768,6 +813,38 @@ debug (const constraint_manager &cm) cm.dump (); } +/* Return a new json::object of the form + {"ecs" : array of objects, one per equiv_class + "constraints" : array of objects, one per constraint}. */ + +json::object * +constraint_manager::to_json () const +{ + json::object *cm_obj = new json::object (); + + /* Equivalence classes. */ + { + json::array *ec_arr = new json::array (); + int i; + equiv_class *ec; + FOR_EACH_VEC_ELT (m_equiv_classes, i, ec) + ec_arr->append (ec->to_json ()); + cm_obj->set ("ecs", ec_arr); + } + + /* Constraints. */ + { + json::array *con_arr = new json::array (); + int i; + constraint *c; + FOR_EACH_VEC_ELT (m_constraints, i, c) + con_arr->append (c->to_json ()); + cm_obj->set ("constraints", con_arr); + } + + return cm_obj; +} + /* Attempt to add the constraint LHS OP RHS to this constraint_manager. Return true if the constraint could be added (or is already true). Return false if the constraint contradicts existing knowledge. */ diff --git a/gcc/analyzer/constraint-manager.h b/gcc/analyzer/constraint-manager.h index 3c31a89..98960ff 100644 --- a/gcc/analyzer/constraint-manager.h +++ b/gcc/analyzer/constraint-manager.h @@ -88,6 +88,8 @@ public: void print (pretty_printer *pp) const; + json::object *to_json () const; + /* An equivalence class can contain multiple constants (e.g. multiple different zeroes, for different types); these are just for the last constant added. */ @@ -160,6 +162,8 @@ class constraint void print (pretty_printer *pp, const constraint_manager &cm) const; + json::object *to_json () const; + hashval_t hash () const; bool operator== (const constraint &other) const; @@ -215,6 +219,8 @@ public: void dump (FILE *fp) const; void dump () const; + json::object *to_json () const; + const equiv_class &get_equiv_class_by_index (unsigned idx) const { return *m_equiv_classes[idx]; diff --git a/gcc/analyzer/diagnostic-manager.cc b/gcc/analyzer/diagnostic-manager.cc index 4a95d4c..8d7e508 100644 --- a/gcc/analyzer/diagnostic-manager.cc +++ b/gcc/analyzer/diagnostic-manager.cc @@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see #include "tristate.h" #include "selftest.h" #include "ordered-hash-map.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "analyzer/sm.h" @@ -114,6 +115,43 @@ saved_diagnostic::operator== (const saved_diagnostic &other) const && m_trailing_eedge == other.m_trailing_eedge); } +/* Return a new json::object of the form + {"sm": optional str, + "enode": int, + "snode": int, + "sval": optional str, + "state": optional str, + "path_length": int, + "pending_diagnostic": str}. */ + +json::object * +saved_diagnostic::to_json () const +{ + json::object *sd_obj = new json::object (); + + if (m_sm) + sd_obj->set ("sm", new json::string (m_sm->get_name ())); + sd_obj->set ("enode", new json::integer_number (m_enode->m_index)); + sd_obj->set ("snode", new json::integer_number (m_snode->m_index)); + if (m_sval) + sd_obj->set ("sval", m_sval->to_json ()); + if (m_state) + sd_obj->set ("state", m_state->to_json ()); + sd_obj->set ("path_length", new json::integer_number (m_epath_length)); + sd_obj->set ("pending_diagnostic", new json::string (m_d->get_kind ())); + + /* We're not yet JSONifying the following fields: + const gimple *m_stmt; + stmt_finder *m_stmt_finder; + tree m_var; + exploded_edge *m_trailing_eedge; + enum status m_status; + feasibility_problem *m_problem; + */ + + return sd_obj; +} + /* State for building a checker_path from a particular exploded_path. In particular, this precomputes reachability information: the set of source enodes for which a path be found to the diagnostic enode. */ @@ -199,6 +237,26 @@ diagnostic_manager::add_diagnostic (const exploded_node *enode, add_diagnostic (NULL, enode, snode, stmt, finder, NULL_TREE, NULL, 0, d); } +/* Return a new json::object of the form + {"diagnostics" : [obj for saved_diagnostic]}. */ + +json::object * +diagnostic_manager::to_json () const +{ + json::object *dm_obj = new json::object (); + + { + json::array *sd_arr = new json::array (); + int i; + saved_diagnostic *sd; + FOR_EACH_VEC_ELT (m_saved_diagnostics, i, sd) + sd_arr->append (sd->to_json ()); + dm_obj->set ("diagnostics", sd_arr); + } + + return dm_obj; +} + /* A class for identifying sets of duplicated pending_diagnostic. We want to find the simplest dedupe_candidate amongst those that share a diff --git a/gcc/analyzer/diagnostic-manager.h b/gcc/analyzer/diagnostic-manager.h index 1e310f7..c32f0c4 100644 --- a/gcc/analyzer/diagnostic-manager.h +++ b/gcc/analyzer/diagnostic-manager.h @@ -46,6 +46,8 @@ public: bool operator== (const saved_diagnostic &other) const; + json::object *to_json () const; + void set_feasible () { gcc_assert (m_status == STATUS_NEW); @@ -105,6 +107,8 @@ public: engine *get_engine () const { return m_eng; } + json::object *to_json () const; + void add_diagnostic (const state_machine *sm, const exploded_node *enode, const supernode *snode, const gimple *stmt, diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index d03e23a..df7e335 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see #include "tristate.h" #include "ordered-hash-map.h" #include "selftest.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "analyzer/call-string.h" @@ -61,6 +62,7 @@ along with GCC; see the file COPYING3. If not see #include "analyzer/checker-path.h" #include "analyzer/state-purge.h" #include "analyzer/bar-chart.h" +#include /* For an overview, see gcc/doc/analyzer.texi. */ @@ -764,6 +766,19 @@ eg_traits::dump_args_t::show_enode_details_p (const exploded_node &enode) const /* class exploded_node : public dnode. */ +const char * +exploded_node::status_to_str (enum status s) +{ + switch (s) + { + default: gcc_unreachable (); + case STATUS_WORKLIST: return "WORKLIST"; + case STATUS_PROCESSED: return "PROCESSED"; + case STATUS_MERGER: return "MERGER"; + case STATUS_BULK_MERGED: return "BULK_MERGED"; + } +} + /* exploded_node's ctor. */ exploded_node::exploded_node (const point_and_state &ps, @@ -952,6 +967,28 @@ exploded_node::dump (const extrinsic_state &ext_state) const dump (stderr, ext_state); } +/* Return a new json::object of the form + {"point" : object for program_point, + "state" : object for program_state, + "status" : str, + "idx" : int, + "processed_stmts" : int}. */ + +json::object * +exploded_node::to_json (const extrinsic_state &ext_state) const +{ + json::object *enode_obj = new json::object (); + + enode_obj->set ("point", get_point ().to_json ()); + enode_obj->set ("state", get_state ().to_json (ext_state)); + enode_obj->set ("status", new json::string (status_to_str (m_status))); + enode_obj->set ("idx", new json::integer_number (m_index)); + enode_obj->set ("processed_stmts", + new json::integer_number (m_num_processed_stmts)); + + return enode_obj; +} + } // namespace ana /* Return true if FNDECL has a gimple body. */ @@ -1502,6 +1539,30 @@ exploded_edge::dump_dot (graphviz_out *gv, const dump_args_t &) const pp_printf (pp, "\"];\n"); } +/* Return a new json::object of the form + {"src_idx": int, the index of the source exploded edge, + "dst_idx": int, the index of the destination exploded edge, + "sedge": (optional) object for the superedge, if any, + "custom": (optional) str, a description, if this is a custom edge}. */ + +json::object * +exploded_edge::to_json () const +{ + json::object *eedge_obj = new json::object (); + eedge_obj->set ("src_idx", new json::integer_number (m_src->m_index)); + eedge_obj->set ("dst_idx", new json::integer_number (m_dest->m_index)); + if (m_sedge) + eedge_obj->set ("sedge", m_sedge->to_json ()); + if (m_custom_info) + { + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + m_custom_info->print (&pp); + eedge_obj->set ("custom", new json::string (pp_formatted_text (&pp))); + } + return eedge_obj; +} + /* struct stats. */ /* stats' ctor. */ @@ -3057,6 +3118,55 @@ exploded_graph::dump_states_for_supernode (FILE *out, snode->m_index, state_idx); } +/* Return a new json::object of the form + {"nodes" : [objs for enodes], + "edges" : [objs for eedges], + "ext_state": object for extrinsic_state, + "diagnostic_manager": object for diagnostic_manager}. */ + +json::object * +exploded_graph::to_json () const +{ + json::object *egraph_obj = new json::object (); + + /* Nodes. */ + { + json::array *nodes_arr = new json::array (); + unsigned i; + exploded_node *n; + FOR_EACH_VEC_ELT (m_nodes, i, n) + nodes_arr->append (n->to_json (m_ext_state)); + egraph_obj->set ("nodes", nodes_arr); + } + + /* Edges. */ + { + json::array *edges_arr = new json::array (); + unsigned i; + exploded_edge *n; + FOR_EACH_VEC_ELT (m_edges, i, n) + edges_arr->append (n->to_json ()); + egraph_obj->set ("edges", edges_arr); + } + + /* m_sg is JSONified at the top-level. */ + + egraph_obj->set ("ext_state", m_ext_state.to_json ()); + egraph_obj->set ("diagnostic_manager", m_diagnostic_manager.to_json ()); + + /* The following fields aren't yet being JSONified: + worklist m_worklist; + const state_purge_map *const m_purge_map; + const analysis_plan &m_plan; + stats m_global_stats; + function_stat_map_t m_per_function_stats; + stats m_functionless_stats; + call_string_data_map_t m_per_call_string_data; + auto_vec m_PK_AFTER_SUPERNODE_per_snode; */ + + return egraph_obj; +} + /* Look for the last use of SEARCH_STMT within this path. If found write the edge's index to *OUT_IDX and return true, otherwise return false. */ @@ -4241,6 +4351,39 @@ private: auto_delete_vec > m_enodes_per_snodes; }; +/* Implement -fdump-analyzer-json. */ + +static void +dump_analyzer_json (const supergraph &sg, + const exploded_graph &eg) +{ + auto_timevar tv (TV_ANALYZER_DUMP); + char *filename = concat (dump_base_name, ".analyzer.json.gz", NULL); + gzFile output = gzopen (filename, "w"); + if (!output) + { + error_at (UNKNOWN_LOCATION, "unable to open %qs for writing", filename); + free (filename); + return; + } + + json::object *toplev_obj = new json::object (); + toplev_obj->set ("sgraph", sg.to_json ()); + toplev_obj->set ("egraph", eg.to_json ()); + + pretty_printer pp; + toplev_obj->print (&pp); + pp_formatted_text (&pp); + + delete toplev_obj; + + if (gzputs (output, pp_formatted_text (&pp)) == EOF + || gzclose (output)) + error_at (UNKNOWN_LOCATION, "error writing %qs", filename); + + free (filename); +} + /* Run the analysis "engine". */ void @@ -4341,6 +4484,9 @@ impl_run_checkers (logger *logger) free (filename); } + if (flag_dump_analyzer_json) + dump_analyzer_json (sg, eg); + delete purge_map; } diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h index 04e878f..f723d52b 100644 --- a/gcc/analyzer/exploded-graph.h +++ b/gcc/analyzer/exploded-graph.h @@ -165,6 +165,7 @@ class exploded_node : public dnode /* Node was processed by maybe_process_run_of_before_supernode_enodes. */ STATUS_BULK_MERGED }; + static const char * status_to_str (enum status s); exploded_node (const point_and_state &ps, int index); @@ -179,6 +180,8 @@ class exploded_node : public dnode void dump (FILE *fp, const extrinsic_state &ext_state) const; void dump (const extrinsic_state &ext_state) const; + json::object *to_json (const extrinsic_state &ext_state) const; + /* The result of on_stmt. */ struct on_stmt_flags { @@ -307,6 +310,8 @@ class exploded_edge : public dedge void dump_dot (graphviz_out *gv, const dump_args_t &args) const FINAL OVERRIDE; + json::object *to_json () const; + //private: const superedge *const m_sedge; @@ -782,6 +787,8 @@ public: void dump_states_for_supernode (FILE *, const supernode *snode) const; void dump_exploded_nodes () const; + json::object *to_json () const; + exploded_node *get_node_by_index (int idx) const; const call_string_data_map_t *get_per_call_string_data () const diff --git a/gcc/analyzer/pending-diagnostic.cc b/gcc/analyzer/pending-diagnostic.cc index c196903..502d177 100644 --- a/gcc/analyzer/pending-diagnostic.cc +++ b/gcc/analyzer/pending-diagnostic.cc @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see #include "intl.h" #include "diagnostic.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "diagnostic-event-id.h" #include "analyzer/analyzer-logging.h" diff --git a/gcc/analyzer/program-point.cc b/gcc/analyzer/program-point.cc index ef19e6e..429d6ec 100644 --- a/gcc/analyzer/program-point.cc +++ b/gcc/analyzer/program-point.cc @@ -24,6 +24,7 @@ along with GCC; see the file COPYING3. If not see #include "tree.h" #include "gimple-pretty-print.h" #include "gcc-rich-location.h" +#include "json.h" #include "analyzer/call-string.h" #include "ordered-hash-map.h" #include "options.h" @@ -281,6 +282,43 @@ program_point::dump () const pp_flush (&pp); } +/* Return a new json::object of the form + {"kind" : str, + "snode_idx" : int (optional), the index of the supernode, + "from_edge_snode_idx" : int (only for kind=='PK_BEFORE_SUPERNODE'), + "stmt_idx": int (only for kind=='PK_BEFORE_STMT', + "call_string": object for the call_string}. */ + +json::object * +program_point::to_json () const +{ + json::object *point_obj = new json::object (); + + point_obj->set ("kind", + new json::string (point_kind_to_string (get_kind ()))); + + if (get_supernode ()) + point_obj->set ("snode_idx", + new json::integer_number (get_supernode ()->m_index)); + + switch (get_kind ()) + { + default: break; + case PK_BEFORE_SUPERNODE: + if (const superedge *sedge = get_from_edge ()) + point_obj->set ("from_edge_snode_idx", + new json::integer_number (sedge->m_src->m_index)); + break; + case PK_BEFORE_STMT: + point_obj->set ("stmt_idx", new json::integer_number (get_stmt_idx ())); + break; + } + + point_obj->set ("call_string", m_call_string.to_json ()); + + return point_obj; +} + /* Generate a hash value for this program_point. */ hashval_t diff --git a/gcc/analyzer/program-point.h b/gcc/analyzer/program-point.h index 97fd0a5..d804621 100644 --- a/gcc/analyzer/program-point.h +++ b/gcc/analyzer/program-point.h @@ -175,6 +175,8 @@ public: void print_source_line (pretty_printer *pp) const; void dump () const; + json::object *to_json () const; + hashval_t hash () const; bool operator== (const program_point &other) const { diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc index 71bb286..83a6e5b 100644 --- a/gcc/analyzer/program-state.cc +++ b/gcc/analyzer/program-state.cc @@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-core.h" #include "diagnostic.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "analyzer/sm.h" @@ -99,6 +100,26 @@ extrinsic_state::dump () const dump_to_file (stderr); } +/* Return a new json::object of the form + {"checkers" : array of objects, one for each state_machine}. */ + +json::object * +extrinsic_state::to_json () const +{ + json::object *ext_state_obj = new json::object (); + + { + json::array *checkers_arr = new json::array (); + unsigned i; + state_machine *sm; + FOR_EACH_VEC_ELT (m_checkers, i, sm) + checkers_arr->append (sm->to_json ()); + ext_state_obj->set ("checkers", checkers_arr); + } + + return ext_state_obj; +} + /* Get the region_model_manager for this extrinsic_state. */ region_model_manager * @@ -208,6 +229,33 @@ sm_state_map::dump (bool simple) const pp_flush (&pp); } +/* Return a new json::object of the form + {"global" : (optional) value for global state, + SVAL_DESC : value for state}. */ + +json::object * +sm_state_map::to_json () const +{ + json::object *map_obj = new json::object (); + + if (m_global_state != m_sm.get_start_state ()) + map_obj->set ("global", m_global_state->to_json ()); + for (map_t::iterator iter = m_map.begin (); + iter != m_map.end (); + ++iter) + { + const svalue *sval = (*iter).first; + entry_t e = (*iter).second; + + label_text sval_desc = sval->get_desc (); + map_obj->set (sval_desc.m_buffer, e.m_state->to_json ()); + sval_desc.maybe_free (); + + /* This doesn't yet JSONify e.m_origin. */ + } + return map_obj; +} + /* Return true if no states have been set within this map (all expressions are for the start state). */ @@ -733,6 +781,43 @@ program_state::dump (const extrinsic_state &ext_state, dump_to_file (ext_state, summarize, true, stderr); } +/* Return a new json::object of the form + {"store" : object for store, + "constraints" : object for constraint_manager, + "curr_frame" : (optional) str for current frame, + "checkers" : { STATE_NAME : object per sm_state_map }, + "valid" : true/false}. */ + +json::object * +program_state::to_json (const extrinsic_state &ext_state) const +{ + json::object *state_obj = new json::object (); + + state_obj->set ("store", m_region_model->get_store ()->to_json ()); + state_obj->set ("constraints", + m_region_model->get_constraints ()->to_json ()); + if (m_region_model->get_current_frame ()) + state_obj->set ("curr_frame", + m_region_model->get_current_frame ()->to_json ()); + + /* Provide m_checker_states as an object, using names as keys. */ + { + json::object *checkers_obj = new json::object (); + + int i; + sm_state_map *smap; + FOR_EACH_VEC_ELT (m_checker_states, i, smap) + if (!smap->is_empty_p ()) + checkers_obj->set (ext_state.get_name (i), smap->to_json ()); + + state_obj->set ("checkers", checkers_obj); + } + + state_obj->set ("valid", new json::literal (m_valid)); + + return state_obj; +} + /* Update this program_state to reflect a top-level call to FUN. The params will have initial_svalues. */ diff --git a/gcc/analyzer/program-state.h b/gcc/analyzer/program-state.h index cb0df8c..a52fbeb 100644 --- a/gcc/analyzer/program-state.h +++ b/gcc/analyzer/program-state.h @@ -53,6 +53,8 @@ public: void dump_to_file (FILE *outf) const; void dump () const; + json::object *to_json () const; + engine *get_engine () const { return m_engine; } region_model_manager *get_model_manager () const; @@ -109,6 +111,8 @@ public: pretty_printer *pp) const; void dump (bool simple) const; + json::object *to_json () const; + bool is_empty_p () const; hashval_t hash () const; @@ -204,6 +208,8 @@ public: bool multiline, FILE *outf) const; void dump (const extrinsic_state &ext_state, bool simple) const; + json::object *to_json (const extrinsic_state &ext_state) const; + void push_frame (const extrinsic_state &ext_state, function *fun); function * get_current_function () const; diff --git a/gcc/analyzer/region-model-impl-calls.cc b/gcc/analyzer/region-model-impl-calls.cc index 423f74a..009b8c3 100644 --- a/gcc/analyzer/region-model-impl-calls.cc +++ b/gcc/analyzer/region-model-impl-calls.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "bitmap.h" #include "selftest.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "ordered-hash-map.h" diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc index da8fa01..8dd3ad0 100644 --- a/gcc/analyzer/region-model-manager.cc +++ b/gcc/analyzer/region-model-manager.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "bitmap.h" #include "selftest.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "ordered-hash-map.h" diff --git a/gcc/analyzer/region-model-reachability.cc b/gcc/analyzer/region-model-reachability.cc index 681b8f7..c1b3b2d 100644 --- a/gcc/analyzer/region-model-reachability.cc +++ b/gcc/analyzer/region-model-reachability.cc @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see #include "cgraph.h" #include "cfg.h" #include "digraph.h" +#include "json.h" #include "analyzer/call-string.h" #include "analyzer/program-point.h" #include "analyzer/store.h" diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 6f04904..74a96b0 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "bitmap.h" #include "selftest.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "ordered-hash-map.h" diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index 4859df3..1e8a517 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -256,6 +256,8 @@ public: void dump (bool simple=true) const; label_text get_desc (bool simple=true) const; + json::value *to_json () const; + virtual const region_svalue * dyn_cast_region_svalue () const { return NULL; } virtual const constant_svalue * @@ -1400,6 +1402,8 @@ public: virtual void dump_to_pp (pretty_printer *pp, bool simple) const = 0; void dump (bool simple) const; + json::value *to_json () const; + bool non_null_p () const; static int cmp_ptrs (const void *, const void *); diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc index 53f32dc..0820893 100644 --- a/gcc/analyzer/region.cc +++ b/gcc/analyzer/region.cc @@ -44,6 +44,7 @@ along with GCC; see the file COPYING3. If not see #include "bitmap.h" #include "selftest.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "ordered-hash-map.h" @@ -460,6 +461,17 @@ region::dump (bool simple) const pp_flush (&pp); } +/* Return a new json::string describing the region. */ + +json::value * +region::to_json () const +{ + label_text desc = get_desc (true); + json::value *reg_js = new json::string (desc.m_buffer); + desc.maybe_free (); + return reg_js; +} + /* Generate a description of this region. */ DEBUG_FUNCTION label_text diff --git a/gcc/analyzer/sm-file.cc b/gcc/analyzer/sm-file.cc index 58a0fd4..d201071 100644 --- a/gcc/analyzer/sm-file.cc +++ b/gcc/analyzer/sm-file.cc @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-path.h" #include "diagnostic-metadata.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "diagnostic-event-id.h" #include "analyzer/analyzer-logging.h" diff --git a/gcc/analyzer/sm-malloc.cc b/gcc/analyzer/sm-malloc.cc index 12b2383..6293d78 100644 --- a/gcc/analyzer/sm-malloc.cc +++ b/gcc/analyzer/sm-malloc.cc @@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-path.h" #include "diagnostic-metadata.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "diagnostic-event-id.h" #include "analyzer/analyzer-logging.h" diff --git a/gcc/analyzer/sm-pattern-test.cc b/gcc/analyzer/sm-pattern-test.cc index bb6d3b1..c430476 100644 --- a/gcc/analyzer/sm-pattern-test.cc +++ b/gcc/analyzer/sm-pattern-test.cc @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-path.h" #include "diagnostic-metadata.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "diagnostic-event-id.h" #include "analyzer/analyzer-logging.h" diff --git a/gcc/analyzer/sm-sensitive.cc b/gcc/analyzer/sm-sensitive.cc index 49f9eb3..aec0a6a 100644 --- a/gcc/analyzer/sm-sensitive.cc +++ b/gcc/analyzer/sm-sensitive.cc @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-path.h" #include "diagnostic-metadata.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "diagnostic-event-id.h" #include "analyzer/analyzer-logging.h" diff --git a/gcc/analyzer/sm-signal.cc b/gcc/analyzer/sm-signal.cc index bf6ea48..2e05de8 100644 --- a/gcc/analyzer/sm-signal.cc +++ b/gcc/analyzer/sm-signal.cc @@ -32,6 +32,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-path.h" #include "diagnostic-metadata.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "diagnostic-event-id.h" #include "analyzer/analyzer-logging.h" @@ -220,6 +221,12 @@ public: pp_string (pp, "signal delivered"); } + json::object *to_json () const + { + json::object *custom_obj = new json::object (); + return custom_obj; + } + void update_model (region_model *model, const exploded_edge &eedge) FINAL OVERRIDE { diff --git a/gcc/analyzer/sm-taint.cc b/gcc/analyzer/sm-taint.cc index 49bbd6d..37491d8 100644 --- a/gcc/analyzer/sm-taint.cc +++ b/gcc/analyzer/sm-taint.cc @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-path.h" #include "diagnostic-metadata.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "diagnostic-event-id.h" #include "analyzer/analyzer-logging.h" diff --git a/gcc/analyzer/sm.cc b/gcc/analyzer/sm.cc index a333063..3fe75ef 100644 --- a/gcc/analyzer/sm.cc +++ b/gcc/analyzer/sm.cc @@ -29,6 +29,9 @@ along with GCC; see the file COPYING3. If not see #include "function.h" #include "diagnostic-core.h" #include "pretty-print.h" +#include "diagnostic.h" +#include "tree-diagnostic.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "analyzer/sm.h" @@ -56,6 +59,17 @@ state_machine::state::dump_to_pp (pretty_printer *pp) const pp_string (pp, m_name); } +/* Return a new json::string describing the state. */ + +json::value * +state_machine::state::to_json () const +{ + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + dump_to_pp (&pp); + return new json::string (pp_formatted_text (&pp)); +} + /* class state_machine. */ /* state_machine's ctor. */ @@ -109,6 +123,28 @@ state_machine::dump_to_pp (pretty_printer *pp) const } } +/* Return a new json::object of the form + {"name" : str, + "states" : [str]}. */ + +json::object * +state_machine::to_json () const +{ + json::object *sm_obj = new json::object (); + + sm_obj->set ("name", new json::string (m_name)); + { + json::array *states_arr = new json::array (); + unsigned i; + state *s; + FOR_EACH_VEC_ELT (m_states, i, s) + states_arr->append (s->to_json ()); + sm_obj->set ("states", states_arr); + } + + return sm_obj; +} + /* Create instances of the various state machines, each using LOGGER, and populate OUT with them. */ diff --git a/gcc/analyzer/sm.h b/gcc/analyzer/sm.h index f44ad92..46b93ff 100644 --- a/gcc/analyzer/sm.h +++ b/gcc/analyzer/sm.h @@ -48,6 +48,7 @@ public: const char *get_name () const { return m_name; } virtual void dump_to_pp (pretty_printer *pp) const; + virtual json::value *to_json () const; unsigned get_id () const { return m_id; } @@ -121,6 +122,8 @@ public: void dump_to_pp (pretty_printer *pp) const; + json::object *to_json () const; + state_t get_start_state () const { return m_start; } protected: diff --git a/gcc/analyzer/state-purge.cc b/gcc/analyzer/state-purge.cc index d5a24b4..e4942a6 100644 --- a/gcc/analyzer/state-purge.cc +++ b/gcc/analyzer/state-purge.cc @@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-core.h" #include "gimple-pretty-print.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/call-string.h" #include "digraph.h" diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc index 1348895..1158512 100644 --- a/gcc/analyzer/store.cc +++ b/gcc/analyzer/store.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "bitmap.h" #include "selftest.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "ordered-hash-map.h" @@ -122,6 +123,17 @@ binding_key::dump (bool simple) const pp_flush (&pp); } +/* Get a description of this binding_key. */ + +label_text +binding_key::get_desc (bool simple) const +{ + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + dump_to_pp (&pp, simple); + return label_text::take (xstrdup (pp_formatted_text (&pp))); +} + /* qsort callback. */ int @@ -366,6 +378,37 @@ binding_map::dump (bool simple) const pp_flush (&pp); } +/* Return a new json::object of the form + {KEY_DESC : SVALUE_DESC, + ...for the various key/value pairs in this binding_map}. */ + +json::object * +binding_map::to_json () const +{ + json::object *map_obj = new json::object (); + + auto_vec binding_keys; + for (map_t::iterator iter = m_map.begin (); + iter != m_map.end (); ++iter) + { + const binding_key *key = (*iter).first; + binding_keys.safe_push (key); + } + binding_keys.qsort (binding_key::cmp_ptrs); + + const binding_key *key; + unsigned i; + FOR_EACH_VEC_ELT (binding_keys, i, key) + { + const svalue *value = *const_cast (m_map).get (key); + label_text key_desc = key->get_desc (); + map_obj->set (key_desc.m_buffer, value->to_json ()); + key_desc.maybe_free (); + } + + return map_obj; +} + /* Get the child region of PARENT_REG based upon INDEX within a CONSTRUCTOR. */ @@ -657,6 +700,23 @@ binding_cluster::dump (bool simple) const pp_flush (&pp); } +/* Return a new json::object of the form + {"escaped": true/false, + "touched": true/false, + "map" : object for the the binding_map. */ + +json::object * +binding_cluster::to_json () const +{ + json::object *cluster_obj = new json::object (); + + cluster_obj->set ("escaped", new json::literal (m_escaped)); + cluster_obj->set ("touched", new json::literal (m_touched)); + cluster_obj->set ("map", m_map.to_json ()); + + return cluster_obj; +} + /* Add a binding of SVAL of kind KIND to REG, unpacking SVAL if it is a compound_sval. */ @@ -1575,6 +1635,64 @@ store::dump (bool simple) const pp_flush (&pp); } +/* Return a new json::object of the form + {PARENT_REGION_DESC: {BASE_REGION_DESC: object for binding_map, + ... for each cluster within parent region}, + ...for each parent region, + "called_unknown_function": true/false}. */ + +json::object * +store::to_json () const +{ + json::object *store_obj = new json::object (); + + /* Sort into some deterministic order. */ + auto_vec base_regions; + for (cluster_map_t::iterator iter = m_cluster_map.begin (); + iter != m_cluster_map.end (); ++iter) + { + const region *base_reg = (*iter).first; + base_regions.safe_push (base_reg); + } + base_regions.qsort (region::cmp_ptrs); + + /* Gather clusters, organize by parent region, so that we can group + together locals, globals, etc. */ + auto_vec parent_regions; + get_sorted_parent_regions (&parent_regions, base_regions); + + const region *parent_reg; + unsigned i; + FOR_EACH_VEC_ELT (parent_regions, i, parent_reg) + { + gcc_assert (parent_reg); + + json::object *clusters_in_parent_reg_obj = new json::object (); + + const region *base_reg; + unsigned j; + FOR_EACH_VEC_ELT (base_regions, j, base_reg) + { + /* This is O(N * M), but N ought to be small. */ + if (base_reg->get_parent_region () != parent_reg) + continue; + binding_cluster *cluster + = *const_cast (m_cluster_map).get (base_reg); + label_text base_reg_desc = base_reg->get_desc (); + clusters_in_parent_reg_obj->set (base_reg_desc.m_buffer, + cluster->to_json ()); + base_reg_desc.maybe_free (); + } + label_text parent_reg_desc = parent_reg->get_desc (); + store_obj->set (parent_reg_desc.m_buffer, clusters_in_parent_reg_obj); + parent_reg_desc.maybe_free (); + } + + store_obj->set ("called_unknown_fn", new json::literal (m_called_unknown_fn)); + + return store_obj; +} + /* Get any svalue bound to REG, or NULL. */ const svalue * diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h index 83a4310..0f4e7ab 100644 --- a/gcc/analyzer/store.h +++ b/gcc/analyzer/store.h @@ -159,6 +159,7 @@ public: virtual void dump_to_pp (pretty_printer *pp, bool simple) const; void dump (bool simple) const; + label_text get_desc (bool simple=true) const; static int cmp_ptrs (const void *, const void *); static int cmp (const binding_key *, const binding_key *); @@ -340,6 +341,8 @@ public: void dump_to_pp (pretty_printer *pp, bool simple, bool multiline) const; void dump (bool simple) const; + json::object *to_json () const; + bool apply_ctor_to_region (const region *parent_reg, tree ctor, region_model_manager *mgr); @@ -392,6 +395,8 @@ public: void dump_to_pp (pretty_printer *pp, bool simple, bool multiline) const; void dump (bool simple) const; + json::object *to_json () const; + void bind (store_manager *mgr, const region *, const svalue *, binding_kind kind); @@ -517,6 +522,8 @@ public: void dump (bool simple) const; void summarize_to_pp (pretty_printer *pp, bool simple) const; + json::object *to_json () const; + const svalue *get_direct_binding (store_manager *mgr, const region *reg); const svalue *get_default_binding (store_manager *mgr, const region *reg); const svalue *get_any_binding (store_manager *mgr, const region *reg) const; diff --git a/gcc/analyzer/supergraph.cc b/gcc/analyzer/supergraph.cc index 7c6fed3..735c4a3 100644 --- a/gcc/analyzer/supergraph.cc +++ b/gcc/analyzer/supergraph.cc @@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-dfa.h" #include "cfganal.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "ordered-hash-map.h" #include "options.h" @@ -374,6 +375,38 @@ supergraph::dump_dot (const char *path, const dump_args_t &dump_args) const fclose (fp); } +/* Return a new json::object of the form + {"nodes" : [objs for snodes], + "edges" : [objs for sedges]}. */ + +json::object * +supergraph::to_json () const +{ + json::object *sgraph_obj = new json::object (); + + /* Nodes. */ + { + json::array *nodes_arr = new json::array (); + unsigned i; + supernode *n; + FOR_EACH_VEC_ELT (m_nodes, i, n) + nodes_arr->append (n->to_json ()); + sgraph_obj->set ("nodes", nodes_arr); + } + + /* Edges. */ + { + json::array *edges_arr = new json::array (); + unsigned i; + superedge *n; + FOR_EACH_VEC_ELT (m_edges, i, n) + edges_arr->append (n->to_json ()); + sgraph_obj->set ("edges", edges_arr); + } + + return sgraph_obj; +} + /* Create a supernode for BB within FUN and add it to this supergraph. If RETURNING_CALL is non-NULL, the supernode represents the resumption @@ -594,6 +627,63 @@ supernode::dump_dot_id (pretty_printer *pp) const pp_printf (pp, "node_%i", m_index); } +/* Return a new json::object of the form + {"idx": int, + "bb_idx": int, + "m_returning_call": optional str, + "phis": [str], + "stmts" : [str]}. */ + +json::object * +supernode::to_json () const +{ + json::object *snode_obj = new json::object (); + + snode_obj->set ("idx", new json::integer_number (m_index)); + snode_obj->set ("bb_idx", new json::integer_number (m_bb->index)); + + if (m_returning_call) + { + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp_gimple_stmt_1 (&pp, m_returning_call, 0, (dump_flags_t)0); + snode_obj->set ("returning_call", + new json::string (pp_formatted_text (&pp))); + } + + /* Phi nodes. */ + { + json::array *phi_arr = new json::array (); + for (gphi_iterator gpi = const_cast (this)->start_phis (); + !gsi_end_p (gpi); gsi_next (&gpi)) + { + const gimple *stmt = gsi_stmt (gpi); + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp_gimple_stmt_1 (&pp, stmt, 0, (dump_flags_t)0); + phi_arr->append (new json::string (pp_formatted_text (&pp))); + } + snode_obj->set ("phis", phi_arr); + } + + /* Statements. */ + { + json::array *stmt_arr = new json::array (); + int i; + gimple *stmt; + FOR_EACH_VEC_ELT (m_stmts, i, stmt) + { + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp_gimple_stmt_1 (&pp, stmt, 0, (dump_flags_t)0); + stmt_arr->append (new json::string (pp_formatted_text (&pp))); + } + snode_obj->set ("stmts", stmt_arr); + } + + return snode_obj; +} + /* Get a location_t for the start of this supernode. */ location_t @@ -759,6 +849,28 @@ superedge::dump_dot (graphviz_out *gv, const dump_args_t &) const pp_printf (pp, "\"];\n"); } +/* Return a new json::object of the form + {"src_idx": int, the index of the source supernode, + "dst_idx": int, the index of the destination supernode, + "desc" : str. */ + +json::object * +superedge::to_json () const +{ + json::object *sedge_obj = new json::object (); + sedge_obj->set ("src_idx", new json::integer_number (m_src->m_index)); + sedge_obj->set ("dst_idx", new json::integer_number (m_dest->m_index)); + + { + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + dump_label_to_pp (&pp, false); + sedge_obj->set ("desc", new json::string (pp_formatted_text (&pp))); + } + + return sedge_obj; +} + /* If this is an intraprocedural superedge, return the associated CFG edge. Otherwise, return NULL. */ diff --git a/gcc/analyzer/supergraph.h b/gcc/analyzer/supergraph.h index c25043d..40ae9ff 100644 --- a/gcc/analyzer/supergraph.h +++ b/gcc/analyzer/supergraph.h @@ -148,6 +148,8 @@ public: void dump_dot_to_file (FILE *fp, const dump_args_t &) const; void dump_dot (const char *path, const dump_args_t &) const; + json::object *to_json () const; + int num_nodes () const { return m_nodes.length (); } int num_edges () const { return m_edges.length (); } @@ -231,6 +233,8 @@ class supernode : public dnode void dump_dot (graphviz_out *gv, const dump_args_t &args) const OVERRIDE; void dump_dot_id (pretty_printer *pp) const; + json::object *to_json () const; + location_t get_start_location () const; location_t get_end_location () const; @@ -289,6 +293,8 @@ class superedge : public dedge virtual void dump_label_to_pp (pretty_printer *pp, bool user_facing) const = 0; + json::object *to_json () const; + enum edge_kind get_kind () const { return m_kind; } virtual cfg_superedge *dyn_cast_cfg_superedge () { return NULL; } diff --git a/gcc/analyzer/svalue.cc b/gcc/analyzer/svalue.cc index fcab578..ae3b678 100644 --- a/gcc/analyzer/svalue.cc +++ b/gcc/analyzer/svalue.cc @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see #include "bitmap.h" #include "selftest.h" #include "function.h" +#include "json.h" #include "analyzer/analyzer.h" #include "analyzer/analyzer-logging.h" #include "options.h" @@ -116,6 +117,17 @@ svalue::get_desc (bool simple) const return label_text::take (xstrdup (pp_formatted_text (&pp))); } +/* Return a new json::string describing the svalue. */ + +json::value * +svalue::to_json () const +{ + label_text desc = get_desc (true); + json::value *sval_js = new json::string (desc.m_buffer); + desc.maybe_free (); + return sval_js; +} + /* If this svalue is a constant_svalue, return the underlying tree constant. Otherwise return NULL_TREE. */ diff --git a/gcc/doc/analyzer.texi b/gcc/doc/analyzer.texi index 92c12e1..6b7d70c 100644 --- a/gcc/doc/analyzer.texi +++ b/gcc/doc/analyzer.texi @@ -488,6 +488,9 @@ truthfulness of the argument. This is useful for writing DejaGnu tests. @subsection Other Debugging Techniques +The option @option{-fdump-analyzer-json} will dump both the supergraph +and the exploded graph in compressed JSON form. + One approach when tracking down where a particular bogus state is introduced into the @code{exploded_graph} is to add custom code to @code{program_state::validate}. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 6a7833b1..f726ff4 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -407,6 +407,7 @@ Objective-C and Objective-C++ Dialects}. -fdump-analyzer-exploded-nodes @gol -fdump-analyzer-exploded-nodes-2 @gol -fdump-analyzer-exploded-nodes-3 @gol +-fdump-analyzer-json @gol -fdump-analyzer-state-purge @gol -fdump-analyzer-supergraph @gol -Wno-analyzer-double-fclose @gol @@ -9123,6 +9124,12 @@ Dump a textual representation of the ``exploded graph'' to one dump file per node, to @file{@var{file}.eg-@var{id}.txt}. This is typically a large number of dump files. +@item -fdump-analyzer-json +@opindex fdump-analyzer-json +Dump a compressed JSON representation of analyzer internals to +@file{@var{file}.analyzer.json.gz}. The precise format is subject +to change. + @item -fdump-analyzer-state-purge @opindex fdump-analyzer-state-purge As per @option{-fdump-analyzer-supergraph}, dump a representation of the -- cgit v1.1 From bc13106e0414b86af8f6878e7681e6a959921b9e Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 22 Sep 2020 21:06:32 +0200 Subject: c++: Ignore __sanitizer_ptr_{sub,cmp} builtin calls during constant expression evaluation [PR97145] These two builtin calls are added already during parsing before pointer subtractions or comparisons, normally they perform runtime verification of whether the pointers point to the same object or different objects, but during constant expressione valuation we don't really need those builtins for anything. 2020-09-22 Jakub Jelinek PR c++/97145 * constexpr.c (cxx_eval_builtin_function_call): Return void_node for calls to __sanitize_ptr_{sub,cmp} builtins. * g++.dg/asan/pr97145.C: New test. --- gcc/cp/constexpr.c | 6 ++++++ gcc/testsuite/g++.dg/asan/pr97145.C | 7 +++++++ 2 files changed, 13 insertions(+) create mode 100644 gcc/testsuite/g++.dg/asan/pr97145.C (limited to 'gcc') diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c index 72fbdab..dacce58 100644 --- a/gcc/cp/constexpr.c +++ b/gcc/cp/constexpr.c @@ -1355,6 +1355,12 @@ cxx_eval_builtin_function_call (const constexpr_ctx *ctx, tree t, tree fun, case BUILT_IN_STRSTR: strops = 2; strret = 1; + break; + case BUILT_IN_ASAN_POINTER_COMPARE: + case BUILT_IN_ASAN_POINTER_SUBTRACT: + /* These builtins shall be ignored during constant expression + evaluation. */ + return void_node; default: break; } diff --git a/gcc/testsuite/g++.dg/asan/pr97145.C b/gcc/testsuite/g++.dg/asan/pr97145.C new file mode 100644 index 0000000..993c8a5 --- /dev/null +++ b/gcc/testsuite/g++.dg/asan/pr97145.C @@ -0,0 +1,7 @@ +// PR c++/97145 +// { dg-do compile { target c++11 } } +// { dg-options "-fsanitize=address,pointer-subtract,pointer-compare" } + +constexpr char *a = nullptr; +constexpr auto b = a - a; +constexpr auto c = a < a; -- cgit v1.1 From 7d8177b027b87cf3211e2d6cf144ec71616425ea Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Tue, 22 Sep 2020 12:21:13 -0700 Subject: c++: Remove a broken error-recovery path The remaining use of xref_tag_from_type was also suspicious. It turns out to be an error path. At parse time we diagnose that a class definition cannot appear, but we swallow the definition. This code was attempting to push it into the global scope (or find a conflict). This seems needless, just return error_mark_node. This was the simpler fix than going through the parser and figuring out how to get it to put in error_mark_node at the right point. gcc/cp/ * cp-tree.h (xref_tag_from_type): Don't declare. * decl.c (xref_tag_from_type): Delete. * pt.c (lookup_template_class_1): Erroneously located class definitions just give error_mark, don't try and inject it into the namespace. --- gcc/cp/cp-tree.h | 1 - gcc/cp/decl.c | 17 ----------------- gcc/cp/pt.c | 11 +++++------ 3 files changed, 5 insertions(+), 24 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 7135381..029a165 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6502,7 +6502,6 @@ extern void grok_special_member_properties (tree); extern bool grok_ctor_properties (const_tree, const_tree); extern bool grok_op_properties (tree, bool); extern tree xref_tag (enum tag_types, tree, tag_scope, bool); -extern tree xref_tag_from_type (tree, tree, tag_scope); extern void xref_basetypes (tree, tree); extern tree start_enum (tree, tree, tree, tree, bool, bool *); extern void finish_enum_value_list (tree); diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index bbecebe..f3fdfe3 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -15120,23 +15120,6 @@ xref_tag (enum tag_types tag_code, tree name, return ret; } - -tree -xref_tag_from_type (tree old, tree id, tag_scope scope) -{ - enum tag_types tag_kind; - - if (TREE_CODE (old) == RECORD_TYPE) - tag_kind = (CLASSTYPE_DECLARED_CLASS (old) ? class_type : record_type); - else - tag_kind = union_type; - - if (id == NULL_TREE) - id = TYPE_IDENTIFIER (old); - - return xref_tag (tag_kind, id, scope, false); -} - /* Create the binfo hierarchy for REF with (possibly NULL) base list BASE_LIST. For each element on BASE_LIST the TREE_PURPOSE is an access_* node, and the TREE_VALUE is the type of the base-class. diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 44ca14a..69946da 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -9856,12 +9856,11 @@ lookup_template_class_1 (tree d1, tree arglist, tree in_decl, tree context, && !PRIMARY_TEMPLATE_P (gen_tmpl) && !LAMBDA_TYPE_P (TREE_TYPE (gen_tmpl)) && TREE_CODE (CP_DECL_CONTEXT (gen_tmpl)) == NAMESPACE_DECL) - { - found = xref_tag_from_type (TREE_TYPE (gen_tmpl), - DECL_NAME (gen_tmpl), - /*tag_scope=*/ts_global); - return found; - } + /* This occurs when the user has tried to define a tagged type + in a scope that forbids it. We emitted an error during the + parse. We didn't complete the bail out then, so here we + are. */ + return error_mark_node; context = DECL_CONTEXT (gen_tmpl); if (context && TYPE_P (context)) -- cgit v1.1 From 39b3b1bdd9ea3d36ce41276b8452506b79cc4368 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Tue, 22 Sep 2020 22:16:00 +0200 Subject: Fix ipa-modref selftest and destructor * ipa-modref-tree.c: Add namespace selftest. (modref_tree_c_tests): Rename to ... (ipa_modref_tree_c_tests): ... this. * ipa-modref.c (pass_modref): Remove destructor. (ipa_modref_c_finalize): New function. * ipa-modref.h (ipa_modref_c_finalize): Declare. * selftest-run-tests.c (selftest::run_tests): Call ipa_modref_c_finalize. * selftest.h (ipa_modref_tree_c_tests): Declare. * toplev.c: Include ipa-modref-tree.h and ipa-modref.h (toplev::finalize): Call ipa_modref_c_finalize. --- gcc/ipa-modref-tree.c | 5 ++++- gcc/ipa-modref.c | 16 ++++++++++------ gcc/ipa-modref.h | 1 + gcc/selftest-run-tests.c | 1 + gcc/selftest.h | 1 + gcc/toplev.c | 3 +++ 6 files changed, 20 insertions(+), 7 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-modref-tree.c b/gcc/ipa-modref-tree.c index e37dee6..a84508a 100644 --- a/gcc/ipa-modref-tree.c +++ b/gcc/ipa-modref-tree.c @@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see #if CHECKING_P +namespace selftest { static void test_insert_search_collapse () @@ -156,12 +157,14 @@ test_merge () void -modref_tree_c_tests () +ipa_modref_tree_c_tests () { test_insert_search_collapse (); test_merge (); } +} // namespace selftest + #endif void diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 0411f66..d195354 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -767,12 +767,6 @@ class pass_modref : public gimple_opt_pass pass_modref (gcc::context *ctxt) : gimple_opt_pass (pass_data_modref, ctxt) {} - ~pass_modref () - { - ggc_delete (summaries); - summaries = NULL; - } - /* opt_pass methods: */ opt_pass *clone () { @@ -1373,4 +1367,14 @@ unsigned int pass_ipa_modref::execute (function *) return 0; } +/* Summaries must stay alive until end of compilation. */ + +void +ipa_modref_c_finalize () +{ + if (summaries) + ggc_delete (summaries); + summaries = NULL; +} + #include "gt-ipa-modref.h" diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h index 6f97920..6cccdfe 100644 --- a/gcc/ipa-modref.h +++ b/gcc/ipa-modref.h @@ -44,5 +44,6 @@ struct GTY(()) modref_summary }; modref_summary *get_modref_function_summary (cgraph_node *func); +void ipa_modref_c_finalize (); #endif diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c index f0a81d4..7a89b2d 100644 --- a/gcc/selftest-run-tests.c +++ b/gcc/selftest-run-tests.c @@ -90,6 +90,7 @@ selftest::run_tests () read_rtl_function_c_tests (); digraph_cc_tests (); tristate_cc_tests (); + ipa_modref_tree_c_tests (); /* Higher-level tests, or for components that other selftests don't rely on. */ diff --git a/gcc/selftest.h b/gcc/selftest.h index 5cffa13..6c6c7f2 100644 --- a/gcc/selftest.h +++ b/gcc/selftest.h @@ -268,6 +268,7 @@ extern void vec_perm_indices_c_tests (); extern void wide_int_cc_tests (); extern void opt_proposer_c_tests (); extern void dbgcnt_c_tests (); +extern void ipa_modref_tree_c_tests (); extern int num_passes; diff --git a/gcc/toplev.c b/gcc/toplev.c index cdd4b5b..a4cb8bb 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -84,6 +84,8 @@ along with GCC; see the file COPYING3. If not see #include "dump-context.h" #include "print-tree.h" #include "optinfo-emit-json.h" +#include "ipa-modref-tree.h" +#include "ipa-modref.h" #if defined(DBX_DEBUGGING_INFO) || defined(XCOFF_DEBUGGING_INFO) #include "dbxout.h" @@ -2497,6 +2499,7 @@ toplev::finalize (void) /* Needs to be called before cgraph_c_finalize since it uses symtab. */ ipa_reference_c_finalize (); ipa_fnsummary_c_finalize (); + ipa_modref_c_finalize (); cgraph_c_finalize (); cgraphunit_c_finalize (); -- cgit v1.1 From c4e4e163c79ca3fca265b85f44b869cb54e802b3 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Tue, 22 Sep 2020 16:26:49 -0400 Subject: c++: Add test for PR96652 Fixed by r11-3361. gcc/testsuite/ChangeLog: PR c++/96652 * g++.dg/cpp0x/decltype-96652.C: New test. --- gcc/testsuite/g++.dg/cpp0x/decltype-96652.C | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp0x/decltype-96652.C (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/cpp0x/decltype-96652.C b/gcc/testsuite/g++.dg/cpp0x/decltype-96652.C new file mode 100644 index 0000000..249cce2 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/decltype-96652.C @@ -0,0 +1,14 @@ +// PR c++/96652 +// { dg-do compile { target c++11 } } + +struct A {}; + +template +struct B +{ + A m; + friend decltype(m); +}; + +A a; +B b; -- cgit v1.1 From d6587211c02c4e2566c4e545c09757f3fbb7adab Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Tue, 22 Sep 2020 16:26:52 -0400 Subject: c++: Return only in-scope tparms in keep_template_parm [PR95310] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the testcase below, the dependent specializations iter_reference_t and iter_reference_t share the same tree due to specialization caching. So when find_template_parameters walks through the requires-expression (as part of normalization), it sees and includes the out-of-scope template parameter F in the list of template parameters it found within the requires-expression (along with Out and N). From a correctness perspective this is harmless since the parameter mapping routines only care about the level and index of each parameter, so F is no different from Out in that sense. And it's also harmless that two parameters in the parameter mapping have the same level and index. But having both Out and F in the parameter mapping means extra work for hash_atomic_constrant, tsubst_parameter_mapping and get_mapped_args; and it also means we print this irrelevant template parameter in the testcase's diagnostics (via pp_cxx_parameter_mapping): in requirements with ‘Out o’ [with N = (const int&)&a; F = const int*; Out = const int*] This patch makes keep_template_parm return only in-scope template parameters by looking into ctx_parms for the corresponding in-scope one, through a new helper function corresponding_template_parameter. (That we sometimes print irrelevant template parameters in diagnostics is also the subject of PR99 and PR66968, so the above diagnostic issue could likely be fixed in a more general way, but this targeted fix to keep_template_parm is perhaps worthwhile on its own.) gcc/cp/ChangeLog: PR c++/95310 * pt.c (corresponding_template_parameter): Define. (keep_template_parm): Use it to adjust the given template parameter to the corresponding in-scope one from ctx_parms. gcc/testsuite/ChangeLog: PR c++/95310 * g++.dg/concepts/diagnostic15.C: New test. --- gcc/cp/pt.c | 44 ++++++++++++++++++++++++++++ gcc/testsuite/g++.dg/concepts/diagnostic15.C | 16 ++++++++++ 2 files changed, 60 insertions(+) create mode 100644 gcc/testsuite/g++.dg/concepts/diagnostic15.C (limited to 'gcc') diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 69946da..314bd03 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -10243,6 +10243,42 @@ lookup_and_finish_template_variable (tree templ, tree targs, return convert_from_reference (templ); } +/* If the set of template parameters PARMS contains a template parameter + at the given LEVEL and INDEX, then return this parameter. Otherwise + return NULL_TREE. */ + +static tree +corresponding_template_parameter (tree parms, int level, int index) +{ + while (TMPL_PARMS_DEPTH (parms) > level) + parms = TREE_CHAIN (parms); + + if (TMPL_PARMS_DEPTH (parms) != level + || TREE_VEC_LENGTH (TREE_VALUE (parms)) <= index) + return NULL_TREE; + + tree t = TREE_VALUE (TREE_VEC_ELT (TREE_VALUE (parms), index)); + /* As in template_parm_to_arg. */ + if (TREE_CODE (t) == TYPE_DECL || TREE_CODE (t) == TEMPLATE_DECL) + t = TREE_TYPE (t); + else + t = DECL_INITIAL (t); + + gcc_assert (TEMPLATE_PARM_P (t)); + return t; +} + +/* Return the template parameter from PARMS that positionally corresponds + to the template parameter PARM, or else return NULL_TREE. */ + +static tree +corresponding_template_parameter (tree parms, tree parm) +{ + int level, index; + template_parm_level_and_index (parm, &level, &index); + return corresponding_template_parameter (parms, level, index); +} + struct pair_fn_data { @@ -10549,6 +10585,14 @@ keep_template_parm (tree t, void* data) BOUND_TEMPLATE_TEMPLATE_PARM itself. */ t = TREE_TYPE (TEMPLATE_TEMPLATE_PARM_TEMPLATE_DECL (t)); + /* This template parameter might be an argument to a cached dependent + specalization that was formed earlier inside some other template, in + which case the parameter is not among the ones that are in-scope. + Look in CTX_PARMS to find the corresponding in-scope template + parameter, and use it instead. */ + if (tree in_scope = corresponding_template_parameter (ftpi->ctx_parms, t)) + t = in_scope; + /* Arguments like const T yield parameters like const T. This means that a template-id like X would yield two distinct parameters: T and const T. Adjust types to their unqualified versions. */ diff --git a/gcc/testsuite/g++.dg/concepts/diagnostic15.C b/gcc/testsuite/g++.dg/concepts/diagnostic15.C new file mode 100644 index 0000000..3acd9f6 --- /dev/null +++ b/gcc/testsuite/g++.dg/concepts/diagnostic15.C @@ -0,0 +1,16 @@ +// PR c++/95310 +// { dg-do compile { target concepts } } + +template +using iter_reference_t = decltype(*T{}); + +template +struct result { using type = iter_reference_t; }; + +template +concept indirectly_writable = requires(Out o) { // { dg-bogus "F =" } + iter_reference_t(*o) = N; +}; + +const int a = 0; +static_assert(indirectly_writable); // { dg-error "assert" } -- cgit v1.1 From 5cfb06b61bc559d94f57914826bac2f34abbd357 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Tue, 22 Sep 2020 22:36:01 +0200 Subject: Ignore clobbers in modref * ipa-modref.c (analyze_stmt): Ignore gimple clobber. --- gcc/ipa-modref.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'gcc') diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index d195354..fe277d8 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -552,6 +552,9 @@ analyze_store (gimple *, tree, tree op, void *data) static bool analyze_stmt (modref_summary *summary, gimple *stmt, bool ipa) { + /* There is no need to record clobbers. */ + if (gimple_clobber_p (stmt)) + return false; /* Analyze all loads and stores in STMT. */ walk_stmt_load_store_ops (stmt, summary, analyze_load, analyze_store); -- cgit v1.1 From 5d2d79c8d9de06dee6c7fe5782c4d4ac66463773 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 22 Sep 2020 16:36:02 -0700 Subject: go.test: update issue4458.go for recent change --- gcc/testsuite/go.test/test/fixedbugs/issue4458.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/go.test/test/fixedbugs/issue4458.go b/gcc/testsuite/go.test/test/fixedbugs/issue4458.go index 820f18c..82b104a 100644 --- a/gcc/testsuite/go.test/test/fixedbugs/issue4458.go +++ b/gcc/testsuite/go.test/test/fixedbugs/issue4458.go @@ -16,5 +16,5 @@ func (T) foo() {} func main() { av := T{} pav := &av - (**T).foo(&pav) // ERROR "no method foo|requires named type or pointer to named" + (**T).foo(&pav) // ERROR "no method|requires named type or pointer to named" } -- cgit v1.1 From 521d2711409b1af89d2d802879c2bbd716200cb6 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 23 Sep 2020 00:16:27 +0000 Subject: Daily bump. --- gcc/ChangeLog | 78 ++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 83 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/c-family/ChangeLog | 5 +++ gcc/cp/ChangeLog | 26 ++++++++++++++++ gcc/testsuite/ChangeLog | 69 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 262 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c2f4752..c8ceb4b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,81 @@ +2020-09-22 Jan Hubicka + + * ipa-modref.c (analyze_stmt): Ignore gimple clobber. + +2020-09-22 Jan Hubicka + + * ipa-modref-tree.c: Add namespace selftest. + (modref_tree_c_tests): Rename to ... + (ipa_modref_tree_c_tests): ... this. + * ipa-modref.c (pass_modref): Remove destructor. + (ipa_modref_c_finalize): New function. + * ipa-modref.h (ipa_modref_c_finalize): Declare. + * selftest-run-tests.c (selftest::run_tests): Call + ipa_modref_c_finalize. + * selftest.h (ipa_modref_tree_c_tests): Declare. + * toplev.c: Include ipa-modref-tree.h and ipa-modref.h + (toplev::finalize): Call ipa_modref_c_finalize. + +2020-09-22 David Malcolm + + * doc/analyzer.texi (Other Debugging Techniques): Mention + -fdump-analyzer-json. + * doc/invoke.texi (Static Analyzer Options): Add + -fdump-analyzer-json. + +2020-09-22 David Faust + + * config/bpf/bpf.md: Add defines for signed div and mod operators. + +2020-09-22 Martin Liska + + PR tree-optimization/96979 + * doc/invoke.texi: Document new param max-switch-clustering-attempts. + * params.opt: Add new parameter. + * tree-switch-conversion.c (jump_table_cluster::find_jump_tables): + Limit number of attempts. + (bit_test_cluster::find_bit_tests): Likewise. + +2020-09-22 Stefan Schulze Frielinghaus + + * config/s390/s390.md ("*cmp_ccs_0", "*cmp_ccz_0", + "*cmp_ccs_0_fastmath"): Basically change "*cmp_ccs_0" into + "*cmp_ccz_0" and for fast math add "*cmp_ccs_0_fastmath". + +2020-09-22 Kyrylo Tkachov + + PR target/71233 + * config/aarch64/arm_neon.h (vcls_u8, vcls_u16, vcls_u32, + vclsq_u8, vclsq_u16, vclsq_u32): Define. + +2020-09-22 Kyrylo Tkachov + + PR target/71233 + * config/aarch64/arm_neon.h (vceqq_p64, vceqz_p64, vceqzq_p64): Define. + +2020-09-22 Kyrylo Tkachov + + PR target/71233 + * config/aarch64/arm_neon.h (vadd_p8, vadd_p16, vadd_p64, vaddq_p8, + vaddq_p16, vaddq_p64, vaddq_p128): Define. + +2020-09-22 Jakub Jelinek + + * params.opt (--param=modref-max-tests=): Fix typo in help text: + perofmed -> performed. + * common.opt: Fix typo: incrmeental -> incremental. + * ipa-modref.c: Fix typos: recroding -> recording, becaue -> because, + analsis -> analysis. + (class modref_summaries): Fix typo: betweehn -> between. + (analyze_call): Fix typo: calle -> callee. + (read_modref_records): Fix typo: expcted -> expected. + (pass_ipa_modref::execute): Fix typo: calle -> callee. + +2020-09-22 Jakub Jelinek + + * common.opt (-fipa-modref): Add dot at the end of option help. + * params.opt (--param=modref-max-tests=): Likewise. + 2020-09-21 Marek Polacek * doc/invoke.texi: Document -Wctad-maybe-unsupported. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index ca3ee10..1a983d8 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200922 +20200923 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 8241610..cd869c2 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,86 @@ +2020-09-22 David Malcolm + + * analysis-plan.cc: Include "json.h". + * analyzer.opt (fdump-analyzer-json): New. + * call-string.cc: Include "json.h". + (call_string::to_json): New. + * call-string.h (call_string::to_json): New decl. + * checker-path.cc: Include "json.h". + * constraint-manager.cc: Include "json.h". + (equiv_class::to_json): New. + (constraint::to_json): New. + (constraint_manager::to_json): New. + * constraint-manager.h (equiv_class::to_json): New decl. + (constraint::to_json): New decl. + (constraint_manager::to_json): New decl. + * diagnostic-manager.cc: Include "json.h". + (saved_diagnostic::to_json): New. + (diagnostic_manager::to_json): New. + * diagnostic-manager.h (saved_diagnostic::to_json): New decl. + (diagnostic_manager::to_json): New decl. + * engine.cc: Include "json.h", . + (exploded_node::status_to_str): New. + (exploded_node::to_json): New. + (exploded_edge::to_json): New. + (exploded_graph::to_json): New. + (dump_analyzer_json): New. + (impl_run_checkers): Call it. + * exploded-graph.h (exploded_node::status_to_str): New decl. + (exploded_node::to_json): New. + (exploded_edge::to_json): New. + (exploded_graph::to_json): New. + * pending-diagnostic.cc: Include "json.h". + * program-point.cc: Include "json.h". + (program_point::to_json): New. + * program-point.h (program_point::to_json): New decl. + * program-state.cc: Include "json.h". + (extrinsic_state::to_json): New. + (sm_state_map::to_json): New. + (program_state::to_json): New. + * program-state.h (extrinsic_state::to_json): New decl. + (sm_state_map::to_json): New decl. + (program_state::to_json): New decl. + * region-model-impl-calls.cc: Include "json.h". + * region-model-manager.cc: Include "json.h". + * region-model-reachability.cc: Include "json.h". + * region-model.cc: Include "json.h". + * region-model.h (svalue::to_json): New decl. + (region::to_json): New decl. + * region.cc: Include "json.h". + (region::to_json: New. + * sm-file.cc: Include "json.h". + * sm-malloc.cc: Include "json.h". + * sm-pattern-test.cc: Include "json.h". + * sm-sensitive.cc: Include "json.h". + * sm-signal.cc: Include "json.h". + (signal_delivery_edge_info_t::to_json): New. + * sm-taint.cc: Include "json.h". + * sm.cc: Include "diagnostic.h", "tree-diagnostic.h", and + "json.h". + (state_machine::state::to_json): New. + (state_machine::to_json): New. + * sm.h (state_machine::state::to_json): New. + (state_machine::to_json): New. + * state-purge.cc: Include "json.h". + * store.cc: Include "json.h". + (binding_key::get_desc): New. + (binding_map::to_json): New. + (binding_cluster::to_json): New. + (store::to_json): New. + * store.h (binding_key::get_desc): New decl. + (binding_map::to_json): New decl. + (binding_cluster::to_json): New decl. + (store::to_json): New decl. + * supergraph.cc: Include "json.h". + (supergraph::to_json): New. + (supernode::to_json): New. + (superedge::to_json): New. + * supergraph.h (supergraph::to_json): New decl. + (supernode::to_json): New decl. + (superedge::to_json): New decl. + * svalue.cc: Include "json.h". + (svalue::to_json): New. + 2020-09-21 David Malcolm PR analyzer/97130 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 69523ff..8ee29d8 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,8 @@ +2020-09-22 Jakub Jelinek + + * c.opt (Wbuiltin-declaration-mismatch): Fix typo in variable name: + warn_builtin_declaraion_mismatch -> warn_builtin_declaration_mismatch. + 2020-09-21 Marek Polacek * c.opt (Wctad-maybe-unsupported): New option. diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index df4b24d..dc1b0b0 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,29 @@ +2020-09-22 Patrick Palka + + PR c++/95310 + * pt.c (corresponding_template_parameter): Define. + (keep_template_parm): Use it to adjust the given template + parameter to the corresponding in-scope one from ctx_parms. + +2020-09-22 Nathan Sidwell + + * cp-tree.h (xref_tag_from_type): Don't declare. + * decl.c (xref_tag_from_type): Delete. + * pt.c (lookup_template_class_1): Erroneously located class + definitions just give error_mark, don't try and inject it into the + namespace. + +2020-09-22 Jakub Jelinek + + PR c++/97145 + * constexpr.c (cxx_eval_builtin_function_call): Return void_node for + calls to __sanitize_ptr_{sub,cmp} builtins. + +2020-09-22 Nathan Sidwell + + * pt.c (instantiate_class_template_1): Do not repush and unhide + injected friend. + 2020-09-21 Marek Polacek PR c++/90583 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a9a9fc1..68cdc31 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,72 @@ +2020-09-22 Patrick Palka + + PR c++/95310 + * g++.dg/concepts/diagnostic15.C: New test. + +2020-09-22 Patrick Palka + + PR c++/96652 + * g++.dg/cpp0x/decltype-96652.C: New test. + +2020-09-22 Jakub Jelinek + + PR c++/97145 + * g++.dg/asan/pr97145.C: New test. + +2020-09-22 David Faust + + * gcc.target/bpf/diag-sdiv.c: New test. + * gcc.target/bpf/diag-smod.c: New test. + * gcc.target/bpf/xbpf-sdiv-1.c: New test. + * gcc.target/bpf/xbpf-smod-1.c: New test. + +2020-09-22 Nathan Sidwell + + * g++.old-deja/g++.pt/friend34.C: Check injected friend is still + invisible. + +2020-09-22 Marek Polacek + + * g++.dg/ext/timevar1.C: Also prune N%. + +2020-09-22 Marek Polacek + + * g++.dg/ext/timevar2.C: Also prune N%. + +2020-09-22 Martin Liska + + PR tree-optimization/96979 + * g++.dg/tree-ssa/pr96979.C: New test. + +2020-09-22 Stefan Schulze Frielinghaus + + * gcc.target/s390/load-and-test-fp-1.c: Change test to include all + possible combinations of dead/live registers and comparisons (equality, + relational). + * gcc.target/s390/load-and-test-fp-2.c: Same as load-and-test-fp-1.c + but for fast math. + * gcc.target/s390/load-and-test-fp.h: New test included by + load-and-test-fp-{1,2}.c. + +2020-09-22 Kyrylo Tkachov + + PR target/71233 + * gcc.target/aarch64/simd/vcls_unsigned_1.c: New test. + +2020-09-22 Kyrylo Tkachov + + PR target/71233 + * gcc.target/aarch64/simd/vceq_poly_1.c: New test. + +2020-09-22 Kyrylo Tkachov + + PR target/71233 + * gcc.target/aarch64/simd/vadd_poly_1.c: New test. + +2020-09-22 Jan Hubicka + + * gcc.target/i386/m128-check.h: Add no-strict-aliasing + 2020-09-21 David Malcolm PR analyzer/97130 -- cgit v1.1 From 63cd53d2f5da07856340bbea11ee09ab1125e8c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Chigot?= Date: Tue, 26 May 2020 11:31:37 +0200 Subject: runtime, net: fix build errors on AIX Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/235158 --- gcc/go/gofrontend/MERGE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index a8ba5a3..d17d397 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -b24062f0b2e8f6173731d5654afe0addf857270e +5605a0727d3395becba1fbd4447807073984ec13 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. -- cgit v1.1 From fefc209299236593fcc3004c874b2602a3735056 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 22 Sep 2020 11:29:02 -0400 Subject: analyzer: use switch in exploded_node::on_stmt This patch replaces a sequence of dyn_cast to different gimple stmt types in exploded_node::on_stmt with a switch on the gimple_code. This makes clearer which kinds of stmt are currently treated as no-ops, as a precursor to handling them properly. No functional change intended. gcc/analyzer/ChangeLog: * engine.cc (exploded_node::on_stmt): Replace sequence of dyn_cast with switch. --- gcc/analyzer/engine.cc | 134 +++++++++++++++++++++++++++++-------------------- 1 file changed, 80 insertions(+), 54 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index df7e335..4374297 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -1063,62 +1063,88 @@ exploded_node::on_stmt (exploded_graph &eg, &old_state, state, stmt); - if (const gassign *assign = dyn_cast (stmt)) - state->m_region_model->on_assignment (assign, &ctxt); - - if (const greturn *return_ = dyn_cast (stmt)) - state->m_region_model->on_return (return_, &ctxt); - - /* Track whether we have a gcall to a function that's not recognized by - anything, for which we don't have a function body, or for which we - don't know the fndecl. */ bool unknown_side_effects = false; - if (const gcall *call = dyn_cast (stmt)) + + switch (gimple_code (stmt)) { - /* Debugging/test support. */ - if (is_special_named_call_p (call, "__analyzer_describe", 2)) - state->m_region_model->impl_call_analyzer_describe (call, &ctxt); - else if (is_special_named_call_p (call, "__analyzer_dump", 0)) - { - /* Handle the builtin "__analyzer_dump" by dumping state - to stderr. */ - state->dump (eg.get_ext_state (), true); - } - else if (is_special_named_call_p (call, "__analyzer_dump_path", 0)) - { - /* Handle the builtin "__analyzer_dump_path" by queuing a - diagnostic at this exploded_node. */ - ctxt.warn (new dump_path_diagnostic ()); - } - else if (is_special_named_call_p (call, "__analyzer_dump_region_model", 0)) - { - /* Handle the builtin "__analyzer_dump_region_model" by dumping - the region model's state to stderr. */ - state->m_region_model->dump (false); - } - else if (is_special_named_call_p (call, "__analyzer_eval", 1)) - state->m_region_model->impl_call_analyzer_eval (call, &ctxt); - else if (is_special_named_call_p (call, "__analyzer_break", 0)) - { - /* Handle the builtin "__analyzer_break" by triggering a - breakpoint. */ - /* TODO: is there a good cross-platform way to do this? */ - raise (SIGINT); - } - else if (is_special_named_call_p (call, "__analyzer_dump_exploded_nodes", - 1)) - { - /* This is handled elsewhere. */ - } - else if (is_setjmp_call_p (call)) - state->m_region_model->on_setjmp (call, this, &ctxt); - else if (is_longjmp_call_p (call)) - { - on_longjmp (eg, call, state, &ctxt); - return on_stmt_flags::terminate_path (); - } - else - unknown_side_effects = state->m_region_model->on_call_pre (call, &ctxt); + default: + /* No-op for now. */ + break; + + case GIMPLE_ASSIGN: + { + const gassign *assign = as_a (stmt); + state->m_region_model->on_assignment (assign, &ctxt); + } + break; + + case GIMPLE_ASM: + /* No-op for now. */ + break; + + case GIMPLE_CALL: + { + /* Track whether we have a gcall to a function that's not recognized by + anything, for which we don't have a function body, or for which we + don't know the fndecl. */ + const gcall *call = as_a (stmt); + + /* Debugging/test support. */ + if (is_special_named_call_p (call, "__analyzer_describe", 2)) + state->m_region_model->impl_call_analyzer_describe (call, &ctxt); + else if (is_special_named_call_p (call, "__analyzer_dump", 0)) + { + /* Handle the builtin "__analyzer_dump" by dumping state + to stderr. */ + state->dump (eg.get_ext_state (), true); + } + else if (is_special_named_call_p (call, "__analyzer_dump_path", 0)) + { + /* Handle the builtin "__analyzer_dump_path" by queuing a + diagnostic at this exploded_node. */ + ctxt.warn (new dump_path_diagnostic ()); + } + else if (is_special_named_call_p (call, "__analyzer_dump_region_model", + 0)) + { + /* Handle the builtin "__analyzer_dump_region_model" by dumping + the region model's state to stderr. */ + state->m_region_model->dump (false); + } + else if (is_special_named_call_p (call, "__analyzer_eval", 1)) + state->m_region_model->impl_call_analyzer_eval (call, &ctxt); + else if (is_special_named_call_p (call, "__analyzer_break", 0)) + { + /* Handle the builtin "__analyzer_break" by triggering a + breakpoint. */ + /* TODO: is there a good cross-platform way to do this? */ + raise (SIGINT); + } + else if (is_special_named_call_p (call, + "__analyzer_dump_exploded_nodes", + 1)) + { + /* This is handled elsewhere. */ + } + else if (is_setjmp_call_p (call)) + state->m_region_model->on_setjmp (call, this, &ctxt); + else if (is_longjmp_call_p (call)) + { + on_longjmp (eg, call, state, &ctxt); + return on_stmt_flags::terminate_path (); + } + else + unknown_side_effects + = state->m_region_model->on_call_pre (call, &ctxt); + } + break; + + case GIMPLE_RETURN: + { + const greturn *return_ = as_a (stmt); + state->m_region_model->on_return (return_, &ctxt); + } + break; } bool any_sm_changes = false; -- cgit v1.1 From c1c2ccc74cb6f547118431d8142bc894991b104a Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 22 Sep 2020 17:42:27 -0400 Subject: Add $(ZLIBINC) to CFLAGS-analyzer/engine.o gcc/ChangeLog: * Makefile.in: Add $(ZLIBINC) to CFLAGS-analyzer/engine.o. --- gcc/Makefile.in | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/Makefile.in b/gcc/Makefile.in index c710bad..9c6c1c9 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2296,6 +2296,7 @@ s-bversion: BASE-VER CFLAGS-toplev.o += -DTARGET_NAME=\"$(target_noncanonical)\" CFLAGS-optinfo-emit-json.o += -DTARGET_NAME=\"$(target_noncanonical)\" $(ZLIBINC) +CFLAGS-analyzer/engine.o += $(ZLIBINC) pass-instances.def: $(srcdir)/passes.def $(PASSES_EXTRA) \ $(srcdir)/gen-pass-instances.awk -- cgit v1.1 From 1260f69aeb96f98ebefdbb852a4e868b8926eb0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Chigot?= Date: Tue, 7 May 2019 13:57:40 +0200 Subject: syscall: remove ptrace syscall on ppc64 ptrace is available only for 32 bits programs. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/252558 --- gcc/go/gofrontend/MERGE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index d17d397..59b580f 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -5605a0727d3395becba1fbd4447807073984ec13 +99ab98d2ed8fa8a33947c52925f89b344d7cb8ae The first line of this file holds the git revision number of the last merge done from the gofrontend repository. -- cgit v1.1 From e884ced05231a04b1a3c70ece7237d0b1eeebf19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Chigot?= Date: Fri, 3 May 2019 16:53:13 +0200 Subject: syscall: fix TestForeground for AIX Syscall function can't be used on AIX. Therefore, Ioctl in TestForeground must call raw_ioctl. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/175080 --- gcc/go/gofrontend/MERGE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 59b580f..5d26b7e 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -99ab98d2ed8fa8a33947c52925f89b344d7cb8ae +cfee06e20a172753552b1515dd3a4fde5d5cad7b The first line of this file holds the git revision number of the last merge done from the gofrontend repository. -- cgit v1.1 From 28d3b78dff512e18fc45c45e10c705e2a5ae3b48 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Wed, 23 Sep 2020 09:16:17 +0200 Subject: [testsuite] Add missing require-effective-target alloca Add missing require-effect-target alloca directives. Tested on nvptx. gcc/testsuite/ChangeLog: * gcc.dg/Warray-bounds-63.c: Add require-effective-target alloca. * gcc.dg/Warray-bounds-66.c: Same. * gcc.dg/atomic/stdatomic-vm.c: Same. --- gcc/testsuite/gcc.dg/Warray-bounds-63.c | 3 ++- gcc/testsuite/gcc.dg/Warray-bounds-66.c | 3 ++- gcc/testsuite/gcc.dg/atomic/stdatomic-vm.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-63.c b/gcc/testsuite/gcc.dg/Warray-bounds-63.c index 0583d23..a3fc918 100644 --- a/gcc/testsuite/gcc.dg/Warray-bounds-63.c +++ b/gcc/testsuite/gcc.dg/Warray-bounds-63.c @@ -1,7 +1,8 @@ /* PR middle-end/94195 - missing warning reading a smaller object via an lvalue of a larger type { dg-do compile } - { dg-options "-O2 -Wall" } */ + { dg-options "-O2 -Wall" } + { dg-require-effective-target alloca } */ typedef __INT16_TYPE__ int16_t; typedef __SIZE_TYPE__ size_t; diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-66.c b/gcc/testsuite/gcc.dg/Warray-bounds-66.c index d9bb2a2..c61891f 100644 --- a/gcc/testsuite/gcc.dg/Warray-bounds-66.c +++ b/gcc/testsuite/gcc.dg/Warray-bounds-66.c @@ -1,6 +1,7 @@ /* PR middle-end/82608 - missing -Warray-bounds on an out-of-bounds VLA index { dg-do compile } - { dg-options "-O2 -Wall -Wno-uninitialized -ftrack-macro-expansion=0" } */ + { dg-options "-O2 -Wall -Wno-uninitialized -ftrack-macro-expansion=0" } + { dg-require-effective-target alloca } */ #include "range.h" diff --git a/gcc/testsuite/gcc.dg/atomic/stdatomic-vm.c b/gcc/testsuite/gcc.dg/atomic/stdatomic-vm.c index f43fa49..cdfb701 100644 --- a/gcc/testsuite/gcc.dg/atomic/stdatomic-vm.c +++ b/gcc/testsuite/gcc.dg/atomic/stdatomic-vm.c @@ -2,6 +2,7 @@ with side effects. */ /* { dg-do run } */ /* { dg-options "-std=c11 -pedantic-errors" } */ +/* { dg-require-effective-target alloca } */ #include -- cgit v1.1 From ca52f937fff52c494ef2951490c1654750ef8157 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Tue, 22 Sep 2020 13:16:39 +0200 Subject: [nvptx] Handle move from DF subreg to DF reg in nvptx_output_mov_insn When compiling test-case gcc.dg/atomic/c11-atomic-exec-1.c, we run into these ptxas errors: ... line 100; error: Rounding modifier required for instruction 'cvt' line 105; error: Rounding modifier required for instruction 'cvt' ... The problem is that this move: ... //(insn 13 11 14 2 // (set (reg:DF 28 [ _9 ]) // (subreg:DF (reg:TI 22 [ _1 ]) 0)) 9 {*movdf_insn} // (nil)) cvt.f64.u64 %r28, %r22$0; ... is emitted as cvt.f64.u64, while it should be a mov.b64 instead. Fix this by handling this case in nvptx_output_mov_insn. Tested on nvptx. gcc/ChangeLog: PR target/97158 * config/nvptx/nvptx.c (nvptx_output_mov_insn): Handle move from DF subreg to DF reg. --- gcc/config/nvptx/nvptx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 0c590d8..54b1fdf 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -2349,6 +2349,7 @@ const char * nvptx_output_mov_insn (rtx dst, rtx src) { machine_mode dst_mode = GET_MODE (dst); + machine_mode src_mode = GET_MODE (src); machine_mode dst_inner = (GET_CODE (dst) == SUBREG ? GET_MODE (XEXP (dst, 0)) : dst_mode); machine_mode src_inner = (GET_CODE (src) == SUBREG @@ -2375,7 +2376,7 @@ nvptx_output_mov_insn (rtx dst, rtx src) if (GET_MODE_SIZE (dst_inner) == GET_MODE_SIZE (src_inner)) { if (GET_MODE_BITSIZE (dst_mode) == 128 - && GET_MODE_BITSIZE (GET_MODE (src)) == 128) + && GET_MODE_BITSIZE (src_mode) == 128) { /* mov.b128 is not supported. */ if (dst_inner == V2DImode && src_inner == TImode) @@ -2388,6 +2389,10 @@ nvptx_output_mov_insn (rtx dst, rtx src) return "%.\tmov.b%T0\t%0, %1;"; } + if (GET_MODE_BITSIZE (src_inner) == 128 + && GET_MODE_BITSIZE (src_mode) == 64) + return "%.\tmov.b%T0\t%0, %1;"; + return "%.\tcvt%t0%t1\t%0, %1;"; } -- cgit v1.1 From 6b4e8bf88f1172ce8561f57b12fb81063b21a78f Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Mon, 21 Sep 2020 16:26:10 +0200 Subject: gcov: fix streaming corruption gcc/ChangeLog: PR gcov-profile/97069 * profile.c (branch_prob): Line number must be at least 1. gcc/testsuite/ChangeLog: PR gcov-profile/97069 * g++.dg/gcov/pr97069.C: New test. --- gcc/profile.c | 6 +++--- gcc/testsuite/g++.dg/gcov/pr97069.C | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/gcov/pr97069.C (limited to 'gcc') diff --git a/gcc/profile.c b/gcc/profile.c index fe8963c..4540959 100644 --- a/gcc/profile.c +++ b/gcc/profile.c @@ -1375,7 +1375,7 @@ branch_prob (bool thunk) seen_locations.add (loc); expanded_location curr_location = expand_location (loc); output_location (&streamed_locations, curr_location.file, - curr_location.line, &offset, bb); + MAX (1, curr_location.line), &offset, bb); } for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) @@ -1386,7 +1386,7 @@ branch_prob (bool thunk) { seen_locations.add (loc); output_location (&streamed_locations, gimple_filename (stmt), - gimple_lineno (stmt), &offset, bb); + MAX (1, gimple_lineno (stmt)), &offset, bb); } } @@ -1401,7 +1401,7 @@ branch_prob (bool thunk) { expanded_location curr_location = expand_location (loc); output_location (&streamed_locations, curr_location.file, - curr_location.line, &offset, bb); + MAX (1, curr_location.line), &offset, bb); } if (offset) diff --git a/gcc/testsuite/g++.dg/gcov/pr97069.C b/gcc/testsuite/g++.dg/gcov/pr97069.C new file mode 100644 index 0000000..040e336 --- /dev/null +++ b/gcc/testsuite/g++.dg/gcov/pr97069.C @@ -0,0 +1,20 @@ +// PR gcov-profile/97069 +// { dg-options "--coverage" } +// { dg-do run { target native } } + +# 0 "pr97069.C" +# 0 "" +# 0 "" +# 1 "/usr/include/stdc-predef.h" 1 3 4 +# 0 "" 2 +# 1 "pr97069.C" +int main() +{ + return 0; +} +# 0 "pr97069.C" +void zero_line_directive() +{ +} + +// { dg-final { run-gcov pr97069.C } } -- cgit v1.1 From 371f0b990f2bbf638b44da27cf6fc5f86e0d6d4e Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 23 Sep 2020 10:07:37 +0200 Subject: middle-end/97162 - fix ICE when building gamess This appropriately guards the check for a hard register in compare_base_decls which otherwise ICEs when passed a CONST_DECL. 2020-09-23 Richard Biener PR middle-end/97162 * alias.c (compare_base_decls): Use DECL_HARD_REGISTER and guard with VAR_P. --- gcc/alias.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/alias.c b/gcc/alias.c index 1cb702b..f6d7a17 100644 --- a/gcc/alias.c +++ b/gcc/alias.c @@ -2142,10 +2142,10 @@ compare_base_decls (tree base1, tree base2) /* If we have two register decls with register specification we cannot decide unless their assembler names are the same. */ - if (DECL_REGISTER (base1) - && DECL_REGISTER (base2) - && HAS_DECL_ASSEMBLER_NAME_P (base1) - && HAS_DECL_ASSEMBLER_NAME_P (base2) + if (VAR_P (base1) + && VAR_P (base2) + && DECL_HARD_REGISTER (base1) + && DECL_HARD_REGISTER (base2) && DECL_ASSEMBLER_NAME_SET_P (base1) && DECL_ASSEMBLER_NAME_SET_P (base2)) { -- cgit v1.1 From 9e64f17d044767248175fece80a2759d94c45fc4 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 23 Sep 2020 10:11:03 +0200 Subject: tree-optimization/97151 - improve PTA for C++ operator delete C++ operator delete, when DECL_IS_REPLACEABLE_OPERATOR_DELETE_P, does not cause the deleted object to be escaped. It also has no other interesting side-effects for PTA so skip it like we do for BUILT_IN_FREE. 2020-09-23 Richard Biener PR tree-optimization/97151 * tree-ssa-structalias.c (find_func_aliases_for_call): DECL_IS_REPLACEABLE_OPERATOR_DELETE_P has no effect on arguments. * g++.dg/cpp1y/new1.C: Adjust for two more handled transforms. --- gcc/testsuite/g++.dg/cpp1y/new1.C | 4 ++-- gcc/tree-ssa-structalias.c | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/cpp1y/new1.C b/gcc/testsuite/g++.dg/cpp1y/new1.C index aa5f647..fec0088 100644 --- a/gcc/testsuite/g++.dg/cpp1y/new1.C +++ b/gcc/testsuite/g++.dg/cpp1y/new1.C @@ -69,5 +69,5 @@ test_unused() { delete p; } -/* { dg-final { scan-tree-dump-times "Deleting : operator delete" 5 "cddce1"} } */ -/* { dg-final { scan-tree-dump-times "Deleting : _\\d+ = operator new" 7 "cddce1"} } */ +/* { dg-final { scan-tree-dump-times "Deleting : operator delete" 6 "cddce1"} } */ +/* { dg-final { scan-tree-dump-times "Deleting : _\\d+ = operator new" 8 "cddce1"} } */ diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index 44fe52e..f676bf9 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -4857,6 +4857,8 @@ find_func_aliases_for_call (struct function *fn, gcall *t) point for reachable memory of their arguments. */ else if (flags & (ECF_PURE|ECF_LOOPING_CONST_OR_PURE)) handle_pure_call (t, &rhsc); + else if (fndecl && DECL_IS_REPLACEABLE_OPERATOR_DELETE_P (fndecl)) + ; else handle_rhs_call (t, &rhsc); if (gimple_call_lhs (t)) -- cgit v1.1 From 5253b3e6deda1caa9432f5ac56e78bd999fb9f85 Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Wed, 23 Sep 2020 11:07:40 +0200 Subject: gcc/analyzer: Silence -Wpragma warns with GCC < 10 gcc/analyzer/ChangeLog: * analyzer-logging.cc: Guard '#pragma ... ignored "-Wformat-diag"' by '#if __GNUC__ >= 10' * analyzer.h: Likewise. * call-string.cc: Likewise. --- gcc/analyzer/analyzer-logging.cc | 2 ++ gcc/analyzer/analyzer.h | 3 ++- gcc/analyzer/call-string.cc | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/analyzer/analyzer-logging.cc b/gcc/analyzer/analyzer-logging.cc index d2bbd05..583840a 100644 --- a/gcc/analyzer/analyzer-logging.cc +++ b/gcc/analyzer/analyzer-logging.cc @@ -30,7 +30,9 @@ along with GCC; see the file COPYING3. If not see #if ENABLE_ANALYZER +#if __GNUC__ >= 10 #pragma GCC diagnostic ignored "-Wformat-diag" +#endif namespace ana { diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h index d234fcf..b85edb1 100644 --- a/gcc/analyzer/analyzer.h +++ b/gcc/analyzer/analyzer.h @@ -301,7 +301,8 @@ private: /* Disable -Wformat-diag; we want to be able to use pp_printf for logging/dumping without complying with the rules for diagnostics. */ - +#if __GNUC__ >= 10 #pragma GCC diagnostic ignored "-Wformat-diag" +#endif #endif /* GCC_ANALYZER_ANALYZER_H */ diff --git a/gcc/analyzer/call-string.cc b/gcc/analyzer/call-string.cc index 72568c6..8dfd157 100644 --- a/gcc/analyzer/call-string.cc +++ b/gcc/analyzer/call-string.cc @@ -39,7 +39,9 @@ along with GCC; see the file COPYING3. If not see #if ENABLE_ANALYZER +#if __GNUC__ >= 10 #pragma GCC diagnostic ignored "-Wformat-diag" +#endif /* class call_string. */ -- cgit v1.1 From d23ea1e865301cd45f14ccbdb0bca49251fde9e1 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 23 Sep 2020 10:29:17 +0100 Subject: AArch64: Implement vstrq_p128 intrinsic This patch implements the missing vstrq_p128 intrinsic. It just performs a store of the poly128_t argument to a memory location. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ PR target/71233 * config/aarch64/arm_neon.h (vstrq_p128): Define. gcc/testsuite/ PR target/71233 * gcc.target/aarch64/simd/vstrq_p128_1.c: New test. --- gcc/config/aarch64/arm_neon.h | 7 +++++++ gcc/testsuite/gcc.target/aarch64/simd/vstrq_p128_1.c | 12 ++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vstrq_p128_1.c (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 341019b..fe1ab0d 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -30167,6 +30167,13 @@ vst4q_p64 (poly64_t * __a, poly64x2x4_t __val) __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); } +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vstrq_p128 (poly128_t * __ptr, poly128_t __val) +{ + *__ptr = __val; +} + /* vsub */ __extension__ extern __inline int64_t diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vstrq_p128_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vstrq_p128_1.c new file mode 100644 index 0000000..8d036fd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vstrq_p128_1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +void +test (poly128_t *ptr, poly128_t a) +{ + vstrq_p128 (ptr, a); +} + +/* { dg-final { scan-assembler-times {stp.*x2,.*x3,.*[x0]} 1 } } */ -- cgit v1.1 From f2868e4bcff2c7b882d01231f039459c00e59d7b Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 23 Sep 2020 10:32:42 +0100 Subject: AArch64: Implement vldrq_p128 intrinsic This patch implements the missing vldrq_p128 intrinsic that just loads from the appropriate pointer. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ PR target/71233 * config/aarch64/arm_neon.h (vldrq_p128): Define. gcc/testsuite/ PR target/71233 * gcc.target/aarch64/simd/vldrq_p128_1.c: New test. --- gcc/config/aarch64/arm_neon.h | 7 +++++++ gcc/testsuite/gcc.target/aarch64/simd/vldrq_p128_1.c | 13 +++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vldrq_p128_1.c (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index fe1ab0d..32b0877 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -19676,6 +19676,13 @@ vld4q_p64 (const poly64_t * __a) return ret; } +__extension__ extern __inline poly128_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vldrq_p128 (const poly128_t * __ptr) +{ + return *__ptr; +} + /* vldn_dup */ __extension__ extern __inline int8x8x2_t diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vldrq_p128_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vldrq_p128_1.c new file mode 100644 index 0000000..9c7e01b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vldrq_p128_1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +poly128_t +test (poly128_t * p) +{ + return vldrq_p128 (p); +} + +/* { dg-final { scan-assembler-times {ldp.*x0,.*x1,.*[x0]} 1 } } */ + -- cgit v1.1 From e8e818399d70c5a5a3d30a54d305c6e2b92e2c66 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 23 Sep 2020 11:07:50 +0100 Subject: AArch64: Implement missing _p64 intrinsics for vector permutes This patch implements some missing vector permute intrinsics operating on poly64x2_t types. They are implemented identically to their uint64x2_t brethren. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ PR target/71233 * config/aarch64/arm_neon.h (vtrn1q_p64, vtrn2q_p64, vuzp1q_p64, vuzp2q_p64, vzip1q_p64, vzip2q_p64): Define. gcc/testsuite/ PR target/71233 * gcc.target/aarch64/simd/trn_zip_p64_1.c: New test. --- gcc/config/aarch64/arm_neon.h | 67 ++++++++++++++++++++++ .../gcc.target/aarch64/simd/trn_zip_p64_1.c | 44 ++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/trn_zip_p64_1.c (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 32b0877..e8c130f 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -30568,6 +30568,17 @@ vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b) #endif } +__extension__ extern __inline poly64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vtrn1q_p64 (poly64x2_t __a, poly64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2}); +#endif +} + __extension__ extern __inline uint64x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b) @@ -30838,6 +30849,18 @@ vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b) #endif } + +__extension__ extern __inline poly64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vtrn2q_p64 (poly64x2_t __a, poly64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3}); +#endif +} + __extension__ extern __inline float16x4x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vtrn_f16 (float16x4_t __a, float16x4_t __b) @@ -31484,6 +31507,17 @@ vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b) #endif } +__extension__ extern __inline poly64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vuzp1q_p64 (poly64x2_t __a, poly64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2}); +#endif +} + __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vuzp2_f16 (float16x4_t __a, float16x4_t __b) @@ -31743,6 +31777,17 @@ vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b) #endif } +__extension__ extern __inline poly64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vuzp2q_p64 (poly64x2_t __a, poly64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3}); +#endif +} + __INTERLEAVE_LIST (uzp) /* vzip */ @@ -32011,6 +32056,17 @@ vzip1q_u64 (uint64x2_t __a, uint64x2_t __b) #endif } +__extension__ extern __inline poly64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vzip1q_p64 (poly64x2_t __a, poly64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2}); +#endif +} + __extension__ extern __inline float16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vzip2_f16 (float16x4_t __a, float16x4_t __b) @@ -32275,6 +32331,17 @@ vzip2q_u64 (uint64x2_t __a, uint64x2_t __b) #endif } +__extension__ extern __inline poly64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vzip2q_p64 (poly64x2_t __a, poly64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3}); +#endif +} + __INTERLEAVE_LIST (zip) #undef __INTERLEAVE_LIST diff --git a/gcc/testsuite/gcc.target/aarch64/simd/trn_zip_p64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/trn_zip_p64_1.c new file mode 100644 index 0000000..a47321d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/trn_zip_p64_1.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +poly64x2_t +foo (poly64x2_t a, poly64x2_t b) +{ + return vtrn1q_p64 (a, b); +} + +poly64x2_t +foo1 (poly64x2_t a, poly64x2_t b) +{ + return vtrn2q_p64 (a, b); +} + +poly64x2_t +foo2 (poly64x2_t a, poly64x2_t b) +{ + return vuzp1q_p64 (a, b); +} + +poly64x2_t +foo3 (poly64x2_t a, poly64x2_t b) +{ + return vuzp2q_p64 (a, b); +} + +poly64x2_t +foo4 (poly64x2_t a, poly64x2_t b) +{ + return vzip1q_p64 (a, b); +} + +poly64x2_t +foo5 (poly64x2_t a, poly64x2_t b) +{ + return vzip2q_p64 (a, b); +} + +/* { dg-final { scan-assembler-times {zip1\tv0.2d, v0.2d, v1.2d} 3 } } */ +/* { dg-final { scan-assembler-times {zip2\tv0.2d, v0.2d, v1.2d} 3 } } */ + -- cgit v1.1 From 52e5b3fd8ef1d1b3ff86e26bd0aa7266bd9767fd Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 23 Sep 2020 10:42:48 +0200 Subject: tree-optimization/97173 - extend assert in vectorizable_live_operation The condition we're expecting to eventually run into isn't fully captured by checking for CTORs, instead we can also run into the CTOR element conversion. 2020-09-23 Richard Biener PR tree-optimization/97173 * tree-vect-loop.c (vectorizable_live_operation): Extend assert to also conver element conversions. * gcc.dg/vect/pr97173.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr97173.c | 19 +++++++++++++++++++ gcc/tree-vect-loop.c | 6 ++++-- 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr97173.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/pr97173.c b/gcc/testsuite/gcc.dg/vect/pr97173.c new file mode 100644 index 0000000..fd4a889 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97173.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ + +typedef struct { + char *track; + char *clocks; + char *fm; + char *weak; +} disk_t; + +disk_t disk_update_tlens_d; +int disk_update_tlens_d_0; + +void disk_update_tlens() { + disk_update_tlens_d.track = disk_update_tlens_d.clocks = + disk_update_tlens_d.track + disk_update_tlens_d_0; + disk_update_tlens_d.fm = disk_update_tlens_d.clocks + disk_update_tlens_d_0; + disk_update_tlens_d.weak = disk_update_tlens_d.fm; + disk_update_tlens_d.track[2] = 5; +} diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index b1a6e15..46d126c 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -8345,8 +8345,10 @@ vectorizable_live_operation (vec_info *vinfo, if (gimple_code (use_stmt) != GIMPLE_PHI && !vect_stmt_dominates_stmt_p (gsi_stmt (*gsi), use_stmt)) { - gcc_assert (is_gimple_assign (use_stmt) - && gimple_assign_rhs_code (use_stmt) == CONSTRUCTOR); + enum tree_code code = gimple_assign_rhs_code (use_stmt); + gcc_assert (code == CONSTRUCTOR + || code == VIEW_CONVERT_EXPR + || CONVERT_EXPR_CODE_P (code)); if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Using original scalar computation for " -- cgit v1.1 From 02b5377b3766804059b7824330d33d0e1cef2e5b Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 23 Sep 2020 12:02:29 +0100 Subject: AArch64: Implement missing vrndns_f32 intrinsic This patch implements the missing vrndns_f32 intrinsic. This operates on a scalar float32_t value. It can be mapped down to a __builtin_aarch64_frintnsf builtin. This patch does that. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ PR target/71233 * config/aarch64/aarch64-simd-builtins.def (frintn): Use BUILTIN_VHSDF_HSDF for modes. Remove explicit hf instantiation. * config/aarch64/arm_neon.h (vrndns_f32): Define. gcc/testsuite/ PR target/71233 * gcc.target/aarch64/simd/vrndns_f32_1.c: New test. --- gcc/config/aarch64/aarch64-simd-builtins.def | 3 +-- gcc/config/aarch64/arm_neon.h | 7 +++++++ gcc/testsuite/gcc.target/aarch64/simd/vrndns_f32_1.c | 13 +++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vrndns_f32_1.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index d1b2110..48ecd4a 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -338,12 +338,11 @@ BUILTIN_VHSDF (UNOP, nearbyint, 2, FP) BUILTIN_VHSDF (UNOP, rint, 2, FP) BUILTIN_VHSDF (UNOP, round, 2, FP) - BUILTIN_VHSDF_DF (UNOP, frintn, 2, FP) + BUILTIN_VHSDF_HSDF (UNOP, frintn, 2, FP) VAR1 (UNOP, btrunc, 2, FP, hf) VAR1 (UNOP, ceil, 2, FP, hf) VAR1 (UNOP, floor, 2, FP, hf) - VAR1 (UNOP, frintn, 2, FP, hf) VAR1 (UNOP, nearbyint, 2, FP, hf) VAR1 (UNOP, rint, 2, FP, hf) VAR1 (UNOP, round, 2, FP, hf) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index e8c130f..b3c9b64 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -26073,6 +26073,13 @@ vrndmq_f64 (float64x2_t __a) /* vrndn */ +__extension__ extern __inline float32_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vrndns_f32 (float32_t __a) +{ + return __builtin_aarch64_frintnsf (__a); +} + __extension__ extern __inline float32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vrndn_f32 (float32x2_t __a) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vrndns_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vrndns_f32_1.c new file mode 100644 index 0000000..960e4f6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vrndns_f32_1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +#include + +float32_t +test (float32_t a) +{ + return vrndns_f32 (a); +} + +/* { dg-final { scan-assembler-times "frintn\\ts\[0-9\]+, s\[0-9\]+" 1 } } */ + -- cgit v1.1 From 4452a7660b224ff310d246bc7f8c612669c8cd98 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 23 Sep 2020 12:29:40 +0100 Subject: vect: Fix epilogue loop handling of partial vectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the fallout that Kewen reported on Power after the recent change to avoid unnecessary use of partial vectors. As Kewen said, the problem is that vect_analyze_loop_2 doesn't know how many epilogue iterations there will be, and so it cannot make a final decision about whether the number of iterations forces an epilogue loop to use partial vectors. This is similar to the current situation for peeling: we don't know during initial analysis whether an epilogue loop will itself require peeling. Instead we decide that during vect_do_peeling, where the final number of epilogue loop iterations is known. The patch takes a similar approach for the decision about whether to use partial vectors. As the comments in the patch say, the idea is that vect_analyze_loop_2 should make peeling and partial- vector decisions based on the assumption that the loop_vinfo will be used as the main loop, while vect_do_peeling should make them in the knowledge that the loop_vinfo will be used as an epilogue loop. This allows the same analysis to be used for both cases, which we rely on for implementing VECT_COMPARE_COSTS; see the big comment in vect_analyze_loop for details. I hope the patch makes the (mostly preexisting) structure a bit more obvious. It isn't what anyone would design from scratch, but that's the nature of working with a mature vector framework. Arranging things this way means that vect_verify_full_masking and vect_verify_loop_lens now become part of the “can” rather than “will” test for partial vectors. Also, while splitting out the logic that handles epilogues with constant iterations, I added a check to make sure that we don't try to use partial vectors to vectorise a single-scalar loop. This required some changes to the Power tests. gcc/ * tree-vectorizer.h (determine_peel_for_niter): Delete in favor of... (vect_determine_partial_vectors_and_peeling): ...this new function. * tree-vect-loop-manip.c (vect_update_epilogue_niters): New function. Reject using vector epilogue loops for single iterations. Install the constant number of epilogue loop iterations in the associated loop_vinfo. Rely on vect_determine_partial_vectors_and_peeling to do the main part of the test. (vect_do_peeling): Use vect_update_epilogue_niters to handle epilogue loops with a known number of iterations. Skip recomputing the number of iterations later in that case. Otherwise, use vect_determine_partial_vectors_and_peeling to decide whether the epilogue loop needs to use partial vectors or peeling. * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Set the default can_use_partial_vectors_p to false if partial-vector-usage=0. (determine_peel_for_niter): Remove in favor of... (vect_determine_partial_vectors_and_peeling): ...this new function, split out from... (vect_analyze_loop_2): ...here. Reflect the vect_verify_full_masking and vect_verify_loop_lens results in CAN_USE_PARTIAL_VECTORS_P rather than USING_PARTIAL_VECTORS_P. gcc/testsuite/ * gcc.target/powerpc/p9-vec-length-epil-1.c: Do not expect the single-iteration epilogues of the 64-bit loops to be vectorized. * gcc.target/powerpc/p9-vec-length-epil-7.c: Likewise. * gcc.target/powerpc/p9-vec-length-epil-8.c: Likewise. --- .../gcc.target/powerpc/p9-vec-length-epil-1.c | 4 +- .../gcc.target/powerpc/p9-vec-length-epil-7.c | 2 +- .../gcc.target/powerpc/p9-vec-length-epil-8.c | 4 +- gcc/tree-vect-loop-manip.c | 83 ++++++--- gcc/tree-vect-loop.c | 196 ++++++++++++++------- gcc/tree-vectorizer.h | 3 +- 6 files changed, 192 insertions(+), 100 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c index ebb2f45..d248f09 100644 --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-1.c @@ -10,6 +10,6 @@ /* { dg-final { scan-assembler-times {\mlxvx?\M} 20 } } */ /* { dg-final { scan-assembler-times {\mstxvx?\M} 10 } } */ -/* { dg-final { scan-assembler-times {\mlxvl\M} 20 } } */ -/* { dg-final { scan-assembler-times {\mstxvl\M} 10 } } */ +/* { dg-final { scan-assembler-times {\mlxvl\M} 14 } } */ +/* { dg-final { scan-assembler-times {\mstxvl\M} 7 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-7.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-7.c index 9d40328..a27ee34 100644 --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-7.c +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-7.c @@ -8,4 +8,4 @@ #include "p9-vec-length-7.h" -/* { dg-final { scan-assembler-times {\mstxvl\M} 10 } } */ +/* { dg-final { scan-assembler-times {\mstxvl\M} 7 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c index 6b54a29..961df0d 100644 --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-epil-8.c @@ -8,5 +8,5 @@ #include "p9-vec-length-8.h" -/* { dg-final { scan-assembler-times {\mlxvl\M} 30 } } */ -/* { dg-final { scan-assembler-times {\mstxvl\M} 10 } } */ +/* { dg-final { scan-assembler-times {\mlxvl\M} 21 } } */ +/* { dg-final { scan-assembler-times {\mstxvl\M} 7 } } */ diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index 47cfa6f..7cf00e6 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -2386,6 +2386,34 @@ slpeel_update_phi_nodes_for_lcssa (class loop *epilog) rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e)); } +/* EPILOGUE_VINFO is an epilogue loop that we now know would need to + iterate exactly CONST_NITERS times. Make a final decision about + whether the epilogue loop should be used, returning true if so. */ + +static bool +vect_update_epilogue_niters (loop_vec_info epilogue_vinfo, + unsigned HOST_WIDE_INT const_niters) +{ + /* Avoid wrap-around when computing const_niters - 1. Also reject + using an epilogue loop for a single scalar iteration, even if + we could in principle implement that using partial vectors. */ + unsigned int gap_niters = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo); + if (const_niters <= gap_niters + 1) + return false; + + /* Install the number of iterations. */ + tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (epilogue_vinfo)); + tree niters_tree = build_int_cst (niters_type, const_niters); + tree nitersm1_tree = build_int_cst (niters_type, const_niters - 1); + + LOOP_VINFO_NITERS (epilogue_vinfo) = niters_tree; + LOOP_VINFO_NITERSM1 (epilogue_vinfo) = nitersm1_tree; + + /* Decide what to do if the number of epilogue iterations is not + a multiple of the epilogue loop's vectorization factor. */ + return vect_determine_partial_vectors_and_peeling (epilogue_vinfo, true); +} + /* Function vect_do_peeling. Input: @@ -2493,6 +2521,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, int estimated_vf; int prolog_peeling = 0; bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0; + bool vect_epilogues_updated_niters = false; /* We currently do not support prolog peeling if the target alignment is not known at compile time. 'vect_gen_prolog_loop_niters' depends on the target alignment being constant. */ @@ -2601,8 +2630,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, if (vect_epilogues && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && prolog_peeling >= 0 - && known_eq (vf, lowest_vf) - && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (epilogue_vinfo)) + && known_eq (vf, lowest_vf)) { unsigned HOST_WIDE_INT eiters = (LOOP_VINFO_INT_NITERS (loop_vinfo) @@ -2612,13 +2640,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, eiters = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo); - unsigned int ratio; - unsigned int epilogue_gaps - = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo); - while (!(constant_multiple_p - (GET_MODE_SIZE (loop_vinfo->vector_mode), - GET_MODE_SIZE (epilogue_vinfo->vector_mode), &ratio) - && eiters >= lowest_vf / ratio + epilogue_gaps)) + while (!vect_update_epilogue_niters (epilogue_vinfo, eiters)) { delete epilogue_vinfo; epilogue_vinfo = NULL; @@ -2629,8 +2651,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, } epilogue_vinfo = loop_vinfo->epilogue_vinfos[0]; loop_vinfo->epilogue_vinfos.ordered_remove (0); - epilogue_gaps = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo); } + vect_epilogues_updated_niters = true; } /* Prolog loop may be skipped. */ bool skip_prolog = (prolog_peeling != 0); @@ -2928,7 +2950,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, skip_e edge. */ if (skip_vector) { - gcc_assert (update_e != NULL && skip_e != NULL); + gcc_assert (update_e != NULL + && skip_e != NULL + && !vect_epilogues_updated_niters); gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)), update_e->dest); tree new_ssa = make_ssa_name (TREE_TYPE (niters)); @@ -2953,25 +2977,32 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, niters = PHI_RESULT (new_phi); } - /* Subtract the number of iterations performed by the vectorized loop - from the number of total iterations. */ - tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), - before_loop_niters, - niters); - - LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters; - LOOP_VINFO_NITERSM1 (epilogue_vinfo) - = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters), - epilogue_niters, - build_one_cst (TREE_TYPE (epilogue_niters))); - /* Set ADVANCE to the number of iterations performed by the previous loop and its prologue. */ *advance = niters; - /* Redo the peeling for niter analysis as the NITERs and alignment - may have been updated to take the main loop into account. */ - determine_peel_for_niter (epilogue_vinfo); + if (!vect_epilogues_updated_niters) + { + /* Subtract the number of iterations performed by the vectorized loop + from the number of total iterations. */ + tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), + before_loop_niters, + niters); + + LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters; + LOOP_VINFO_NITERSM1 (epilogue_vinfo) + = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters), + epilogue_niters, + build_one_cst (TREE_TYPE (epilogue_niters))); + + /* Decide what to do if the number of epilogue iterations is not + a multiple of the epilogue loop's vectorization factor. + We should have rejected the loop during the analysis phase + if this fails. */ + if (!vect_determine_partial_vectors_and_peeling (epilogue_vinfo, + true)) + gcc_unreachable (); + } } adjust_vec.release (); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 46d126c..f1d6bdd 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -814,7 +814,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) vec_outside_cost (0), vec_inside_cost (0), vectorizable (false), - can_use_partial_vectors_p (true), + can_use_partial_vectors_p (param_vect_partial_vector_usage != 0), using_partial_vectors_p (false), epil_using_partial_vectors_p (false), peeling_for_gaps (false), @@ -2003,22 +2003,123 @@ vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo) } } +/* Determine if operating on full vectors for LOOP_VINFO might leave + some scalar iterations still to do. If so, decide how we should + handle those scalar iterations. The possibilities are: -/* Decides whether we need to create an epilogue loop to handle - remaining scalar iterations and sets PEELING_FOR_NITERS accordingly. */ + (1) Make LOOP_VINFO operate on partial vectors instead of full vectors. + In this case: -void -determine_peel_for_niter (loop_vec_info loop_vinfo) + LOOP_VINFO_USING_PARTIAL_VECTORS_P == true + LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false + LOOP_VINFO_PEELING_FOR_NITER == false + + (2) Make LOOP_VINFO operate on full vectors and use an epilogue loop + to handle the remaining scalar iterations. In this case: + + LOOP_VINFO_USING_PARTIAL_VECTORS_P == false + LOOP_VINFO_PEELING_FOR_NITER == true + + There are two choices: + + (2a) Consider vectorizing the epilogue loop at the same VF as the + main loop, but using partial vectors instead of full vectors. + In this case: + + LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == true + + (2b) Consider vectorizing the epilogue loop at lower VFs only. + In this case: + + LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P == false + + When FOR_EPILOGUE_P is true, make this determination based on the + assumption that LOOP_VINFO is an epilogue loop, otherwise make it + based on the assumption that LOOP_VINFO is the main loop. The caller + has made sure that the number of iterations is set appropriately for + this value of FOR_EPILOGUE_P. */ + +opt_result +vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo, + bool for_epilogue_p) { - LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false; + /* Determine whether there would be any scalar iterations left over. */ + bool need_peeling_or_partial_vectors_p + = vect_need_peeling_or_partial_vectors_p (loop_vinfo); - if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) - /* The main loop handles all iterations. */ - LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false; - else if (vect_need_peeling_or_partial_vectors_p (loop_vinfo)) - LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; -} + /* Decide whether to vectorize the loop with partial vectors. */ + LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false; + LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = false; + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) + && need_peeling_or_partial_vectors_p) + { + /* For partial-vector-usage=1, try to push the handling of partial + vectors to the epilogue, with the main loop continuing to operate + on full vectors. + + ??? We could then end up failing to use partial vectors if we + decide to peel iterations into a prologue, and if the main loop + then ends up processing fewer than VF iterations. */ + if (param_vect_partial_vector_usage == 1 + && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) + && !vect_known_niters_smaller_than_vf (loop_vinfo)) + LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = true; + else + LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true; + } + + if (dump_enabled_p ()) + { + if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) + dump_printf_loc (MSG_NOTE, vect_location, + "operating on partial vectors%s.\n", + for_epilogue_p ? " for epilogue loop" : ""); + else + dump_printf_loc (MSG_NOTE, vect_location, + "operating only on full vectors%s.\n", + for_epilogue_p ? " for epilogue loop" : ""); + } + if (for_epilogue_p) + { + loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); + gcc_assert (orig_loop_vinfo); + if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) + gcc_assert (known_lt (LOOP_VINFO_VECT_FACTOR (loop_vinfo), + LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo))); + } + + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) + { + /* Check that the loop processes at least one full vector. */ + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo); + if (known_lt (wi::to_widest (scalar_niters), vf)) + return opt_result::failure_at (vect_location, + "loop does not have enough iterations" + " to support vectorization.\n"); + + /* If we need to peel an extra epilogue iteration to handle data + accesses with gaps, check that there are enough scalar iterations + available. + + The check above is redundant with this one when peeling for gaps, + but the distinction is useful for diagnostics. */ + tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo); + if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) + && known_lt (wi::to_widest (scalar_nitersm1), vf)) + return opt_result::failure_at (vect_location, + "loop does not have enough iterations" + " to support peeling for gaps.\n"); + } + + LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) + = (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) + && need_peeling_or_partial_vectors_p); + + return opt_result::success (); +} /* Function vect_analyze_loop_2. @@ -2272,72 +2373,32 @@ start_over: LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } - /* Decide whether to vectorize a loop with partial vectors for - this vectorization factor. */ - if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) - { - /* Don't use partial vectors if we don't need to peel the loop. */ - if (param_vect_partial_vector_usage == 0 - || !vect_need_peeling_or_partial_vectors_p (loop_vinfo)) - LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false; - else if (vect_verify_full_masking (loop_vinfo) - || vect_verify_loop_lens (loop_vinfo)) - { - /* The epilogue and other known niters less than VF - cases can still use vector access with length fully. */ - if (param_vect_partial_vector_usage == 1 - && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) - && !vect_known_niters_smaller_than_vf (loop_vinfo)) - { - LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false; - LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo) = true; - } - else - LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = true; - } - else - LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false; - } - else - LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) = false; - - if (dump_enabled_p ()) - { - if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) - dump_printf_loc (MSG_NOTE, vect_location, - "operating on partial vectors.\n"); - else - dump_printf_loc (MSG_NOTE, vect_location, - "operating only on full vectors.\n"); - } - - /* If epilog loop is required because of data accesses with gaps, - one additional iteration needs to be peeled. Check if there is - enough iterations for vectorization. */ - if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) - && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) - { - poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); - tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo); - - if (known_lt (wi::to_widest (scalar_niters), vf)) - return opt_result::failure_at (vect_location, - "loop has no enough iterations to" - " support peeling for gaps.\n"); - } + /* If we still have the option of using partial vectors, + check whether we can generate the necessary loop controls. */ + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) + && !vect_verify_full_masking (loop_vinfo) + && !vect_verify_loop_lens (loop_vinfo)) + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; /* If we're vectorizing an epilogue loop, the vectorized loop either needs to be able to handle fewer than VF scalars, or needs to have a lower VF than the main loop. */ if (LOOP_VINFO_EPILOGUE_P (loop_vinfo) - && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo) + && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo), LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo))) return opt_result::failure_at (vect_location, "Vectorization factor too high for" " epilogue loop.\n"); + /* Decide whether this loop_vinfo should use partial vectors or peeling, + assuming that the loop will be used as a main loop. We will redo + this analysis later if we instead decide to use the loop as an + epilogue loop. */ + ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false); + if (!ok) + return ok; + /* Check the costings of the loop make vectorizing worthwhile. */ res = vect_analyze_loop_costing (loop_vinfo); if (res < 0) @@ -2350,7 +2411,6 @@ start_over: return opt_result::failure_at (vect_location, "Loop costings not worthwhile.\n"); - determine_peel_for_niter (loop_vinfo); /* If an epilogue loop is required make sure we can create one. */ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 9dffc55..b7fa6bc 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1967,7 +1967,8 @@ extern tree vect_create_addr_base_for_vector_ref (vec_info *, extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo); bool vect_rgroup_iv_might_wrap_p (loop_vec_info, rgroup_controls *); /* Used in tree-vect-loop-manip.c */ -extern void determine_peel_for_niter (loop_vec_info); +extern opt_result vect_determine_partial_vectors_and_peeling (loop_vec_info, + bool); /* Used in gimple-loop-interchange.c and tree-parloops.c. */ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, enum tree_code); -- cgit v1.1 From bc909324bda71543add2229adfa59d8daff5f0db Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 23 Sep 2020 14:20:44 +0200 Subject: middle-end/96466 - fix VEC_COND isel/expansion issue We need to avoid forcing BLKmode for truth vectors, instead do as other code and use VOIDmode so layout_type can pick a suitable and consistent mode. RTL expansion of vect_cond_mask also needs to deal with CONST_INT operands which means passing the mode explicitely. 2020-09-23 Richard Biener PR middle-end/96466 * internal-fn.c (expand_vect_cond_mask_optab_fn): Use appropriate mode for force_reg. * tree.c (build_truth_vector_type_for): Pass VOIDmode to make_vector_type. * gcc.dg/pr96466.c: New testcase. --- gcc/internal-fn.c | 2 +- gcc/testsuite/gcc.dg/pr96466.c | 19 +++++++++++++++++++ gcc/tree.c | 2 +- 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr96466.c (limited to 'gcc') diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 8efc77d..6cf2687 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -2644,7 +2644,7 @@ expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab) rtx_op2 = expand_normal (op2); mask = force_reg (mask_mode, mask); - rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1); + rtx_op1 = force_reg (mode, rtx_op1); rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); create_output_operand (&ops[0], target, mode); diff --git a/gcc/testsuite/gcc.dg/pr96466.c b/gcc/testsuite/gcc.dg/pr96466.c new file mode 100644 index 0000000..a8840f5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr96466.c @@ -0,0 +1,19 @@ +/* PR tree-optimization/96466 */ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-Og -finline-functions-called-once -fno-tree-ccp" } */ + +typedef unsigned long __attribute__ ((__vector_size__ (8))) V; + +V +bar (unsigned long x, V v) +{ + v &= x >= v; + return (V) v; +} + +V +foo (void) +{ + return bar (5, (V) 4441221375); +} diff --git a/gcc/tree.c b/gcc/tree.c index 4046deb..a1fc119 100644 --- a/gcc/tree.c +++ b/gcc/tree.c @@ -10952,7 +10952,7 @@ build_truth_vector_type_for (tree vectype) unsigned HOST_WIDE_INT esize = vector_element_size (vsize, nunits); tree bool_type = build_nonstandard_boolean_type (esize); - return make_vector_type (bool_type, nunits, BLKmode); + return make_vector_type (bool_type, nunits, VOIDmode); } /* Like build_vector_type, but builds a variant type with TYPE_VECTOR_OPAQUE -- cgit v1.1 From 67c935c8232f6fe96a4be2dc27287b7ace839c67 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Wed, 23 Sep 2020 15:12:18 +0200 Subject: Minor modref optimization and statistics fix this patch fixes bug in tracking memory stats and also I have noticed that while the pass takes care to stop traking things when things are obviously out of hand it still keeps summaries that have no useful info for loads or stores and also many summaries are just copying const/pure attributes. This patch thus also adds logic to detect if summary is useful and drop it early otherwise. This reduces number of queries to the oracle and saves memory/lto streaming. For cc1plus LTO build (configured with --disable-plugin --enable-checking=release --with-build-config=lto) I now get: Alias oracle query stats: refs_may_alias_p: 62488734 disambiguations, 72660949 queries ref_maybe_used_by_call_p: 128863 disambiguations, 63393551 queries call_may_clobber_ref_p: 16013 disambiguations, 21776 queries nonoverlapping_component_refs_p: 0 disambiguations, 37628 queries nonoverlapping_refs_since_match_p: 19397 disambiguations, 55370 must overlaps, 75516 queries aliasing_component_refs_p: 54741 disambiguations, 752198 queries TBAA oracle: 21632692 disambiguations 52565147 queries 15656420 are in alias set 0 10108172 queries asked about the same object 124 queries asked about the same alias set 0 access volatile 3640460 are dependent in the DAG 1527279 are aritificially in conflict with void * Modref stats: modref use: 5712 disambiguations, 31221 queries modref clobber: 684316 disambiguations, 1010000 queries 1779717 tbaa queries (1.762096 per modref query) PTA query stats: pt_solution_includes: 947334 disambiguations, 13601373 queries pt_solutions_intersect: 1011662 disambiguations, 13139565 queries The number of queries should change, but the number of disambiguations should not. However comparing with stats here https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554309.html I see about 50% drop in clobber disambiguations. There is however same drop in other alias oracle stats. I suppose someting changed in meanwhile on mainline because I was basing that on older tree. I tried to proofread changes between mainline and branch and they seem all quite obvious. This is consistent with what I get on tramp3d: Alias oracle query stats: refs_may_alias_p: 2051320 disambiguations, 2312132 queries ref_maybe_used_by_call_p: 7058 disambiguations, 2088222 queries call_may_clobber_ref_p: 232 disambiguations, 232 queries nonoverlapping_component_refs_p: 0 disambiguations, 4339 queries nonoverlapping_refs_since_match_p: 329 disambiguations, 10200 must overlaps, 10616 queries aliasing_component_refs_p: 857 disambiguations, 34639 queries TBAA oracle: 886768 disambiguations 1670635 queries 131572 are in alias set 0 461689 queries asked about the same object 0 queries asked about the same alias set 0 access volatile 190291 are dependent in the DAG 315 are aritificially in conflict with void * Modref stats: modref use: 430 disambiguations, 1885 queries modref clobber: 9657 disambiguations, 16076 queries 19027 tbaa queries (1.183566 per modref query) PTA query stats: pt_solution_includes: 311756 disambiguations, 524179 queries pt_solutions_intersect: 129689 disambiguations, 415878 queries In both cases the number of disambiguations should be same (queries are not comparable). Bootstrapped/regtested x86_64-linux, comitted. gcc/ChangeLog: 2020-09-23 Jan Hubicka * ipa-modref.c (modref_summary::lto_useful_p): New member function. (modref_summary::useful_p): New member function. (analyze_function): Drop useless summaries. (modref_write): Skip useless summaries. (pass_ipa_modref::execute): Drop useless summaries. * ipa-modref.h (struct GTY): Declare useful_p and lto_useful_p. * tree-ssa-alias.c (dump_alias_stats): Fix. (modref_may_conflict): Fix stats. --- gcc/ipa-modref.c | 50 +++++++++++++++++++++++++++++++++++++++++++------- gcc/ipa-modref.h | 2 ++ gcc/tree-ssa-alias.c | 6 +++--- 3 files changed, 48 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index fe277d8..3e65159 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -106,6 +106,36 @@ modref_summary::~modref_summary () ggc_delete (stores_lto); } +/* Return true if lto summary is potentially useful for optimization. */ + +bool +modref_summary::lto_useful_p (int ecf_flags) +{ + if (ecf_flags & (ECF_CONST | ECF_NOVOPS)) + return false; + if (loads_lto && !loads_lto->every_base) + return true; + if (ecf_flags & ECF_PURE) + return false; + return stores_lto && !stores_lto->every_base; +} + +/* Return true if summary is potentially useful for optimization. */ + +bool +modref_summary::useful_p (int ecf_flags) +{ + if (ecf_flags & (ECF_CONST | ECF_NOVOPS)) + return false; + if (lto_useful_p (ecf_flags)) + return true; + if (loads && !loads->every_base) + return true; + if (ecf_flags & ECF_PURE) + return false; + return stores && !loads->every_base; +} + /* Dump records TT to OUT. */ static void @@ -588,8 +618,10 @@ static void analyze_function (function *f, bool ipa) { if (dump_file) - fprintf (dump_file, "modref analyzing '%s' (ipa=%i)...\n", - function_name (f), ipa); + fprintf (dump_file, "modref analyzing '%s' (ipa=%i)%s%s\n", + function_name (f), ipa, + TREE_READONLY (current_function_decl) ? " (const)" : "", + DECL_PURE_P (current_function_decl) ? " (pure)" : ""); /* Don't analyze this function if it's compiled with -fno-strict-aliasing. */ if (!flag_ipa_modref) @@ -646,6 +678,7 @@ analyze_function (function *f, bool ipa) param_modref_max_refs); } summary->finished = false; + int ecf_flags = flags_from_decl_or_type (current_function_decl); /* Analyze each statement in each basic block of the function. If the statement cannot be analyzed (for any reason), the entire function cannot @@ -656,7 +689,8 @@ analyze_function (function *f, bool ipa) gimple_stmt_iterator si; for (si = gsi_after_labels (bb); !gsi_end_p (si); gsi_next (&si)) { - if (!analyze_stmt (summary, gsi_stmt (si), ipa)) + if (!analyze_stmt (summary, gsi_stmt (si), ipa) + || !summary->useful_p (ecf_flags)) { cgraph_node *fnode = cgraph_node::get (current_function_decl); summaries->remove (fnode); @@ -927,9 +961,11 @@ modref_write () { symtab_node *snode = lto_symtab_encoder_deref (encoder, i); cgraph_node *cnode = dyn_cast (snode); + modref_summary *r; if (cnode && cnode->definition && !cnode->alias - && summaries->get (cnode)) + && (r = summaries->get (cnode)) + && r->lto_useful_p (flags_from_decl_or_type (cnode->decl))) count++; } streamer_write_uhwi (ob, count); @@ -944,7 +980,7 @@ modref_write () modref_summary *r = summaries->get (cnode); - if (!r) + if (!r || !r->lto_useful_p (flags_from_decl_or_type (cnode->decl))) continue; streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode)); @@ -1233,7 +1269,7 @@ unsigned int pass_ipa_modref::execute (function *) if (dump_file) fprintf (dump_file, " Call to %s\n", - cur->dump_name ()); + callee_edge->callee->dump_name ()); /* We can not safely optimize based on summary of callee if it does not always bind to current def: it is possible that @@ -1278,7 +1314,7 @@ unsigned int pass_ipa_modref::execute (function *) its_hopeless = true; if (dump_file && avail <= AVAIL_INTERPOSABLE) fprintf (dump_file, " Call target interposable" - "or not available\n"); + " or not available\n"); else if (dump_file) fprintf (dump_file, " No call target summary\n"); break; diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h index 6cccdfe..152e715 100644 --- a/gcc/ipa-modref.h +++ b/gcc/ipa-modref.h @@ -41,6 +41,8 @@ struct GTY(()) modref_summary modref_summary (); ~modref_summary (); void dump (FILE *); + bool useful_p (int ecf_flags); + bool lto_useful_p (int ecf_flags); }; modref_summary *get_modref_function_summary (cgraph_node *func); diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c index be4d446..18ff529 100644 --- a/gcc/tree-ssa-alias.c +++ b/gcc/tree-ssa-alias.c @@ -170,7 +170,7 @@ dump_alias_stats (FILE *s) fprintf (s, " modref clobber: " HOST_WIDE_INT_PRINT_DEC" disambiguations, " HOST_WIDE_INT_PRINT_DEC" queries\n " - HOST_WIDE_INT_PRINT_DEC" tbaa querries (%f per modref querry)\n", + HOST_WIDE_INT_PRINT_DEC" tbaa queries (%f per modref query)\n", alias_stats.modref_clobber_no_alias, alias_stats.modref_clobber_no_alias + alias_stats.modref_clobber_may_alias, @@ -2448,9 +2448,9 @@ modref_may_conflict (modref_tree *tt, ao_ref *ref, bool tbaa_p) if (tbaa_p && flag_strict_aliasing) { + alias_stats.modref_tests++; if (!alias_sets_conflict_p (base_set, base_node->base)) continue; - alias_stats.modref_tests++; num_tests++; } else @@ -2465,9 +2465,9 @@ modref_may_conflict (modref_tree *tt, ao_ref *ref, bool tbaa_p) return true; if (!flag_strict_aliasing) return true; + alias_stats.modref_tests++; if (alias_sets_conflict_p (ref_set, ref_node->ref)) return true; - alias_stats.modref_tests++; num_tests++; if (num_tests >= max_tests) return true; -- cgit v1.1 From 3457dae55f72bd4ac0f346bbebb02d1613ac4b5c Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 23 Sep 2020 15:03:31 +0200 Subject: middle-end/96453 - relax gimple_expand_vec_cond_expr This relaxes the condition under which we also try NE_EXPR for a fake generated compare in addition to LT_EXPR given the fact the verification ICEd when it failed but obviously was only implemented for constants. Thus the patch removes the verification and the restriction to constant operands. 2020-09-23 Richard Biener PR middle-end/96453 * gimple-isel.cc (gimple_expand_vec_cond_expr): Remove LT_EXPR -> NE_EXPR verification and also apply it for non-constant masks. * gcc.dg/pr96453.c: New testcase. --- gcc/gimple-isel.cc | 15 ++------------- gcc/testsuite/gcc.dg/pr96453.c | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr96453.c (limited to 'gcc') diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc index b330cf4..9792263 100644 --- a/gcc/gimple-isel.cc +++ b/gcc/gimple-isel.cc @@ -138,22 +138,11 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, if (icode == CODE_FOR_nothing) { if (tcode == LT_EXPR - && op0a == op0 - && TREE_CODE (op0) == VECTOR_CST) + && op0a == op0) { /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR into a constant when only get_vcond_eq_icode is supported. - Verify < 0 and != 0 behave the same and change it to NE_EXPR. */ - unsigned HOST_WIDE_INT nelts; - if (!VECTOR_CST_NELTS (op0).is_constant (&nelts)) - { - if (VECTOR_CST_STEPPED_P (op0)) - gcc_unreachable (); - nelts = vector_cst_encoded_nelts (op0); - } - for (unsigned int i = 0; i < nelts; ++i) - if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1) - gcc_unreachable (); + Try changing it to NE_EXPR. */ tcode = NE_EXPR; } if (tcode == EQ_EXPR || tcode == NE_EXPR) diff --git a/gcc/testsuite/gcc.dg/pr96453.c b/gcc/testsuite/gcc.dg/pr96453.c new file mode 100644 index 0000000..f758e7e --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr96453.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-Og -fno-early-inlining -fno-tree-ccp -fno-tree-dce" } */ +/* { dg-additional-options "-mavx -mno-sse4.2" { target x86_64-*-* i?86-*-* } } */ + +typedef int __attribute__ ((__vector_size__ (16))) U; +typedef unsigned long __attribute__ ((__vector_size__ (16))) V; + +static inline int +bar (unsigned long e, V f) +{ + V g = f != e; + (union {U b;}){(U) g}; +} + +void +foo (void) +{ + int j = bar (8, (V) { }); + for (unsigned i;; i[&j]) + ; +} -- cgit v1.1 From c48ffe21f8f3478cf08f9442e3f973df358caf2a Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Sun, 20 Sep 2020 16:11:00 -0400 Subject: c: Fix -Wduplicated-branches ICE [PR97125] We crash here because since r11-3302 the C FE uses codes like SWITCH_STMT in the else branches in the attached test, and inchash::add_expr in do_warn_duplicated_branches doesn't handle these front-end codes. In the C++ FE this works because by the time we get to do_warn_duplicated_branches we've already cp_genericize'd the SWITCH_STMT tree into a SWITCH_EXPR. The fix is to call do_warn_duplicated_branches_r only after loops and other structured control constructs have been lowered. gcc/c-family/ChangeLog: PR c/97125 * c-gimplify.c (c_genericize): Only call do_warn_duplicated_branches_r after loops and other structured control constructs have been lowered. gcc/testsuite/ChangeLog: PR c/97125 * c-c++-common/Wduplicated-branches-15.c: New test. --- gcc/c-family/c-gimplify.c | 8 +++--- .../c-c++-common/Wduplicated-branches-15.c | 32 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/Wduplicated-branches-15.c (limited to 'gcc') diff --git a/gcc/c-family/c-gimplify.c b/gcc/c-family/c-gimplify.c index 8b326c9..d1e3915 100644 --- a/gcc/c-family/c-gimplify.c +++ b/gcc/c-family/c-gimplify.c @@ -533,10 +533,6 @@ c_genericize (tree fndecl) &pset); } - if (warn_duplicated_branches) - walk_tree_without_duplicates (&DECL_SAVED_TREE (fndecl), - do_warn_duplicated_branches_r, NULL); - /* Genericize loops and other structured control constructs. The C++ front end has already done this in lang-specific code. */ if (!c_dialect_cxx ()) @@ -550,6 +546,10 @@ c_genericize (tree fndecl) pop_cfun (); } + if (warn_duplicated_branches) + walk_tree_without_duplicates (&DECL_SAVED_TREE (fndecl), + do_warn_duplicated_branches_r, NULL); + /* Dump the C-specific tree IR. */ dump_orig = get_dump_info (TDI_original, &local_dump_flags); if (dump_orig) diff --git a/gcc/testsuite/c-c++-common/Wduplicated-branches-15.c b/gcc/testsuite/c-c++-common/Wduplicated-branches-15.c new file mode 100644 index 0000000..d494360 --- /dev/null +++ b/gcc/testsuite/c-c++-common/Wduplicated-branches-15.c @@ -0,0 +1,32 @@ +/* PR c/97125 */ +/* { dg-do compile } */ +/* { dg-options "-Wduplicated-branches" } */ + +void foo (void); + +void +fn1 (void) +{ + if (0) + foo (); + else + switch (0); +} + +void +fn2 (void) +{ + if (0) + foo (); + else + while (0); +} + +void +fn3 (void) +{ + if (0) + foo (); + else + for (;;); +} -- cgit v1.1 From 13f7c5d504b5fe6c233f2e68acd10bd4fc9007ac Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Wed, 23 Sep 2020 07:01:10 -0700 Subject: c++: dependent local extern decl ICE [PR97171] I'd missed the piece of substutution for the uses of a local extern decl. Just grab the local specialization. We need to do this regardless of dependentness because we always cloned the local extern. PR c++/97171 gcc/cp/ * pt.c (tsubst_copy) [FUNCTION_DECL,VAR_DECL]: Retrieve local specialization for DECL_LOCAL_P decls. gcc/testsuite/ * g++.dg/template/local10.C: New. --- gcc/cp/pt.c | 8 ++++++++ gcc/testsuite/g++.dg/template/local10.C | 15 +++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 gcc/testsuite/g++.dg/template/local10.C (limited to 'gcc') diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 314bd03..1ec039d 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -16531,6 +16531,14 @@ tsubst_copy (tree t, tree args, tsubst_flags_t complain, tree in_decl) case FUNCTION_DECL: if (DECL_LANG_SPECIFIC (t) && DECL_TEMPLATE_INFO (t)) r = tsubst (t, args, complain, in_decl); + else if (DECL_LOCAL_DECL_P (t)) + { + /* Local specialization will have been created when we + instantiated the DECL_EXPR_DECL. */ + r = retrieve_local_specialization (t); + if (!r) + r = error_mark_node; + } else if (local_variable_p (t) && uses_template_parms (DECL_CONTEXT (t))) { diff --git a/gcc/testsuite/g++.dg/template/local10.C b/gcc/testsuite/g++.dg/template/local10.C new file mode 100644 index 0000000..a2ffc1e --- /dev/null +++ b/gcc/testsuite/g++.dg/template/local10.C @@ -0,0 +1,15 @@ +// PR c++/97171 +// { dg-additional-options -flto } + +template +void transform(_UnaryOperation); + +template +void Apply () +{ + extern T Maker (void); // block-scope extern with dependent type + + transform (Maker); +} + +template void Apply (); -- cgit v1.1 From c9d56eb777552ac5ee0c281e1f6e34b6fe929b77 Mon Sep 17 00:00:00 2001 From: Alex Coplan Date: Wed, 23 Sep 2020 15:20:19 +0100 Subject: aarch64: Add support for Neoverse V1 CPU This adds support for Arm's Neoverse V1 CPU to the AArch64 backend. --- gcc/ChangeLog: * config/aarch64/aarch64-cores.def: Add Neoverse V1. * config/aarch64/aarch64-tune.md: Regenerate. * doc/invoke.texi: Document support for Neoverse V1. --- gcc/config/aarch64/aarch64-cores.def | 1 + gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/doc/invoke.texi | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index f30ff35..04dc587 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -134,6 +134,7 @@ AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_ /* Arm ('A') cores. */ AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversen1, 0x41, 0xd40, -1) +AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversen1, 0x41, 0xd40, -1) /* Qualcomm ('Q') cores. */ AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index 0e3239c..729eb3e 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index f726ff4..1380146 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -17505,7 +17505,8 @@ performance of the code. Permissible values for this option are: @samp{cortex-a76}, @samp{cortex-a76ae}, @samp{cortex-a77}, @samp{cortex-a65}, @samp{cortex-a65ae}, @samp{cortex-a34}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, -@samp{neoverse-e1},@samp{neoverse-n1},@samp{qdf24xx}, @samp{saphira}, +@samp{neoverse-e1},@samp{neoverse-n1},@samp{neoverse-v1},@samp{qdf24xx}, +@samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan}, @samp{octeontx}, @samp{octeontx81}, @samp{octeontx83}, @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96} -- cgit v1.1 From da13b7737662da11f8fefb28eaf4ed7c50c51767 Mon Sep 17 00:00:00 2001 From: Alex Coplan Date: Wed, 23 Sep 2020 15:21:00 +0100 Subject: arm: Add support for Neoverse V1 CPU This adds support for Arm's Neoverse V1 CPU to the AArch32 backend. --- gcc/ChangeLog: * config/arm/arm-cpus.in (neoverse-v1): New. * config/arm/arm-tables.opt: Regenerate. * config/arm/arm-tune.md: Regenerate. * doc/invoke.texi: Document support for Neoverse V1. --- gcc/config/arm/arm-cpus.in | 10 ++++++++++ gcc/config/arm/arm-tables.opt | 3 +++ gcc/config/arm/arm-tune.md | 4 ++-- gcc/doc/invoke.texi | 6 +++--- 4 files changed, 18 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index c98f8ed..4550694 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -1478,6 +1478,16 @@ begin cpu cortex-a76.cortex-a55 costs cortex_a57 end cpu cortex-a76.cortex-a55 +# Armv8.4 A-profile Architecture Processors +begin cpu neoverse-v1 + cname neoversev1 + tune for cortex-a57 + tune flags LDSCHED + architecture armv8.4-a+bf16+i8mm + option crypto add FP_ARMv8 CRYPTO + costs cortex_a57 +end cpu neoverse-v1 + # V8 M-profile implementations. begin cpu cortex-m23 cname cortexm23 diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index ce35661..1a7c319 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -250,6 +250,9 @@ EnumValue Enum(processor_type) String(cortex-a76.cortex-a55) Value( TARGET_CPU_cortexa76cortexa55) EnumValue +Enum(processor_type) String(neoverse-v1) Value( TARGET_CPU_neoversev1) + +EnumValue Enum(processor_type) String(cortex-m23) Value( TARGET_CPU_cortexm23) EnumValue diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 8ea9435..3874f42 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -46,6 +46,6 @@ cortexa73cortexa53,cortexa55,cortexa75, cortexa76,cortexa76ae,cortexa77, neoversen1,cortexa75cortexa55,cortexa76cortexa55, - cortexm23,cortexm33,cortexm35p, - cortexm55,cortexr52" + neoversev1,cortexm23,cortexm33, + cortexm35p,cortexm55,cortexr52" (const (symbol_ref "((enum attr_tune) arm_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1380146..c17e5c6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -19353,9 +19353,9 @@ Permissible names are: @samp{arm7tdmi}, @samp{arm7tdmi-s}, @samp{arm710t}, @samp{cortex-m35p}, @samp{cortex-m55}, @samp{cortex-m1.small-multiply}, @samp{cortex-m0.small-multiply}, @samp{cortex-m0plus.small-multiply}, @samp{exynos-m1}, @samp{marvell-pj4}, -@samp{neoverse-n1}, @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, -@samp{ep9312}, @samp{fa526}, @samp{fa626}, @samp{fa606te}, @samp{fa626te}, -@samp{fmp626}, @samp{fa726te}, @samp{xgene1}. +@samp{neoverse-n1}, @samp{neoverse-v1}, @samp{xscale}, @samp{iwmmxt}, +@samp{iwmmxt2}, @samp{ep9312}, @samp{fa526}, @samp{fa626}, @samp{fa606te}, +@samp{fa626te}, @samp{fmp626}, @samp{fa726te}, @samp{xgene1}. Additionally, this option can specify that GCC should tune the performance of the code for a big.LITTLE system. Permissible names are: -- cgit v1.1 From c80ee302a8cfff18c5a14f5ca21b9ae35c4cab2e Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Wed, 23 Sep 2020 08:13:25 -0700 Subject: c++: Remove some gratuitous typedefing This is C++, we don't need 'typedef struct foo foo;'. Oh, and bool bitfields are a thing. gcc/cp/ * name-lookup.h (typedef cxx_binding): Delete tdef. (typedef cp_binding_level): Likewise. (struct cxx_binding): Flags are bools. --- gcc/cp/name-lookup.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h index a0815e1..5d2d364f 100644 --- a/gcc/cp/name-lookup.h +++ b/gcc/cp/name-lookup.h @@ -47,12 +47,8 @@ struct GTY(()) binding_entry_s { extern void binding_table_foreach (binding_table, bt_foreach_proc, void *); extern binding_entry binding_table_find (binding_table, tree); -/* Datatype that represents binding established by a declaration between - a name and a C++ entity. */ -typedef struct cxx_binding cxx_binding; - /* The datatype used to implement C++ scope. */ -typedef struct cp_binding_level cp_binding_level; +struct cp_binding_level; /* Nonzero if this binding is for a local scope, as opposed to a class or namespace scope. */ @@ -62,6 +58,8 @@ typedef struct cp_binding_level cp_binding_level; currently being defined. */ #define INHERITED_VALUE_BINDING_P(NODE) ((NODE)->value_is_inherited) +/* Datatype that represents binding established by a declaration between + a name and a C++ entity. */ struct GTY(()) cxx_binding { /* Link to chain together various bindings for this name. */ cxx_binding *previous; @@ -71,8 +69,9 @@ struct GTY(()) cxx_binding { tree type; /* The scope at which this binding was made. */ cp_binding_level *scope; - unsigned value_is_inherited : 1; - unsigned is_local : 1; + + bool value_is_inherited : 1; + bool is_local : 1; }; /* Datatype used to temporarily save C++ bindings (for implicit -- cgit v1.1 From 65c9878641cbe0ed898aa7047b7b994e9d4a5bb1 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 23 Sep 2020 17:37:58 +0100 Subject: AArch64: Implement missing p128<->f64 reinterpret intrinsics This patch implements the missing reinterprets to and from poly128_t and float64x2_t. I've plugged in the appropriate testing in the advsimd-intrinsics.exp too. Bootstrapped and tested on aarch64-none-linux-gnu. Tested advsimd-intrinsics.exp on arm-none-eabi too to make sure arm testing isn't affected. gcc/ PR target/71233 * config/aarch64/arm_neon.h (vreinterpretq_f64_p128, vreinterpretq_p128_f64): Define. gcc/testsuite/ PR target/71233 * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h (clean_results): Add float64x2_t cleanup. (DECL_VARIABLE_128BITS_VARIANTS): Add float64x2_t variable. * gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c: Add testing of vreinterpretq_f64_p128, vreinterpretq_p128_f64. --- gcc/config/aarch64/arm_neon.h | 14 ++++++++++++++ .../aarch64/advsimd-intrinsics/arm-neon-ref.h | 8 ++++++-- .../aarch64/advsimd-intrinsics/vreinterpret_p128.c | 19 +++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index b3c9b64..9a970e7 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -6088,6 +6088,20 @@ vreinterpretq_u32_p128 (poly128_t __a) return (uint32x4_t)__a; } +__extension__ extern __inline float64x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vreinterpretq_f64_p128 (poly128_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ extern __inline poly128_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vreinterpretq_p128_f64 (float64x2_t __a) +{ + return (poly128_t) __a; +} + /* vset_lane */ __extension__ extern __inline float16x4_t diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h index fde6029..791972c 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h @@ -460,6 +460,8 @@ static void clean_results (void) #endif CLEAN(result, float, 32, 4); + AARCH64_ONLY(CLEAN(result, float, 64, 2)); + #if defined(__aarch64__) /* On AArch64, make sure to return DefaultNaN to have the same results as on AArch32. */ @@ -544,7 +546,8 @@ static void clean_results (void) DECL_VARIABLE(VAR, poly, 16, 8); \ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \ DECL_VARIABLE(VAR, float, 16, 8); \ - DECL_VARIABLE(VAR, float, 32, 4) + DECL_VARIABLE(VAR, float, 32, 4); \ + AARCH64_ONLY(DECL_VARIABLE(VAR, float, 64, 2)) #else #define DECL_VARIABLE_128BITS_VARIANTS(VAR) \ DECL_VARIABLE_128BITS_SIGNED_VARIANTS(VAR); \ @@ -552,7 +555,8 @@ static void clean_results (void) DECL_VARIABLE(VAR, poly, 8, 16); \ DECL_VARIABLE(VAR, poly, 16, 8); \ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \ - DECL_VARIABLE(VAR, float, 32, 4) + DECL_VARIABLE(VAR, float, 32, 4); \ + AARCH64_ONLY(DECL_VARIABLE(VAR, float, 64, 2)) #endif /* Declare all variants. */ #define DECL_VARIABLE_ALL_VARIANTS(VAR) \ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c index 25b3482..67f809c 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c @@ -33,6 +33,10 @@ VECT_VAR_DECL(vreint_expected_q_p128_f32,poly,64,2) [] = { 0xc1700000c1800000, 0xc1500000c1600000 }; VECT_VAR_DECL(vreint_expected_q_p128_f16,poly,64,2) [] = { 0xca80cb00cb80cc00, 0xc880c900c980ca00 }; +#ifdef __aarch64__ +VECT_VAR_DECL(vreint_expected_q_p128_f64,poly,64,2) [] = { 0xc030000000000000, + 0xc02e000000000000 }; +#endif /* Expected results: vreinterpretq_*_p128. */ VECT_VAR_DECL(vreint_expected_q_s8_p128,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, @@ -75,6 +79,10 @@ VECT_VAR_DECL(vreint_expected_q_f16_p128,hfloat,16,8) [] = { 0xfff0, 0xffff, 0xffff, 0xffff, 0xfff1, 0xffff, 0xffff, 0xffff }; +#ifdef __aarch64__ +VECT_VAR_DECL(vreint_expected_q_f64_p128,hfloat,64,2) [] = { 0xfffffffffffffff0, + 0xfffffffffffffff1 }; +#endif int main (void) { @@ -90,6 +98,10 @@ int main (void) #endif VLOAD(vreint_vector, buffer, q, float, f, 32, 4); +#ifdef __aarch64__ + VLOAD(vreint_vector, buffer, q, float, f, 64, 2); +#endif + /* vreinterpretq_p128_* tests. */ #undef TEST_MSG #define TEST_MSG "VREINTERPRETQ_P128_*" @@ -121,6 +133,10 @@ int main (void) #endif TEST_VREINTERPRET128(q, poly, p, 128, 1, float, f, 32, 4, vreint_expected_q_p128_f32); +#ifdef __aarch64__ + TEST_VREINTERPRET128(q, poly, p, 128, 1, float, f, 64, 2, vreint_expected_q_p128_f64); +#endif + /* vreinterpretq_*_p128 tests. */ #undef TEST_MSG #define TEST_MSG "VREINTERPRETQ_*_P128" @@ -161,5 +177,8 @@ int main (void) #endif TEST_VREINTERPRET_FP_FROM_P128(q, float, f, 32, 4, poly, p, 128, 1, vreint_expected_q_f32_p128); +#ifdef __aarch64__ + TEST_VREINTERPRET_FP_FROM_P128(q, float, f, 64, 2, poly, p, 128, 1, vreint_expected_q_f64_p128); +#endif return 0; } -- cgit v1.1 From f65ebb5210e2fded0f7b339219685f4480124f0c Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 23 Sep 2020 11:18:43 -0400 Subject: analyzer: fix member call on null seen with ubsan [PR97178] gcc/analyzer/ChangeLog: PR analyzer/97178 * engine.cc (impl_run_checkers): Update for change to ext_state ctor. * program-state.cc (selftest::test_sm_state_map): Pass an engine instance to ext_state ctor. (selftest::test_program_state_1): Likewise. (selftest::test_program_state_2): Likewise. (selftest::test_program_state_merging): Likewise. (selftest::test_program_state_merging_2): Likewise. * program-state.h (extrinsic_state::extrinsic_state): Remove NULL default value for "eng" param. --- gcc/analyzer/engine.cc | 2 +- gcc/analyzer/program-state.cc | 12 +++++++----- gcc/analyzer/program-state.h | 4 ++-- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 4374297..b36c198 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -4464,7 +4464,7 @@ impl_run_checkers (logger *logger) } /* Extrinsic state shared by nodes in the graph. */ - const extrinsic_state ext_state (checkers, logger, &eng); + const extrinsic_state ext_state (checkers, &eng, logger); const analysis_plan plan (sg, logger); diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc index 83a6e5b..188fec0 100644 --- a/gcc/analyzer/program-state.cc +++ b/gcc/analyzer/program-state.cc @@ -1140,7 +1140,8 @@ test_sm_state_map () state_machine *sm = make_malloc_state_machine (NULL); auto_delete_vec checkers; checkers.safe_push (sm); - extrinsic_state ext_state (checkers); + engine eng; + extrinsic_state ext_state (checkers, &eng); state_machine::state_t start = sm->get_start_state (); /* Test setting states on svalue_id instances directly. */ @@ -1272,7 +1273,7 @@ test_program_state_1 () checkers.safe_push (sm); engine eng; - extrinsic_state ext_state (checkers, NULL, &eng); + extrinsic_state ext_state (checkers, &eng); region_model_manager *mgr = eng.get_model_manager (); program_state s (ext_state); region_model *model = s.m_region_model; @@ -1301,7 +1302,7 @@ test_program_state_2 () auto_delete_vec checkers; engine eng; - extrinsic_state ext_state (checkers, NULL, &eng); + extrinsic_state ext_state (checkers, &eng); program_state s (ext_state); region_model *model = s.m_region_model; @@ -1324,7 +1325,7 @@ test_program_state_merging () auto_delete_vec checkers; checkers.safe_push (make_malloc_state_machine (NULL)); engine eng; - extrinsic_state ext_state (checkers, NULL, &eng); + extrinsic_state ext_state (checkers, &eng); region_model_manager *mgr = eng.get_model_manager (); program_state s0 (ext_state); @@ -1389,7 +1390,8 @@ test_program_state_merging_2 () program_point point (program_point::origin ()); auto_delete_vec checkers; checkers.safe_push (make_signal_state_machine (NULL)); - extrinsic_state ext_state (checkers); + engine eng; + extrinsic_state ext_state (checkers, &eng); const state_machine::state test_state_0 ("test state 0", 0); const state_machine::state test_state_1 ("test state 1", 1); diff --git a/gcc/analyzer/program-state.h b/gcc/analyzer/program-state.h index a52fbeb..094d256 100644 --- a/gcc/analyzer/program-state.h +++ b/gcc/analyzer/program-state.h @@ -29,8 +29,8 @@ class extrinsic_state { public: extrinsic_state (auto_delete_vec &checkers, - logger *logger = NULL, - engine *eng = NULL) + engine *eng, + logger *logger = NULL) : m_checkers (checkers), m_logger (logger), m_engine (eng) { } -- cgit v1.1 From 0f0b00033a71ff728d6fab6f9d674fb6b3ba4980 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 23 Sep 2020 19:21:56 +0100 Subject: aarch64: Add a couple of extra stack-protector tests These tests were inspired by corresponding arm ones. They already pass. gcc/testsuite/ * gcc.target/aarch64/stack-protector-3.c: New test. * gcc.target/aarch64/stack-protector-4.c: Likewise. --- .../gcc.target/aarch64/stack-protector-3.c | 45 ++++++++++++++++++++++ .../gcc.target/aarch64/stack-protector-4.c | 6 +++ 2 files changed, 51 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-3.c create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-4.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-3.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-3.c new file mode 100644 index 0000000..909e26c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-3.c @@ -0,0 +1,45 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fstack_protector } */ +/* { dg-options "-fstack-protector-all -O2" } */ + +extern volatile long *stack_chk_guard_ptr; + +void __attribute__ ((noipa)) +f (void) +{ + volatile int x; + /* Munging the contents of __stack_chk_guard should trigger a + stack-smashing failure for this function. */ + *stack_chk_guard_ptr += 1; +} + +#define CHECK(REG) "\tcmp\tx0, " #REG "\n\tbeq\t1f\n" + +asm ( +" .pushsection .data\n" +" .align 3\n" +"stack_chk_guard_ptr:\n" +#if __ILP32__ +" .word __stack_chk_guard\n" +#else +" .xword __stack_chk_guard\n" +#endif +" .weak __stack_chk_guard\n" +"__stack_chk_guard:\n" +" .word 0xdead4321\n" +" .word 0xbeef8765\n" +" .text\n" +" .type __stack_chk_fail, %function\n" +"__stack_chk_fail:\n" +" mov x0, #0\n" +" b exit\n" +" .size __stack_chk_fail, .-__stack_chk_fail\n" +" .popsection" +); + +int +main (void) +{ + f (); + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-4.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-4.c new file mode 100644 index 0000000..6334dd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-4.c @@ -0,0 +1,6 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fstack_protector } */ +/* { dg-require-effective-target fpic } */ +/* { dg-options "-fstack-protector-all -O2 -fpic" } */ + +#include "stack-protector-3.c" -- cgit v1.1 From 74b27d8eedc7a4c0e8276345107790e6b3c023cb Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 23 Sep 2020 19:25:04 +0100 Subject: aarch64: Prevent canary address being spilled to stack This patch fixes the equivalent of arm bug PR85434/CVE-2018-12886 for aarch64: under high register pressure, the -fstack-protector code might spill the address of the canary onto the stack and reload it at the test site, giving an attacker the opportunity to change the expected canary value. This would happen in two cases: - when generating PIC for -mstack-protector-guard=global (tested by stack-protector-6.c). This is a direct analogue of PR85434, which was also about PIC for the global case. - when using -mstack-protector-guard=sysreg. The two problems were really separate bugs and caused by separate code, but it was more convenient to fix them together. The post-patch code still spills _GLOBAL_OFFSET_TABLE_ for stack-protector-6.c, which is a more general problem. However, it no longer spills the canary address itself. The patch also fixes an ICE when using -mstack-protector-guard=sysreg with ILP32: even if the register read is SImode, the address calculation itself should still be DImode. gcc/ * config/aarch64/aarch64-protos.h (aarch64_salt_type): New enum. (aarch64_stack_protect_canary_mem): Declare. * config/aarch64/aarch64.md (UNSPEC_SALT_ADDR): New unspec. (stack_protect_set): Forward to stack_protect_combined_set. (stack_protect_combined_set): New pattern. Use aarch64_stack_protect_canary_mem. (reg_stack_protect_address_): Add a salt operand. (stack_protect_test): Forward to stack_protect_combined_test. (stack_protect_combined_test): New pattern. Use aarch64_stack_protect_canary_mem. * config/aarch64/aarch64.c (strip_salt): New function. (strip_offset_and_salt): Likewise. (tls_symbolic_operand_type): Use strip_offset_and_salt. (aarch64_stack_protect_canary_mem): New function. (aarch64_cannot_force_const_mem): Use strip_offset_and_salt. (aarch64_classify_address): Likewise. (aarch64_symbolic_address_p): Likewise. (aarch64_print_operand): Likewise. (aarch64_output_addr_const_extra): New function. (aarch64_tls_symbol_p): Use strip_salt. (aarch64_classify_symbol): Likewise. (aarch64_legitimate_pic_operand_p): Use strip_offset_and_salt. (aarch64_legitimate_constant_p): Likewise. (aarch64_mov_operand_p): Use strip_salt. (TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA): Override. gcc/testsuite/ * gcc.target/aarch64/stack-protector-5.c: New test. * gcc.target/aarch64/stack-protector-6.c: Likewise. * gcc.target/aarch64/stack-protector-7.c: Likewise. --- gcc/config/aarch64/aarch64-protos.h | 20 +++ gcc/config/aarch64/aarch64.c | 164 +++++++++++++++------ gcc/config/aarch64/aarch64.md | 85 +++++------ .../gcc.target/aarch64/stack-protector-5.c | 23 +++ .../gcc.target/aarch64/stack-protector-6.c | 8 + .../gcc.target/aarch64/stack-protector-7.c | 25 ++++ 6 files changed, 228 insertions(+), 97 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-5.c create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-6.c create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-7.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index c7e828d..302e09b 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -136,6 +136,25 @@ enum aarch64_addr_query_type { ADDR_QUERY_ANY }; +/* Enumerates values that can be arbitrarily mixed into a calculation + in order to make the result of the calculation unique to its use case. + + AARCH64_SALT_SSP_SET + AARCH64_SALT_SSP_TEST + Used when calculating the address of the stack protection canary value. + There is a separate value for setting and testing the canary, meaning + that these two operations produce unique addresses: they are different + from each other, and from all other address calculations. + + The main purpose of this is to prevent the SET address being spilled + to the stack and reloaded for the TEST, since that would give an + attacker the opportunity to change the address of the expected + canary value. */ +enum aarch64_salt_type { + AARCH64_SALT_SSP_SET, + AARCH64_SALT_SSP_TEST +}; + /* A set of tuning parameters contains references to size and time cost models and vectors for address cost calculations, register move costs and memory move costs. */ @@ -608,6 +627,7 @@ opt_machine_mode aarch64_ptrue_all_mode (rtx); rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx); rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx); void aarch64_expand_mov_immediate (rtx, rtx); +rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type); rtx aarch64_ptrue_reg (machine_mode); rtx aarch64_pfalse_reg (machine_mode); bool aarch64_sve_pred_dominates_p (rtx *, rtx); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index b251f39..491fc58 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1935,6 +1935,29 @@ aarch64_sve_abi (void) return sve_abi; } +/* If X is an UNSPEC_SALT_ADDR expression, return the address that it + wraps, otherwise return X itself. */ + +static rtx +strip_salt (rtx x) +{ + rtx search = x; + if (GET_CODE (search) == CONST) + search = XEXP (search, 0); + if (GET_CODE (search) == UNSPEC && XINT (search, 1) == UNSPEC_SALT_ADDR) + x = XVECEXP (search, 0, 0); + return x; +} + +/* Like strip_offset, but also strip any UNSPEC_SALT_ADDR from the + expression. */ + +static rtx +strip_offset_and_salt (rtx addr, poly_int64 *offset) +{ + return strip_salt (strip_offset (addr, offset)); +} + /* Generate code to enable conditional branches in functions over 1 MiB. */ const char * aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest, @@ -2932,14 +2955,9 @@ static enum tls_model tls_symbolic_operand_type (rtx addr) { enum tls_model tls_kind = TLS_MODEL_NONE; - if (GET_CODE (addr) == CONST) - { - poly_int64 addend; - rtx sym = strip_offset (addr, &addend); - if (GET_CODE (sym) == SYMBOL_REF) - tls_kind = SYMBOL_REF_TLS_MODEL (sym); - } - else if (GET_CODE (addr) == SYMBOL_REF) + poly_int64 offset; + addr = strip_offset_and_salt (addr, &offset); + if (GET_CODE (addr) == SYMBOL_REF) tls_kind = SYMBOL_REF_TLS_MODEL (addr); return tls_kind; @@ -5239,6 +5257,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) as_a (mode)); } +/* Return the MEM rtx that provides the canary value that should be used + for stack-smashing protection. MODE is the mode of the memory. + For SSP_GLOBAL, DECL_RTL is the MEM rtx for the canary variable + (__stack_chk_guard), otherwise it has no useful value. SALT_TYPE + indicates whether the caller is performing a SET or a TEST operation. */ + +rtx +aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl, + aarch64_salt_type salt_type) +{ + rtx addr; + if (aarch64_stack_protector_guard == SSP_GLOBAL) + { + gcc_assert (MEM_P (decl_rtl)); + addr = XEXP (decl_rtl, 0); + poly_int64 offset; + rtx base = strip_offset_and_salt (addr, &offset); + if (!SYMBOL_REF_P (base)) + return decl_rtl; + + rtvec v = gen_rtvec (2, base, GEN_INT (salt_type)); + addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_SALT_ADDR); + addr = gen_rtx_CONST (Pmode, addr); + addr = plus_constant (Pmode, addr, offset); + } + else + { + /* Calculate the address from the system register. */ + rtx salt = GEN_INT (salt_type); + addr = gen_reg_rtx (mode); + if (mode == DImode) + emit_insn (gen_reg_stack_protect_address_di (addr, salt)); + else + { + emit_insn (gen_reg_stack_protect_address_si (addr, salt)); + addr = convert_memory_address (Pmode, addr); + } + addr = plus_constant (Pmode, addr, aarch64_stack_protector_guard_offset); + } + return gen_rtx_MEM (mode, force_reg (Pmode, addr)); +} + /* Emit an SVE predicated move from SRC to DEST. PRED is a predicate that is known to contain PTRUE. */ @@ -8677,8 +8737,6 @@ aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) static bool aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { - rtx base, offset; - if (GET_CODE (x) == HIGH) return true; @@ -8688,10 +8746,12 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) if (GET_CODE (*iter) == CONST_POLY_INT) return true; - split_const (x, &base, &offset); + poly_int64 offset; + rtx base = strip_offset_and_salt (x, &offset); if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF) { - if (aarch64_classify_symbol (base, INTVAL (offset)) + /* We checked for POLY_INT_CST offsets above. */ + if (aarch64_classify_symbol (base, offset.to_constant ()) != SYMBOL_FORCE_TO_MEM) return true; else @@ -9217,9 +9277,8 @@ aarch64_classify_address (struct aarch64_address_info *info, && GET_MODE_SIZE (mode).is_constant (&const_size) && const_size >= 4) { - rtx sym, addend; - - split_const (x, &sym, &addend); + poly_int64 offset; + rtx sym = strip_offset_and_salt (x, &offset); return ((GET_CODE (sym) == LABEL_REF || (GET_CODE (sym) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (sym) @@ -9234,10 +9293,12 @@ aarch64_classify_address (struct aarch64_address_info *info, if (allow_reg_index_p && aarch64_base_register_rtx_p (info->base, strict_p)) { - rtx sym, offs; - split_const (info->offset, &sym, &offs); + poly_int64 offset; + HOST_WIDE_INT const_offset; + rtx sym = strip_offset_and_salt (info->offset, &offset); if (GET_CODE (sym) == SYMBOL_REF - && (aarch64_classify_symbol (sym, INTVAL (offs)) + && offset.is_constant (&const_offset) + && (aarch64_classify_symbol (sym, const_offset) == SYMBOL_SMALL_ABSOLUTE)) { /* The symbol and offset must be aligned to the access size. */ @@ -9263,7 +9324,7 @@ aarch64_classify_address (struct aarch64_address_info *info, if (known_eq (ref_size, 0)) ref_size = GET_MODE_SIZE (DImode); - return (multiple_p (INTVAL (offs), ref_size) + return (multiple_p (const_offset, ref_size) && multiple_p (align / BITS_PER_UNIT, ref_size)); } } @@ -9295,9 +9356,8 @@ aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p) bool aarch64_symbolic_address_p (rtx x) { - rtx offset; - - split_const (x, &x, &offset); + poly_int64 offset; + x = strip_offset_and_salt (x, &offset); return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF; } @@ -10028,27 +10088,16 @@ aarch64_print_operand (FILE *f, rtx x, int code) switch (code) { case 'c': - switch (GET_CODE (x)) + if (CONST_INT_P (x)) + fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + else { - case CONST_INT: - fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); - break; - - case SYMBOL_REF: - output_addr_const (f, x); - break; - - case CONST: - if (GET_CODE (XEXP (x, 0)) == PLUS - && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) - { - output_addr_const (f, x); - break; - } - /* Fall through. */ - - default: - output_operand_lossage ("unsupported operand for code '%c'", code); + poly_int64 offset; + rtx base = strip_offset_and_salt (x, &offset); + if (SYMBOL_REF_P (base)) + output_addr_const (f, x); + else + output_operand_lossage ("unsupported operand for code '%c'", code); } break; @@ -10623,6 +10672,19 @@ aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x) output_addr_const (f, x); } +/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ + +static bool +aarch64_output_addr_const_extra (FILE *file, rtx x) +{ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SALT_ADDR) + { + output_addr_const (file, XVECEXP (x, 0, 0)); + return true; + } + return false; +} + bool aarch64_label_mentioned_p (rtx x) { @@ -15932,6 +15994,7 @@ aarch64_tls_symbol_p (rtx x) if (! TARGET_HAVE_TLS) return false; + x = strip_salt (x); if (GET_CODE (x) != SYMBOL_REF) return false; @@ -15987,6 +16050,8 @@ aarch64_classify_tls_symbol (rtx x) enum aarch64_symbol_type aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset) { + x = strip_salt (x); + if (GET_CODE (x) == LABEL_REF) { switch (aarch64_cmodel) @@ -16086,11 +16151,10 @@ aarch64_constant_address_p (rtx x) bool aarch64_legitimate_pic_operand_p (rtx x) { - if (GET_CODE (x) == SYMBOL_REF - || (GET_CODE (x) == CONST - && GET_CODE (XEXP (x, 0)) == PLUS - && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) - return false; + poly_int64 offset; + x = strip_offset_and_salt (x, &offset); + if (GET_CODE (x) == SYMBOL_REF) + return false; return true; } @@ -16136,7 +16200,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x) /* If an offset is being added to something else, we need to allow the base to be moved into the destination register, meaning that there are no free temporaries for the offset. */ - x = strip_offset (x, &offset); + x = strip_offset_and_salt (x, &offset); if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0) return false; @@ -18035,6 +18099,7 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) return aarch64_simd_valid_immediate (x, NULL); } + x = strip_salt (x); if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) return true; @@ -23890,6 +23955,9 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_PRINT_OPERAND_ADDRESS #define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA aarch64_output_addr_const_extra + #undef TARGET_OPTAB_SUPPORTED_P #define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index dbc6b1d..19ec9e3 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -281,6 +281,7 @@ UNSPEC_GEN_TAG_RND ; Generate a random 4-bit MTE tag. UNSPEC_TAG_SPACE ; Translate address to MTE tag address space. UNSPEC_LD1RO + UNSPEC_SALT_ADDR ]) (define_c_enum "unspecv" [ @@ -6881,43 +6882,37 @@ DONE; }) -;; Named patterns for stack smashing protection. +;; Defined for -mstack-protector-guard=sysreg, which goes through this +;; pattern rather than stack_protect_combined_set. Our implementation +;; of the latter can handle both. (define_expand "stack_protect_set" [(match_operand 0 "memory_operand") - (match_operand 1 "memory_operand")] + (match_operand 1 "")] "" { - machine_mode mode = GET_MODE (operands[0]); - if (aarch64_stack_protector_guard != SSP_GLOBAL) - { - /* Generate access through the system register. */ - rtx tmp_reg = gen_reg_rtx (mode); - if (mode == DImode) - { - emit_insn (gen_reg_stack_protect_address_di (tmp_reg)); - emit_insn (gen_adddi3 (tmp_reg, tmp_reg, - GEN_INT (aarch64_stack_protector_guard_offset))); - } - else - { - emit_insn (gen_reg_stack_protect_address_si (tmp_reg)); - emit_insn (gen_addsi3 (tmp_reg, tmp_reg, - GEN_INT (aarch64_stack_protector_guard_offset))); + emit_insn (gen_stack_protect_combined_set (operands[0], operands[1])); + DONE; +}) - } - operands[1] = gen_rtx_MEM (mode, tmp_reg); - } - +(define_expand "stack_protect_combined_set" + [(match_operand 0 "memory_operand") + (match_operand 1 "")] + "" +{ + machine_mode mode = GET_MODE (operands[0]); + operands[1] = aarch64_stack_protect_canary_mem (mode, operands[1], + AARCH64_SALT_SSP_SET); emit_insn ((mode == DImode ? gen_stack_protect_set_di : gen_stack_protect_set_si) (operands[0], operands[1])); DONE; }) +;; Operand 1 is either AARCH64_SALT_SSP_SET or AARCH64_SALT_SSP_TEST. (define_insn "reg_stack_protect_address_" [(set (match_operand:PTR 0 "register_operand" "=r") - (unspec:PTR [(const_int 0)] - UNSPEC_SSP_SYSREG))] + (unspec:PTR [(match_operand 1 "const_int_operand")] + UNSPEC_SSP_SYSREG))] "aarch64_stack_protector_guard != SSP_GLOBAL" { char buf[150]; @@ -6940,37 +6935,29 @@ [(set_attr "length" "12") (set_attr "type" "multiple")]) +;; Defined for -mstack-protector-guard=sysreg, which goes through this +;; pattern rather than stack_protect_combined_test. Our implementation +;; of the latter can handle both. (define_expand "stack_protect_test" [(match_operand 0 "memory_operand") - (match_operand 1 "memory_operand") + (match_operand 1 "") (match_operand 2)] "" { - machine_mode mode = GET_MODE (operands[0]); - - if (aarch64_stack_protector_guard != SSP_GLOBAL) - { - /* Generate access through the system register. The - sequence we want here is the access - of the stack offset to come with - mrs scratch_reg, - add scratch_reg, scratch_reg, :lo12:offset. */ - rtx tmp_reg = gen_reg_rtx (mode); - if (mode == DImode) - { - emit_insn (gen_reg_stack_protect_address_di (tmp_reg)); - emit_insn (gen_adddi3 (tmp_reg, tmp_reg, - GEN_INT (aarch64_stack_protector_guard_offset))); - } - else - { - emit_insn (gen_reg_stack_protect_address_si (tmp_reg)); - emit_insn (gen_addsi3 (tmp_reg, tmp_reg, - GEN_INT (aarch64_stack_protector_guard_offset))); + emit_insn (gen_stack_protect_combined_test (operands[0], operands[1], + operands[2])); + DONE; +}) - } - operands[1] = gen_rtx_MEM (mode, tmp_reg); - } +(define_expand "stack_protect_combined_test" + [(match_operand 0 "memory_operand") + (match_operand 1 "") + (match_operand 2)] + "" +{ + machine_mode mode = GET_MODE (operands[0]); + operands[1] = aarch64_stack_protect_canary_mem (mode, operands[1], + AARCH64_SALT_SSP_TEST); emit_insn ((mode == DImode ? gen_stack_protect_test_di : gen_stack_protect_test_si) (operands[0], operands[1])); diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-5.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-5.c new file mode 100644 index 0000000..a9cd53b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-5.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-fstack-protector-all -O2" } */ + +void __attribute__ ((noipa)) +f (void) +{ + volatile int x; + asm volatile ("" ::: + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x30"); +} + +/* The register clobbers above should not generate any single LDRs or STRs; + all registers should be saved and restored in pairs. The only STRs + should be therefore be those associated with the stack protector + tests themselves. + + Make sure the address of the canary value is not spilled and reloaded, + since that would give the attacker an opportunity to change the + canary value. */ +/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-6.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-6.c new file mode 100644 index 0000000..e2ac088 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-6.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target fpic } */ +/* { dg-options "-fstack-protector-all -O2 -fpic" } */ + +#include "stack-protector-5.c" + +/* See the comment in stack-protector-5.c. */ +/* { dg-final { scan-assembler-times {\tldr\t[^\n]*__stack_chk_guard} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-7.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-7.c new file mode 100644 index 0000000..e644768 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-7.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-fstack-protector-all -mstack-protector-guard=sysreg -mstack-protector-guard-offset=16 -mstack-protector-guard-reg=tpidr_el0 -O2" } */ + +void __attribute__ ((noipa)) +f (void) +{ + volatile int x; + asm volatile ("" ::: + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x30"); +} + +/* The register clobbers above should not generate any single LDRs or STRs; + all registers should be saved and restored in pairs. The only LDRs and + STRs should be therefore be those associated with the stack protector + tests themselves. + + Make sure the address of the canary value (tpidr_el0 + 16) is not + spilled and reloaded, since that would give the attacker an opportunity + to change the canary value. */ +/* { dg-final { scan-assembler-times {\tmrs\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\t} 3 } } */ -- cgit v1.1 From 37c3c297396af3229e9de35ef437f3614e0b4b87 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Wed, 23 Sep 2020 17:35:23 +0200 Subject: [nvptx] Split up function ref plus const With test-case gcc.c-torture/compile/pr92231.c, we run into: ... nvptx-as: ptxas terminated with signal 11 [Segmentation fault], core dumped^M compiler exited with status 1 FAIL: gcc.c-torture/compile/pr92231.c -O0 (test for excess errors) ... due to using a function reference plus constant as operand: ... mov.u64 %r24,bar+4096'; ... Fix this by splitting such an insn into: ... mov.u64 %r24,bar'; add.u64 %r24,%r24,4096'; ... Tested on nvptx. gcc/ChangeLog: * config/nvptx/nvptx.md: Don't allow operand containing sum of function ref and const. --- gcc/config/nvptx/nvptx.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'gcc') diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 6178e6a..035f6e0 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -146,6 +146,13 @@ return true; }) +;; Test for a function symbol ref operand +(define_predicate "symbol_ref_function_operand" + (match_code "symbol_ref") +{ + return SYMBOL_REF_FUNCTION_P (op); +}) + (define_attr "predicable" "false,true" (const_string "true")) @@ -241,6 +248,17 @@ } [(set_attr "subregs_ok" "true")]) +;; ptxas segfaults on 'mov.u64 %r24,bar+4096', so break it up. +(define_split + [(set (match_operand:DI 0 "nvptx_register_operand") + (const:DI (plus:DI (match_operand:DI 1 "symbol_ref_function_operand") + (match_operand 2 "const_int_operand"))))] + "" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2))) + ] + "") + (define_insn "*mov_insn" [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m") (match_operand:SDFM 1 "general_operand" "RF,m,R"))] -- cgit v1.1 From e92779db3304bc96a6b861f87c5edde8dd4d4030 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Wed, 23 Sep 2020 15:02:01 -0600 Subject: Avoid assuming input corresponds to valid source code (PR c/97131). gcc/c-family/ChangeLog: PR c/97131 * c-warn.c (warn_parm_ptrarray_mismatch): Handle more invalid input. gcc/testsuite/ChangeLog: PR c/97131 * gcc.dg/Warray-parameter-6.c: New test. --- gcc/c-family/c-warn.c | 5 +++++ gcc/testsuite/gcc.dg/Warray-parameter-6.c | 9 +++++++++ 2 files changed, 14 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/Warray-parameter-6.c (limited to 'gcc') diff --git a/gcc/c-family/c-warn.c b/gcc/c-family/c-warn.c index d6db85b..ebd011d 100644 --- a/gcc/c-family/c-warn.c +++ b/gcc/c-family/c-warn.c @@ -3181,11 +3181,16 @@ warn_parm_ptrarray_mismatch (location_t origloc, tree curparms, tree newparms) while (TREE_CODE (curtyp) == POINTER_TYPE && TREE_CODE (newtyp) == POINTER_TYPE); + if (!newtyp) + /* Bail on error. */ + return; + if (TREE_CODE (curtyp) != ARRAY_TYPE || TREE_CODE (newtyp) != ARRAY_TYPE) { if (curtyp == error_mark_node || newtyp == error_mark_node) + /* Bail on error. */ return; continue; diff --git a/gcc/testsuite/gcc.dg/Warray-parameter-6.c b/gcc/testsuite/gcc.dg/Warray-parameter-6.c new file mode 100644 index 0000000..609dac9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Warray-parameter-6.c @@ -0,0 +1,9 @@ +/* PR c/97131 - ICE: Segmentation fault in warn_parm_ptrarray_mismatch + { dg-do compile } + { dg-options "-Wall" } */ + +struct bm { }; + +void ms (struct bm (*at)[1]) { } + +void ms (int f1) { } // { dg-error "conflicting types for 'ms'" } -- cgit v1.1 From e977dd5edbcc3a3b88c3bd7efa1026c845af7487 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Wed, 23 Sep 2020 23:06:05 +0200 Subject: Cleanup modref interfaces. * ipa-fnsummary.c (refs_local_or_readonly_memory_p): New function. (points_to_local_or_readonly_memory_p): New function. * ipa-fnsummary.h (refs_local_or_readonly_memory_p): Declare. (points_to_local_or_readonly_memory_p): Declare. * ipa-modref.c (record_access_p): Use refs_local_or_readonly_memory_p. * ipa-pure-const.c (check_op): Likewise. * gcc.dg/tree-ssa/local-pure-const.c: Update template. --- gcc/ipa-fnsummary.c | 41 ++++++++++++++++++++++++ gcc/ipa-fnsummary.h | 2 ++ gcc/ipa-modref.c | 31 +++--------------- gcc/ipa-pure-const.c | 8 ++--- gcc/testsuite/gcc.dg/tree-ssa/local-pure-const.c | 2 +- 5 files changed, 52 insertions(+), 32 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index 86d01ad..bb703f6 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -2430,6 +2430,47 @@ fp_expression_p (gimple *stmt) return false; } +/* Return true if T references memory location that is local + for the function (that means, dead after return) or read-only. */ + +bool +refs_local_or_readonly_memory_p (tree t) +{ + /* Non-escaping memory is fine. */ + t = get_base_address (t); + if ((TREE_CODE (t) == MEM_REF + || TREE_CODE (t) == TARGET_MEM_REF)) + return points_to_local_or_readonly_memory_p (TREE_OPERAND (t, 0)); + + /* Automatic variables are fine. */ + if (DECL_P (t) + && auto_var_in_fn_p (t, current_function_decl)) + return true; + + /* Read-only variables are fine. */ + if (DECL_P (t) && TREE_READONLY (t)) + return true; + + return false; +} + +/* Return true if T is a pointer pointing to memory location that is local + for the function (that means, dead after return) or read-only. */ + +bool +points_to_local_or_readonly_memory_p (tree t) +{ + /* See if memory location is clearly invalid. */ + if (integer_zerop (t)) + return flag_delete_null_pointer_checks; + if (TREE_CODE (t) == SSA_NAME) + return !ptr_deref_may_alias_global_p (t); + if (TREE_CODE (t) == ADDR_EXPR) + return refs_local_or_readonly_memory_p (TREE_OPERAND (t, 0)); + return false; +} + + /* Analyze function body for NODE. EARLY indicates run from early optimization pipeline. */ diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h index c6ddc9f..4e1f841 100644 --- a/gcc/ipa-fnsummary.h +++ b/gcc/ipa-fnsummary.h @@ -357,6 +357,8 @@ void estimate_ipcp_clone_size_and_time (struct cgraph_node *, void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge); void ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset = true); void compute_fn_summary (struct cgraph_node *, bool); +bool refs_local_or_readonly_memory_p (tree); +bool points_to_local_or_readonly_memory_p (tree); void evaluate_properties_for_edge (struct cgraph_edge *e, diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 3e65159..9cc9056 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -62,6 +62,9 @@ along with GCC; see the file COPYING3. If not see #include "calls.h" #include "ipa-modref-tree.h" #include "ipa-modref.h" +#include "value-range.h" +#include "ipa-prop.h" +#include "ipa-fnsummary.h" /* Class (from which there is one global instance) that holds modref summaries for all analyzed functions. */ @@ -347,36 +350,12 @@ record_access_lto (modref_records_lto *tt, ao_ref *ref) static bool record_access_p (tree expr) { - /* Non-escaping memory is fine */ - tree t = get_base_address (expr); - if (t && (INDIRECT_REF_P (t) - || TREE_CODE (t) == MEM_REF - || TREE_CODE (t) == TARGET_MEM_REF) - && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME - && !ptr_deref_may_alias_global_p (TREE_OPERAND (t, 0))) + if (refs_local_or_readonly_memory_p (expr)) { if (dump_file) - fprintf (dump_file, " - Non-escaping memory, ignoring.\n"); + fprintf (dump_file, " - Read-only or local, ignoring.\n"); return false; } - - /* Automatic variables are fine. */ - if (DECL_P (t) - && auto_var_in_fn_p (t, current_function_decl)) - { - if (dump_file) - fprintf (dump_file, " - Automatic variable, ignoring.\n"); - return false; - } - - /* Read-only variables are fine. */ - if (DECL_P (t) && TREE_READONLY (t)) - { - if (dump_file) - fprintf (dump_file, " - Read-only variable, ignoring.\n"); - return false; - } - return true; } diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c index 564c662..bdbccd0 100644 --- a/gcc/ipa-pure-const.c +++ b/gcc/ipa-pure-const.c @@ -381,13 +381,11 @@ check_op (funct_state local, tree t, bool checking_write) fprintf (dump_file, " Volatile indirect ref is not const/pure\n"); return; } - else if (t - && (INDIRECT_REF_P (t) || TREE_CODE (t) == MEM_REF) - && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME - && !ptr_deref_may_alias_global_p (TREE_OPERAND (t, 0))) + else if (refs_local_or_readonly_memory_p (t)) { if (dump_file) - fprintf (dump_file, " Indirect ref to local memory is OK\n"); + fprintf (dump_file, " Indirect ref to local or readonly " + "memory is OK\n"); return; } else if (checking_write) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/local-pure-const.c b/gcc/testsuite/gcc.dg/tree-ssa/local-pure-const.c index 3c358e0..6746758 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/local-pure-const.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/local-pure-const.c @@ -12,5 +12,5 @@ t(int a, int b, int c) p = &c; return *p; } -/* { dg-final { scan-tree-dump-times "local memory is OK" 1 "local-pure-const1"} } */ +/* { dg-final { scan-tree-dump-times "local or readonly memory is OK" 1 "local-pure-const1"} } */ /* { dg-final { scan-tree-dump-times "found to be const" 1 "local-pure-const1"} } */ -- cgit v1.1 From 6edc8f5bfe7d9db8fb8bd37bb8086a69850c6c6d Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Wed, 23 Sep 2020 15:04:32 -0600 Subject: Handle DECLs and EXPRESSIONs consistently (PR middle-end/97175). gcc/ChangeLog: PR middle-end/97175 * builtins.c (maybe_warn_for_bound): Handle both DECLs and EXPRESSIONs in pad->dst.ref, same is pad->src.ref. gcc/testsuite/ChangeLog: PR middle-end/97175 * gcc.dg/Wstringop-overflow-44.c: New test. --- gcc/builtins.c | 10 ++- gcc/testsuite/gcc.dg/Wstringop-overflow-44.c | 129 +++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/Wstringop-overflow-44.c (limited to 'gcc') diff --git a/gcc/builtins.c b/gcc/builtins.c index 45efc1c..cac842f 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -3480,8 +3480,14 @@ maybe_warn_for_bound (int opt, location_t loc, tree exp, tree func, if (warned) { if (pad && pad->dst.ref) - inform (DECL_SOURCE_LOCATION (pad->dst.ref), - "destination object declared here"); + { + if (DECL_P (pad->dst.ref)) + inform (DECL_SOURCE_LOCATION (pad->dst.ref), + "destination object declared here"); + else if (EXPR_HAS_LOCATION (pad->dst.ref)) + inform (EXPR_LOCATION (pad->dst.ref), + "destination object allocated here"); + } TREE_NO_WARNING (exp) = true; } diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-44.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-44.c new file mode 100644 index 0000000..9e292a9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-44.c @@ -0,0 +1,129 @@ +/* PR middle-end/97175 - ICE on an excessive strncpy bound + { dg-do compile } + { dg-options "-O -Wall" } */ + +int n; + +char *d; + +void sink (void*); + +/* Exercise calls with a destination of unknown size. */ + +void f0 (const void *s) +{ + if (n > 0) return; + __builtin_memcpy (d, s, n); // eliminated +} + +void f1 (const void *s) +{ + if (n > 0) return; + __builtin_memmove (d, s, n); // eliminated +} + +void f2 (void) +{ + if (n > 0) return; + __builtin_memset (d, 0, n); // eliminated +} + +void f3 (const char *s) +{ + if (n > 0) return; + __builtin_strncpy (d, s, n); // can be eliminated but isn't +} + +void f4 (const char *s) +{ + if (n > 0) return; + *d = 0; + __builtin_strncat (d, s, n); // can be eliminated but isn't +} + + +/* Exercise the same calls but with a declared destination object. */ + +void g0 (const void *s) +{ + if (n > 0) return; + char a[1]; + __builtin_memcpy (a, s, n); // eliminated + sink (a); +} + +void g1 (const void *s) +{ + if (n > 0) return; + char a[1]; + __builtin_memmove (a, s, n); // eliminated + sink (a); +} + +void g2 (void) +{ + if (n > 0) return; + char a[1]; + __builtin_memset (a, 0, n); // eliminated + sink (a); +} + +void g3 (const char *s) +{ + if (n > 0) return; + char a[1]; + __builtin_strncpy (a, s, n); // can be eliminated but isn't + sink (a); +} + +void g4 (const char *s) +{ + if (n > 0) return; + char a[1]; + *a = 0; + __builtin_strncat (a, s, n); // can be eliminated but isn't + sink (a); +} + + +void h0 (const void *s) +{ + if (n > 0) return; + d = __builtin_malloc (1); + __builtin_memcpy (d, s, n); // eliminated +} + +void h1 (const void *s) +{ + if (n > 0) return; + d = __builtin_malloc (1); + __builtin_memmove (d, s, n); // eliminated +} + +void h2 (void) +{ + if (n > 0) return; + d = __builtin_malloc (1); + __builtin_memset (d, 0, n); // eliminated +} + +void h3 (const char *s) +{ + if (n > 0) return; + d = __builtin_malloc (1); + __builtin_strncpy (d, s, n); // can be eliminated but isn't +} + +void h4 (const char *s) +{ + if (n > 0) return; + d = __builtin_malloc (1); + *d = 0; + __builtin_strncat (d, s, n); // can be eliminated but isn't +} + +/* The calls above that aren't eliminated trigger + warning: specified size between INT_MAX and SIZE_MAX exceed maximum + object size PTRDIFF_MAX + { dg-prune-output "-Wstringop-overflow" } + { dg-prune-output "-Wstringop-overread" } */ -- cgit v1.1 From 67aeddb785ddcc8688ee5736ecab3c81de34a214 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Wed, 23 Sep 2020 15:19:13 -0600 Subject: Build a zero element array type that reliably renders as T[0] in diagnostcs. gcc/ChangeLog: * gimple-array-bounds.cc (build_zero_elt_array_type): New function. (array_bounds_checker::check_mem_ref): Call it. --- gcc/gimple-array-bounds.cc | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/gimple-array-bounds.cc b/gcc/gimple-array-bounds.cc index b93ef7a..ab638ba 100644 --- a/gcc/gimple-array-bounds.cc +++ b/gcc/gimple-array-bounds.cc @@ -372,6 +372,20 @@ array_bounds_checker::check_array_ref (location_t location, tree ref, return warned; } +/* Hack around the internal representation constraints and build a zero + element array type that actually renders as T[0] in diagnostcs. */ + +static tree +build_zero_elt_array_type (tree eltype) +{ + tree idxtype = build_range_type (sizetype, size_zero_node, NULL_TREE); + tree arrtype = build_array_type (eltype, idxtype); + arrtype = build_distinct_type_copy (TYPE_MAIN_VARIANT (arrtype)); + TYPE_SIZE (arrtype) = bitsize_zero_node; + TYPE_SIZE_UNIT (arrtype) = size_zero_node; + return arrtype; +} + /* Checks one MEM_REF in REF, located at LOCATION, for out-of-bounds references to string constants. If VRP can determine that the array subscript is a constant, check if it is outside valid range. @@ -547,7 +561,10 @@ array_bounds_checker::check_mem_ref (location_t location, tree ref, return false; offset_int nelts = arrbounds[1] / eltsize; - reftype = build_array_type_nelts (reftype, nelts.to_uhwi ()); + if (nelts == 0) + reftype = build_zero_elt_array_type (reftype); + else + reftype = build_array_type_nelts (reftype, nelts.to_uhwi ()); } else if (TREE_CODE (arg) == ADDR_EXPR) { -- cgit v1.1 From 27cdb4de83660c20c08663e2b6c47436eeed34a7 Mon Sep 17 00:00:00 2001 From: "Paul A. Clarke" Date: Wed, 23 Sep 2020 11:59:26 -0500 Subject: rs6000: Add 'd' for doubleword variant of vector insert When the "Vector Insert" section was added to the documentation, the doubleword ('d') variant was omitted. Add it. 2020-09-23 Paul A. Clarke gcc/ * doc/extend.texi: Add 'd' for doubleword variant of vector insert instruction. --- gcc/doc/extend.texi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 5571c4f..7f14a28 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -20961,9 +20961,9 @@ given by the third argument, using natural element order in the second argument. The rest of the second argument is unchanged. If the byte index is greater than 14 for halfwords, greater than 12 for words, or greater than 8 for doublewords the result is undefined. For little-endian, -the generated code will be semantically equivalent to @code{vins[bhw]rx} +the generated code will be semantically equivalent to @code{vins[bhwd]rx} instructions. Similarly for big-endian it will be semantically equivalent -to @code{vins[bhw]lx}. Note that some fairly anomalous results can be +to @code{vins[bhwd]lx}. Note that some fairly anomalous results can be generated if the byte index is not aligned on an element boundary for the type of element being inserted. @findex vec_insertl @@ -20996,8 +20996,8 @@ for halfwords, 12 for words, or 8 for doublewords, the intrinsic will be rejected. Note that the underlying hardware instruction uses the same register for the second argument and the result. For little-endian, the code generation will be semantically equivalent to -@code{vins[bhw]lx}, while for big-endian it will be semantically equivalent to -@code{vins[bhw]rx}. +@code{vins[bhwd]lx}, while for big-endian it will be semantically equivalent to +@code{vins[bhwd]rx}. Note that some fairly anomalous results can be generated if the byte index is not aligned on an element boundary for the sort of element being inserted. @findex vec_inserth -- cgit v1.1 From 82b77dee751c916bcef55e527bffdd82b68fc897 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 24 Sep 2020 00:16:31 +0000 Subject: Daily bump. --- gcc/ChangeLog | 183 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 26 +++++++ gcc/c-family/ChangeLog | 11 +++ gcc/cp/ChangeLog | 12 ++++ gcc/testsuite/ChangeLog | 102 +++++++++++++++++++++++++++ 6 files changed, 335 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c8ceb4b..9ed3785 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,186 @@ +2020-09-24 Paul A. Clarke + + * doc/extend.texi: Add 'd' for doubleword variant of + vector insert instruction. + +2020-09-23 Martin Sebor + + * gimple-array-bounds.cc (build_zero_elt_array_type): New function. + (array_bounds_checker::check_mem_ref): Call it. + +2020-09-23 Martin Sebor + + PR middle-end/97175 + * builtins.c (maybe_warn_for_bound): Handle both DECLs and EXPRESSIONs + in pad->dst.ref, same is pad->src.ref. + +2020-09-23 Jan Hubicka + + * ipa-fnsummary.c (refs_local_or_readonly_memory_p): New function. + (points_to_local_or_readonly_memory_p): New function. + * ipa-fnsummary.h (refs_local_or_readonly_memory_p): Declare. + (points_to_local_or_readonly_memory_p): Declare. + * ipa-modref.c (record_access_p): Use refs_local_or_readonly_memory_p. + * ipa-pure-const.c (check_op): Likewise. + +2020-09-23 Tom de Vries + + * config/nvptx/nvptx.md: Don't allow operand containing sum of + function ref and const. + +2020-09-23 Richard Sandiford + + * config/aarch64/aarch64-protos.h (aarch64_salt_type): New enum. + (aarch64_stack_protect_canary_mem): Declare. + * config/aarch64/aarch64.md (UNSPEC_SALT_ADDR): New unspec. + (stack_protect_set): Forward to stack_protect_combined_set. + (stack_protect_combined_set): New pattern. Use + aarch64_stack_protect_canary_mem. + (reg_stack_protect_address_): Add a salt operand. + (stack_protect_test): Forward to stack_protect_combined_test. + (stack_protect_combined_test): New pattern. Use + aarch64_stack_protect_canary_mem. + * config/aarch64/aarch64.c (strip_salt): New function. + (strip_offset_and_salt): Likewise. + (tls_symbolic_operand_type): Use strip_offset_and_salt. + (aarch64_stack_protect_canary_mem): New function. + (aarch64_cannot_force_const_mem): Use strip_offset_and_salt. + (aarch64_classify_address): Likewise. + (aarch64_symbolic_address_p): Likewise. + (aarch64_print_operand): Likewise. + (aarch64_output_addr_const_extra): New function. + (aarch64_tls_symbol_p): Use strip_salt. + (aarch64_classify_symbol): Likewise. + (aarch64_legitimate_pic_operand_p): Use strip_offset_and_salt. + (aarch64_legitimate_constant_p): Likewise. + (aarch64_mov_operand_p): Use strip_salt. + (TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA): Override. + +2020-09-23 Kyrylo Tkachov + + PR target/71233 + * config/aarch64/arm_neon.h (vreinterpretq_f64_p128, + vreinterpretq_p128_f64): Define. + +2020-09-23 Alex Coplan + + * config/arm/arm-cpus.in (neoverse-v1): New. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-tune.md: Regenerate. + * doc/invoke.texi: Document support for Neoverse V1. + +2020-09-23 Alex Coplan + + * config/aarch64/aarch64-cores.def: Add Neoverse V1. + * config/aarch64/aarch64-tune.md: Regenerate. + * doc/invoke.texi: Document support for Neoverse V1. + +2020-09-23 Richard Biener + + PR middle-end/96453 + * gimple-isel.cc (gimple_expand_vec_cond_expr): Remove + LT_EXPR -> NE_EXPR verification and also apply it for + non-constant masks. + +2020-09-23 Jan Hubicka + + * ipa-modref.c (modref_summary::lto_useful_p): New member function. + (modref_summary::useful_p): New member function. + (analyze_function): Drop useless summaries. + (modref_write): Skip useless summaries. + (pass_ipa_modref::execute): Drop useless summaries. + * ipa-modref.h (struct GTY): Declare useful_p and lto_useful_p. + * tree-ssa-alias.c (dump_alias_stats): Fix. + (modref_may_conflict): Fix stats. + +2020-09-23 Richard Biener + + PR middle-end/96466 + * internal-fn.c (expand_vect_cond_mask_optab_fn): Use + appropriate mode for force_reg. + * tree.c (build_truth_vector_type_for): Pass VOIDmode to + make_vector_type. + +2020-09-23 Richard Sandiford + + * tree-vectorizer.h (determine_peel_for_niter): Delete in favor of... + (vect_determine_partial_vectors_and_peeling): ...this new function. + * tree-vect-loop-manip.c (vect_update_epilogue_niters): New function. + Reject using vector epilogue loops for single iterations. Install + the constant number of epilogue loop iterations in the associated + loop_vinfo. Rely on vect_determine_partial_vectors_and_peeling + to do the main part of the test. + (vect_do_peeling): Use vect_update_epilogue_niters to handle + epilogue loops with a known number of iterations. Skip recomputing + the number of iterations later in that case. Otherwise, use + vect_determine_partial_vectors_and_peeling to decide whether the + epilogue loop needs to use partial vectors or peeling. + * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Set the + default can_use_partial_vectors_p to false if partial-vector-usage=0. + (determine_peel_for_niter): Remove in favor of... + (vect_determine_partial_vectors_and_peeling): ...this new function, + split out from... + (vect_analyze_loop_2): ...here. Reflect the vect_verify_full_masking + and vect_verify_loop_lens results in CAN_USE_PARTIAL_VECTORS_P + rather than USING_PARTIAL_VECTORS_P. + +2020-09-23 Kyrylo Tkachov + + PR target/71233 + * config/aarch64/aarch64-simd-builtins.def (frintn): Use BUILTIN_VHSDF_HSDF + for modes. Remove explicit hf instantiation. + * config/aarch64/arm_neon.h (vrndns_f32): Define. + +2020-09-23 Richard Biener + + PR tree-optimization/97173 + * tree-vect-loop.c (vectorizable_live_operation): Extend + assert to also conver element conversions. + +2020-09-23 Kyrylo Tkachov + + PR target/71233 + * config/aarch64/arm_neon.h (vtrn1q_p64, vtrn2q_p64, vuzp1q_p64, + vuzp2q_p64, vzip1q_p64, vzip2q_p64): Define. + +2020-09-23 Kyrylo Tkachov + + PR target/71233 + * config/aarch64/arm_neon.h (vldrq_p128): Define. + +2020-09-23 Kyrylo Tkachov + + PR target/71233 + * config/aarch64/arm_neon.h (vstrq_p128): Define. + +2020-09-23 Richard Biener + + PR tree-optimization/97151 + * tree-ssa-structalias.c (find_func_aliases_for_call): + DECL_IS_REPLACEABLE_OPERATOR_DELETE_P has no effect on + arguments. + +2020-09-23 Richard Biener + + PR middle-end/97162 + * alias.c (compare_base_decls): Use DECL_HARD_REGISTER + and guard with VAR_P. + +2020-09-23 Martin Liska + + PR gcov-profile/97069 + * profile.c (branch_prob): Line number must be at least 1. + +2020-09-23 Tom de Vries + + PR target/97158 + * config/nvptx/nvptx.c (nvptx_output_mov_insn): Handle move from + DF subreg to DF reg. + +2020-09-23 David Malcolm + + * Makefile.in: Add $(ZLIBINC) to CFLAGS-analyzer/engine.o. + 2020-09-22 Jan Hubicka * ipa-modref.c (analyze_stmt): Ignore gimple clobber. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 1a983d8..52a894d 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200923 +20200924 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index cd869c2..8bba071 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,29 @@ +2020-09-23 David Malcolm + + PR analyzer/97178 + * engine.cc (impl_run_checkers): Update for change to ext_state + ctor. + * program-state.cc (selftest::test_sm_state_map): Pass an engine + instance to ext_state ctor. + (selftest::test_program_state_1): Likewise. + (selftest::test_program_state_2): Likewise. + (selftest::test_program_state_merging): Likewise. + (selftest::test_program_state_merging_2): Likewise. + * program-state.h (extrinsic_state::extrinsic_state): Remove NULL + default value for "eng" param. + +2020-09-23 Tobias Burnus + + * analyzer-logging.cc: Guard '#pragma ... ignored "-Wformat-diag"' + by '#if __GNUC__ >= 10' + * analyzer.h: Likewise. + * call-string.cc: Likewise. + +2020-09-23 David Malcolm + + * engine.cc (exploded_node::on_stmt): Replace sequence of dyn_cast + with switch. + 2020-09-22 David Malcolm * analysis-plan.cc: Include "json.h". diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 8ee29d8..4dd5822 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,14 @@ +2020-09-23 Martin Sebor + + PR c/97131 + * c-warn.c (warn_parm_ptrarray_mismatch): Handle more invalid input. + +2020-09-23 Marek Polacek + + PR c/97125 + * c-gimplify.c (c_genericize): Only call do_warn_duplicated_branches_r + after loops and other structured control constructs have been lowered. + 2020-09-22 Jakub Jelinek * c.opt (Wbuiltin-declaration-mismatch): Fix typo in variable name: diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index dc1b0b0..e583c64 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,15 @@ +2020-09-23 Nathan Sidwell + + * name-lookup.h (typedef cxx_binding): Delete tdef. + (typedef cp_binding_level): Likewise. + (struct cxx_binding): Flags are bools. + +2020-09-23 Nathan Sidwell + + PR c++/97171 + * pt.c (tsubst_copy) [FUNCTION_DECL,VAR_DECL]: Retrieve local + specialization for DECL_LOCAL_P decls. + 2020-09-22 Patrick Palka PR c++/95310 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 68cdc31..e40c474 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,105 @@ +2020-09-23 Martin Sebor + + PR middle-end/97175 + * gcc.dg/Wstringop-overflow-44.c: New test. + +2020-09-23 Jan Hubicka + + * gcc.dg/tree-ssa/local-pure-const.c: Update template. + +2020-09-23 Martin Sebor + + PR c/97131 + * gcc.dg/Warray-parameter-6.c: New test. + +2020-09-23 Richard Sandiford + + * gcc.target/aarch64/stack-protector-5.c: New test. + * gcc.target/aarch64/stack-protector-6.c: Likewise. + * gcc.target/aarch64/stack-protector-7.c: Likewise. + +2020-09-23 Richard Sandiford + + * gcc.target/aarch64/stack-protector-3.c: New test. + * gcc.target/aarch64/stack-protector-4.c: Likewise. + +2020-09-23 Kyrylo Tkachov + + PR target/71233 + * gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h + (clean_results): Add float64x2_t cleanup. + (DECL_VARIABLE_128BITS_VARIANTS): Add float64x2_t variable. + * gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c: Add + testing of vreinterpretq_f64_p128, vreinterpretq_p128_f64. + +2020-09-23 Nathan Sidwell + + PR c++/97171 + * g++.dg/template/local10.C: New. + +2020-09-23 Marek Polacek + + PR c/97125 + * c-c++-common/Wduplicated-branches-15.c: New test. + +2020-09-23 Richard Biener + + PR middle-end/96453 + * gcc.dg/pr96453.c: New testcase. + +2020-09-23 Richard Biener + + PR middle-end/96466 + * gcc.dg/pr96466.c: New testcase. + +2020-09-23 Richard Sandiford + + * gcc.target/powerpc/p9-vec-length-epil-1.c: Do not expect the + single-iteration epilogues of the 64-bit loops to be vectorized. + * gcc.target/powerpc/p9-vec-length-epil-7.c: Likewise. + * gcc.target/powerpc/p9-vec-length-epil-8.c: Likewise. + +2020-09-23 Kyrylo Tkachov + + PR target/71233 + * gcc.target/aarch64/simd/vrndns_f32_1.c: New test. + +2020-09-23 Richard Biener + + PR tree-optimization/97173 + * gcc.dg/vect/pr97173.c: New testcase. + +2020-09-23 Kyrylo Tkachov + + PR target/71233 + * gcc.target/aarch64/simd/trn_zip_p64_1.c: New test. + +2020-09-23 Kyrylo Tkachov + + PR target/71233 + * gcc.target/aarch64/simd/vldrq_p128_1.c: New test. + +2020-09-23 Kyrylo Tkachov + + PR target/71233 + * gcc.target/aarch64/simd/vstrq_p128_1.c: New test. + +2020-09-23 Richard Biener + + PR tree-optimization/97151 + * g++.dg/cpp1y/new1.C: Adjust for two more handled transforms. + +2020-09-23 Martin Liska + + PR gcov-profile/97069 + * g++.dg/gcov/pr97069.C: New test. + +2020-09-23 Tom de Vries + + * gcc.dg/Warray-bounds-63.c: Add require-effective-target alloca. + * gcc.dg/Warray-bounds-66.c: Same. + * gcc.dg/atomic/stdatomic-vm.c: Same. + 2020-09-22 Patrick Palka PR c++/95310 -- cgit v1.1 From 10a83805e047a583348e8bef18b966ecb8eee5d4 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Tue, 22 Sep 2020 20:30:08 -0700 Subject: libgo: update to Go1.15.2 release Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/256618 --- gcc/go/gofrontend/MERGE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 5d26b7e..f51dac5 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -cfee06e20a172753552b1515dd3a4fde5d5cad7b +6a7648c97c3e0cdbecbec7e760b30246521a6d90 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. -- cgit v1.1 From 84fb35466654ec179fa16e718a5014fbe9f41357 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Wed, 23 Sep 2020 06:55:51 -0400 Subject: analyzer: add -fno-analyzer-feasibility This patch provides a new option "-fno-analyzer-feasibility" as a way to disable feasibility-checking of the constraints along the control flow paths for -fanalyzer diagnostics. I'm adding this in the hope of making it easier to debug issues involving the feasibility-checking logic. The patch adds a new rejected_constraint object which is captured if exploded_path::feasible_p fails, and adds logic that uses this to emit an additional custom_event within the checker_path for the diagnostic, showing where in the control flow path the diagnostic would have been rejected, and giving details of why. gcc/analyzer/ChangeLog: * analyzer.h (struct rejected_constraint): New decl. * analyzer.opt (fanalyzer-feasibility): New option. * diagnostic-manager.cc (path_builder::path_builder): Add "problem" param and use it to initialize new field. (path_builder::get_feasibility_problem): New accessor. (path_builder::m_feasibility_problem): New field. (dedupe_winners::add): Remove inversion of logic in "if" clause, swapping if/else suites. In the !feasible_p suite, inspect flag_analyzer_feasibility and add code to handle when this is off, accepting the infeasible path, but recording the feasibility_problem. (diagnostic_manager::emit_saved_diagnostic): Pass the feasibility_problem to the path_builder. (diagnostic_manager::add_events_for_eedge): If we have a feasibility_problem at this edge, use it to add a custom event. * engine.cc (exploded_path::feasible_p): Pass a rejected_constraint ** to model.maybe_update_for_edge and transfer ownership of any created instance to any feasibility_problem. (feasibility_problem::dump_to_pp): New. * exploded-graph.h (feasibility_problem::feasibility_problem): Drop "model" param; add rejected_constraint * param. (feasibility_problem::~feasibility_problem): New. (feasibility_problem::dump_to_pp): New decl. (feasibility_problem::m_model): Drop field. (feasibility_problem::m_rc): New field. * program-point.cc (function_point::get_location): Handle PK_BEFORE_SUPERNODE and PK_AFTER_SUPERNODE. * program-state.cc (program_state::on_edge): Pass NULL to new param of region_model::maybe_update_for_edge. * region-model.cc (region_model::add_constraint): New overload adding a rejected_constraint ** param. (region_model::maybe_update_for_edge): Add rejected_constraint ** param and pass it to the various apply_constraints_for_ calls. (region_model::apply_constraints_for_gcond): Add rejected_constraint ** param and pass it to add_constraint calls. (region_model::apply_constraints_for_gswitch): Likewise. (region_model::apply_constraints_for_exception): Likewise. (rejected_constraint::dump_to_pp): New. * region-model.h (region_model::maybe_update_for_edge): Add rejected_constraint ** param. (region_model::add_constraint): New overload adding a rejected_constraint ** param. (region_model::apply_constraints_for_gcond): Add rejected_constraint ** param. (region_model::apply_constraints_for_gswitch): Likewise. (region_model::apply_constraints_for_exception): Likewise. (struct rejected_constraint): New. gcc/ChangeLog: * doc/analyzer.texi (Analyzer Paths): Add note about -fno-analyzer-feasibility. * doc/invoke.texi (Static Analyzer Options): Add -fno-analyzer-feasibility. gcc/testsuite/ChangeLog: * gcc.dg/analyzer/feasibility-2.c: New test. --- gcc/analyzer/analyzer.h | 1 + gcc/analyzer/analyzer.opt | 4 ++ gcc/analyzer/diagnostic-manager.cc | 69 ++++++++++++++++++------ gcc/analyzer/engine.cc | 25 +++++++-- gcc/analyzer/exploded-graph.h | 13 +++-- gcc/analyzer/program-point.cc | 8 ++- gcc/analyzer/program-state.cc | 2 +- gcc/analyzer/region-model.cc | 78 ++++++++++++++++++++------- gcc/analyzer/region-model.h | 33 ++++++++++-- gcc/doc/analyzer.texi | 3 +- gcc/doc/invoke.texi | 12 +++++ gcc/testsuite/gcc.dg/analyzer/feasibility-2.c | 20 +++++++ 12 files changed, 219 insertions(+), 49 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/feasibility-2.c (limited to 'gcc') diff --git a/gcc/analyzer/analyzer.h b/gcc/analyzer/analyzer.h index b85edb1..aa43b7f 100644 --- a/gcc/analyzer/analyzer.h +++ b/gcc/analyzer/analyzer.h @@ -71,6 +71,7 @@ class region_model; class region_model_context; class impl_region_model_context; class call_details; +struct rejected_constraint; class constraint_manager; class equiv_class; diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt index 872fb31..a4d3842 100644 --- a/gcc/analyzer/analyzer.opt +++ b/gcc/analyzer/analyzer.opt @@ -126,6 +126,10 @@ fanalyzer-fine-grained Common Var(flag_analyzer_fine_grained) Init(0) Avoid combining multiple statements into one exploded edge. +fanalyzer-feasibility +Common Var(flag_analyzer_feasibility) Init(1) +Verify that paths are feasible when emitting diagnostics. + fanalyzer-show-duplicate-count Common Var(flag_analyzer_show_duplicate_count) Init(0) Issue a note when diagnostics are deduplicated. diff --git a/gcc/analyzer/diagnostic-manager.cc b/gcc/analyzer/diagnostic-manager.cc index 8d7e508..13dd3da 100644 --- a/gcc/analyzer/diagnostic-manager.cc +++ b/gcc/analyzer/diagnostic-manager.cc @@ -160,10 +160,12 @@ class path_builder { public: path_builder (const exploded_graph &eg, - const exploded_path &epath) + const exploded_path &epath, + const feasibility_problem *problem) : m_eg (eg), m_diag_enode (epath.get_final_enode ()), - m_reachability (eg, m_diag_enode) + m_reachability (eg, m_diag_enode), + m_feasibility_problem (problem) {} const exploded_node *get_diag_node () const { return m_diag_enode; } @@ -175,6 +177,11 @@ public: const extrinsic_state &get_ext_state () const { return m_eg.get_ext_state (); } + const feasibility_problem *get_feasibility_problem () const + { + return m_feasibility_problem; + } + private: typedef reachability enode_reachability; @@ -185,6 +192,8 @@ private: /* Precompute all enodes from which the diagnostic is reachable. */ enode_reachability m_reachability; + + const feasibility_problem *m_feasibility_problem; }; /* class diagnostic_manager. */ @@ -436,24 +445,38 @@ public: sd->m_snode->m_index); feasibility_problem *p = NULL; - if (!dc->get_path ().feasible_p (logger, &p, m_engine, eg)) + if (dc->get_path ().feasible_p (logger, &p, m_engine, eg)) { if (logger) - logger->log ("rejecting %qs at EN: %i, SN: %i" - " due to infeasible path", + logger->log ("accepting %qs at EN: %i, SN: %i with feasible path", sd->m_d->get_kind (), sd->m_enode->m_index, sd->m_snode->m_index); - sd->set_infeasible (p); - delete dc; - return; + sd->set_feasible (); } else - if (logger) - logger->log ("accepting %qs at EN: %i, SN: %i with feasible path", - sd->m_d->get_kind (), sd->m_enode->m_index, - sd->m_snode->m_index); - - sd->set_feasible (); + { + if (flag_analyzer_feasibility) + { + if (logger) + logger->log ("rejecting %qs at EN: %i, SN: %i" + " due to infeasible path", + sd->m_d->get_kind (), sd->m_enode->m_index, + sd->m_snode->m_index); + sd->set_infeasible (p); + delete dc; + return; + } + else + { + if (logger) + logger->log ("accepting %qs at EN: %i, SN: %i" + " despite infeasible path (due to %qs)", + sd->m_d->get_kind (), sd->m_enode->m_index, + sd->m_snode->m_index, + "-fno-analyzer-feasibility"); + sd->set_infeasible (p); + } + } dedupe_key *key = new dedupe_key (*sd, dc->get_path ()); if (dedupe_candidate **slot = m_map.get (key)) @@ -598,7 +621,7 @@ diagnostic_manager::emit_saved_diagnostic (const exploded_graph &eg, pretty_printer *pp = global_dc->printer->clone (); /* Precompute all enodes from which the diagnostic is reachable. */ - path_builder pb (eg, epath); + path_builder pb (eg, epath, sd.get_feasibility_problem ()); /* This is the diagnostic_path subclass that will be built for the diagnostic. */ @@ -1043,6 +1066,22 @@ diagnostic_manager::add_events_for_eedge (const path_builder &pb, } break; } + + if (pb.get_feasibility_problem () + && &pb.get_feasibility_problem ()->m_eedge == &eedge) + { + pretty_printer pp; + pp_format_decoder (&pp) = default_tree_printer; + pp_string (&pp, + "this path would have been rejected as infeasible" + " at this edge: "); + pb.get_feasibility_problem ()->dump_to_pp (&pp); + emission_path->add_event (new custom_event + (dst_point.get_location (), + dst_point.get_fndecl (), + dst_stack_depth, + pp_formatted_text (&pp))); + } } /* Return true if EEDGE is a significant edge in the path to the diagnostic diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index b36c198..aa43e4c 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -3284,7 +3284,8 @@ exploded_path::feasible_p (logger *logger, feasibility_problem **out, sedge->get_description (false)); const gimple *last_stmt = src_point.get_supernode ()->get_last_stmt (); - if (!model.maybe_update_for_edge (*sedge, last_stmt, NULL)) + rejected_constraint *rc = NULL; + if (!model.maybe_update_for_edge (*sedge, last_stmt, NULL, &rc)) { if (logger) { @@ -3292,8 +3293,10 @@ exploded_path::feasible_p (logger *logger, feasibility_problem **out, model.dump_to_pp (logger->get_printer (), true, false); } if (out) - *out = new feasibility_problem (edge_idx, model, *eedge, - last_stmt); + *out = new feasibility_problem (edge_idx, *eedge, + last_stmt, rc); + else + delete rc; return false; } } @@ -3399,6 +3402,22 @@ exploded_path::dump () const dump (stderr); } +/* class feasibility_problem. */ + +void +feasibility_problem::dump_to_pp (pretty_printer *pp) const +{ + pp_printf (pp, "edge from EN: %i to EN: %i", + m_eedge.m_src->m_index, m_eedge.m_dest->m_index); + if (m_rc) + { + pp_string (pp, "; rejected constraint: "); + m_rc->dump_to_pp (pp); + pp_string (pp, "; rmodel: "); + m_rc->m_model.dump_to_pp (pp, true, false); + } +} + /* A family of cluster subclasses for use when generating .dot output for exploded graphs (-fdump-analyzer-exploded-graph), for grouping the enodes into hierarchical boxes. diff --git a/gcc/analyzer/exploded-graph.h b/gcc/analyzer/exploded-graph.h index f723d52b..a6ca4b9 100644 --- a/gcc/analyzer/exploded-graph.h +++ b/gcc/analyzer/exploded-graph.h @@ -880,17 +880,20 @@ class feasibility_problem { public: feasibility_problem (unsigned eedge_idx, - const region_model &model, const exploded_edge &eedge, - const gimple *last_stmt) - : m_eedge_idx (eedge_idx), m_model (model), m_eedge (eedge), - m_last_stmt (last_stmt) + const gimple *last_stmt, + rejected_constraint *rc) + : m_eedge_idx (eedge_idx), m_eedge (eedge), + m_last_stmt (last_stmt), m_rc (rc) {} + ~feasibility_problem () { delete m_rc; } + + void dump_to_pp (pretty_printer *pp) const; unsigned m_eedge_idx; - region_model m_model; const exploded_edge &m_eedge; const gimple *m_last_stmt; + rejected_constraint *m_rc; }; /* Finding the shortest exploded_path within an exploded_graph. */ diff --git a/gcc/analyzer/program-point.cc b/gcc/analyzer/program-point.cc index 429d6ec..0aadd73 100644 --- a/gcc/analyzer/program-point.cc +++ b/gcc/analyzer/program-point.cc @@ -199,8 +199,12 @@ function_point::get_location () const const gimple *stmt = get_stmt (); if (stmt) return stmt->location; - - return UNKNOWN_LOCATION; + if (m_kind == PK_BEFORE_SUPERNODE) + return m_supernode->get_start_location (); + else if (m_kind == PK_AFTER_SUPERNODE) + return m_supernode->get_end_location (); + else + return UNKNOWN_LOCATION; } /* Create a function_point representing the entrypoint of function FUN. */ diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc index 188fec0..78b87d5 100644 --- a/gcc/analyzer/program-state.cc +++ b/gcc/analyzer/program-state.cc @@ -872,7 +872,7 @@ program_state::on_edge (exploded_graph &eg, last_stmt); if (!m_region_model->maybe_update_for_edge (*succ, last_stmt, - &ctxt)) + &ctxt, NULL)) { logger * const logger = eg.get_logger (); if (logger) diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 74a96b0..981fb77 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -1810,6 +1810,20 @@ region_model::add_constraint (tree lhs, enum tree_code op, tree rhs, return true; } +/* As above, but when returning false, if OUT is non-NULL, write a + new rejected_constraint to *OUT. */ + +bool +region_model::add_constraint (tree lhs, enum tree_code op, tree rhs, + region_model_context *ctxt, + rejected_constraint **out) +{ + bool sat = add_constraint (lhs, op, rhs, ctxt); + if (!sat && out) + *out = new rejected_constraint (*this, lhs, op, rhs); + return sat; +} + /* Subroutine of region_model::add_constraint for handling optimized && and || conditionals. @@ -2188,6 +2202,8 @@ region_model::update_for_phis (const supernode *snode, /* Attempt to update this model for taking EDGE (where the last statement was LAST_STMT), returning true if the edge can be taken, false otherwise. + When returning false, if OUT is non-NULL, write a new rejected_constraint + to it. For CFG superedges where LAST_STMT is a conditional or a switch statement, attempt to add the relevant conditions for EDGE to this @@ -2207,7 +2223,8 @@ region_model::update_for_phis (const supernode *snode, bool region_model::maybe_update_for_edge (const superedge &edge, const gimple *last_stmt, - region_model_context *ctxt) + region_model_context *ctxt, + rejected_constraint **out) { /* Handle frame updates for interprocedural edges. */ switch (edge.m_kind) @@ -2247,20 +2264,21 @@ region_model::maybe_update_for_edge (const superedge &edge, if (const gcond *cond_stmt = dyn_cast (last_stmt)) { const cfg_superedge *cfg_sedge = as_a (&edge); - return apply_constraints_for_gcond (*cfg_sedge, cond_stmt, ctxt); + return apply_constraints_for_gcond (*cfg_sedge, cond_stmt, ctxt, out); } if (const gswitch *switch_stmt = dyn_cast (last_stmt)) { const switch_cfg_superedge *switch_sedge = as_a (&edge); - return apply_constraints_for_gswitch (*switch_sedge, switch_stmt, ctxt); + return apply_constraints_for_gswitch (*switch_sedge, switch_stmt, + ctxt, out); } /* Apply any constraints due to an exception being thrown. */ if (const cfg_superedge *cfg_sedge = dyn_cast (&edge)) if (cfg_sedge->get_flags () & EDGE_EH) - return apply_constraints_for_exception (last_stmt, ctxt); + return apply_constraints_for_exception (last_stmt, ctxt, out); return true; } @@ -2338,12 +2356,15 @@ region_model::update_for_call_summary (const callgraph_superedge &cg_sedge, If they are feasible, add the constraints and return true. Return false if the constraints contradict existing knowledge - (and so the edge should not be taken). */ + (and so the edge should not be taken). + When returning false, if OUT is non-NULL, write a new rejected_constraint + to it. */ bool region_model::apply_constraints_for_gcond (const cfg_superedge &sedge, const gcond *cond_stmt, - region_model_context *ctxt) + region_model_context *ctxt, + rejected_constraint **out) { ::edge cfg_edge = sedge.get_cfg_edge (); gcc_assert (cfg_edge != NULL); @@ -2354,7 +2375,7 @@ region_model::apply_constraints_for_gcond (const cfg_superedge &sedge, tree rhs = gimple_cond_rhs (cond_stmt); if (cfg_edge->flags & EDGE_FALSE_VALUE) op = invert_tree_comparison (op, false /* honor_nans */); - return add_constraint (lhs, op, rhs, ctxt); + return add_constraint (lhs, op, rhs, ctxt, out); } /* Given an EDGE guarded by SWITCH_STMT, determine appropriate constraints @@ -2363,12 +2384,15 @@ region_model::apply_constraints_for_gcond (const cfg_superedge &sedge, If they are feasible, add the constraints and return true. Return false if the constraints contradict existing knowledge - (and so the edge should not be taken). */ + (and so the edge should not be taken). + When returning false, if OUT is non-NULL, write a new rejected_constraint + to it. */ bool region_model::apply_constraints_for_gswitch (const switch_cfg_superedge &edge, const gswitch *switch_stmt, - region_model_context *ctxt) + region_model_context *ctxt, + rejected_constraint **out) { tree index = gimple_switch_index (switch_stmt); tree case_label = edge.get_case_label (); @@ -2380,13 +2404,13 @@ region_model::apply_constraints_for_gswitch (const switch_cfg_superedge &edge, if (upper_bound) { /* Range. */ - if (!add_constraint (index, GE_EXPR, lower_bound, ctxt)) + if (!add_constraint (index, GE_EXPR, lower_bound, ctxt, out)) return false; - return add_constraint (index, LE_EXPR, upper_bound, ctxt); + return add_constraint (index, LE_EXPR, upper_bound, ctxt, out); } else /* Single-value. */ - return add_constraint (index, EQ_EXPR, lower_bound, ctxt); + return add_constraint (index, EQ_EXPR, lower_bound, ctxt, out); } else { @@ -2406,14 +2430,16 @@ region_model::apply_constraints_for_gswitch (const switch_cfg_superedge &edge, /* Exclude this range-valued case. For now, we just exclude the boundary values. TODO: exclude the values within the region. */ - if (!add_constraint (index, NE_EXPR, other_lower_bound, ctxt)) + if (!add_constraint (index, NE_EXPR, other_lower_bound, + ctxt, out)) return false; - if (!add_constraint (index, NE_EXPR, other_upper_bound, ctxt)) + if (!add_constraint (index, NE_EXPR, other_upper_bound, + ctxt, out)) return false; } else /* Exclude this single-valued case. */ - if (!add_constraint (index, NE_EXPR, other_lower_bound, ctxt)) + if (!add_constraint (index, NE_EXPR, other_lower_bound, ctxt, out)) return false; } return true; @@ -2425,11 +2451,14 @@ region_model::apply_constraints_for_gswitch (const switch_cfg_superedge &edge, If they are feasible, add the constraints and return true. Return false if the constraints contradict existing knowledge - (and so the edge should not be taken). */ + (and so the edge should not be taken). + When returning false, if OUT is non-NULL, write a new rejected_constraint + to it. */ bool region_model::apply_constraints_for_exception (const gimple *last_stmt, - region_model_context *ctxt) + region_model_context *ctxt, + rejected_constraint **out) { gcc_assert (last_stmt); if (const gcall *call = dyn_cast (last_stmt)) @@ -2442,7 +2471,7 @@ region_model::apply_constraints_for_exception (const gimple *last_stmt, leak report due to the result being lost when following the EH edge. */ if (tree lhs = gimple_call_lhs (call)) - return add_constraint (lhs, EQ_EXPR, null_pointer_node, ctxt); + return add_constraint (lhs, EQ_EXPR, null_pointer_node, ctxt, out); return true; } return true; @@ -2862,6 +2891,19 @@ debug (const region_model &rmodel) rmodel.dump (false); } +/* struct rejected_constraint. */ + +void +rejected_constraint::dump_to_pp (pretty_printer *pp) const +{ + region_model m (m_model); + const svalue *lhs_sval = m.get_rvalue (m_lhs, NULL); + const svalue *rhs_sval = m.get_rvalue (m_rhs, NULL); + lhs_sval->dump_to_pp (pp, true); + pp_printf (pp, " %s ", op_symbol_code (m_op)); + rhs_sval->dump_to_pp (pp, true); +} + /* class engine. */ /* Dump the managed objects by class to LOGGER, and the per-class totals. */ diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index 1e8a517..a61aff2 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -2586,7 +2586,8 @@ class region_model bool maybe_update_for_edge (const superedge &edge, const gimple *last_stmt, - region_model_context *ctxt); + region_model_context *ctxt, + rejected_constraint **out); const region *push_frame (function *fun, const vec *arg_sids, region_model_context *ctxt); @@ -2630,6 +2631,9 @@ class region_model region_model_context *ctxt); bool add_constraint (tree lhs, enum tree_code op, tree rhs, region_model_context *ctxt); + bool add_constraint (tree lhs, enum tree_code op, tree rhs, + region_model_context *ctxt, + rejected_constraint **out); const region *create_region_for_heap_alloc (const svalue *size_in_bytes); const region *create_region_for_alloca (const svalue *size_in_bytes); @@ -2699,12 +2703,15 @@ class region_model region_model_context *ctxt); bool apply_constraints_for_gcond (const cfg_superedge &edge, const gcond *cond_stmt, - region_model_context *ctxt); + region_model_context *ctxt, + rejected_constraint **out); bool apply_constraints_for_gswitch (const switch_cfg_superedge &edge, const gswitch *switch_stmt, - region_model_context *ctxt); + region_model_context *ctxt, + rejected_constraint **out); bool apply_constraints_for_exception (const gimple *last_stmt, - region_model_context *ctxt); + region_model_context *ctxt, + rejected_constraint **out); int poison_any_pointers_to_descendents (const region *reg, enum poison_kind pkind); @@ -2851,6 +2858,24 @@ struct model_merger region_model *m_merged_model; }; +/* A record that can (optionally) be written out when + region_model::add_constraint fails. */ + +struct rejected_constraint +{ + rejected_constraint (const region_model &model, + tree lhs, enum tree_code op, tree rhs) + : m_model (model), m_lhs (lhs), m_op (op), m_rhs (rhs) + {} + + void dump_to_pp (pretty_printer *pp) const; + + region_model m_model; + tree m_lhs; + enum tree_code m_op; + tree m_rhs; +}; + /* A bundle of state. */ class engine diff --git a/gcc/doc/analyzer.texi b/gcc/doc/analyzer.texi index 6b7d70c..96fe9bb 100644 --- a/gcc/doc/analyzer.texi +++ b/gcc/doc/analyzer.texi @@ -329,7 +329,8 @@ we only emit the simplest path (which could be intraprocedural, if it can be reproduced without a caller). We apply a check that each duplicate warning's shortest path is feasible, rejecting any warnings for which the shortest path is infeasible (which could lead to -false negatives). +false negatives). This check can be suppressed (for debugging purposes) +using @option{-fno-analyzer-feasibility}. We use the shortest feasible @code{exploded_path} through the @code{exploded_graph} (a list of @code{exploded_edge *}) to build a diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c17e5c6..75203ba 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -393,6 +393,7 @@ Objective-C and Objective-C++ Dialects}. -fanalyzer @gol -fanalyzer-call-summaries @gol -fanalyzer-checker=@var{name} @gol +-fno-analyzer-feasibility @gol -fanalyzer-fine-grained @gol -fanalyzer-state-merge @gol -fanalyzer-state-purge @gol @@ -8993,6 +8994,17 @@ such as the @code{taint} checker that implements @option{-Wanalyzer-tainted-array-index}, and this option is required to enable them. +@item -fno-analyzer-feasibility +@opindex fanalyzer-feasibility +@opindex fno-analyzer-feasibility +This option is intended for analyzer developers. + +By default the analyzer verifies that there is a feasible control flow path +for each diagnostic it emits: that the conditions that hold are not mutually +exclusive. Diagnostics for which no feasible path can be found are rejected. +This filtering can be suppressed with @option{-fno-analyzer-feasibility}, for +debugging issues in this code. + @item -fanalyzer-fine-grained @opindex fanalyzer-fine-grained @opindex fno-analyzer-fine-grained diff --git a/gcc/testsuite/gcc.dg/analyzer/feasibility-2.c b/gcc/testsuite/gcc.dg/analyzer/feasibility-2.c new file mode 100644 index 0000000..9fe62d2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/feasibility-2.c @@ -0,0 +1,20 @@ +/* Verify that -fno-analyzer-feasibility works. */ +/* { dg-additional-options "-fno-analyzer-feasibility" } */ + +#include "analyzer-decls.h" + +void test_1 (int flag) +{ + int a; + if (flag) + a = 1; + else + a = 2; + + if (a == 1) /* (can only be the case when "flag" was true above). */ + if (!flag) + { + __analyzer_dump_path (); /* { dg-message "note: path" "path diag" } */ + /* { dg-message "infeasible" "infeasibility event" { target *-*-* } .-1 } */ + } +} -- cgit v1.1 From 6b828f69519a50e6e2961b62ea552bf89d287199 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 19 Dec 2019 16:15:09 -0500 Subject: analyzer: add testcases for PR 93355 (intl/localealias.c leak) PR analyzer/93355 reports a missing diagnostic about a FILE leak in intl/localealias.c. This appears to be due to a issue in the feasibility-checking code, though there is also a state explosion. This patch adds test cases that I've been using when investigating this, two of them currently requiring -fno-analyzer-feasibility, and one currently requiring -Wno-analyzer-too-complex. gcc/testsuite/ChangeLog: PR analyzer/93355 * gcc.dg/analyzer/pr93355-localealias-feasibility.c: New test. * gcc.dg/analyzer/pr93355-localealias-simplified.c: New test. * gcc.dg/analyzer/pr93355-localealias.c: New test. --- .../analyzer/pr93355-localealias-feasibility.c | 79 +++++ .../analyzer/pr93355-localealias-simplified.c | 45 +++ .../gcc.dg/analyzer/pr93355-localealias.c | 390 +++++++++++++++++++++ 3 files changed, 514 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-feasibility.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-simplified.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr93355-localealias.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-feasibility.c b/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-feasibility.c new file mode 100644 index 0000000..0d470d6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-feasibility.c @@ -0,0 +1,79 @@ +/* Simplified version of test to ensure we issue a FILE * leak diagnostic, + reproducing a feasibility issue. + Adapted from intl/localealias.c, with all #includes removed. */ + +/* { dg-do "compile" } */ +/* { dg-additional-options "-fno-analyzer-feasibility" } */ +/* TODO: remove the need for this option. */ + +/* Handle aliases for locale names. + Copyright (C) 1995-1999, 2000-2001, 2003 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, + USA. */ + +/* Minimal version of system headers. */ + +typedef __SIZE_TYPE__ size_t; +#define NULL ((void *)0) + +typedef struct _IO_FILE FILE; +extern FILE *fopen (const char *__restrict __filename, + const char *__restrict __modes); +extern int fclose (FILE *__stream); + +extern int isspace (int) __attribute__((__nothrow__, __leaf__)); + +/* Cleaned-up body of localealias.c follows. */ + +size_t +read_alias_file (const char *fname, int fname_len) +{ + FILE *fp; + size_t added; + char buf[400]; + char *alias; + char *value; + char *cp; + + fp = fopen (fname, "r"); /* { dg-message "opened here" } */ + if (fp == NULL) + return 0; + + cp = buf; + + /* Ignore leading white space. */ + while (isspace ((unsigned char)cp[0])) + ++cp; + + if (cp[0] != '\0' && cp[0] != '#') + { + alias = cp++; + while (cp[0] != '\0' && !isspace ((unsigned char)cp[0])) + ++cp; + if (cp[0] != '\0') + *cp++ = '\0'; + + while (isspace ((unsigned char)cp[0])) + ++cp; + + if (cp[0] != '\0') + return 42; /* { dg-warning "leak of FILE 'fp'" } */ + } + + fclose(fp); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-simplified.c b/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-simplified.c new file mode 100644 index 0000000..6f65add --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias-simplified.c @@ -0,0 +1,45 @@ +/* Simplified version of test for ensuring we issue a FILE * leak diagnostic, + made trivial. + Adapted from intl/localealias.c, with all #includes removed. */ + +/* { dg-do "compile" } */ + +/* Handle aliases for locale names. + Copyright (C) 1995-1999, 2000-2001, 2003 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, + USA. */ + +/* Minimal version of system headers. */ +#define NULL ((void *) 0) +typedef struct _IO_FILE FILE; +extern FILE *fopen(const char *__restrict __filename, + const char *__restrict __modes); +extern int fclose(FILE *__stream); + +void +read_alias_file (int flag) +{ + FILE *fp; + + fp = fopen ("name", "r"); /* { dg-message "opened here" } */ + if (fp == NULL) + return; + + if (flag) + return; /* { dg-warning "leak of FILE 'fp'" } */ + + fclose (fp); +} diff --git a/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias.c b/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias.c new file mode 100644 index 0000000..a5cb0d5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias.c @@ -0,0 +1,390 @@ +/* Integration test to ensure we issue a FILE * leak diagnostic for + this particular non-trivial case. + Adapted from intl/localealias.c, with all #includes removed. */ + +/* { dg-do "compile" } */ +/* { dg-additional-options "-Wno-analyzer-too-complex -fno-analyzer-feasibility" } */ +/* TODO: remove the need for these options. */ + +/* Handle aliases for locale names. + Copyright (C) 1995-1999, 2000-2001, 2003 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, + USA. */ + +/* Minimal version of system headers. */ + +typedef __SIZE_TYPE__ size_t; +#define NULL ((void *) 0) + +#define PATH_SEPARATOR ':' +typedef struct _IO_FILE FILE; +extern FILE *fopen(const char *__restrict __filename, + const char *__restrict __modes); +extern int feof_unlocked(FILE *__stream) __attribute__((__nothrow__, __leaf__)); +extern char *fgets_unlocked(char *__restrict __s, int __n, + FILE *__restrict __stream); +extern int fclose(FILE *__stream); + +#define alloca __builtin_alloca + +extern char *strchr(const char *__s, int __c) + __attribute__((__nothrow__, __leaf__)) __attribute__((__pure__)) + __attribute__((__nonnull__(1))); +extern void *memcpy(void *__restrict __dest, const void *__restrict __src, + size_t __n) __attribute__((__nothrow__, __leaf__)) + __attribute__((__nonnull__(1, 2))); +extern void *mempcpy(void *__restrict __dest, const void *__restrict __src, + size_t __n) __attribute__((__nothrow__, __leaf__)) + __attribute__((__nonnull__(1, 2))); +#define HAVE_MEMPCPY 1 +extern size_t strlen(const char *__s) __attribute__((__nothrow__, __leaf__)) + __attribute__((__pure__)) __attribute__((__nonnull__(1))); + +extern int strcasecmp(const char *__s1, const char *__s2) + __attribute__((__nothrow__, __leaf__)) __attribute__((__pure__)) + __attribute__((__nonnull__(1, 2))); + +extern int isspace(int) __attribute__((__nothrow__, __leaf__)); + +extern void *realloc(void *__ptr, size_t __size) + __attribute__((__nothrow__, __leaf__)) + __attribute__((__warn_unused_result__)); + +typedef int (*__compar_fn_t)(const void *, const void *); +extern void *bsearch(const void *__key, const void *__base, size_t __nmemb, + size_t __size, __compar_fn_t __compar) + __attribute__((__nonnull__(1, 2, 5))); + +extern __inline __attribute__((__gnu_inline__)) void * +bsearch(const void *__key, const void *__base, size_t __nmemb, size_t __size, + __compar_fn_t __compar) { + size_t __l, __u, __idx; + const void *__p; + int __comparison; + + __l = 0; + __u = __nmemb; + while (__l < __u) { + __idx = (__l + __u) / 2; + __p = (void *)(((const char *)__base) + (__idx * __size)); + __comparison = (*__compar)(__key, __p); + if (__comparison < 0) + __u = __idx; + else if (__comparison > 0) + __l = __idx + 1; + else + return (void *)__p; + } + + return ((void *)0); +} + +extern void qsort(void *__base, size_t __nmemb, size_t __size, + __compar_fn_t __compar) __attribute__((__nonnull__(1, 4))); + +/* Minimal version of intl headers. */ + +#define PARAMS(args) args + +#define relocate libintl_relocate +extern const char *libintl_relocate(const char *pathname); + +#define LOCALE_ALIAS_PATH "value for LOCALE_ALIAS_PATH" + +/* Cleaned-up body of localealias.c follows. */ + +#ifndef internal_function +# define internal_function +#endif + +/* Some optimizations for glibc. */ +# define FEOF(fp) feof_unlocked (fp) +# define FGETS(buf, n, fp) fgets_unlocked (buf, n, fp) + +/* For those losing systems which don't have `alloca' we have to add + some additional code emulating it. */ +# define freea(p) /* nothing */ + +struct alias_map +{ + const char *alias; + const char *value; +}; + +# define libc_freeres_ptr(decl) decl + +libc_freeres_ptr (static char *string_space); +static size_t string_space_act; +static size_t string_space_max; +libc_freeres_ptr (static struct alias_map *map); +static size_t nmap; +static size_t maxmap; + + +/* Prototypes for local functions. */ +static size_t read_alias_file PARAMS ((const char *fname, int fname_len)) + internal_function; +static int extend_alias_table PARAMS ((void)); +static int alias_compare PARAMS ((const struct alias_map *map1, + const struct alias_map *map2)); + + +const char * +_nl_expand_alias (name) + const char *name; +{ + static const char *locale_alias_path; + struct alias_map *retval; + const char *result = NULL; + size_t added; + +#ifdef _LIBC + __libc_lock_lock (lock); +#endif + + if (locale_alias_path == NULL) + locale_alias_path = LOCALE_ALIAS_PATH; + + do + { + struct alias_map item; + + item.alias = name; + + if (nmap > 0) + retval = (struct alias_map *) bsearch (&item, map, nmap, + sizeof (struct alias_map), + (int (*) PARAMS ((const void *, + const void *)) + ) alias_compare); + else + retval = NULL; + + /* We really found an alias. Return the value. */ + if (retval != NULL) + { + result = retval->value; + break; + } + + /* Perhaps we can find another alias file. */ + added = 0; + while (added == 0 && locale_alias_path[0] != '\0') + { + const char *start; + + while (locale_alias_path[0] == PATH_SEPARATOR) + ++locale_alias_path; + start = locale_alias_path; + + while (locale_alias_path[0] != '\0' + && locale_alias_path[0] != PATH_SEPARATOR) + ++locale_alias_path; + + if (start < locale_alias_path) + added = read_alias_file (start, locale_alias_path - start); + } + } + while (added != 0); + +#ifdef _LIBC + __libc_lock_unlock (lock); +#endif + + return result; +} + + +static size_t +internal_function +read_alias_file (fname, fname_len) + const char *fname; + int fname_len; +{ + FILE *fp; + char *full_fname; + size_t added; + static const char aliasfile[] = "/locale.alias"; + + full_fname = (char *) alloca (fname_len + sizeof aliasfile); +#ifdef HAVE_MEMPCPY + mempcpy (mempcpy (full_fname, fname, fname_len), + aliasfile, sizeof aliasfile); +#else + memcpy (full_fname, fname, fname_len); + memcpy (&full_fname[fname_len], aliasfile, sizeof aliasfile); +#endif + + fp = fopen (relocate (full_fname), "r"); /* { dg-message "opened here" } */ + freea (full_fname); + if (fp == NULL) + return 0; + +#ifdef HAVE___FSETLOCKING + /* No threads present. */ + __fsetlocking (fp, FSETLOCKING_BYCALLER); +#endif + + added = 0; + while (!FEOF (fp)) + { + /* It is a reasonable approach to use a fix buffer here because + a) we are only interested in the first two fields + b) these fields must be usable as file names and so must not + be that long + We avoid a multi-kilobyte buffer here since this would use up + stack space which we might not have if the program ran out of + memory. */ + char buf[400]; + char *alias; + char *value; + char *cp; + + if (FGETS (buf, sizeof buf, fp) == NULL) + /* EOF reached. */ + break; + + cp = buf; + /* Ignore leading white space. */ + while (isspace ((unsigned char) cp[0])) + ++cp; + + /* A leading '#' signals a comment line. */ + if (cp[0] != '\0' && cp[0] != '#') + { + alias = cp++; + while (cp[0] != '\0' && !isspace ((unsigned char) cp[0])) + ++cp; + /* Terminate alias name. */ + if (cp[0] != '\0') + *cp++ = '\0'; + + /* Now look for the beginning of the value. */ + while (isspace ((unsigned char) cp[0])) + ++cp; + + if (cp[0] != '\0') + { + size_t alias_len; + size_t value_len; + + value = cp++; + while (cp[0] != '\0' && !isspace ((unsigned char) cp[0])) + ++cp; + /* Terminate value. */ + if (cp[0] == '\n') + { + /* This has to be done to make the following test + for the end of line possible. We are looking for + the terminating '\n' which do not overwrite here. */ + *cp++ = '\0'; + *cp = '\n'; + } + else if (cp[0] != '\0') + *cp++ = '\0'; + + if (nmap >= maxmap) + if (__builtin_expect (extend_alias_table (), 0)) + return added; /* { dg-warning "leak of FILE 'fp'" } */ + + alias_len = strlen (alias) + 1; + value_len = strlen (value) + 1; + + if (string_space_act + alias_len + value_len > string_space_max) + { + /* Increase size of memory pool. */ + size_t new_size = (string_space_max + + (alias_len + value_len > 1024 + ? alias_len + value_len : 1024)); + char *new_pool = (char *) realloc (string_space, new_size); + if (new_pool == NULL) + return added; + + if (__builtin_expect (string_space != new_pool, 0)) + { + size_t i; + + for (i = 0; i < nmap; i++) + { + map[i].alias += new_pool - string_space; + map[i].value += new_pool - string_space; + } + } + + string_space = new_pool; + string_space_max = new_size; + } + + map[nmap].alias = memcpy (&string_space[string_space_act], + alias, alias_len); + string_space_act += alias_len; + + map[nmap].value = memcpy (&string_space[string_space_act], + value, value_len); + string_space_act += value_len; + + ++nmap; + ++added; + } + } + + /* Possibly not the whole line fits into the buffer. Ignore + the rest of the line. */ + while (strchr (buf, '\n') == NULL) + if (FGETS (buf, sizeof buf, fp) == NULL) + /* Make sure the inner loop will be left. The outer loop + will exit at the `feof' test. */ + break; + } + + /* Should we test for ferror()? I think we have to silently ignore + errors. --drepper */ + fclose (fp); + + if (added > 0) + qsort (map, nmap, sizeof (struct alias_map), + (int (*) PARAMS ((const void *, const void *))) alias_compare); + + return added; +} + + +static int +extend_alias_table () +{ + size_t new_size; + struct alias_map *new_map; + + new_size = maxmap == 0 ? 100 : 2 * maxmap; + new_map = (struct alias_map *) realloc (map, (new_size + * sizeof (struct alias_map))); + if (new_map == NULL) + /* Simply don't extend: we don't have any more core. */ + return -1; + + map = new_map; + maxmap = new_size; + return 0; +} + + +static int +alias_compare (map1, map2) + const struct alias_map *map1; + const struct alias_map *map2; +{ + return strcasecmp (map1->alias, map2->alias); +} -- cgit v1.1 From 199baa71f7a6c8f255afdda03cd3b2644f8f0fcd Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Fri, 18 Sep 2020 23:03:11 +0930 Subject: [RS6000] PR97107, libgo fails to build for power10 Calls from split-stack code to non-split-stack code need to expand mapped stack memory via __morestack. Even tail calls. __morestack is quite a surprising function on powerpc in that it calls back to its caller, and a tail call will continue running in the context of extra mapped stack. PR target/97107 * config/rs6000/rs6000-internal.h (struct rs6000_stack): Improve calls_p comment. * config/rs6000/rs6000-logue.c (rs6000_stack_info): Likewise. (rs6000_expand_split_stack_prologue): Emit the prologue for functions that make a sibling call. --- gcc/config/rs6000/rs6000-internal.h | 2 +- gcc/config/rs6000/rs6000-logue.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-internal.h b/gcc/config/rs6000/rs6000-internal.h index 9caef01..32681b6 100644 --- a/gcc/config/rs6000/rs6000-internal.h +++ b/gcc/config/rs6000/rs6000-internal.h @@ -32,7 +32,7 @@ typedef struct rs6000_stack { int cr_save_p; /* true if the CR reg needs to be saved */ unsigned int vrsave_mask; /* mask of vec registers to save */ int push_p; /* true if we need to allocate stack space */ - int calls_p; /* true if the function makes any calls */ + int calls_p; /* true if there are non-sibling calls */ int world_save_p; /* true if we're saving *everything*: r13-r31, cr, f14-f31, vrsave, v20-v31 */ enum rs6000_abi abi; /* which ABI to use */ diff --git a/gcc/config/rs6000/rs6000-logue.c b/gcc/config/rs6000/rs6000-logue.c index 0f88ec1..d90cd57 100644 --- a/gcc/config/rs6000/rs6000-logue.c +++ b/gcc/config/rs6000/rs6000-logue.c @@ -714,7 +714,7 @@ rs6000_stack_info (void) info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save); - /* Does this function call anything? */ + /* Does this function call anything (apart from sibling calls)? */ info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame); /* Determine if we need to save the condition code registers. */ @@ -5479,7 +5479,18 @@ rs6000_expand_split_stack_prologue (void) gcc_assert (flag_split_stack && reload_completed); if (!info->push_p) - return; + { + /* We need the -fsplit-stack prologue for functions that make + tail calls. Tail calls don't count against crtl->is_leaf. + Note that we are called inside a sequence. get_insns will + just return that (as yet empty) sequence, so instead we + access the function rtl with get_topmost_sequence. */ + for (insn = get_topmost_sequence ()->first; insn; insn = NEXT_INSN (insn)) + if (CALL_P (insn)) + break; + if (!insn) + return; + } if (global_regs[29]) { -- cgit v1.1 From 677b9150f54a0483d3de1182ac40717b7c4431a5 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Wed, 23 Sep 2020 20:15:39 +0930 Subject: [RS6000] Built-in __PCREL__ define Useful in assembly to know details of power10 function calls. * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Conditionally define __PCREL__. --- gcc/config/rs6000/rs6000-c.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index f5982907..cc1e997 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -597,6 +597,9 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, /* Tell the user if we support the MMA instructions. */ if ((flags & OPTION_MASK_MMA) != 0) rs6000_define_or_undefine_macro (define_p, "__MMA__"); + /* Whether pc-relative code is being generated. */ + if ((flags & OPTION_MASK_PCREL) != 0) + rs6000_define_or_undefine_macro (define_p, "__PCREL__"); } void -- cgit v1.1 From 0075cea0fdccc2efb781f4f3c924007de37a3417 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Wed, 1 Apr 2020 13:34:47 +1030 Subject: [RS6000] Count rldimi constant insns rldimi is generated by rs6000_emit_set_long_const when the high and low 32 bits of a 64-bit constant are equal. PR target/93012 * config/rs6000/rs6000.c (num_insns_constant_gpr): Count rldimi constants correctly. --- gcc/config/rs6000/rs6000.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6f204ca..5f4e292 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5731,7 +5731,7 @@ direct_return (void) /* Helper for num_insns_constant. Calculate number of instructions to load VALUE to a single gpr using combinations of addi, addis, ori, - oris and sldi instructions. */ + oris, sldi and rldimi instructions. */ static int num_insns_constant_gpr (HOST_WIDE_INT value) @@ -5759,7 +5759,7 @@ num_insns_constant_gpr (HOST_WIDE_INT value) high >>= 1; - if (low == 0) + if (low == 0 || low == high) return num_insns_constant_gpr (high) + 1; else if (high == 0) return num_insns_constant_gpr (low) + 1; -- cgit v1.1 From 34cd84890422f79c37cbcbcc594028bb852bcc67 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Thu, 18 Jun 2015 20:19:55 +0930 Subject: [RS6000] rs6000_rtx_costs for PLUS/MINUS constant These functions do behave a little differently for SImode, so the mode should be passed. * config/rs6000/rs6000.c (rs6000_rtx_costs): Pass mode to reg_or_add_cint_operand and reg_or_sub_cint_operand. --- gcc/config/rs6000/rs6000.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 5f4e292..375fff5 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -21176,9 +21176,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, return true; } else if ((outer_code == PLUS - && reg_or_add_cint_operand (x, VOIDmode)) + && reg_or_add_cint_operand (x, mode)) || (outer_code == MINUS - && reg_or_sub_cint_operand (x, VOIDmode)) + && reg_or_sub_cint_operand (x, mode)) || ((outer_code == SET || outer_code == IOR || outer_code == XOR) -- cgit v1.1 From 5427bd4d57c0376e51fc7b256e76aa46c43aa8cf Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Thu, 24 Sep 2020 00:40:47 -0500 Subject: test: Adjust case p9-vec-length-full-6.c [PR97075] The commit r11-3230 brings a nice improvement to use full vectors instead of partial vectors when available. This patch is to fix the test failures on p9-vec-length-full-6.c, where 64bit/32bit pairs are able to use full vector instead. Bootstrapped/regtested on powerpc64le-linux-gnu P9. gcc/testsuite/ChangeLog: PR tree-optimization/97075 * gcc.target/powerpc/p9-vec-length-full-6.c: Adjust. --- gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c index cfae9bb..5d2357a 100644 --- a/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c +++ b/gcc/testsuite/gcc.target/powerpc/p9-vec-length-full-6.c @@ -9,8 +9,7 @@ #include "p9-vec-length-6.h" /* It can use normal vector load for constant vector load. */ -/* { dg-final { scan-assembler-not {\mstxv\M} } } */ -/* { dg-final { scan-assembler-not {\mlxvx\M} } } */ -/* { dg-final { scan-assembler-not {\mstxvx\M} } } */ -/* { dg-final { scan-assembler-times {\mlxvl\M} 16 } } */ -/* { dg-final { scan-assembler-times {\mstxvl\M} 16 } } */ +/* { dg-final { scan-assembler-times {\mstxvx?\M} 6 } } */ +/* 64bit/32bit pairs won't use partial vectors. */ +/* { dg-final { scan-assembler-times {\mlxvl\M} 10 } } */ +/* { dg-final { scan-assembler-times {\mstxvl\M} 10 } } */ -- cgit v1.1 From 2d5fb576bdda843da47aea2e025bb5d45e883827 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Thu, 24 Sep 2020 08:02:29 +0200 Subject: [testsuite] Check target alias in builtin-has-attribute-3.c When running test-case c-c++-common/builtin-has-attribute-3.c on nvptx, I get: ... FAIL: c-c++-common/builtin-has-attribute-3.c -Wc++-compat \ (test for excess errors) Excess errors: src/gcc/testsuite/c-c++-common/builtin-has-attribute-3.c:33:33: error: \ alias definitions not supported in this configuration ... Fix this by adding -DSKIP_ALIAS to the compilation options for effective target ! alias. Tested on nvptx. gcc/testsuite/ChangeLog: * c-c++-common/builtin-has-attribute-3.c: Compile with -DSKIP_ALIAS for effective target ! alias. --- gcc/testsuite/c-c++-common/builtin-has-attribute-3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/c-c++-common/builtin-has-attribute-3.c b/gcc/testsuite/c-c++-common/builtin-has-attribute-3.c index 5736bab..45806d5 100644 --- a/gcc/testsuite/c-c++-common/builtin-has-attribute-3.c +++ b/gcc/testsuite/c-c++-common/builtin-has-attribute-3.c @@ -2,7 +2,7 @@ { dg-do compile } { dg-options "-Wall -ftrack-macro-expansion=0" } { dg-options "-Wall -Wno-narrowing -Wno-unused-local-typedefs -ftrack-macro-expansion=0" { target c++ } } - { dg-additional-options "-DSKIP_ALIAS" { target *-*-darwin* hppa*-*-hpux* } } + { dg-additional-options "-DSKIP_ALIAS" { target { { *-*-darwin* hppa*-*-hpux } || { ! alias } } } } */ #define ATTR(...) __attribute__ ((__VA_ARGS__)) -- cgit v1.1 From c9da53d6987af5f8ff68b58dd76a9fbc900a6a21 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 24 Sep 2020 08:28:09 +0200 Subject: Fix memory allocations in ipa-modref. Pair ggc_delete with ggc_alloc_no_dtor. I copy same scheme as used by Martin in ipa-fnsummary, that is creating a static member function create_ggc hidding the ugly bits and using it in ipa-modref.c. I also noticed that modref-tree leaks memory on destruction/collapse method and fixed that. Bootstrapped/regtested x86_64-linux. gcc/ChangeLog: 2020-09-24 Jan Hubicka * ipa-modref-tree.h (modref_base::collapse): Release memory. (modref_tree::create_ggc): New member function. (modref_tree::colapse): Release memory. (modref_tree::~modref_tree): New destructor. * ipa-modref.c (modref_summaries::create_ggc): New function. (analyze_function): Use create_ggc. (modref_summaries::duplicate): Likewise. (read_modref_records): Likewise. (modref_read): Likewise. --- gcc/ipa-modref-tree.h | 36 ++++++++++++++++++++++++-- gcc/ipa-modref.c | 71 ++++++++++++++++++++++----------------------------- 2 files changed, 65 insertions(+), 42 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-modref-tree.h b/gcc/ipa-modref-tree.h index 3bdd305..82e959a 100644 --- a/gcc/ipa-modref-tree.h +++ b/gcc/ipa-modref-tree.h @@ -95,7 +95,15 @@ struct GTY((user)) modref_base_node void collapse () { - vec_free (refs); + size_t i; + modref_ref_node *r; + + if (refs) + { + FOR_EACH_VEC_SAFE_ELT (refs, i, r) + ggc_free (r); + vec_free (refs); + } refs = NULL; every_ref = true; } @@ -214,12 +222,36 @@ struct GTY((user)) modref_tree return NULL; } + /* Return ggc allocated instance. We explicitly call destructors via + ggc_delete and do not want finalizers to be registered and + called at the garbage collection time. */ + static modref_tree *create_ggc (size_t max_bases, size_t max_refs) + { + return new (ggc_alloc_no_dtor> ()) + modref_tree (max_bases, max_refs); + } + void collapse () { - vec_free (bases); + size_t i; + modref_base_node *n; + + if (bases) + { + FOR_EACH_VEC_SAFE_ELT (bases, i, n) + { + n->collapse (); + ggc_free (n); + } + vec_free (bases); + } bases = NULL; every_base = true; } + ~modref_tree () + { + collapse (); + } }; void modref_c_tests (); diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 9cc9056..43545c1 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -84,6 +84,11 @@ public: ipa-modref pass execution needs to be analyzed in IPA mode while all other insertions leads to normal analysis. */ bool ipa; + static modref_summaries *create_ggc (symbol_table *symtab) + { + return new (ggc_alloc_no_dtor ()) + modref_summaries (symtab); + } }; /* Global variable holding all modref summaries. */ @@ -608,8 +613,7 @@ analyze_function (function *f, bool ipa) /* Initialize the summary. */ if (!summaries) - summaries = new (ggc_alloc ()) - modref_summaries (symtab); + summaries = modref_summaries::create_ggc (symtab); else /* Remove existing summary if we are re-running the pass. */ summaries->remove (cgraph_node::get (f->decl)); @@ -633,28 +637,22 @@ analyze_function (function *f, bool ipa) if (nolto) { gcc_assert (!summary->loads); - summary->loads - = new (ggc_alloc > ()) - modref_records (param_modref_max_bases, - param_modref_max_refs); + summary->loads = modref_records::create_ggc (param_modref_max_bases, + param_modref_max_refs); gcc_assert (!summary->stores); - summary->stores - = new (ggc_alloc > ()) - modref_records (param_modref_max_bases, - param_modref_max_refs); + summary->stores = modref_records::create_ggc (param_modref_max_bases, + param_modref_max_refs); } if (lto) { gcc_assert (!summary->loads_lto); - summary->loads_lto - = new (ggc_alloc > ()) - modref_records_lto (param_modref_max_bases, - param_modref_max_refs); + summary->loads_lto = modref_records_lto::create_ggc + (param_modref_max_bases, + param_modref_max_refs); gcc_assert (!summary->stores_lto); - summary->stores_lto - = new (ggc_alloc > ()) - modref_records_lto (param_modref_max_bases, - param_modref_max_refs); + summary->stores_lto = modref_records_lto::create_ggc + (param_modref_max_bases, + param_modref_max_refs); } summary->finished = false; int ecf_flags = flags_from_decl_or_type (current_function_decl); @@ -730,34 +728,30 @@ modref_summaries::duplicate (cgraph_node *, cgraph_node *, dst_data->finished = src_data->finished; if (src_data->stores) { - dst_data->stores = new (ggc_alloc > ()) - modref_records - (src_data->stores->max_bases, - src_data->stores->max_refs); + dst_data->stores = modref_records::create_ggc + (src_data->stores->max_bases, + src_data->stores->max_refs); dst_data->stores->merge (src_data->stores); } if (src_data->loads) { - dst_data->loads = new (ggc_alloc > ()) - modref_records - (src_data->loads->max_bases, - src_data->loads->max_refs); + dst_data->loads = modref_records::create_ggc + (src_data->loads->max_bases, + src_data->loads->max_refs); dst_data->loads->merge (src_data->loads); } if (src_data->stores_lto) { - dst_data->stores_lto = new (ggc_alloc > ()) - modref_records_lto - (src_data->stores_lto->max_bases, - src_data->stores_lto->max_refs); + dst_data->stores_lto = modref_records_lto::create_ggc + (src_data->stores_lto->max_bases, + src_data->stores_lto->max_refs); dst_data->stores_lto->merge (src_data->stores_lto); } if (src_data->loads_lto) { - dst_data->loads_lto = new (ggc_alloc > ()) - modref_records_lto - (src_data->stores_lto->max_bases, - src_data->stores_lto->max_refs); + dst_data->loads_lto = modref_records_lto::create_ggc + (src_data->loads_lto->max_bases, + src_data->loads_lto->max_refs); dst_data->loads_lto->merge (src_data->loads_lto); } } @@ -838,11 +832,9 @@ read_modref_records (lto_input_block *ib, struct data_in *data_in, /* Decide whether we want to turn LTO data types to non-LTO (i.e. when LTO re-streaming is not going to happen). */ if (flag_wpa || flag_incremental_link == INCREMENTAL_LINK_LTO) - *lto_ret = new (ggc_alloc ()) modref_records_lto - (max_bases, max_refs); + *lto_ret = modref_records_lto::create_ggc (max_bases, max_refs); else - *nolto_ret = new (ggc_alloc ()) modref_records - (max_bases, max_refs); + *nolto_ret = modref_records::create_ggc (max_bases, max_refs); size_t every_base = streamer_read_uhwi (ib); size_t nbase = streamer_read_uhwi (ib); @@ -1048,8 +1040,7 @@ modref_read (void) unsigned int j = 0; if (!summaries) - summaries = new (ggc_alloc ()) - modref_summaries (symtab); + summaries = modref_summaries::create_ggc (symtab); ((modref_summaries *)summaries)->ipa = true; while ((file_data = file_data_vec[j++])) -- cgit v1.1 From 7e437162001f258c8db4eae25da3bca812dd557a Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Thu, 24 Sep 2020 10:03:10 +0200 Subject: [testsuite] Require non_strict_align in pr94600-{1,3}.c With the nvptx target, we run into: ... FAIL: gcc.dg/pr94600-1.c scan-rtl-dump-times final "\\(mem/v" 6 FAIL: gcc.dg/pr94600-1.c scan-rtl-dump-times final "\\(set \\(mem/v" 6 FAIL: gcc.dg/pr94600-3.c scan-rtl-dump-times final "\\(mem/v" 1 FAIL: gcc.dg/pr94600-3.c scan-rtl-dump-times final "\\(set \\(mem/v" 1 ... The scans attempt to check for volatile stores, but on nvptx we have memcpy instead. This is due to nvptx being a STRICT_ALIGNMENT target, which has the effect that the TYPE_MODE for the store target is set to BKLmode in compute_record_mode. Fix the FAILs by requiring effective target non_strict_align. Tested on nvptx. gcc/testsuite/ChangeLog: 2020-09-24 Tom de Vries * gcc.dg/pr94600-1.c: Require effective target non_strict_align for scan-rtl-dump-times. * gcc.dg/pr94600-3.c: Same. --- gcc/testsuite/gcc.dg/pr94600-1.c | 4 ++-- gcc/testsuite/gcc.dg/pr94600-3.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/pr94600-1.c b/gcc/testsuite/gcc.dg/pr94600-1.c index b5913a0..38f939a 100644 --- a/gcc/testsuite/gcc.dg/pr94600-1.c +++ b/gcc/testsuite/gcc.dg/pr94600-1.c @@ -32,5 +32,5 @@ foo(void) } /* The only volatile accesses should be the obvious writes. */ -/* { dg-final { scan-rtl-dump-times {\(mem/v} 6 "final" } } */ -/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 6 "final" } } */ +/* { dg-final { scan-rtl-dump-times {\(mem/v} 6 "final" { target { non_strict_align } } } } */ +/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 6 "final" { target { non_strict_align } } } } */ diff --git a/gcc/testsuite/gcc.dg/pr94600-3.c b/gcc/testsuite/gcc.dg/pr94600-3.c index 7537f6c..e8776fb 100644 --- a/gcc/testsuite/gcc.dg/pr94600-3.c +++ b/gcc/testsuite/gcc.dg/pr94600-3.c @@ -31,5 +31,5 @@ foo(void) } /* The loop isn't unrolled. */ -/* { dg-final { scan-rtl-dump-times {\(mem/v} 1 "final" } } */ -/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 1 "final" } } */ +/* { dg-final { scan-rtl-dump-times {\(mem/v} 1 "final" { target { non_strict_align } } } } */ +/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 1 "final" { target { non_strict_align } } } } */ -- cgit v1.1 From 10843f8303509fcba880c6c05c08e4b4ccd24f36 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 24 Sep 2020 10:14:33 +0200 Subject: tree-optimization/97085 - fold some trivial bool vector ?: The following aovids the ICE in the testcase by doing some additional simplification of VEC_COND_EXPRs for VECTOR_BOOLEAN_TYPE_P which we don't really expect, esp. when they are not classical vectors, thus AVX512 or SVE masks. 2020-09-24 Richard Biener PR tree-optimization/97085 * match.pd (mask ? { false,..} : { true, ..} -> ~mask): New. * gcc.dg/vect/pr97085.c: New testcase. --- gcc/match.pd | 11 +++++++++++ gcc/testsuite/gcc.dg/vect/pr97085.c | 13 +++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/pr97085.c (limited to 'gcc') diff --git a/gcc/match.pd b/gcc/match.pd index 7d63bb9..e6dcdd0 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3521,6 +3521,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (optimize_vectors_before_lowering_p () && types_match (@0, @1)) (vec_cond (bit_and (bit_not @0) @1) @2 @3))) +/* Canonicalize mask ? { 0, ... } : { -1, ...} to ~mask if the mask + types are compatible. */ +(simplify + (vec_cond @0 VECTOR_CST@1 VECTOR_CST@2) + (if (VECTOR_BOOLEAN_TYPE_P (type) + && types_match (type, TREE_TYPE (@0))) + (if (integer_zerop (@1) && integer_all_onesp (@2)) + (bit_not @0) + (if (integer_all_onesp (@1) && integer_zerop (@2)) + @0)))) + /* Simplification moved from fold_cond_expr_with_comparison. It may also be extended. */ /* This pattern implements two kinds simplification: diff --git a/gcc/testsuite/gcc.dg/vect/pr97085.c b/gcc/testsuite/gcc.dg/vect/pr97085.c new file mode 100644 index 0000000..ffde9f1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97085.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8.2-a+sve" { target aarch64-*-* } } */ + +int a, b, c, d; +short e, g; +unsigned short f; +void h() { + for (; d; d++) { + g = d; + e = b == 0 ? 1 : a % b; + c ^= (f = e) > (g == 5); + } +} -- cgit v1.1 From e94797250b403d66cb3624a594e41faf0dd76617 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 24 Sep 2020 10:06:11 +0100 Subject: arm: Fix canary address calculation for non-PIC For non-PIC, the stack protector patterns did: rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0); emit_move_insn (operands[2], mem); Here, operands[1] is the address of the canary (&__stack_chk_guard) and operands[2] is the register that we want to move that address into. However, the code above instead sets operands[2] to the address of a constant pool entry that contains &__stack_chk_guard, rather than to &__stack_chk_guard itself. The sequence therefore does one less pointer indirection than it should. The net effect was to use &__stack_chk_guard for stack-smash detection, instead of using __stack_chk_guard itself. gcc/ * config/arm/arm.md (*stack_protect_combined_set_insn): For non-PIC, load the address of the canary rather than the address of the constant pool entry that points to it. (*stack_protect_combined_test_insn): Likewise. gcc/testsuite/ * gcc.target/arm/stack-protector-3.c: New test. * gcc.target/arm/stack-protector-4.c: Likewise. --- gcc/config/arm/arm.md | 4 +-- gcc/testsuite/gcc.target/arm/stack-protector-3.c | 38 ++++++++++++++++++++++++ gcc/testsuite/gcc.target/arm/stack-protector-4.c | 6 ++++ 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/stack-protector-3.c create mode 100644 gcc/testsuite/gcc.target/arm/stack-protector-4.c (limited to 'gcc') diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index bffdb0b..c4fa116 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -9212,7 +9212,7 @@ operands[2] = operands[1]; else { - rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0); + rtx mem = force_const_mem (SImode, operands[1]); emit_move_insn (operands[2], mem); } } @@ -9295,7 +9295,7 @@ operands[3] = operands[1]; else { - rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0); + rtx mem = force_const_mem (SImode, operands[1]); emit_move_insn (operands[3], mem); } } diff --git a/gcc/testsuite/gcc.target/arm/stack-protector-3.c b/gcc/testsuite/gcc.target/arm/stack-protector-3.c new file mode 100644 index 0000000..b8f77fa --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/stack-protector-3.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fstack_protector } */ +/* { dg-options "-fstack-protector-all -O2" } */ + +extern volatile long *stack_chk_guard_ptr; + +void __attribute__ ((noipa)) +f (void) +{ + volatile int x; + /* Munging the contents of __stack_chk_guard should trigger a + stack-smashing failure for this function. */ + *stack_chk_guard_ptr += 1; +} + +asm ( +" .data\n" +" .align 3\n" +" .globl stack_chk_guard_ptr\n" +"stack_chk_guard_ptr:\n" +" .word __stack_chk_guard\n" +" .weak __stack_chk_guard\n" +"__stack_chk_guard:\n" +" .word 0xdead4321\n" +" .text\n" +" .type __stack_chk_fail, %function\n" +"__stack_chk_fail:\n" +" movs r0, #0\n" +" b exit\n" +" .size __stack_chk_fail, .-__stack_chk_fail" +); + +int +main (void) +{ + f (); + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/arm/stack-protector-4.c b/gcc/testsuite/gcc.target/arm/stack-protector-4.c new file mode 100644 index 0000000..6334dd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/stack-protector-4.c @@ -0,0 +1,6 @@ +/* { dg-do run } */ +/* { dg-require-effective-target fstack_protector } */ +/* { dg-require-effective-target fpic } */ +/* { dg-options "-fstack-protector-all -O2 -fpic" } */ + +#include "stack-protector-3.c" -- cgit v1.1 From 59c8329389751ec39985a89a673768b4d4fdb338 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 24 Sep 2020 10:06:11 +0100 Subject: arm: Add a couple of extra stack-protector tests These tests were inspired by corresponding aarch64 ones. They already pass. gcc/testsuite/ * gcc.target/arm/stack-protector-5.c: New test. * gcc.target/arm/stack-protector-6.c: Likewise. --- gcc/testsuite/gcc.target/arm/stack-protector-5.c | 21 +++++++++++++++++++++ gcc/testsuite/gcc.target/arm/stack-protector-6.c | 8 ++++++++ 2 files changed, 29 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arm/stack-protector-5.c create mode 100644 gcc/testsuite/gcc.target/arm/stack-protector-6.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/arm/stack-protector-5.c b/gcc/testsuite/gcc.target/arm/stack-protector-5.c new file mode 100644 index 0000000..b808b11 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/stack-protector-5.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-fstack-protector-all -O2" } */ + +void __attribute__ ((noipa)) +f (void) +{ + volatile int x; + asm volatile ("" ::: + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r14"); +} + +/* The register clobbers above should not generate any single LDRs or STRs; + all registers should be pushed and popped using register lists. The only + STRs should therefore be those associated with the stack protector tests + themselves. + + Make sure the address of the canary is not spilled and reloaded, + since that would give the attacker an opportunity to change the + canary value. */ +/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/stack-protector-6.c b/gcc/testsuite/gcc.target/arm/stack-protector-6.c new file mode 100644 index 0000000..f8eec87 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/stack-protector-6.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target fpic } */ +/* { dg-options "-fstack-protector-all -O2 -fpic" } */ + +#include "stack-protector-5.c" + +/* See the comment in stack-protector-5.c. */ +/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */ -- cgit v1.1 From fe28d34079aad7d3bf8d9bfd78d0ba43110b7906 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Thu, 24 Sep 2020 12:22:13 +0200 Subject: [testsuite, nvptx] Fix string matching in gcc.dg/pr87314-1.c with nvptx we run into: ... FAIL: gcc.dg/pr87314-1.c scan-assembler hellooo ... The required string is part of the assembly, just in a different format than expected: ... .const .align 1 .u8 $LC0[12] = { 104, 101, 108, 108, 111, 111, 111, 111, 98, 121, 101, 0 }; ... Fix this by adding an nvptx-specific scan-assembler directive. Tested on nvptx and x86_64. gcc/testsuite/ChangeLog: 2020-09-24 Tom de Vries * gcc.dg/pr87314-1.c: Add nvptx-specific scan-assembler directive. --- gcc/testsuite/gcc.dg/pr87314-1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/pr87314-1.c b/gcc/testsuite/gcc.dg/pr87314-1.c index 9bc9056..0cb9c07 100644 --- a/gcc/testsuite/gcc.dg/pr87314-1.c +++ b/gcc/testsuite/gcc.dg/pr87314-1.c @@ -8,4 +8,6 @@ int h() { return "bye"=="helloooobye"+8; } /* { dg-final { scan-tree-dump-times "hello" 1 "original" } } */ /* The test in h() should be retained because the result depends on string merging. */ -/* { dg-final { scan-assembler "hellooo" } } */ +/* { dg-final { scan-assembler "hellooo" { target { ! nvptx*-*-* } } } } */ +/* { dg-final { scan-assembler "104, 101, 108, 108, 111, 111, 111" { target { nvptx*-*-* } } } } */ + -- cgit v1.1 From e86a02f87d8a11480c1421ef2dd71b8b5f43d938 Mon Sep 17 00:00:00 2001 From: Paul Thomas Date: Thu, 24 Sep 2020 11:52:30 +0100 Subject: This patch fixes PR96495 - frees result components outside loop. 2020-24-09 Paul Thomas gcc/fortran PR fortran/96495 * trans-expr.c (gfc_conv_procedure_call): Take the deallocation of allocatable result components of a scalar result outside the scalarization loop. Find and use the stored result. gcc/testsuite/ PR fortran/96495 * gfortran.dg/alloc_comp_result_2.f90 : New test. --- gcc/fortran/trans-expr.c | 26 ++++++- gcc/testsuite/gfortran.dg/alloc_comp_result_2.f90 | 93 +++++++++++++++++------ 2 files changed, 95 insertions(+), 24 deletions(-) (limited to 'gcc') diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index 36ff9b5..a690839 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -6421,6 +6421,26 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, if (!finalized && !e->must_finalize) { + bool scalar_res_outside_loop; + scalar_res_outside_loop = e->expr_type == EXPR_FUNCTION + && parm_rank == 0 + && parmse.loop; + + if (scalar_res_outside_loop) + { + /* Go through the ss chain to find the argument and use + the stored value. */ + gfc_ss *tmp_ss = parmse.loop->ss; + for (; tmp_ss; tmp_ss = tmp_ss->next) + if (tmp_ss->info + && tmp_ss->info->expr == e + && tmp_ss->info->data.scalar.value != NULL_TREE) + { + tmp = tmp_ss->info->data.scalar.value; + break; + } + } + if ((e->ts.type == BT_CLASS && GFC_CLASS_TYPE_P (TREE_TYPE (tmp))) || e->ts.type == BT_DERIVED) @@ -6429,7 +6449,11 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, else if (e->ts.type == BT_CLASS) tmp = gfc_deallocate_alloc_comp (CLASS_DATA (e)->ts.u.derived, tmp, parm_rank); - gfc_prepend_expr_to_block (&post, tmp); + + if (scalar_res_outside_loop) + gfc_add_expr_to_block (&parmse.loop->post, tmp); + else + gfc_prepend_expr_to_block (&post, tmp); } } diff --git a/gcc/testsuite/gfortran.dg/alloc_comp_result_2.f90 b/gcc/testsuite/gfortran.dg/alloc_comp_result_2.f90 index 89ff5ac..6b09187 100644 --- a/gcc/testsuite/gfortran.dg/alloc_comp_result_2.f90 +++ b/gcc/testsuite/gfortran.dg/alloc_comp_result_2.f90 @@ -1,28 +1,75 @@ ! { dg-do run } -! Tests the fix for PR40440, in which gfortran tried to deallocate -! the allocatable components of the actual argument of CALL SUB ! -! Contributed by Juergen Reuter -! Reduced testcase from Tobias Burnus +! Test the fix for PR96495 - segfaults at runtime at locations below. ! +! Contributed by Paul Luckner +! +module foo_m + implicit none - type t - integer, allocatable :: A(:) - end type t - type (t) :: arg - arg = t ([1,2,3]) - call sub (func (arg)) + + type foo + integer, allocatable :: j(:) + end type + + interface operator(.unary.) + module procedure neg_foo + end interface + + interface operator(.binary.) + module procedure foo_sub_foo + end interface + + interface operator(.binaryElemental.) + module procedure foo_add_foo + end interface + contains - function func (a) - type(t), pointer :: func - type(t), target :: a - integer, save :: i = 0 - if (i /= 0) STOP 1! multiple calls would cause this abort - i = i + 1 - func => a - end function func - subroutine sub (a) - type(t), intent(IN), target :: a - if (any (a%A .ne. [1,2,3])) STOP 2 - end subroutine sub -end + + elemental function foo_add_foo(f, g) result(h) + !! an example for an elemental binary operator + type(foo), intent(in) :: f, g + type(foo) :: h + + allocate (h%j(size(f%j)), source = f%j+g%j) + end function + + elemental function foo_sub_foo(f, g) result(h) + !! an example for an elemental binary operator + type(foo), intent(in) :: f, g + type(foo) :: h + + allocate (h%j(size(f%j)), source = f%j-3*g%j) + end function + + pure function neg_foo(f) result(g) + !! an example for a unary operator + type(foo), intent(in) :: f + type(foo) :: g + + allocate (g%j(size(f%j)), source = -f%j) + end function + +end module + +program main_tmp + + use foo_m + + implicit none + + type(foo) f, g(2) + + allocate (f%j(3)) + f%j = [2, 3, 4] + + g = f + if (any (g(2)%j .ne. [2, 3, 4])) stop 1 + + g = g .binaryElemental. (f .binary. f) ! threw "Segmentation fault" + if (any (g(2)%j .ne. [-2,-3,-4])) stop 2 + + g = g .binaryElemental. ( .unary. f) ! threw "Segmentation fault" + if (any (g(2)%j .ne. [-4,-6,-8])) stop 3 + +end program \ No newline at end of file -- cgit v1.1 From a8d5c28233f95e3474ee8cbc4d341cbb43ab7bb6 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 24 Sep 2020 13:27:49 +0200 Subject: target/97192 - new testcase for fixed PR This adds another testcase for the PR97085 fix. 2020-09-24 Richard Biener PR tree-optimization/97085 * gcc.dg/pr97192.c: New testcase. --- gcc/testsuite/gcc.dg/pr97192.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/pr97192.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/pr97192.c b/gcc/testsuite/gcc.dg/pr97192.c new file mode 100644 index 0000000..16647ca --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr97192.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O -ftracer" } */ +/* { dg-additional-options "-mavx512vl" { target x86_64-*-* i?86-*-* } } */ + +typedef int __attribute__ ((__vector_size__ (32))) V; + +int a, b; +V v; + +int +foo (void) +{ + b -= 4 - !a; + V u = 0 != v == a; + return u[0]; +} -- cgit v1.1 From c4d283b142790063693be2d517f0209ce2f84499 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Thu, 24 Sep 2020 13:30:11 +0200 Subject: [testsuite] Scan final instead of asm in independent-cloneids-1.c When running test-case gcc.dg/independent-cloneids-1.c for nvptx, we get: ... FAIL: scan-assembler-times (?n)^_*bar[.$_]constprop[.$_]0: 1 FAIL: scan-assembler-times (?n)^_*bar[.$_]constprop[.$_]1: 1 FAIL: scan-assembler-times (?n)^_*bar[.$_]constprop[.$_]2: 1 FAIL: scan-assembler-times (?n)^_*foo[.$_]constprop[.$_]0: 1 FAIL: scan-assembler-times (?n)^_*foo[.$_]constprop[.$_]1: 1 FAIL: scan-assembler-times (?n)^_*foo[.$_]constprop[.$_]2: 1 ... The test expects to find something like: ... bar.constprop.0: ... but instead on nvptx we have: ... .func (.param.u32 %value_out) bar$constprop$0 ... Fix this by rewriting the scans to use the final dump instead. Tested on x86_64. gcc/testsuite/ChangeLog: 2020-09-24 Tom de Vries * gcc.dg/independent-cloneids-1.c: Use scan-rtl-dump instead of scan-assembler. --- gcc/testsuite/gcc.dg/independent-cloneids-1.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/independent-cloneids-1.c b/gcc/testsuite/gcc.dg/independent-cloneids-1.c index 516211a..efbc1c5 100644 --- a/gcc/testsuite/gcc.dg/independent-cloneids-1.c +++ b/gcc/testsuite/gcc.dg/independent-cloneids-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -fipa-cp -fipa-cp-clone" } */ +/* { dg-options "-O3 -fipa-cp -fipa-cp-clone -fdump-rtl-final" } */ /* { dg-skip-if "Odd label definition syntax" { mmix-*-* } } */ extern int printf (const char *, ...); @@ -29,11 +29,11 @@ baz (int arg) return foo (8); } -/* { dg-final { scan-assembler-times {(?n)^_*bar[.$_]constprop[.$_]0:} 1 } } */ -/* { dg-final { scan-assembler-times {(?n)^_*bar[.$_]constprop[.$_]1:} 1 } } */ -/* { dg-final { scan-assembler-times {(?n)^_*bar[.$_]constprop[.$_]2:} 1 } } */ -/* { dg-final { scan-assembler-times {(?n)^_*foo[.$_]constprop[.$_]0:} 1 } } */ -/* { dg-final { scan-assembler-times {(?n)^_*foo[.$_]constprop[.$_]1:} 1 } } */ -/* { dg-final { scan-assembler-times {(?n)^_*foo[.$_]constprop[.$_]2:} 1 } } */ -/* { dg-final { scan-assembler-not {(?n)^_*foo[.$_]constprop[.$_]3:} } } */ -/* { dg-final { scan-assembler-not {(?n)^_*foo[.$_]constprop[.$_]4:} } } */ +/* { dg-final { scan-rtl-dump-times {(?n)^;; Function bar.constprop \(bar[.$_]constprop[.$_]0,} 1 "final" } } */ +/* { dg-final { scan-rtl-dump-times {(?n)^;; Function bar.constprop \(bar[.$_]constprop[.$_]1,} 1 "final" } } */ +/* { dg-final { scan-rtl-dump-times {(?n)^;; Function bar.constprop \(bar[.$_]constprop[.$_]2,} 1 "final" } } */ +/* { dg-final { scan-rtl-dump-times {(?n)^;; Function foo.constprop \(foo[.$_]constprop[.$_]0,} 1 "final" } } */ +/* { dg-final { scan-rtl-dump-times {(?n)^;; Function foo.constprop \(foo[.$_]constprop[.$_]1,} 1 "final" } } */ +/* { dg-final { scan-rtl-dump-times {(?n)^;; Function foo.constprop \(foo[.$_]constprop[.$_]2,} 1 "final" } } */ +/* { dg-final { scan-rtl-dump-times {(?n)^;; Function foo.constprop \(foo[.$_]constprop[.$_]3,} 0 "final" } } */ +/* { dg-final { scan-rtl-dump-times {(?n)^;; Function foo.constprop \(foo[.$_]constprop[.$_]4,} 0 "final" } } */ -- cgit v1.1 From 329851416e698f4455b3021e297a13c248273618 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Thu, 24 Sep 2020 14:07:42 +0200 Subject: [testsuite, nvptx] Fix gcc.dg/tls/thr-cse-1.c With nvptx, we run into: ... FAIL: gcc.dg/tls/thr-cse-1.c scan-assembler-not \ emutls_get_address.*emutls_get_address.* ... because the nvptx assembly looks like: ... call (%value_in), __emutls_get_address, (%out_arg1); ... // BEGIN GLOBAL FUNCTION DECL: __emutls_get_address .extern .func (.param.u64 %value_out) __emutls_get_address (.param.u64 %in_ar0); ... Fix this by checking the slim final dump instead, where we have just: ... 12: r35:DI=call [`__emutls_get_address'] argc:0 ... gcc/testsuite/ChangeLog: 2020-09-24 Tom de Vries * gcc.dg/tls/thr-cse-1.c: Scan final dump instead of assembly for nvptx. --- gcc/testsuite/gcc.dg/tls/thr-cse-1.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/tls/thr-cse-1.c b/gcc/testsuite/gcc.dg/tls/thr-cse-1.c index 84eedfd..7145671 100644 --- a/gcc/testsuite/gcc.dg/tls/thr-cse-1.c +++ b/gcc/testsuite/gcc.dg/tls/thr-cse-1.c @@ -4,6 +4,7 @@ registers and thus getting the counts wrong. */ /* { dg-additional-options "-mshort-calls" { target epiphany-*-* } } */ /* { dg-require-effective-target tls_emulated } */ +/* { dg-additional-options "-fdump-rtl-final-slim" { target nvptx-*-* } }*/ /* Test that we only get one call to emutls_get_address when CSE is active. Note that the var _must_ be initialized for the scan asm @@ -18,10 +19,12 @@ int foo (int b, int c, int d) return a; } -/* { dg-final { scan-assembler-not "emutls_get_address.*emutls_get_address.*" { target { ! { "*-wrs-vxworks" "*-*-darwin8" "hppa*-*-hpux*" "i?86-*-mingw*" "x86_64-*-mingw*" visium-*-* } } } } } */ +/* { dg-final { scan-assembler-not "emutls_get_address.*emutls_get_address.*" { target { ! { "*-wrs-vxworks" "*-*-darwin8" "hppa*-*-hpux*" "i?86-*-mingw*" "x86_64-*-mingw*" visium-*-* nvptx-*-* } } } } } */ /* { dg-final { scan-assembler-not "call\tL___emutls_get_address.stub.*call\tL___emutls_get_address.stub.*" { target "*-*-darwin8" } } } */ /* { dg-final { scan-assembler-not "(b,l|bl) __emutls_get_address.*(b,l|bl) __emutls_get_address.*" { target "hppa*-*-hpux*" } } } */ /* { dg-final { scan-assembler-not "tls_lookup.*tls_lookup.*" { target *-wrs-vxworks } } } */ /* { dg-final { scan-assembler-not "call\t___emutls_get_address.*call\t___emutls_get_address" { target "i?86-*-mingw*" } } } */ /* { dg-final { scan-assembler-not "call\t__emutls_get_address.*call\t__emutls_get_address" { target "x86_64-*-mingw*" } } } */ /* { dg-final { scan-assembler-not "%l __emutls_get_address.*%l __emutls_get_address" { target visium-*-* } } } */ + +/* { dg-final { scan-rtl-dump-times "emutls_get_address" 1 "final" { target nvptx-*-* } } } */ -- cgit v1.1 From c33f474239308d81bf96cfdb2520d25488ad8724 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 24 Sep 2020 15:09:17 +0200 Subject: Add access through parameter derference tracking to modref re-add tracking of accesses which was unfinished in David's patch. At the moment I only implemented tracking of the fact that access is based on derefernece of the parameter (so we track THIS pointers). Patch does not implement IPA propagation since it needs bit more work which I will post shortly: ipa-fnsummary needs to track when parameter points to local memory, summaries needs to be merged when function is inlined (because jump functions are) and propagation needs to be turned into iterative dataflow on SCC components. Patch also adds documentation of -fipa-modref and params that was left uncommited in my branch :(. Even without this change it does lead to nice increase of disambiguations for cc1plus build. Alias oracle query stats: refs_may_alias_p: 62758323 disambiguations, 72935683 queries ref_maybe_used_by_call_p: 139511 disambiguations, 63654045 queries call_may_clobber_ref_p: 23502 disambiguations, 29242 queries nonoverlapping_component_refs_p: 0 disambiguations, 37654 queries nonoverlapping_refs_since_match_p: 19417 disambiguations, 55555 must overlaps, 75721 queries aliasing_component_refs_p: 54665 disambiguations, 752449 queries TBAA oracle: 21917926 disambiguations 53054678 queries 15763411 are in alias set 0 10162238 queries asked about the same object 124 queries asked about the same alias set 0 access volatile 3681593 are dependent in the DAG 1529386 are aritificially in conflict with void * Modref stats: modref use: 8311 disambiguations, 32527 queries modref clobber: 742126 disambiguations, 1036986 queries 1987054 tbaa queries (1.916182 per modref query) 125479 base compares (0.121004 per modref query) PTA query stats: pt_solution_includes: 968314 disambiguations, 13609584 queries pt_solutions_intersect: 1019136 disambiguations, 13147139 queries So compared to https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554605.html we get 41% more use disambiguations (with similar number of queries) and 8% more clobber disambiguations. For tramp3d: Alias oracle query stats: refs_may_alias_p: 2052256 disambiguations, 2312703 queries ref_maybe_used_by_call_p: 7122 disambiguations, 2089118 queries call_may_clobber_ref_p: 234 disambiguations, 234 queries nonoverlapping_component_refs_p: 0 disambiguations, 4299 queries nonoverlapping_refs_since_match_p: 329 disambiguations, 10200 must overlaps, 10616 queries aliasing_component_refs_p: 857 disambiguations, 34555 queries TBAA oracle: 885546 disambiguations 1677080 queries 132105 are in alias set 0 469030 queries asked about the same object 0 queries asked about the same alias set 0 access volatile 190084 are dependent in the DAG 315 are aritificially in conflict with void * Modref stats: modref use: 426 disambiguations, 1881 queries modref clobber: 10042 disambiguations, 16202 queries 19405 tbaa queries (1.197692 per modref query) 2775 base compares (0.171275 per modref query) PTA query stats: pt_solution_includes: 313908 disambiguations, 526183 queries pt_solutions_intersect: 130510 disambiguations, 416084 queries Here uses decrease by 4 disambiguations and clobber improve by 3.5%. I think the difference is caused by fact that gcc has much more alias set 0 accesses originating from gimple and tree unions as I mentioned in original mail. After pushing out the IPA propagation I will re-add code to track offsets and sizes that further improve disambiguation. On tramp3d it enables a lot of DSE for structure fields not acessed by uninlined function. gcc/ * doc/invoke.texi: Document -fipa-modref, ipa-modref-max-bases, ipa-modref-max-refs, ipa-modref-max-accesses, ipa-modref-max-tests. * ipa-modref-tree.c (test_insert_search_collapse): Update. (test_merge): Update. (gt_ggc_mx): New function. * ipa-modref-tree.h (struct modref_access_node): New structure. (struct modref_ref_node): Add every_access and accesses array. (modref_ref_node::modref_ref_node): Update ctor. (modref_ref_node::search): New member function. (modref_ref_node::collapse): New member function. (modref_ref_node::insert_access): New member function. (modref_base_node::insert_ref): Do not collapse base if ref is 0. (modref_base_node::collapse): Copllapse also refs. (modref_tree): Add accesses. (modref_tree::modref_tree): Initialize max_accesses. (modref_tree::insert): Add access parameter. (modref_tree::cleanup): New member function. (modref_tree::merge): Add parm_map; merge accesses. (modref_tree::copy_from): New member function. (modref_tree::create_ggc): Add max_accesses. * ipa-modref.c (dump_access): New function. (dump_records): Dump accesses. (dump_lto_records): Dump accesses. (get_access): New function. (record_access): Record access. (record_access_lto): Record access. (analyze_call): Compute parm_map. (analyze_function): Update construction of modref records. (modref_summaries::duplicate): Likewise; use copy_from. (write_modref_records): Stream accesses. (read_modref_records): Sream accesses. (pass_ipa_modref::execute): Update call of merge. * params.opt (-param=modref-max-accesses): New. * tree-ssa-alias.c (alias_stats): Add modref_baseptr_tests. (dump_alias_stats): Update. (base_may_alias_with_dereference_p): New function. (modref_may_conflict): Check accesses. (ref_maybe_used_by_call_p_1): Update call to modref_may_conflict. (call_may_clobber_ref_p_1): Update call to modref_may_conflict. --- gcc/doc/invoke.texi | 28 ++++++- gcc/ipa-modref-tree.c | 70 +++++++++------- gcc/ipa-modref-tree.h | 216 ++++++++++++++++++++++++++++++++++++++++++----- gcc/ipa-modref.c | 228 ++++++++++++++++++++++++++++++++++++++++---------- gcc/params.opt | 6 +- gcc/tree-ssa-alias.c | 75 +++++++++++------ 6 files changed, 501 insertions(+), 122 deletions(-) (limited to 'gcc') diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 75203ba..623dfb8 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -486,7 +486,7 @@ Objective-C and Objective-C++ Dialects}. -fgcse-sm -fhoist-adjacent-loads -fif-conversion @gol -fif-conversion2 -findirect-inlining @gol -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol --finline-small-functions -fipa-cp -fipa-cp-clone @gol +-finline-small-functions -fipa-modref -fipa-cp -fipa-cp-clone @gol -fipa-bit-cp -fipa-vrp -fipa-pta -fipa-profile -fipa-pure-const @gol -fipa-reference -fipa-reference-addressable @gol -fipa-stack-alignment -fipa-icf -fira-algorithm=@var{algorithm} @gol @@ -9688,6 +9688,7 @@ compilation time. -fif-conversion @gol -fif-conversion2 @gol -finline-functions-called-once @gol +-fipa-modref @gol -fipa-profile @gol -fipa-pure-const @gol -fipa-reference @gol @@ -10783,11 +10784,18 @@ default at any optimization level. @opindex fipa-profile Perform interprocedural profile propagation. The functions called only from cold functions are marked as cold. Also functions executed once (such as -@code{cold}, @code{noreturn}, static constructors or destructors) are identified. Cold -functions and loop less parts of functions executed once are then optimized for -size. +@code{cold}, @code{noreturn}, static constructors or destructors) are +identified. Cold functions and loop less parts of functions executed once are +then optimized for size. Enabled by default at @option{-O} and higher. +@item -fipa-modref +@opindex fipa-modref +Perform interprocedural mod/ref analysis. This optimization analyzes the side +effects of functions (memory locations that are modified or referenced) and +enables better optimization across the function call boundary. This flag is +enabled by default at @option{-O} and higher. + @item -fipa-cp @opindex fipa-cp Perform interprocedural constant propagation. @@ -12764,6 +12772,18 @@ Deeper chains are still handled by late inlining. Probability (in percent) that C++ inline function with comdat visibility are shared across multiple compilation units. +@item ipa-modref-max-bases +@item ipa-modref-max-refs +@item ipa-modref-max-accesses +Specifies the maximal number of base pointers, referneces and accesses stored +for a single function by mod/ref analysis. + +@item ipa-modref-max-tests +Specifies the maxmal number of tests alias oracle can perform to disambiguate +memory locations using the mod/ref information. This parameter ought to be +bigger than @option{--param ipa-modref-max-bases} and @option{--param +ipa-modref-max-refs}. + @item profile-func-internal-id A parameter to control whether to use function internal id in profile database lookup. If the value is 0, the compiler uses an id that diff --git a/gcc/ipa-modref-tree.c b/gcc/ipa-modref-tree.c index a84508a..499dc60 100644 --- a/gcc/ipa-modref-tree.c +++ b/gcc/ipa-modref-tree.c @@ -35,12 +35,13 @@ test_insert_search_collapse () { modref_base_node *base_node; modref_ref_node *ref_node; + modref_access_node a = { -1 }; - modref_tree *t = new modref_tree(1, 2); + modref_tree *t = new modref_tree(1, 2, 2); ASSERT_FALSE (t->every_base); /* Insert into an empty tree. */ - t->insert (1, 2); + t->insert (1, 2, a); ASSERT_NE (t->bases, NULL); ASSERT_EQ (t->bases->length (), 1); ASSERT_FALSE (t->every_base); @@ -58,7 +59,7 @@ test_insert_search_collapse () ASSERT_EQ (ref_node->ref, 2); /* Insert when base exists but ref does not. */ - t->insert (1, 3); + t->insert (1, 3, a); ASSERT_NE (t->bases, NULL); ASSERT_EQ (t->bases->length (), 1); ASSERT_EQ (t->search (1), base_node); @@ -71,7 +72,7 @@ test_insert_search_collapse () /* Insert when base and ref exist, but access is not dominated by nor dominates other accesses. */ - t->insert (1, 2); + t->insert (1, 2, a); ASSERT_EQ (t->bases->length (), 1); ASSERT_EQ (t->search (1), base_node); @@ -79,12 +80,12 @@ test_insert_search_collapse () ASSERT_NE (ref_node, NULL); /* Insert when base and ref exist and access is dominated. */ - t->insert (1, 2); + t->insert (1, 2, a); ASSERT_EQ (t->search (1), base_node); ASSERT_EQ (base_node->search (2), ref_node); /* Insert ref to trigger ref list collapse for base 1. */ - t->insert (1, 4); + t->insert (1, 4, a); ASSERT_EQ (t->search (1), base_node); ASSERT_EQ (base_node->refs, NULL); ASSERT_EQ (base_node->search (2), NULL); @@ -92,7 +93,7 @@ test_insert_search_collapse () ASSERT_TRUE (base_node->every_ref); /* Further inserts to collapsed ref list are ignored. */ - t->insert (1, 5); + t->insert (1, 5, a); ASSERT_EQ (t->search (1), base_node); ASSERT_EQ (base_node->refs, NULL); ASSERT_EQ (base_node->search (2), NULL); @@ -100,13 +101,13 @@ test_insert_search_collapse () ASSERT_TRUE (base_node->every_ref); /* Insert base to trigger base list collapse. */ - t->insert (5, 6); + t->insert (5, 6, a); ASSERT_TRUE (t->every_base); ASSERT_EQ (t->bases, NULL); ASSERT_EQ (t->search (1), NULL); /* Further inserts to collapsed base list are ignored. */ - t->insert (7, 8); + t->insert (7, 8, a); ASSERT_TRUE (t->every_base); ASSERT_EQ (t->bases, NULL); ASSERT_EQ (t->search (1), NULL); @@ -117,24 +118,25 @@ test_merge () { modref_tree *t1, *t2; modref_base_node *base_node; - - t1 = new modref_tree(3, 4); - t1->insert (1, 1); - t1->insert (1, 2); - t1->insert (1, 3); - t1->insert (2, 1); - t1->insert (3, 1); - - t2 = new modref_tree(10, 10); - t2->insert (1, 2); - t2->insert (1, 3); - t2->insert (1, 4); - t2->insert (3, 2); - t2->insert (3, 3); - t2->insert (3, 4); - t2->insert (3, 5); - - t1->merge (t2); + modref_access_node a = { -1 }; + + t1 = new modref_tree(3, 4, 1); + t1->insert (1, 1, a); + t1->insert (1, 2, a); + t1->insert (1, 3, a); + t1->insert (2, 1, a); + t1->insert (3, 1, a); + + t2 = new modref_tree(10, 10, 10); + t2->insert (1, 2, a); + t2->insert (1, 3, a); + t2->insert (1, 4, a); + t2->insert (3, 2, a); + t2->insert (3, 3, a); + t2->insert (3, 4, a); + t2->insert (3, 5, a); + + t1->merge (t2, NULL); ASSERT_FALSE (t1->every_base); ASSERT_NE (t1->bases, NULL); @@ -222,11 +224,21 @@ void gt_pch_nx (modref_base_node*, gt_pointer_operator, void *) {} void gt_ggc_mx (modref_ref_node* &r) { ggc_test_and_set_mark (r); + if (r->accesses) + { + ggc_test_and_set_mark (r->accesses); + gt_ggc_mx (r->accesses); + } } void gt_ggc_mx (modref_ref_node* &r) { ggc_test_and_set_mark (r); + if (r->accesses) + { + ggc_test_and_set_mark (r->accesses); + gt_ggc_mx (r->accesses); + } if (r->ref) gt_ggc_mx (r->ref); } @@ -236,4 +248,6 @@ void gt_pch_nx (modref_ref_node*) {} void gt_pch_nx (modref_ref_node*, gt_pointer_operator, void *) {} void gt_pch_nx (modref_ref_node*, gt_pointer_operator, void *) {} - +void gt_ggc_mx (modref_access_node &) +{ +} diff --git a/gcc/ipa-modref-tree.h b/gcc/ipa-modref-tree.h index 82e959a..caf5d34 100644 --- a/gcc/ipa-modref-tree.h +++ b/gcc/ipa-modref-tree.h @@ -18,20 +18,101 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ +/* modref_tree represent a decision tree that can be used by alias analysis + oracle to determine whether given memory access can be affected by a function + call. For every function we collect two trees, one for loads and other + for stores. Tree consist of following levels: + + 1) Base: this level represent base alias set of the acecess and refers + to sons (ref nodes). Flag all_refs means that all possible references + are aliasing. + + Because for LTO streaming we need to stream types rahter than alias sets + modref_base_node is implemented as a template. + 2) Ref: this level represent ref alias set and links to acesses unless + all_refs flag is et. + Again ref is an template to allow LTO streaming. + 3) Access: this level represent info about individual accesses. Presently + we record whether access is trhough a dereference of a function parameter +*/ + #ifndef GCC_MODREF_TREE_H #define GCC_MODREF_TREE_H struct ipa_modref_summary; +/* Memory access. */ +struct GTY(()) modref_access_node +{ + /* Index of parameter which specifies the base of access. -1 if base is not + a function parameter. */ + int parm_index; + + /* Return true if access node holds no useful info. */ + bool useful_p () + { + return parm_index != -1; + } +}; template struct GTY((user)) modref_ref_node { T ref; + bool every_access; + vec *accesses; modref_ref_node (T ref): - ref (ref) + ref (ref), + every_access (false), + accesses (NULL) {} + + /* Search REF; return NULL if failed. */ + modref_access_node *search (modref_access_node access) + { + size_t i; + modref_access_node *a; + FOR_EACH_VEC_SAFE_ELT (accesses, i, a) + if (a->parm_index == access.parm_index) + return a; + return NULL; + } + + /* Collapse the tree. */ + void collapse () + { + vec_free (accesses); + accesses = NULL; + every_access = true; + } + + /* Insert access with OFFSET and SIZE. + Collapse tree if it has more than MAX_ACCESSES entries. */ + void insert_access (modref_access_node a, size_t max_accesses) + { + /* If this base->ref pair has no access information, bail out. */ + if (every_access) + return; + + /* Otherwise, insert a node for the ref of the access under the base. */ + modref_access_node *access_node = search (a); + if (access_node) + return; + + /* If this base->ref pair has too many accesses stored, we will clear + all accesses and bail out. */ + if ((accesses && accesses->length () >= max_accesses) + || !a.useful_p ()) + { + if (dump_file && a.useful_p ()) + fprintf (dump_file, + "--param param=modref-max-accesses limit reached\n"); + collapse (); + return; + } + vec_safe_push (accesses, a); + } }; /* Base of an access. */ @@ -67,12 +148,6 @@ struct GTY((user)) modref_base_node if (every_ref) return NULL; - if (!ref) - { - collapse (); - return NULL; - } - /* Otherwise, insert a node for the ref of the access under the base. */ ref_node = search (ref); if (ref_node) @@ -101,7 +176,10 @@ struct GTY((user)) modref_base_node if (refs) { FOR_EACH_VEC_SAFE_ELT (refs, i, r) - ggc_free (r); + { + r->collapse (); + ggc_free (r); + } vec_free (refs); } refs = NULL; @@ -116,12 +194,14 @@ struct GTY((user)) modref_tree vec *, va_gc> *bases; size_t max_bases; size_t max_refs; + size_t max_accesses; bool every_base; - modref_tree (size_t max_bases, size_t max_refs): + modref_tree (size_t max_bases, size_t max_refs, size_t max_accesses): bases (NULL), max_bases (max_bases), max_refs (max_refs), + max_accesses (max_accesses), every_base (false) {} modref_base_node *insert_base (T base) @@ -153,31 +233,92 @@ struct GTY((user)) modref_tree } /* Insert memory access to the tree. */ - void insert (T base, T ref) + void insert (T base, T ref, modref_access_node a) { - modref_base_node *base_node; - - base_node = insert_base (base); - - if (!base && !ref) + /* No useful information tracked; collapse everything. */ + if (!base && !ref && !a.useful_p ()) { collapse (); return; } + + modref_base_node *base_node = insert_base (base); if (!base_node) return; gcc_assert (search (base) != NULL); - base_node->insert_ref (ref, max_refs); + modref_ref_node *ref_node = base_node->insert_ref (ref, max_refs); + + /* No useful ref information and no useful base; collapse everyting. */ if (!base && base_node->every_ref) { collapse (); return; } + if (ref_node) + { + /* No useful ref and access; collapse ref. */ + if (!ref && !a.useful_p ()) + ref_node->collapse (); + else + { + ref_node->insert_access (a, max_accesses); + /* If ref has collapses and there is no useful base; collapse + everything. */ + if (!base && !ref && ref_node->every_access) + collapse (); + } + } } - /* Merge OTHER into the tree. */ - void merge (modref_tree *other) + /* Remove tree branches that are not useful (i.e. they will allways pass). */ + + void cleanup () + { + size_t i, j; + modref_base_node *base_node; + modref_ref_node *ref_node; + + if (!bases) + return; + + for (i = 0; vec_safe_iterate (bases, i, &base_node);) + { + if (base_node->refs) + for (j = 0; vec_safe_iterate (base_node->refs, j, &ref_node);) + { + if (!ref_node->every_access + && (!ref_node->accesses + || !ref_node->accesses->length ())) + { + base_node->refs->unordered_remove (j); + vec_free (ref_node->accesses); + ggc_delete (ref_node); + } + else + j++; + } + if (!base_node->every_ref + && (!base_node->refs || !base_node->refs->length ())) + { + bases->unordered_remove (i); + vec_free (base_node->refs); + ggc_delete (base_node); + } + else + i++; + } + if (bases && !bases->length ()) + { + vec_free (bases); + bases = NULL; + } + } + + /* Merge OTHER into the tree. + PARM_MAP, if non-NULL, maps parm indexes of callee to caller. -2 is used + to signalize that parameter is local and does not need to be tracked. */ + void merge (modref_tree *other, vec *parm_map) { if (!other) return; @@ -187,9 +328,10 @@ struct GTY((user)) modref_tree return; } - size_t i, j; + size_t i, j, k; modref_base_node *base_node, *my_base_node; modref_ref_node *ref_node, *my_ref_node; + modref_access_node *access_node; FOR_EACH_VEC_SAFE_ELT (other->bases, i, base_node) { my_base_node = insert_base (base_node->base); @@ -207,8 +349,36 @@ struct GTY((user)) modref_tree my_ref_node = my_base_node->insert_ref (ref_node->ref, max_refs); if (!my_ref_node) continue; + + if (ref_node->every_access) + { + my_ref_node->collapse (); + continue; + } + FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node) + { + modref_access_node a = *access_node; + if (a.parm_index != -1 && parm_map) + { + if (a.parm_index >= (int)parm_map->length ()) + a.parm_index = -1; + else if ((*parm_map) [a.parm_index] == -2) + continue; + else + a.parm_index = (*parm_map) [a.parm_index]; + } + my_ref_node->insert_access (a, max_accesses); + } } } + if (parm_map) + cleanup (); + } + + /* Copy OTHER to THIS. */ + void copy_from (modref_tree *other) + { + merge (other, NULL); } /* Search BASE in tree; return NULL if failed. */ @@ -225,12 +395,14 @@ struct GTY((user)) modref_tree /* Return ggc allocated instance. We explicitly call destructors via ggc_delete and do not want finalizers to be registered and called at the garbage collection time. */ - static modref_tree *create_ggc (size_t max_bases, size_t max_refs) + static modref_tree *create_ggc (size_t max_bases, size_t max_refs, + size_t max_accesses) { return new (ggc_alloc_no_dtor> ()) - modref_tree (max_bases, max_refs); + modref_tree (max_bases, max_refs, max_accesses); } + /* Remove all records and mark tree to alias with everything. */ void collapse () { size_t i; @@ -248,6 +420,8 @@ struct GTY((user)) modref_tree bases = NULL; every_base = true; } + + /* Release memory. */ ~modref_tree () { collapse (); diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 43545c1..aa6929f 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -20,14 +20,8 @@ along with GCC; see the file COPYING3. If not see /* Mod/ref pass records summary about loads and stores performed by the function. This is later used by alias analysis to disambiguate memory - accesses across function calls. The summary has a form of decision tree and - contains: - - - base alias set - and for each: - - ref alias set - - In future more information will be tracked. + accesses across function calls. The summary has a form of decision tree + described in ipa-modref-tree.h. This file contains a tree pass and an IPA pass. Both performs the same analys however tree pass is executed during early and late optimization @@ -144,6 +138,14 @@ modref_summary::useful_p (int ecf_flags) return stores && !loads->every_base; } +/* Dump A to OUT. */ + +static void +dump_access (modref_access_node *a, FILE *out) +{ + fprintf (out, " Parm %i\n", a->parm_index); +} + /* Dump records TT to OUT. */ static void @@ -171,6 +173,15 @@ dump_records (modref_records *tt, FILE *out) FOR_EACH_VEC_SAFE_ELT (n->refs, j, r) { fprintf (out, " Ref %i: alias set %i\n", (int)j, r->ref); + if (r->every_access) + { + fprintf (out, " Every access\n"); + continue; + } + size_t k; + modref_access_node *a; + FOR_EACH_VEC_SAFE_ELT (r->accesses, k, a) + dump_access (a, out); } } } @@ -208,6 +219,15 @@ dump_lto_records (modref_records_lto *tt, FILE *out) print_generic_expr (dump_file, r->ref); fprintf (out, " (alias set %i)\n", r->ref ? get_alias_set (r->ref) : 0); + if (r->every_access) + { + fprintf (out, " Every access\n"); + continue; + } + size_t k; + modref_access_node *a; + FOR_EACH_VEC_SAFE_ELT (r->accesses, k, a) + dump_access (a, out); } } } @@ -268,6 +288,43 @@ get_modref_function_summary (cgraph_node *func) return NULL; } +/* Construct modref_access_node from REF. */ +static modref_access_node +get_access (ao_ref *ref) +{ + modref_access_node a; + tree base; + + base = ref->ref; + while (handled_component_p (base)) + base = TREE_OPERAND (base, 0); + if (TREE_CODE (base) == MEM_REF || TREE_CODE (base) == TARGET_MEM_REF) + { + base = TREE_OPERAND (base, 0); + if (TREE_CODE (base) == SSA_NAME + && SSA_NAME_IS_DEFAULT_DEF (base) + && TREE_CODE (SSA_NAME_VAR (base)) == PARM_DECL) + { + a.parm_index = 0; + for (tree t = DECL_ARGUMENTS (current_function_decl); + t != SSA_NAME_VAR (base); t = DECL_CHAIN (t)) + { + if (!t) + { + a.parm_index = -1; + break; + } + a.parm_index++; + } + } + else + a.parm_index = -1; + } + else + a.parm_index = -1; + return a; +} + /* Record access into the modref_records data structure. */ static void @@ -277,12 +334,13 @@ record_access (modref_records *tt, ao_ref *ref) : ao_ref_base_alias_set (ref); alias_set_type ref_set = !flag_strict_aliasing ? 0 : (ao_ref_alias_set (ref)); + modref_access_node a = get_access (ref); if (dump_file) { - fprintf (dump_file, " - Recording base_set=%i ref_set=%i\n", - base_set, ref_set); + fprintf (dump_file, " - Recording base_set=%i ref_set=%i parm=%i\n", + base_set, ref_set, a.parm_index); } - tt->insert (base_set, ref_set); + tt->insert (base_set, ref_set, a); } /* IPA version of record_access_tree. */ @@ -335,6 +393,7 @@ record_access_lto (modref_records_lto *tt, ao_ref *ref) || variably_modified_type_p (ref_type, NULL_TREE))) ref_type = NULL_TREE; } + modref_access_node a = get_access (ref); if (dump_file) { fprintf (dump_file, " - Recording base type:"); @@ -342,11 +401,12 @@ record_access_lto (modref_records_lto *tt, ao_ref *ref) fprintf (dump_file, " (alias set %i) ref type:", base_type ? get_alias_set (base_type) : 0); print_generic_expr (dump_file, ref_type); - fprintf (dump_file, " (alias set %i)\n", - ref_type ? get_alias_set (ref_type) : 0); + fprintf (dump_file, " (alias set %i) parm:%i\n", + ref_type ? get_alias_set (ref_type) : 0, + a.parm_index); } - tt->insert (base_type, ref_type); + tt->insert (base_type, ref_type, a); } /* Returns true if and only if we should store the access to EXPR. @@ -490,17 +550,47 @@ analyze_call (modref_summary *cur_summary, return false; } + auto_vec parm_map; + + parm_map.safe_grow (gimple_call_num_args (stmt)); + for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) + { + tree op = gimple_call_arg (stmt, i); + STRIP_NOPS (op); + if (TREE_CODE (op) == SSA_NAME + && SSA_NAME_IS_DEFAULT_DEF (op) + && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL) + { + int index = 0; + for (tree t = DECL_ARGUMENTS (current_function_decl); + t != SSA_NAME_VAR (op); t = DECL_CHAIN (t)) + { + if (!t) + { + index = -1; + break; + } + index++; + } + parm_map[i] = index; + } + else if (points_to_local_or_readonly_memory_p (op)) + parm_map[i] = -2; + else + parm_map[i] = -1; + } + /* Merge with callee's summary. */ if (cur_summary->loads) - cur_summary->loads->merge (callee_summary->loads); + cur_summary->loads->merge (callee_summary->loads, &parm_map); if (cur_summary->loads_lto) - cur_summary->loads_lto->merge (callee_summary->loads_lto); + cur_summary->loads_lto->merge (callee_summary->loads_lto, &parm_map); if (!ignore_stores) { if (cur_summary->stores) - cur_summary->stores->merge (callee_summary->stores); + cur_summary->stores->merge (callee_summary->stores, &parm_map); if (cur_summary->stores_lto) - cur_summary->stores_lto->merge (callee_summary->stores_lto); + cur_summary->stores_lto->merge (callee_summary->stores_lto, &parm_map); } return true; @@ -638,21 +728,25 @@ analyze_function (function *f, bool ipa) { gcc_assert (!summary->loads); summary->loads = modref_records::create_ggc (param_modref_max_bases, - param_modref_max_refs); + param_modref_max_refs, + param_modref_max_accesses); gcc_assert (!summary->stores); summary->stores = modref_records::create_ggc (param_modref_max_bases, - param_modref_max_refs); + param_modref_max_refs, + param_modref_max_accesses); } if (lto) { gcc_assert (!summary->loads_lto); summary->loads_lto = modref_records_lto::create_ggc (param_modref_max_bases, - param_modref_max_refs); + param_modref_max_refs, + param_modref_max_accesses); gcc_assert (!summary->stores_lto); summary->stores_lto = modref_records_lto::create_ggc (param_modref_max_bases, - param_modref_max_refs); + param_modref_max_refs, + param_modref_max_accesses); } summary->finished = false; int ecf_flags = flags_from_decl_or_type (current_function_decl); @@ -730,29 +824,33 @@ modref_summaries::duplicate (cgraph_node *, cgraph_node *, { dst_data->stores = modref_records::create_ggc (src_data->stores->max_bases, - src_data->stores->max_refs); - dst_data->stores->merge (src_data->stores); + src_data->stores->max_refs, + src_data->stores->max_accesses); + dst_data->stores->copy_from (src_data->stores); } if (src_data->loads) { dst_data->loads = modref_records::create_ggc (src_data->loads->max_bases, - src_data->loads->max_refs); - dst_data->loads->merge (src_data->loads); + src_data->loads->max_refs, + src_data->loads->max_accesses); + dst_data->loads->copy_from (src_data->loads); } if (src_data->stores_lto) { dst_data->stores_lto = modref_records_lto::create_ggc (src_data->stores_lto->max_bases, - src_data->stores_lto->max_refs); - dst_data->stores_lto->merge (src_data->stores_lto); + src_data->stores_lto->max_refs, + src_data->stores_lto->max_accesses); + dst_data->stores_lto->copy_from (src_data->stores_lto); } if (src_data->loads_lto) { dst_data->loads_lto = modref_records_lto::create_ggc (src_data->loads_lto->max_bases, - src_data->loads_lto->max_refs); - dst_data->loads_lto->merge (src_data->loads_lto); + src_data->loads_lto->max_refs, + src_data->stores_lto->max_accesses); + dst_data->loads_lto->copy_from (src_data->loads_lto); } } @@ -796,6 +894,7 @@ write_modref_records (modref_records_lto *tt, struct output_block *ob) { streamer_write_uhwi (ob, tt->max_bases); streamer_write_uhwi (ob, tt->max_refs); + streamer_write_uhwi (ob, tt->max_accesses); streamer_write_uhwi (ob, tt->every_base); streamer_write_uhwi (ob, vec_safe_length (tt->bases)); @@ -807,11 +906,19 @@ write_modref_records (modref_records_lto *tt, struct output_block *ob) streamer_write_uhwi (ob, base_node->every_ref); streamer_write_uhwi (ob, vec_safe_length (base_node->refs)); + size_t j; modref_ref_node *ref_node; FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node) { stream_write_tree (ob, ref_node->ref, true); + streamer_write_uhwi (ob, ref_node->every_access); + streamer_write_uhwi (ob, vec_safe_length (ref_node->accesses)); + + size_t k; + modref_access_node *access_node; + FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node) + streamer_write_uhwi (ob, access_node->parm_index); } } } @@ -828,13 +935,16 @@ read_modref_records (lto_input_block *ib, struct data_in *data_in, { size_t max_bases = streamer_read_uhwi (ib); size_t max_refs = streamer_read_uhwi (ib); + size_t max_accesses = streamer_read_uhwi (ib); /* Decide whether we want to turn LTO data types to non-LTO (i.e. when LTO re-streaming is not going to happen). */ if (flag_wpa || flag_incremental_link == INCREMENTAL_LINK_LTO) - *lto_ret = modref_records_lto::create_ggc (max_bases, max_refs); + *lto_ret = modref_records_lto::create_ggc (max_bases, max_refs, + max_accesses); else - *nolto_ret = modref_records::create_ggc (max_bases, max_refs); + *nolto_ret = modref_records::create_ggc (max_bases, max_refs, + max_accesses); size_t every_base = streamer_read_uhwi (ib); size_t nbase = streamer_read_uhwi (ib); @@ -897,16 +1007,43 @@ read_modref_records (lto_input_block *ib, struct data_in *data_in, print_generic_expr (dump_file, ref_tree); fprintf (dump_file, "\n"); } - base_tree = NULL; + ref_tree = NULL; } + modref_ref_node *nolto_ref_node = NULL; + modref_ref_node *lto_ref_node = NULL; + if (nolto_base_node) - nolto_base_node->insert_ref (ref_tree ? get_alias_set (ref_tree) - : 0, max_refs); + nolto_ref_node + = nolto_base_node->insert_ref (ref_tree + ? get_alias_set (ref_tree) : 0, + max_refs); if (lto_base_node) - lto_base_node->insert_ref (ref_tree, max_refs); + lto_ref_node = lto_base_node->insert_ref (ref_tree, max_refs); + + size_t every_access = streamer_read_uhwi (ib); + size_t naccesses = streamer_read_uhwi (ib); + + if (nolto_ref_node) + nolto_ref_node->every_access = every_access; + if (lto_ref_node) + lto_ref_node->every_access = every_access; + + for (size_t k = 0; k < naccesses; k++) + { + int parm_index = streamer_read_uhwi (ib); + modref_access_node a = {parm_index}; + if (nolto_ref_node) + nolto_ref_node->insert_access (a, max_accesses); + if (lto_ref_node) + lto_ref_node->insert_access (a, max_accesses); + } } } + if (*lto_ret) + (*lto_ret)->cleanup (); + if (*nolto_ret) + (*nolto_ret)->cleanup (); } /* Callback for write_summary. */ @@ -1305,19 +1442,22 @@ unsigned int pass_ipa_modref::execute (function *) } } + auto_vec parm_map; + /* TODO: compute parm_map. */ + /* Merge in callee's information. */ if (callee_summary->loads && callee_summary->loads != loads) - loads->merge (callee_summary->loads); + loads->merge (callee_summary->loads, &parm_map); if (callee_summary->stores && callee_summary->stores != stores) - stores->merge (callee_summary->stores); + stores->merge (callee_summary->stores, &parm_map); if (callee_summary->loads_lto && callee_summary->loads_lto != loads_lto) - loads_lto->merge (callee_summary->loads_lto); + loads_lto->merge (callee_summary->loads_lto, &parm_map); if (callee_summary->stores_lto && callee_summary->stores_lto != stores_lto) - stores_lto->merge (callee_summary->stores_lto); + stores_lto->merge (callee_summary->stores_lto, &parm_map); } } @@ -1351,13 +1491,13 @@ unsigned int pass_ipa_modref::execute (function *) else { if (loads) - cur_summary->loads->merge (loads); + cur_summary->loads->merge (loads, NULL); if (stores) - cur_summary->stores->merge (stores); + cur_summary->stores->merge (stores, NULL); if (loads_lto) - cur_summary->loads_lto->merge (loads_lto); + cur_summary->loads_lto->merge (loads_lto, NULL); if (stores_lto) - cur_summary->stores_lto->merge (stores_lto); + cur_summary->stores_lto->merge (stores_lto, NULL); } cur_summary->finished = true; if (dump_file) diff --git a/gcc/params.opt b/gcc/params.opt index 5f2e11d..5bc7e16 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -882,7 +882,11 @@ Maximum number of bases stored in each modref tree. -param=modref-max-refs= Common Joined UInteger Var(param_modref_max_refs) Init(16) -Maximum number of refs stored in each modref tree. +Maximum number of references stored in each modref base. + +-param=modref-max-accesses= +Common Joined UInteger Var(param_modref_max_accesses) Init(16) +Maximum number of accesse stored in each modref reference. -param=modref-max-tests= Common Joined UInteger Var(param_modref_max_tests) Init(64) diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c index 18ff529..fe390d4 100644 --- a/gcc/tree-ssa-alias.c +++ b/gcc/tree-ssa-alias.c @@ -114,6 +114,7 @@ static struct { unsigned HOST_WIDE_INT modref_clobber_may_alias; unsigned HOST_WIDE_INT modref_clobber_no_alias; unsigned HOST_WIDE_INT modref_tests; + unsigned HOST_WIDE_INT modref_baseptr_tests; } alias_stats; void @@ -169,14 +170,19 @@ dump_alias_stats (FILE *s) + alias_stats.modref_use_may_alias); fprintf (s, " modref clobber: " HOST_WIDE_INT_PRINT_DEC" disambiguations, " - HOST_WIDE_INT_PRINT_DEC" queries\n " - HOST_WIDE_INT_PRINT_DEC" tbaa queries (%f per modref query)\n", + HOST_WIDE_INT_PRINT_DEC" queries\n" + " " HOST_WIDE_INT_PRINT_DEC" tbaa queries (%f per modref query)\n" + " " HOST_WIDE_INT_PRINT_DEC" base compares (%f per modref query)\n", alias_stats.modref_clobber_no_alias, alias_stats.modref_clobber_no_alias + alias_stats.modref_clobber_may_alias, alias_stats.modref_tests, ((double)alias_stats.modref_tests) / (alias_stats.modref_clobber_no_alias + + alias_stats.modref_clobber_may_alias), + alias_stats.modref_baseptr_tests, + ((double)alias_stats.modref_baseptr_tests) + / (alias_stats.modref_clobber_no_alias + alias_stats.modref_clobber_may_alias)); } @@ -2423,12 +2429,13 @@ refs_output_dependent_p (tree store1, tree store2) IF TBAA_P is true, use TBAA oracle. */ static bool -modref_may_conflict (modref_tree *tt, ao_ref *ref, bool tbaa_p) +modref_may_conflict (const gimple *stmt, + modref_tree *tt, ao_ref *ref, bool tbaa_p) { alias_set_type base_set, ref_set; modref_base_node *base_node; modref_ref_node *ref_node; - size_t i, j; + size_t i, j, k; if (tt->every_base) return true; @@ -2440,37 +2447,57 @@ modref_may_conflict (modref_tree *tt, ao_ref *ref, bool tbaa_p) int num_tests = 0, max_tests = param_modref_max_tests; FOR_EACH_VEC_SAFE_ELT (tt->bases, i, base_node) { - if (base_node->every_ref) - return true; - - if (!base_node->base) - return true; - if (tbaa_p && flag_strict_aliasing) { + if (num_tests >= max_tests) + return true; alias_stats.modref_tests++; if (!alias_sets_conflict_p (base_set, base_node->base)) continue; num_tests++; } - else - return true; - if (num_tests >= max_tests) + + if (base_node->every_ref) return true; FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node) { /* Do not repeat same test as before. */ - if (ref_set == base_set && base_node->base == ref_node->ref) - return true; - if (!flag_strict_aliasing) - return true; - alias_stats.modref_tests++; - if (alias_sets_conflict_p (ref_set, ref_node->ref)) - return true; - num_tests++; - if (num_tests >= max_tests) + if ((ref_set != base_set || base_node->base != ref_node->ref) + && tbaa_p && flag_strict_aliasing) + { + if (num_tests >= max_tests) + return true; + alias_stats.modref_tests++; + if (!alias_sets_conflict_p (ref_set, ref_node->ref)) + continue; + num_tests++; + } + + /* TBAA checks did not disambiguate, try to use base pointer, for + that we however need to have ref->ref. */ + if (ref_node->every_access || !ref->ref) return true; + + modref_access_node *access_node; + FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node) + { + if (num_tests >= max_tests) + return true; + + if (access_node->parm_index == -1 + || (unsigned)access_node->parm_index + >= gimple_call_num_args (stmt)) + return true; + + + alias_stats.modref_baseptr_tests++; + + if (ptr_deref_may_alias_ref_p_1 + (gimple_call_arg (stmt, access_node->parm_index), ref)) + return true; + num_tests++; + } } } return false; @@ -2510,7 +2537,7 @@ ref_maybe_used_by_call_p_1 (gcall *call, ao_ref *ref, bool tbaa_p) modref_summary *summary = get_modref_function_summary (node); if (summary) { - if (!modref_may_conflict (summary->loads, ref, tbaa_p)) + if (!modref_may_conflict (call, summary->loads, ref, tbaa_p)) { alias_stats.modref_use_no_alias++; if (dump_file && (dump_flags & TDF_DETAILS)) @@ -2934,7 +2961,7 @@ call_may_clobber_ref_p_1 (gcall *call, ao_ref *ref, bool tbaa_p) modref_summary *summary = get_modref_function_summary (node); if (summary) { - if (!modref_may_conflict (summary->stores, ref, tbaa_p)) + if (!modref_may_conflict (call, summary->stores, ref, tbaa_p)) { alias_stats.modref_clobber_no_alias++; if (dump_file && (dump_flags & TDF_DETAILS)) -- cgit v1.1 From 9de8fa8052154a83b82f8b3785ec100d8cb24261 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 24 Sep 2020 15:10:04 +0200 Subject: Add modref testcase * gcc.dg/tree-ssa/modref-1.c: New test. --- gcc/testsuite/gcc.dg/tree-ssa/modref-1.c | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/modref-1.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-1.c b/gcc/testsuite/gcc.dg/tree-ssa/modref-1.c new file mode 100644 index 0000000..a80ca6b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-1.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +int p,q,r,s,*ptr=&q, *ptr2=&p; +__attribute__ ((noinline)) +int +test (int *p) +{ + *p = 1; +} +int +test1() +{ + q = 123; + test(&p); + return q; +} +int +test2() +{ + int *ptr = p ? &q : &s; + *ptr = 124; + test(&p); + return *ptr; +} +int +test3() +{ + int *ptr = p ? &p : &s; + q = 125; + test(ptr); + return q; +} +int +test4() +{ + int *ptr1 = p ? &q : &s; + int *ptr = p ? &r : &p; + *ptr1 = 126; + test(ptr); + return *ptr1; +} +/* { dg-final { scan-tree-dump "return 123" "optimized"} } */ +/* { dg-final { scan-tree-dump "return 124" "optimized"} } */ +/* { dg-final { scan-tree-dump "return 125" "optimized"} } */ +/* { dg-final { scan-tree-dump "return 126" "optimized"} } */ -- cgit v1.1 From 2e66e53b1efb98f5cf6b0a123990c1ca999affd7 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Thu, 24 Sep 2020 06:17:00 -0700 Subject: c++: local-decls are never member fns [PR97186] This fixes an ICE in noexcept instantiation. It was presuming functions always have template_info, but that changed with my DECL_LOCAL_DECL_P changes. Fortunately DECL_LOCAL_DECL_P fns are never member fns, so we don't need to go fishing out a this pointer. Also I realized I'd misnamed local10.C, so renaming it local-fn3.C, and while there adding the effective-target lto that David E pointed out was missing. PR c++/97186 gcc/cp/ * pt.c (maybe_instantiate_noexcept): Local externs are never member fns. gcc/testsuite/ * g++.dg/template/local10.C: Rename ... * g++.dg/template/local-fn3.C: .. here. Require lto. * g++.dg/template/local-fn4.C: New. --- gcc/cp/pt.c | 21 +++++++++++++-------- gcc/testsuite/g++.dg/template/local-fn3.C | 17 +++++++++++++++++ gcc/testsuite/g++.dg/template/local-fn4.C | 21 +++++++++++++++++++++ gcc/testsuite/g++.dg/template/local10.C | 15 --------------- 4 files changed, 51 insertions(+), 23 deletions(-) create mode 100644 gcc/testsuite/g++.dg/template/local-fn3.C create mode 100644 gcc/testsuite/g++.dg/template/local-fn4.C delete mode 100644 gcc/testsuite/g++.dg/template/local10.C (limited to 'gcc') diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 1ec039d..62e8509 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -25397,15 +25397,20 @@ maybe_instantiate_noexcept (tree fn, tsubst_flags_t complain) push_deferring_access_checks (dk_no_deferred); input_location = DECL_SOURCE_LOCATION (fn); - /* If needed, set current_class_ptr for the benefit of - tsubst_copy/PARM_DECL. */ - tree tdecl = DECL_TEMPLATE_RESULT (DECL_TI_TEMPLATE (fn)); - if (DECL_NONSTATIC_MEMBER_FUNCTION_P (tdecl)) + if (!DECL_LOCAL_DECL_P (fn)) { - tree this_parm = DECL_ARGUMENTS (tdecl); - current_class_ptr = NULL_TREE; - current_class_ref = cp_build_fold_indirect_ref (this_parm); - current_class_ptr = this_parm; + /* If needed, set current_class_ptr for the benefit of + tsubst_copy/PARM_DECL. The exception pattern will + refer to the parm of the template, not the + instantiation. */ + tree tdecl = DECL_TEMPLATE_RESULT (DECL_TI_TEMPLATE (fn)); + if (DECL_NONSTATIC_MEMBER_FUNCTION_P (tdecl)) + { + tree this_parm = DECL_ARGUMENTS (tdecl); + current_class_ptr = NULL_TREE; + current_class_ref = cp_build_fold_indirect_ref (this_parm); + current_class_ptr = this_parm; + } } /* If this function is represented by a TEMPLATE_DECL, then diff --git a/gcc/testsuite/g++.dg/template/local-fn3.C b/gcc/testsuite/g++.dg/template/local-fn3.C new file mode 100644 index 0000000..2affe23 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/local-fn3.C @@ -0,0 +1,17 @@ +// PR c++/97171 + +// { dg-require-effective-target lto } +// { dg-additional-options -flto } + +template +void transform(_UnaryOperation); + +template +void Apply () +{ + extern T Maker (void); // block-scope extern with dependent type + + transform (Maker); +} + +template void Apply (); diff --git a/gcc/testsuite/g++.dg/template/local-fn4.C b/gcc/testsuite/g++.dg/template/local-fn4.C new file mode 100644 index 0000000..4699012 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/local-fn4.C @@ -0,0 +1,21 @@ +// PR c++/97186 +// ICE in exception spec substitution + + +template +struct no { + static void + tg () + { + void + hk () noexcept (tg); // { dg-error "convert" } + + hk (); + } +}; + +void +os () +{ + no ().tg (); +} diff --git a/gcc/testsuite/g++.dg/template/local10.C b/gcc/testsuite/g++.dg/template/local10.C deleted file mode 100644 index a2ffc1e..0000000 --- a/gcc/testsuite/g++.dg/template/local10.C +++ /dev/null @@ -1,15 +0,0 @@ -// PR c++/97171 -// { dg-additional-options -flto } - -template -void transform(_UnaryOperation); - -template -void Apply () -{ - extern T Maker (void); // block-scope extern with dependent type - - transform (Maker); -} - -template void Apply (); -- cgit v1.1 From 4b9d61f79c0c0185a33048ae6cc72269cf7efa31 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 6 Aug 2020 14:50:56 +0200 Subject: add move CTOR to auto_vec, use auto_vec for get_loop_exit_edges This adds a move CTOR to auto_vec and makes use of a auto_vec return value for get_loop_exit_edges denoting that lifetime management of the vector is handed to the caller. The move CTOR prompted the hash_table change because it appearantly makes the copy CTOR implicitely deleted (good) and hash-table expansion of the odr_enum_map which is hash_map where odr_enum has an auto_vec member triggers this. Not sure if there's a latent bug there before this (I think we're not invoking DTORs, but we're invoking copy-CTORs). 2020-08-06 Richard Biener * vec.h (auto_vec::auto_vec (auto_vec &&)): New move CTOR. (auto_vec::operator=(auto_vec &&)): Delete. * hash-table.h (hash_table::expand): Use std::move when expanding. * cfgloop.h (get_loop_exit_edges): Return auto_vec. * cfgloop.c (get_loop_exit_edges): Adjust. * cfgloopmanip.c (fix_loop_placement): Likewise. * ipa-fnsummary.c (analyze_function_body): Likewise. * ira-build.c (create_loop_tree_nodes): Likewise. (create_loop_tree_node_allocnos): Likewise. (loop_with_complex_edge_p): Likewise. * ira-color.c (ira_loop_edge_freq): Likewise. * loop-unroll.c (analyze_insns_in_loop): Likewise. * predict.c (predict_loops): Likewise. * tree-predcom.c (last_always_executed_block): Likewise. * tree-ssa-loop-ch.c (ch_base::copy_headers): Likewise. * tree-ssa-loop-im.c (store_motion_loop): Likewise. * tree-ssa-loop-ivcanon.c (loop_edge_to_cancel): Likewise. (canonicalize_loop_induction_variables): Likewise. * tree-ssa-loop-manip.c (get_loops_exits): Likewise. * tree-ssa-loop-niter.c (find_loop_niter): Likewise. (finite_loop_p): Likewise. (find_loop_niter_by_eval): Likewise. (estimate_numbers_of_iterations): Likewise. * tree-ssa-loop-prefetch.c (emit_mfence_after_loop): Likewise. (may_use_storent_in_loop_p): Likewise. --- gcc/cfgloop.c | 4 ++-- gcc/cfgloop.h | 2 +- gcc/cfgloopmanip.c | 3 +-- gcc/hash-table.h | 2 +- gcc/ipa-fnsummary.c | 4 +--- gcc/ira-build.c | 12 +++--------- gcc/ira-color.c | 4 +--- gcc/loop-unroll.c | 3 +-- gcc/predict.c | 9 ++------- gcc/tree-predcom.c | 3 +-- gcc/tree-ssa-loop-ch.c | 3 +-- gcc/tree-ssa-loop-im.c | 3 +-- gcc/tree-ssa-loop-ivcanon.c | 9 ++------- gcc/tree-ssa-loop-manip.c | 3 +-- gcc/tree-ssa-loop-niter.c | 20 +++++--------------- gcc/tree-ssa-loop-prefetch.c | 7 ++----- gcc/vec.h | 7 +++++++ 17 files changed, 33 insertions(+), 65 deletions(-) (limited to 'gcc') diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c index 7720e6e..33a26cc 100644 --- a/gcc/cfgloop.c +++ b/gcc/cfgloop.c @@ -1202,10 +1202,10 @@ release_recorded_exits (function *fn) /* Returns the list of the exit edges of a LOOP. */ -vec +auto_vec get_loop_exit_edges (const class loop *loop, basic_block *body) { - vec edges = vNULL; + auto_vec edges; edge e; unsigned i; edge_iterator ei; diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index be97828..d14689d 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -383,7 +383,7 @@ extern basic_block *get_loop_body_in_custom_order (const class loop *, extern basic_block *get_loop_body_in_custom_order (const class loop *, void *, int (*) (const void *, const void *, void *)); -extern vec get_loop_exit_edges (const class loop *, basic_block * = NULL); +extern auto_vec get_loop_exit_edges (const class loop *, basic_block * = NULL); extern edge single_exit (const class loop *); extern edge single_likely_exit (class loop *loop, vec); extern unsigned num_loop_branches (const class loop *); diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index 73134a2..3c9e2a0 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -126,7 +126,7 @@ fix_loop_placement (class loop *loop, bool *irred_invalidated) { unsigned i; edge e; - vec exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); class loop *father = current_loops->tree_root, *act; bool ret = false; @@ -157,7 +157,6 @@ fix_loop_placement (class loop *loop, bool *irred_invalidated) ret = true; } - exits.release (); return ret; } diff --git a/gcc/hash-table.h b/gcc/hash-table.h index 32f3a63..487003c 100644 --- a/gcc/hash-table.h +++ b/gcc/hash-table.h @@ -819,7 +819,7 @@ hash_table::expand () if (!is_empty (x) && !is_deleted (x)) { value_type *q = find_empty_slot_for_expand (Descriptor::hash (x)); - new ((void*) q) value_type (x); + new ((void*) q) value_type (std::move (x)); } p++; diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index bb703f6..cbcf0c4 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -2808,7 +2808,6 @@ analyze_function_body (struct cgraph_node *node, bool early) scev_initialize (); FOR_EACH_LOOP (loop, 0) { - vec exits; edge ex; unsigned int j; class tree_niter_desc niter_desc; @@ -2817,7 +2816,7 @@ analyze_function_body (struct cgraph_node *node, bool early) else bb_predicate = false; - exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); FOR_EACH_VEC_ELT (exits, j, ex) if (number_of_iterations_exit (loop, ex, &niter_desc, false) && !is_gimple_min_invariant (niter_desc.niter)) @@ -2835,7 +2834,6 @@ analyze_function_body (struct cgraph_node *node, bool early) loop with independent predicate. */ loop_iterations &= will_be_nonconstant; } - exits.release (); } /* To avoid quadratic behavior we analyze stride predicates only diff --git a/gcc/ira-build.c b/gcc/ira-build.c index 0bbdb4d..9b35d0e 100644 --- a/gcc/ira-build.c +++ b/gcc/ira-build.c @@ -128,7 +128,6 @@ create_loop_tree_nodes (void) bool skip_p; edge_iterator ei; edge e; - vec edges; loop_p loop; ira_bb_nodes @@ -173,14 +172,13 @@ create_loop_tree_nodes (void) } if (skip_p) continue; - edges = get_loop_exit_edges (loop); + auto_vec edges = get_loop_exit_edges (loop); FOR_EACH_VEC_ELT (edges, j, e) if ((e->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (e)) { skip_p = true; break; } - edges.release (); if (skip_p) continue; } @@ -1964,17 +1962,15 @@ create_loop_tree_node_allocnos (ira_loop_tree_node_t loop_node) int i; edge_iterator ei; edge e; - vec edges; ira_assert (current_loops != NULL); FOR_EACH_EDGE (e, ei, loop_node->loop->header->preds) if (e->src != loop_node->loop->latch) create_loop_allocnos (e); - edges = get_loop_exit_edges (loop_node->loop); + auto_vec edges = get_loop_exit_edges (loop_node->loop); FOR_EACH_VEC_ELT (edges, i, e) create_loop_allocnos (e); - edges.release (); } } @@ -2167,13 +2163,12 @@ loop_with_complex_edge_p (class loop *loop) int i; edge_iterator ei; edge e; - vec edges; bool res; FOR_EACH_EDGE (e, ei, loop->header->preds) if (e->flags & EDGE_EH) return true; - edges = get_loop_exit_edges (loop); + auto_vec edges = get_loop_exit_edges (loop); res = false; FOR_EACH_VEC_ELT (edges, i, e) if (e->flags & EDGE_COMPLEX) @@ -2181,7 +2176,6 @@ loop_with_complex_edge_p (class loop *loop) res = true; break; } - edges.release (); return res; } #endif diff --git a/gcc/ira-color.c b/gcc/ira-color.c index dbb3b7a..d3f8e23 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -2539,7 +2539,6 @@ ira_loop_edge_freq (ira_loop_tree_node_t loop_node, int regno, bool exit_p) int freq, i; edge_iterator ei; edge e; - vec edges; ira_assert (current_loops != NULL && loop_node->loop != NULL && (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)); @@ -2555,13 +2554,12 @@ ira_loop_edge_freq (ira_loop_tree_node_t loop_node, int regno, bool exit_p) } else { - edges = get_loop_exit_edges (loop_node->loop); + auto_vec edges = get_loop_exit_edges (loop_node->loop); FOR_EACH_VEC_ELT (edges, i, e) if (regno < 0 || (bitmap_bit_p (df_get_live_out (e->src), regno) && bitmap_bit_p (df_get_live_in (e->dest), regno))) freq += EDGE_FREQUENCY (e); - edges.release (); } return REG_FREQ_FROM_EDGE_FREQ (freq); diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c index 693c776..e1efe62 100644 --- a/gcc/loop-unroll.c +++ b/gcc/loop-unroll.c @@ -1580,7 +1580,7 @@ analyze_insns_in_loop (class loop *loop) struct var_to_expand *ves = NULL; iv_to_split **slot1; var_to_expand **slot2; - vec edges = get_loop_exit_edges (loop); + auto_vec edges = get_loop_exit_edges (loop); edge exit; bool can_apply = false; @@ -1656,7 +1656,6 @@ analyze_insns_in_loop (class loop *loop) } } - edges.release (); free (body); return opt_info; } diff --git a/gcc/predict.c b/gcc/predict.c index 3c7b46f..5983889 100644 --- a/gcc/predict.c +++ b/gcc/predict.c @@ -1916,7 +1916,6 @@ predict_loops (void) { basic_block bb, *bbs; unsigned j, n_exits = 0; - vec exits; class tree_niter_desc niter_desc; edge ex; class nb_iter_bound *nb_iter; @@ -1927,15 +1926,12 @@ predict_loops (void) gcond *stmt = NULL; bool recursion = with_recursion.contains (loop); - exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); FOR_EACH_VEC_ELT (exits, j, ex) if (!unlikely_executed_edge_p (ex) && !(ex->flags & EDGE_ABNORMAL_CALL)) n_exits ++; if (!n_exits) - { - exits.release (); - continue; - } + continue; if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Predicting loop %i%s with %i exits.\n", @@ -2049,7 +2045,6 @@ predict_loops (void) probability = RDIV (REG_BR_PROB_BASE, nitercst); predict_edge (ex, predictor, probability); } - exits.release (); /* Find information about loop bound variables. */ for (nb_iter = loop->bounds; nb_iter; diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c index b1d6e63..7a5990a 100644 --- a/gcc/tree-predcom.c +++ b/gcc/tree-predcom.c @@ -737,13 +737,12 @@ static basic_block last_always_executed_block (class loop *loop) { unsigned i; - vec exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); edge ex; basic_block last = loop->latch; FOR_EACH_VEC_ELT (exits, i, ex) last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src); - exits.release (); return last; } diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c index b9002d8..b86acf7 100644 --- a/gcc/tree-ssa-loop-ch.c +++ b/gcc/tree-ssa-loop-ch.c @@ -504,14 +504,13 @@ ch_base::copy_headers (function *fun) { edge entry = copied[i].first; loop_p loop = copied[i].second; - vec exit_edges = get_loop_exit_edges (loop); + auto_vec exit_edges = get_loop_exit_edges (loop); bitmap exit_bbs = BITMAP_ALLOC (NULL); for (unsigned j = 0; j < exit_edges.length (); ++j) bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index); bitmap_set_bit (exit_bbs, loop->header->index); do_rpo_vn (cfun, entry, exit_bbs); BITMAP_FREE (exit_bbs); - exit_edges.release (); } } free (bbs); diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c index 139c7e7..6bb07e1 100644 --- a/gcc/tree-ssa-loop-im.c +++ b/gcc/tree-ssa-loop-im.c @@ -2868,7 +2868,7 @@ loop_suitable_for_sm (class loop *loop ATTRIBUTE_UNUSED, static void store_motion_loop (class loop *loop, bitmap sm_executed) { - vec exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); class loop *subloop; bitmap sm_in_loop = BITMAP_ALLOC (&lim_bitmap_obstack); @@ -2878,7 +2878,6 @@ store_motion_loop (class loop *loop, bitmap sm_executed) if (!bitmap_empty_p (sm_in_loop)) hoist_memory_references (loop, sm_in_loop, exits); } - exits.release (); bitmap_ior_into (sm_executed, sm_in_loop); for (subloop = loop->inner; subloop != NULL; subloop = subloop->next) diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index 298ab21..5bb781d 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -444,7 +444,6 @@ estimated_unrolled_size (struct loop_size *size, static edge loop_edge_to_cancel (class loop *loop) { - vec exits; unsigned i; edge edge_to_cancel; gimple_stmt_iterator gsi; @@ -453,7 +452,7 @@ loop_edge_to_cancel (class loop *loop) if (EDGE_COUNT (loop->latch->preds) > 1) return NULL; - exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); FOR_EACH_VEC_ELT (exits, i, edge_to_cancel) { @@ -477,8 +476,6 @@ loop_edge_to_cancel (class loop *loop) if (edge_to_cancel->dest != loop->latch) continue; - exits.release (); - /* Verify that the code in loop latch does nothing that may end program execution without really reaching the exit. This may include non-pure/const function calls, EH statements, volatile ASMs etc. */ @@ -487,7 +484,6 @@ loop_edge_to_cancel (class loop *loop) return NULL; return edge_to_cancel; } - exits.release (); return NULL; } @@ -1222,10 +1218,9 @@ canonicalize_loop_induction_variables (class loop *loop, by find_loop_niter_by_eval. Be sure to keep it for future. */ if (niter && TREE_CODE (niter) == INTEGER_CST) { - vec exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); record_niter_bound (loop, wi::to_widest (niter), exit == single_likely_exit (loop, exits), true); - exits.release (); } /* Force re-computation of loop bounds so we can remove redundant exits. */ diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c index a2717a4..cdd1ac7 100644 --- a/gcc/tree-ssa-loop-manip.c +++ b/gcc/tree-ssa-loop-manip.c @@ -368,11 +368,10 @@ get_loops_exits (bitmap *loop_exits) FOR_EACH_LOOP (loop, 0) { - vec exit_edges = get_loop_exit_edges (loop); + auto_vec exit_edges = get_loop_exit_edges (loop); loop_exits[loop->num] = BITMAP_ALLOC (&loop_renamer_obstack); FOR_EACH_VEC_ELT (exit_edges, j, e) bitmap_set_bit (loop_exits[loop->num], e->dest->index); - exit_edges.release (); } } diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index b3647d9e..45747e1 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -2752,7 +2752,7 @@ tree find_loop_niter (class loop *loop, edge *exit) { unsigned i; - vec exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); edge ex; tree niter = NULL_TREE, aniter; class tree_niter_desc desc; @@ -2803,7 +2803,6 @@ find_loop_niter (class loop *loop, edge *exit) continue; } } - exits.release (); return niter ? niter : chrec_dont_know; } @@ -2837,21 +2836,18 @@ finite_loop_p (class loop *loop) if (loop->finite_p) { unsigned i; - vec exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); edge ex; /* If the loop has a normal exit, we can assume it will terminate. */ FOR_EACH_VEC_ELT (exits, i, ex) if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_FAKE))) { - exits.release (); if (dump_file) fprintf (dump_file, "Assume loop %i to be finite: it has an exit " "and -ffinite-loops is on.\n", loop->num); return true; } - - exits.release (); } return false; @@ -3114,7 +3110,7 @@ tree find_loop_niter_by_eval (class loop *loop, edge *exit) { unsigned i; - vec exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); edge ex; tree niter = NULL_TREE, aniter; @@ -3123,10 +3119,7 @@ find_loop_niter_by_eval (class loop *loop, edge *exit) /* Loops with multiple exits are expensive to handle and less important. */ if (!flag_expensive_optimizations && exits.length () > 1) - { - exits.release (); - return chrec_dont_know; - } + return chrec_dont_know; FOR_EACH_VEC_ELT (exits, i, ex) { @@ -3144,7 +3137,6 @@ find_loop_niter_by_eval (class loop *loop, edge *exit) niter = aniter; *exit = ex; } - exits.release (); return niter ? niter : chrec_dont_know; } @@ -4236,7 +4228,6 @@ get_upper_bound_based_on_builtin_expr_with_prob (gcond *cond) void estimate_numbers_of_iterations (class loop *loop) { - vec exits; tree niter, type; unsigned i; class tree_niter_desc niter_desc; @@ -4275,7 +4266,7 @@ estimate_numbers_of_iterations (class loop *loop) number_of_latch_executions (loop); basic_block *body = get_loop_body (loop); - exits = get_loop_exit_edges (loop, body); + auto_vec exits = get_loop_exit_edges (loop, body); likely_exit = single_likely_exit (loop, exits); FOR_EACH_VEC_ELT (exits, i, ex) { @@ -4311,7 +4302,6 @@ estimate_numbers_of_iterations (class loop *loop) true, ex == likely_exit, true); record_control_iv (loop, &niter_desc); } - exits.release (); if (flag_aggressive_loop_optimizations) infer_loop_bounds_from_undefined (loop, body); diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c index d19ece6..5e94a19 100644 --- a/gcc/tree-ssa-loop-prefetch.c +++ b/gcc/tree-ssa-loop-prefetch.c @@ -1289,7 +1289,7 @@ mark_nontemporal_store (struct mem_ref *ref) static void emit_mfence_after_loop (class loop *loop) { - vec exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); edge exit; gcall *call; gimple_stmt_iterator bsi; @@ -1309,7 +1309,6 @@ emit_mfence_after_loop (class loop *loop) gsi_insert_before (&bsi, call, GSI_NEW_STMT); } - exits.release (); update_ssa (TODO_update_ssa_only_virtuals); } @@ -1327,7 +1326,7 @@ may_use_storent_in_loop_p (class loop *loop) is a suitable place for it at each of the loop exits. */ if (FENCE_FOLLOWING_MOVNT != NULL_TREE) { - vec exits = get_loop_exit_edges (loop); + auto_vec exits = get_loop_exit_edges (loop); unsigned i; edge exit; @@ -1335,8 +1334,6 @@ may_use_storent_in_loop_p (class loop *loop) if ((exit->flags & EDGE_ABNORMAL) && exit->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) ret = false; - - exits.release (); } return ret; diff --git a/gcc/vec.h b/gcc/vec.h index 48e756e..d73d865 100644 --- a/gcc/vec.h +++ b/gcc/vec.h @@ -1540,6 +1540,13 @@ public: auto_vec () { this->m_vec = NULL; } auto_vec (size_t n) { this->create (n); } ~auto_vec () { this->release (); } + + auto_vec (auto_vec&& r) + { + this->m_vec = r.m_vec; + r.m_vec = NULL; + } + void operator= (auto_vec&&) = delete; }; -- cgit v1.1 From 50d9db203bc3b05c781a0da91be5ba21a1256f21 Mon Sep 17 00:00:00 2001 From: Alex Coplan Date: Thu, 24 Sep 2020 17:21:47 +0100 Subject: aarch64: Add support for Neoverse N2 CPU This patch adds support for Arm's Neoverse N2 CPU to the AArch64 backend. gcc/ChangeLog: * config/aarch64/aarch64-cores.def: Add Neoverse N2. * config/aarch64/aarch64-tune.md: Regenerate. * doc/invoke.texi: Document AArch64 support for Neoverse N2. --- gcc/config/aarch64/aarch64-cores.def | 3 +++ gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/doc/invoke.texi | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 04dc587..469ee99 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -136,6 +136,9 @@ AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_ AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversen1, 0x41, 0xd40, -1) AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversen1, 0x41, 0xd40, -1) +/* Armv8.5-A Architecture Processors. */ +AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoversen1, 0x41, 0xd49, -1) + /* Qualcomm ('Q') cores. */ AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index 729eb3e..3cf69ce 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoversen2,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 623dfb8..3dc2553 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -17537,8 +17537,8 @@ performance of the code. Permissible values for this option are: @samp{cortex-a76}, @samp{cortex-a76ae}, @samp{cortex-a77}, @samp{cortex-a65}, @samp{cortex-a65ae}, @samp{cortex-a34}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, -@samp{neoverse-e1},@samp{neoverse-n1},@samp{neoverse-v1},@samp{qdf24xx}, -@samp{saphira}, +@samp{neoverse-e1}, @samp{neoverse-n1}, @samp{neoverse-n2}, +@samp{neoverse-v1}, @samp{qdf24xx}, @samp{saphira}, @samp{phecda}, @samp{xgene1}, @samp{vulcan}, @samp{octeontx}, @samp{octeontx81}, @samp{octeontx83}, @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96} -- cgit v1.1 From 637ad78cdf4026234308973bb87839f10f3d39cf Mon Sep 17 00:00:00 2001 From: Alex Coplan Date: Thu, 24 Sep 2020 17:22:44 +0100 Subject: arm: Add support for Neoverse N2 CPU This adds support for Arm's Neoverse N2 CPU to the AArch32 backend. Neoverse N2 builds AArch32 at EL0 and therefore needs support in AArch32 GCC. gcc/ChangeLog: * config/arm/arm-cpus.in (neoverse-n2): New. * config/arm/arm-tables.opt: Regenerate. * config/arm/arm-tune.md: Regenerate. * doc/invoke.texi: Document support for Neoverse N2. --- gcc/config/arm/arm-cpus.in | 11 +++++++++++ gcc/config/arm/arm-tables.opt | 3 +++ gcc/config/arm/arm-tune.md | 7 ++++--- gcc/doc/invoke.texi | 6 +++--- 4 files changed, 21 insertions(+), 6 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index 4550694..be563b7 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -1459,6 +1459,17 @@ begin cpu neoverse-n1 part d0c end cpu neoverse-n1 +begin cpu neoverse-n2 + cname neoversen2 + tune for cortex-a57 + tune flags LDSCHED + architecture armv8.5-a+fp16+bf16+i8mm + option crypto add FP_ARMv8 CRYPTO + costs cortex_a57 + vendor 41 + part 0xd49 +end cpu neoverse-n2 + # ARMv8.2 A-profile ARM DynamIQ big.LITTLE implementations begin cpu cortex-a75.cortex-a55 cname cortexa75cortexa55 diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index 1a7c319..b572063 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -244,6 +244,9 @@ EnumValue Enum(processor_type) String(neoverse-n1) Value( TARGET_CPU_neoversen1) EnumValue +Enum(processor_type) String(neoverse-n2) Value( TARGET_CPU_neoversen2) + +EnumValue Enum(processor_type) String(cortex-a75.cortex-a55) Value( TARGET_CPU_cortexa75cortexa55) EnumValue diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 3874f42..2377037 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -45,7 +45,8 @@ cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35, cortexa73cortexa53,cortexa55,cortexa75, cortexa76,cortexa76ae,cortexa77, - neoversen1,cortexa75cortexa55,cortexa76cortexa55, - neoversev1,cortexm23,cortexm33, - cortexm35p,cortexm55,cortexr52" + neoversen1,neoversen2,cortexa75cortexa55, + cortexa76cortexa55,neoversev1,cortexm23, + cortexm33,cortexm35p,cortexm55, + cortexr52" (const (symbol_ref "((enum attr_tune) arm_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 3dc2553..2091e0c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -19385,9 +19385,9 @@ Permissible names are: @samp{arm7tdmi}, @samp{arm7tdmi-s}, @samp{arm710t}, @samp{cortex-m35p}, @samp{cortex-m55}, @samp{cortex-m1.small-multiply}, @samp{cortex-m0.small-multiply}, @samp{cortex-m0plus.small-multiply}, @samp{exynos-m1}, @samp{marvell-pj4}, -@samp{neoverse-n1}, @samp{neoverse-v1}, @samp{xscale}, @samp{iwmmxt}, -@samp{iwmmxt2}, @samp{ep9312}, @samp{fa526}, @samp{fa626}, @samp{fa606te}, -@samp{fa626te}, @samp{fmp626}, @samp{fa726te}, @samp{xgene1}. +@samp{neoverse-n1}, @samp{neoverse-n2}, @samp{neoverse-v1}, @samp{xscale}, +@samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}, @samp{fa526}, @samp{fa626}, +@samp{fa606te}, @samp{fa626te}, @samp{fmp626}, @samp{fa726te}, @samp{xgene1}. Additionally, this option can specify that GCC should tune the performance of the code for a big.LITTLE system. Permissible names are: -- cgit v1.1 From 1b8a23fc97de65f3188d0cdd5bfb56060defa84c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Chigot?= Date: Wed, 23 Sep 2020 16:08:21 +0200 Subject: runtime: remove __go_ptrace on AIX AIX ptrace syscalls doesn't have the same semantic than the glibc one. The syscall package is already handling it correctly so disable the new __go_ptrace C function for AIX. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/256777 --- gcc/go/gofrontend/MERGE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index f51dac5..daa0d2d 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -6a7648c97c3e0cdbecbec7e760b30246521a6d90 +2357468ae9b071de0e2ebe6574d78572967b7183 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. -- cgit v1.1 From d482c07b34558998658eac7fede023a853561314 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Thu, 24 Sep 2020 11:34:10 -0700 Subject: c++: restrict test to c++>=11 [pr97171] I'd missed an important restriction on use of noexcept. Fixed thusly gcc/testsuite/ * g++.dg/template/local-fn4.C: Add target c++11 --- gcc/testsuite/g++.dg/template/local-fn4.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/template/local-fn4.C b/gcc/testsuite/g++.dg/template/local-fn4.C index 4699012..f8522dd 100644 --- a/gcc/testsuite/g++.dg/template/local-fn4.C +++ b/gcc/testsuite/g++.dg/template/local-fn4.C @@ -1,6 +1,6 @@ // PR c++/97186 // ICE in exception spec substitution - +// { dg-do compile { target c++11 } } template struct no { -- cgit v1.1 From 6b6c89b37bc26791943ea79191891e77591de2b8 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Thu, 24 Sep 2020 12:13:28 -0700 Subject: c++: add testcase [PR97177] Pr97177 is the local-var duplicate of pr97171. So just adding the testcase. gcc/testsuite/ * g++.dg/template/local-var1.C: New. --- gcc/testsuite/g++.dg/template/local-var1.C | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 gcc/testsuite/g++.dg/template/local-var1.C (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/template/local-var1.C b/gcc/testsuite/g++.dg/template/local-var1.C new file mode 100644 index 0000000..4b3854b --- /dev/null +++ b/gcc/testsuite/g++.dg/template/local-var1.C @@ -0,0 +1,20 @@ +// PR c++/97186, related to c++/97171 but with a variable +// { dg-do compile { target c++11 } } + +namespace +{ + template + void + ml () + { + extern WF cr; + + static_assert (sizeof (cr) == 12, ""); + } + + void + qc () + { + ml (); + } +} -- cgit v1.1 From d13c0ae859fbb5bb937692548e5860e4cb39e26b Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Thu, 24 Sep 2020 12:50:29 -0700 Subject: c++: Cleanup some decl pushing apis In cleaning up local decl handling, here's an initial patch that takes advantage of C++'s default args for the is_friend parm of pushdecl, duplicate_decls and push_template_decl_real and the scope & tpl_header parms of xref_tag. Then many of the calls simply not mention these. I also rename push_template_decl_real to push_template_decl, deleting the original forwarding function. This'll make my later patches changing their types less intrusive. There are 2 functional changes: 1) push_template_decl requires is_friend to be correct, it doesn't go checking for a friend function (an assert is added). 2) debug_overload prints out Hidden and Using markers for the overload set. gcc/cp/ * cp-tree.h (duplicate_decls): Default is_friend to false. (xref_tag): Default tag_scope & tpl_header_p to ts_current & false. (push_template_decl_real): Default is_friend to false. Rename to ... (push_template_decl): ... here. Delete original decl. * name-lookup.h (pushdecl_namespace_level): Default is_friend to false. (pushtag): Default tag_scope to ts_current. * coroutines.cc (morph_fn_to_coro): Drop default args to xref_tag. * decl.c (start_decl): Drop default args to duplicate_decls. (start_enum): Drop default arg to pushtag & xref_tag. (start_preparsed_function): Pass DECL_FRIEND_P to push_template_decl. (grokmethod): Likewise. * friend.c (do_friend): Rename push_template_decl_real calls. * lambda.c (begin_lamnbda_type): Drop default args to xref_tag. (vla_capture_type): Likewise. * name-lookup.c (maybe_process_template_type_declaration): Rename push_template_decl_real call. (pushdecl_top_level_and_finish): Drop default arg to pushdecl_namespace_level. * pt.c (push_template_decl_real): Assert no surprising friend functions. Rename to ... (push_template_decl): ... here. Delete original function. (lookup_template_class_1): Drop default args from pushtag. (instantiate_class_template_1): Likewise. * ptree.c (debug_overload): Print hidden and using markers. * rtti.c (init_rtti_processing): Drop refault args from xref_tag. (build_dynamic_cast_1, tinfo_base_init): Likewise. * semantics.c (begin_class_definition): Drop default args to pushtag. gcc/objcp/ * objcp-decl.c (objcp_start_struct): Drop default args to xref_tag. (objcp_xref_tag): Likewise. libcc1/ * libcp1plugin.cc (supplement_binding): Drop default args to duplicate_decls. (safe_pushtag): Drop scope parm. Drop default args to pushtag. (safe_pushdecl_maybe_friend): Rename to ... (safe_pushdecl): ... here. Drop is_friend parm. Drop default args to pushdecl. (plugin_build_decl): Adjust safe_pushdecl & safe_pushtag calls. (plugin_build_constant): Adjust safe_pushdecl call. --- gcc/cp/coroutines.cc | 2 +- gcc/cp/cp-tree.h | 10 ++++++---- gcc/cp/decl.c | 15 ++++++--------- gcc/cp/friend.c | 4 ++-- gcc/cp/lambda.c | 5 ++--- gcc/cp/name-lookup.c | 8 ++++---- gcc/cp/name-lookup.h | 4 ++-- gcc/cp/pt.c | 19 ++++++++----------- gcc/cp/ptree.c | 6 +++++- gcc/cp/rtti.c | 11 +++-------- gcc/cp/semantics.c | 4 ++-- gcc/objcp/objcp-decl.c | 4 ++-- 12 files changed, 43 insertions(+), 49 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index 898b88b..ba81345 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -4011,7 +4011,7 @@ morph_fn_to_coro (tree orig, tree *resumer, tree *destroyer) /* 2. Types we need to define or look up. */ tree fr_name = get_fn_local_identifier (orig, "frame"); - tree coro_frame_type = xref_tag (record_type, fr_name, ts_current, false); + tree coro_frame_type = xref_tag (record_type, fr_name); DECL_CONTEXT (TYPE_NAME (coro_frame_type)) = current_scope (); tree coro_frame_ptr = build_pointer_type (coro_frame_type); tree act_des_fn_type diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 029a165..3ae4874 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6461,7 +6461,8 @@ extern void note_iteration_stmt_body_end (bool); extern void determine_local_discriminator (tree); extern int decls_match (tree, tree, bool = true); extern bool maybe_version_functions (tree, tree, bool); -extern tree duplicate_decls (tree, tree, bool); +extern tree duplicate_decls (tree, tree, + bool is_friend = false); extern tree declare_local_label (tree); extern tree define_label (location_t, tree); extern void check_goto (tree); @@ -6501,7 +6502,9 @@ extern tree get_scope_of_declarator (const cp_declarator *); extern void grok_special_member_properties (tree); extern bool grok_ctor_properties (const_tree, const_tree); extern bool grok_op_properties (tree, bool); -extern tree xref_tag (enum tag_types, tree, tag_scope, bool); +extern tree xref_tag (tag_types, tree, + tag_scope = ts_current, + bool tpl_header_p = false); extern void xref_basetypes (tree, tree); extern tree start_enum (tree, tree, tree, tree, bool, bool *); extern void finish_enum_value_list (tree); @@ -6849,8 +6852,7 @@ extern void end_template_parm_list (void); extern void end_template_decl (void); extern tree maybe_update_decl_type (tree, tree); extern bool check_default_tmpl_args (tree, tree, bool, bool, int); -extern tree push_template_decl (tree); -extern tree push_template_decl_real (tree, bool); +extern tree push_template_decl (tree, bool is_friend = false); extern tree add_inherited_template_parms (tree, tree); extern void template_parm_level_and_index (tree, int*, int*); extern bool redeclare_class_template (tree, tree, tree); diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index f3fdfe3..6019051 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -5361,8 +5361,7 @@ start_decl (const cp_declarator *declarator, about this situation, and so we check here. */ if (initialized && DECL_INITIALIZED_IN_CLASS_P (field)) error ("duplicate initialization of %qD", decl); - field = duplicate_decls (decl, field, - /*newdecl_is_friend=*/false); + field = duplicate_decls (decl, field); if (field == error_mark_node) return error_mark_node; else if (field) @@ -5376,8 +5375,7 @@ start_decl (const cp_declarator *declarator, ? current_template_parms : NULL_TREE); if (field && field != error_mark_node - && duplicate_decls (decl, field, - /*newdecl_is_friend=*/false)) + && duplicate_decls (decl, field)) decl = field; } @@ -15476,7 +15474,7 @@ start_enum (tree name, tree enumtype, tree underlying_type, || TREE_CODE (enumtype) != ENUMERAL_TYPE) { enumtype = cxx_make_type (ENUMERAL_TYPE); - enumtype = pushtag (name, enumtype, /*tag_scope=*/ts_current); + enumtype = pushtag (name, enumtype); /* std::byte aliases anything. */ if (enumtype != error_mark_node @@ -15485,8 +15483,7 @@ start_enum (tree name, tree enumtype, tree underlying_type, TYPE_ALIAS_SET (enumtype) = 0; } else - enumtype = xref_tag (enum_type, name, /*tag_scope=*/ts_current, - false); + enumtype = xref_tag (enum_type, name); if (enumtype == error_mark_node) return error_mark_node; @@ -16257,7 +16254,7 @@ start_preparsed_function (tree decl1, tree attrs, int flags) by push_nested_class.) */ if (processing_template_decl) { - tree newdecl1 = push_template_decl (decl1); + tree newdecl1 = push_template_decl (decl1, DECL_FRIEND_P (decl1)); if (newdecl1 == error_mark_node) { if (ctype || DECL_STATIC_FUNCTION_P (decl1)) @@ -17362,7 +17359,7 @@ grokmethod (cp_decl_specifier_seq *declspecs, /* We process method specializations in finish_struct_1. */ if (processing_template_decl && !DECL_TEMPLATE_SPECIALIZATION (fndecl)) { - fndecl = push_template_decl (fndecl); + fndecl = push_template_decl (fndecl, DECL_FRIEND_P (fndecl)); if (fndecl == error_mark_node) return fndecl; } diff --git a/gcc/cp/friend.c b/gcc/cp/friend.c index fa20a93..e484134 100644 --- a/gcc/cp/friend.c +++ b/gcc/cp/friend.c @@ -558,7 +558,7 @@ do_friend (tree ctype, tree declarator, tree decl, else if (class_template_depth) /* We rely on tsubst_friend_function to check the validity of the declaration later. */ - decl = push_template_decl_real (decl, /*is_friend=*/true); + decl = push_template_decl (decl, /*is_friend=*/true); else decl = check_classfn (ctype, decl, template_member_p @@ -611,7 +611,7 @@ do_friend (tree ctype, tree declarator, tree decl, general, such a declaration depends on template parameters. Instead, we call pushdecl when the class is instantiated. */ - decl = push_template_decl_real (decl, /*is_friend=*/true); + decl = push_template_decl (decl, /*is_friend=*/true); else if (current_function_decl) /* pushdecl will check there's a local decl already. */ decl = pushdecl (decl, /*is_friend=*/true); diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c index 7fccccc..07a5401 100644 --- a/gcc/cp/lambda.c +++ b/gcc/cp/lambda.c @@ -134,8 +134,7 @@ begin_lambda_type (tree lambda) IDENTIFIER_LAMBDA_P (name) = true; /* Create the new RECORD_TYPE for this lambda. */ - tree type = xref_tag (/*tag_code=*/record_type, name, - /*scope=*/ts_current, /*template_header_p=*/false); + tree type = xref_tag (/*tag_code=*/record_type, name); if (type == error_mark_node) return error_mark_node; @@ -476,7 +475,7 @@ static GTY(()) tree max_id; static tree vla_capture_type (tree array_type) { - tree type = xref_tag (record_type, make_anon_name (), ts_current, false); + tree type = xref_tag (record_type, make_anon_name ()); xref_basetypes (type, NULL_TREE); type = begin_class_definition (type); if (!ptr_id) diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index bbeaf64..e7764ab 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -6723,11 +6723,11 @@ maybe_process_template_type_declaration (tree type, int is_friend, if (processing_template_decl) { - /* This may change after the call to - push_template_decl_real, but we want the original value. */ + /* This may change after the call to push_template_decl, but + we want the original value. */ tree name = DECL_NAME (decl); - decl = push_template_decl_real (decl, is_friend); + decl = push_template_decl (decl, is_friend); if (decl == error_mark_node) return error_mark_node; @@ -7301,7 +7301,7 @@ pushdecl_top_level_and_finish (tree x, tree init) { bool subtime = timevar_cond_start (TV_NAME_LOOKUP); do_push_to_top_level (); - x = pushdecl_namespace_level (x, false); + x = pushdecl_namespace_level (x); cp_finish_decl (x, init, false, NULL_TREE, 0); do_pop_from_top_level (); timevar_cond_stop (TV_NAME_LOOKUP, subtime); diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h index 5d2d364f..76ec8f2 100644 --- a/gcc/cp/name-lookup.h +++ b/gcc/cp/name-lookup.h @@ -341,7 +341,7 @@ extern tree lookup_qualified_name (tree scope, const char *name, bool = true); extern bool is_local_extern (tree); extern bool pushdecl_class_level (tree); -extern tree pushdecl_namespace_level (tree, bool); +extern tree pushdecl_namespace_level (tree, bool is_friend = false); extern bool push_class_level_binding (tree, tree); extern tree get_local_decls (); extern int function_parm_depth (void); @@ -371,7 +371,7 @@ extern tree pushdecl (tree, bool is_friend = false); extern tree pushdecl_outermost_localscope (tree); extern tree pushdecl_top_level (tree, bool is_friend = false); extern tree pushdecl_top_level_and_finish (tree, tree); -extern tree pushtag (tree, tree, tag_scope); +extern tree pushtag (tree, tree, tag_scope = ts_current); extern int push_namespace (tree, bool make_inline = false); extern void pop_namespace (void); extern void push_nested_namespace (tree); diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 62e8509..6f8dbc3 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -5669,7 +5669,7 @@ template_parm_outer_level (tree t, void *data) If IS_FRIEND is true, DECL is a friend declaration. */ tree -push_template_decl_real (tree decl, bool is_friend) +push_template_decl (tree decl, bool is_friend) { tree tmpl; tree args; @@ -5694,8 +5694,10 @@ push_template_decl_real (tree decl, bool is_friend) && DECL_TEMPLATE_SPECIALIZATION (decl) && TINFO_USED_TEMPLATE_ID (DECL_TEMPLATE_INFO (decl)))); - if (TREE_CODE (decl) == FUNCTION_DECL && DECL_FRIEND_P (decl)) - is_friend = true; + /* No surprising friend functions. */ + gcc_checking_assert (is_friend + || !(TREE_CODE (decl) == FUNCTION_DECL + && DECL_FRIEND_P (decl))); if (is_friend) /* For a friend, we want the context of the friend, not @@ -6096,12 +6098,6 @@ push_template_decl_real (tree decl, bool is_friend) return DECL_TEMPLATE_RESULT (tmpl); } -tree -push_template_decl (tree decl) -{ - return push_template_decl_real (decl, false); -} - /* FN is an inheriting constructor that inherits from the constructor template INHERITED; turn FN into a constructor template with a matching template header. */ @@ -9943,7 +9939,7 @@ lookup_template_class_1 (tree d1, tree arglist, tree in_decl, tree context, /* A local class. Make sure the decl gets registered properly. */ if (context == current_function_decl) - if (pushtag (DECL_NAME (gen_tmpl), t, /*tag_scope=*/ts_current) + if (pushtag (DECL_NAME (gen_tmpl), t) == error_mark_node) return error_mark_node; @@ -11897,7 +11893,7 @@ instantiate_class_template_1 (tree type) tsubst_enum. */ if (name) SET_IDENTIFIER_TYPE_VALUE (name, newtag); - pushtag (name, newtag, /*tag_scope=*/ts_current); + pushtag (name, newtag); } } else if (DECL_DECLARES_FUNCTION_P (t)) @@ -12077,6 +12073,7 @@ instantiate_class_template_1 (tree type) /* friend class C; */ friend_type = tsubst (friend_type, args, tf_warning_or_error, NULL_TREE); + /* Otherwise it's friend class C; diff --git a/gcc/cp/ptree.c b/gcc/cp/ptree.c index 11833e3..a28b722 100644 --- a/gcc/cp/ptree.c +++ b/gcc/cp/ptree.c @@ -332,8 +332,12 @@ debug_overload (tree node) tree decl = *iter; auto xloc = expand_location (DECL_SOURCE_LOCATION (decl)); auto fullname = decl_as_string (decl, 0); + bool using_p = iter.using_p (); + bool hidden_p = iter.hidden_p (); - fprintf (file, "%p: %s:%d:%d \"%s\"\n", (void *)decl, + fprintf (file, "%p:%c%c %s:%d:%d \"%s\"\n", (void *)decl, + hidden_p ? 'H' : '-', + using_p ? 'U' : '-', xloc.file, xloc.line, xloc.column, fullname); } } diff --git a/gcc/cp/rtti.c b/gcc/cp/rtti.c index 0ab3c42..7c4bff7 100644 --- a/gcc/cp/rtti.c +++ b/gcc/cp/rtti.c @@ -169,8 +169,7 @@ init_rtti_processing (void) tree type_info_type; push_nested_namespace (std_node); - type_info_type = xref_tag (class_type, get_identifier ("type_info"), - /*tag_scope=*/ts_current, false); + type_info_type = xref_tag (class_type, get_identifier ("type_info")); pop_nested_namespace (std_node); const_type_info_type_node = cp_build_qualified_type (type_info_type, TYPE_QUAL_CONST); @@ -761,9 +760,7 @@ build_dynamic_cast_1 (location_t loc, tree type, tree expr, push_abi_namespace (); tinfo_ptr = xref_tag (class_type, - get_identifier ("__class_type_info"), - /*tag_scope=*/ts_current, false); - + get_identifier ("__class_type_info")); tinfo_ptr = build_pointer_type (cp_build_qualified_type (tinfo_ptr, TYPE_QUAL_CONST)); @@ -948,10 +945,8 @@ tinfo_base_init (tinfo_s *ti, tree target) vtable_ptr = ti->vtable; if (!vtable_ptr) { - tree real_type; push_abi_namespace (); - real_type = xref_tag (class_type, ti->name, - /*tag_scope=*/ts_current, false); + tree real_type = xref_tag (class_type, ti->name); pop_abi_namespace (); if (!COMPLETE_TYPE_P (real_type)) diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index 11996c9..b093044 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -3216,13 +3216,13 @@ begin_class_definition (tree t) if (t == error_mark_node || ! MAYBE_CLASS_TYPE_P (t)) { t = make_class_type (RECORD_TYPE); - pushtag (make_anon_name (), t, /*tag_scope=*/ts_current); + pushtag (make_anon_name (), t); } if (TYPE_BEING_DEFINED (t)) { t = make_class_type (TREE_CODE (t)); - pushtag (TYPE_IDENTIFIER (t), t, /*tag_scope=*/ts_current); + pushtag (TYPE_IDENTIFIER (t), t); } maybe_process_partial_specialization (t); pushclass (t); diff --git a/gcc/objcp/objcp-decl.c b/gcc/objcp/objcp-decl.c index 9ae7f23..087b5d5 100644 --- a/gcc/objcp/objcp-decl.c +++ b/gcc/objcp/objcp-decl.c @@ -41,7 +41,7 @@ objcp_start_struct (location_t loc ATTRIBUTE_UNUSED, if (!name) name = make_anon_name (); - s = xref_tag (record_type, name, ts_global, 0); + s = xref_tag (record_type, name, ts_global); CLASSTYPE_DECLARED_CLASS (s) = 0; /* this is a 'struct', not a 'class'. */ xref_basetypes (s, NULL_TREE); /* no base classes here! */ @@ -84,7 +84,7 @@ objcp_finish_function (void) tree objcp_xref_tag (enum tree_code code ATTRIBUTE_UNUSED, tree name) { - return xref_tag (record_type, name, ts_global, false); + return xref_tag (record_type, name, ts_global); } int -- cgit v1.1 From a2b7397b501378815d1b6c5beb7cbda21f2e4ad7 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Fri, 25 Sep 2020 00:16:27 +0000 Subject: Daily bump. --- gcc/ChangeLog | 139 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 50 +++++++++++++++++ gcc/cp/ChangeLog | 40 ++++++++++++++ gcc/fortran/ChangeLog | 7 +++ gcc/objcp/ChangeLog | 6 +++ gcc/testsuite/ChangeLog | 85 +++++++++++++++++++++++++++++ 7 files changed, 328 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9ed3785..b8ececa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,142 @@ +2020-09-24 Alex Coplan + + * config/arm/arm-cpus.in (neoverse-n2): New. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-tune.md: Regenerate. + * doc/invoke.texi: Document support for Neoverse N2. + +2020-09-24 Alex Coplan + + * config/aarch64/aarch64-cores.def: Add Neoverse N2. + * config/aarch64/aarch64-tune.md: Regenerate. + * doc/invoke.texi: Document AArch64 support for Neoverse N2. + +2020-09-24 Richard Biener + + * vec.h (auto_vec::auto_vec (auto_vec &&)): New move CTOR. + (auto_vec::operator=(auto_vec &&)): Delete. + * hash-table.h (hash_table::expand): Use std::move when expanding. + * cfgloop.h (get_loop_exit_edges): Return auto_vec. + * cfgloop.c (get_loop_exit_edges): Adjust. + * cfgloopmanip.c (fix_loop_placement): Likewise. + * ipa-fnsummary.c (analyze_function_body): Likewise. + * ira-build.c (create_loop_tree_nodes): Likewise. + (create_loop_tree_node_allocnos): Likewise. + (loop_with_complex_edge_p): Likewise. + * ira-color.c (ira_loop_edge_freq): Likewise. + * loop-unroll.c (analyze_insns_in_loop): Likewise. + * predict.c (predict_loops): Likewise. + * tree-predcom.c (last_always_executed_block): Likewise. + * tree-ssa-loop-ch.c (ch_base::copy_headers): Likewise. + * tree-ssa-loop-im.c (store_motion_loop): Likewise. + * tree-ssa-loop-ivcanon.c (loop_edge_to_cancel): Likewise. + (canonicalize_loop_induction_variables): Likewise. + * tree-ssa-loop-manip.c (get_loops_exits): Likewise. + * tree-ssa-loop-niter.c (find_loop_niter): Likewise. + (finite_loop_p): Likewise. + (find_loop_niter_by_eval): Likewise. + (estimate_numbers_of_iterations): Likewise. + * tree-ssa-loop-prefetch.c (emit_mfence_after_loop): Likewise. + (may_use_storent_in_loop_p): Likewise. + +2020-09-24 Jan Hubicka + + * doc/invoke.texi: Document -fipa-modref, ipa-modref-max-bases, + ipa-modref-max-refs, ipa-modref-max-accesses, ipa-modref-max-tests. + * ipa-modref-tree.c (test_insert_search_collapse): Update. + (test_merge): Update. + (gt_ggc_mx): New function. + * ipa-modref-tree.h (struct modref_access_node): New structure. + (struct modref_ref_node): Add every_access and accesses array. + (modref_ref_node::modref_ref_node): Update ctor. + (modref_ref_node::search): New member function. + (modref_ref_node::collapse): New member function. + (modref_ref_node::insert_access): New member function. + (modref_base_node::insert_ref): Do not collapse base if ref is 0. + (modref_base_node::collapse): Copllapse also refs. + (modref_tree): Add accesses. + (modref_tree::modref_tree): Initialize max_accesses. + (modref_tree::insert): Add access parameter. + (modref_tree::cleanup): New member function. + (modref_tree::merge): Add parm_map; merge accesses. + (modref_tree::copy_from): New member function. + (modref_tree::create_ggc): Add max_accesses. + * ipa-modref.c (dump_access): New function. + (dump_records): Dump accesses. + (dump_lto_records): Dump accesses. + (get_access): New function. + (record_access): Record access. + (record_access_lto): Record access. + (analyze_call): Compute parm_map. + (analyze_function): Update construction of modref records. + (modref_summaries::duplicate): Likewise; use copy_from. + (write_modref_records): Stream accesses. + (read_modref_records): Sream accesses. + (pass_ipa_modref::execute): Update call of merge. + * params.opt (-param=modref-max-accesses): New. + * tree-ssa-alias.c (alias_stats): Add modref_baseptr_tests. + (dump_alias_stats): Update. + (base_may_alias_with_dereference_p): New function. + (modref_may_conflict): Check accesses. + (ref_maybe_used_by_call_p_1): Update call to modref_may_conflict. + (call_may_clobber_ref_p_1): Update call to modref_may_conflict. + +2020-09-24 Richard Sandiford + + * config/arm/arm.md (*stack_protect_combined_set_insn): For non-PIC, + load the address of the canary rather than the address of the + constant pool entry that points to it. + (*stack_protect_combined_test_insn): Likewise. + +2020-09-24 Richard Biener + + PR tree-optimization/97085 + * match.pd (mask ? { false,..} : { true, ..} -> ~mask): New. + +2020-09-24 Jan Hubicka + + * ipa-modref-tree.h (modref_base::collapse): Release memory. + (modref_tree::create_ggc): New member function. + (modref_tree::colapse): Release memory. + (modref_tree::~modref_tree): New destructor. + * ipa-modref.c (modref_summaries::create_ggc): New function. + (analyze_function): Use create_ggc. + (modref_summaries::duplicate): Likewise. + (read_modref_records): Likewise. + (modref_read): Likewise. + +2020-09-24 Alan Modra + + * config/rs6000/rs6000.c (rs6000_rtx_costs): Pass mode to + reg_or_add_cint_operand and reg_or_sub_cint_operand. + +2020-09-24 Alan Modra + + PR target/93012 + * config/rs6000/rs6000.c (num_insns_constant_gpr): Count rldimi + constants correctly. + +2020-09-24 Alan Modra + + * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): + Conditionally define __PCREL__. + +2020-09-24 Alan Modra + + PR target/97107 + * config/rs6000/rs6000-internal.h (struct rs6000_stack): Improve + calls_p comment. + * config/rs6000/rs6000-logue.c (rs6000_stack_info): Likewise. + (rs6000_expand_split_stack_prologue): Emit the prologue for + functions that make a sibling call. + +2020-09-24 David Malcolm + + * doc/analyzer.texi (Analyzer Paths): Add note about + -fno-analyzer-feasibility. + * doc/invoke.texi (Static Analyzer Options): Add + -fno-analyzer-feasibility. + 2020-09-24 Paul A. Clarke * doc/extend.texi: Add 'd' for doubleword variant of diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 52a894d..cfe4a2e 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200924 +20200925 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 8bba071..2047917 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,53 @@ +2020-09-24 David Malcolm + + * analyzer.h (struct rejected_constraint): New decl. + * analyzer.opt (fanalyzer-feasibility): New option. + * diagnostic-manager.cc (path_builder::path_builder): Add + "problem" param and use it to initialize new field. + (path_builder::get_feasibility_problem): New accessor. + (path_builder::m_feasibility_problem): New field. + (dedupe_winners::add): Remove inversion of logic in "if" clause, + swapping if/else suites. In the !feasible_p suite, inspect + flag_analyzer_feasibility and add code to handle when this + is off, accepting the infeasible path, but recording the + feasibility_problem. + (diagnostic_manager::emit_saved_diagnostic): Pass the + feasibility_problem to the path_builder. + (diagnostic_manager::add_events_for_eedge): If we have + a feasibility_problem at this edge, use it to add a custom event. + * engine.cc (exploded_path::feasible_p): Pass a + rejected_constraint ** to model.maybe_update_for_edge and transfer + ownership of any created instance to any feasibility_problem. + (feasibility_problem::dump_to_pp): New. + * exploded-graph.h (feasibility_problem::feasibility_problem): + Drop "model" param; add rejected_constraint * param. + (feasibility_problem::~feasibility_problem): New. + (feasibility_problem::dump_to_pp): New decl. + (feasibility_problem::m_model): Drop field. + (feasibility_problem::m_rc): New field. + * program-point.cc (function_point::get_location): Handle + PK_BEFORE_SUPERNODE and PK_AFTER_SUPERNODE. + * program-state.cc (program_state::on_edge): Pass NULL to new + param of region_model::maybe_update_for_edge. + * region-model.cc (region_model::add_constraint): New overload + adding a rejected_constraint ** param. + (region_model::maybe_update_for_edge): Add rejected_constraint ** + param and pass it to the various apply_constraints_for_ calls. + (region_model::apply_constraints_for_gcond): Add + rejected_constraint ** param and pass it to add_constraint calls. + (region_model::apply_constraints_for_gswitch): Likewise. + (region_model::apply_constraints_for_exception): Likewise. + (rejected_constraint::dump_to_pp): New. + * region-model.h (region_model::maybe_update_for_edge): + Add rejected_constraint ** param. + (region_model::add_constraint): New overload adding a + rejected_constraint ** param. + (region_model::apply_constraints_for_gcond): Add + rejected_constraint ** param. + (region_model::apply_constraints_for_gswitch): Likewise. + (region_model::apply_constraints_for_exception): Likewise. + (struct rejected_constraint): New. + 2020-09-23 David Malcolm PR analyzer/97178 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index e583c64..e411f34 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,43 @@ +2020-09-24 Nathan Sidwell + + * cp-tree.h (duplicate_decls): Default is_friend to false. + (xref_tag): Default tag_scope & tpl_header_p to ts_current & false. + (push_template_decl_real): Default is_friend to false. Rename to + ... + (push_template_decl): ... here. Delete original decl. + * name-lookup.h (pushdecl_namespace_level): Default is_friend to + false. + (pushtag): Default tag_scope to ts_current. + * coroutines.cc (morph_fn_to_coro): Drop default args to xref_tag. + * decl.c (start_decl): Drop default args to duplicate_decls. + (start_enum): Drop default arg to pushtag & xref_tag. + (start_preparsed_function): Pass DECL_FRIEND_P to + push_template_decl. + (grokmethod): Likewise. + * friend.c (do_friend): Rename push_template_decl_real calls. + * lambda.c (begin_lamnbda_type): Drop default args to xref_tag. + (vla_capture_type): Likewise. + * name-lookup.c (maybe_process_template_type_declaration): Rename + push_template_decl_real call. + (pushdecl_top_level_and_finish): Drop default arg to + pushdecl_namespace_level. + * pt.c (push_template_decl_real): Assert no surprising friend + functions. Rename to ... + (push_template_decl): ... here. Delete original function. + (lookup_template_class_1): Drop default args from pushtag. + (instantiate_class_template_1): Likewise. + * ptree.c (debug_overload): Print hidden and using markers. + * rtti.c (init_rtti_processing): Drop refault args from xref_tag. + (build_dynamic_cast_1, tinfo_base_init): Likewise. + * semantics.c (begin_class_definition): Drop default args to + pushtag. + +2020-09-24 Nathan Sidwell + + PR c++/97186 + * pt.c (maybe_instantiate_noexcept): Local externs are never + member fns. + 2020-09-23 Nathan Sidwell * name-lookup.h (typedef cxx_binding): Delete tdef. diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index e826128..19f2e6a 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,10 @@ +2020-09-24 Paul Thomas + + PR fortran/96495 + * trans-expr.c (gfc_conv_procedure_call): Take the deallocation + of allocatable result components of a scalar result outside the + scalarization loop. Find and use the stored result. + 2020-09-21 Harald Anlauf Paul Thomas diff --git a/gcc/objcp/ChangeLog b/gcc/objcp/ChangeLog index c8d5075..993c36d5 100644 --- a/gcc/objcp/ChangeLog +++ b/gcc/objcp/ChangeLog @@ -1,3 +1,9 @@ +2020-09-24 Nathan Sidwell + + * objcp-decl.c (objcp_start_struct): Drop default args to + xref_tag. + (objcp_xref_tag): Likewise. + 2020-01-01 Jakub Jelinek Update copyright years. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e40c474..0a190f8 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,88 @@ +2020-09-24 Nathan Sidwell + + * g++.dg/template/local-var1.C: New. + +2020-09-24 Nathan Sidwell + + * g++.dg/template/local-fn4.C: Add target c++11 + +2020-09-24 Nathan Sidwell + + PR c++/97186 + * g++.dg/template/local10.C: Rename ... + * g++.dg/template/local-fn3.C: .. here. Require lto. + * g++.dg/template/local-fn4.C: New. + +2020-09-24 Jan Hubicka + + * gcc.dg/tree-ssa/modref-1.c: New test. + +2020-09-24 Tom de Vries + + * gcc.dg/tls/thr-cse-1.c: Scan final dump instead of assembly for + nvptx. + +2020-09-24 Tom de Vries + + * gcc.dg/independent-cloneids-1.c: Use scan-rtl-dump instead of + scan-assembler. + +2020-09-24 Richard Biener + + PR tree-optimization/97085 + * gcc.dg/pr97192.c: New testcase. + +2020-09-24 Paul Thomas + + PR fortran/96495 + * gfortran.dg/alloc_comp_result_2.f90 : New test. + +2020-09-24 Tom de Vries + + * gcc.dg/pr87314-1.c: Add nvptx-specific scan-assembler directive. + +2020-09-24 Richard Sandiford + + * gcc.target/arm/stack-protector-5.c: New test. + * gcc.target/arm/stack-protector-6.c: Likewise. + +2020-09-24 Richard Sandiford + + * gcc.target/arm/stack-protector-3.c: New test. + * gcc.target/arm/stack-protector-4.c: Likewise. + +2020-09-24 Richard Biener + + PR tree-optimization/97085 + * gcc.dg/vect/pr97085.c: New testcase. + +2020-09-24 Tom de Vries + + * gcc.dg/pr94600-1.c: Require effective target non_strict_align for + scan-rtl-dump-times. + * gcc.dg/pr94600-3.c: Same. + +2020-09-24 Tom de Vries + + * c-c++-common/builtin-has-attribute-3.c: Compile with -DSKIP_ALIAS + for effective target ! alias. + +2020-09-24 Kewen Lin + + PR tree-optimization/97075 + * gcc.target/powerpc/p9-vec-length-full-6.c: Adjust. + +2020-09-24 David Malcolm + + PR analyzer/93355 + * gcc.dg/analyzer/pr93355-localealias-feasibility.c: New test. + * gcc.dg/analyzer/pr93355-localealias-simplified.c: New test. + * gcc.dg/analyzer/pr93355-localealias.c: New test. + +2020-09-24 David Malcolm + + * gcc.dg/analyzer/feasibility-2.c: New test. + 2020-09-23 Martin Sebor PR middle-end/97175 -- cgit v1.1 From fa91ca7f506b162d5f0afcffd2d7d562da6aa7fa Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Thu, 24 Sep 2020 10:49:02 +0200 Subject: [testsuite] Add effective target ident_directive On nvptx we run into: ... FAIL: c-c++-common/ident-1b.c -Wc++-compat scan-assembler GCC: FAIL: c-c++-common/ident-2b.c -Wc++-compat scan-assembler GCC: ... Using a scan-assembler directive adds -fno-indent to the compile options. The test c-c++-common/ident-1b.c adds dg-options "-fident", and intends to check that the -fident overrides the -fno-indent, by means of the scan-assembler. But for nvptx, there's no .ident directive, both with -fident and -fno-ident. Fix this by adding an effective target ident_directive, and requiring it in both test-cases. Tested on nvptx and x86_64. gcc/testsuite/ChangeLog: 2020-09-24 Tom de Vries * lib/target-supports.exp (check_effective_target_ident_directive): New proc. * c-c++-common/ident-1b.c: Require effective target ident_directive. * c-c++-common/ident-2b.c: Same. --- gcc/testsuite/c-c++-common/ident-1b.c | 1 + gcc/testsuite/c-c++-common/ident-2b.c | 1 + gcc/testsuite/lib/target-supports.exp | 9 +++++++++ 3 files changed, 11 insertions(+) (limited to 'gcc') diff --git a/gcc/testsuite/c-c++-common/ident-1b.c b/gcc/testsuite/c-c++-common/ident-1b.c index 6956744..b8b83e6 100644 --- a/gcc/testsuite/c-c++-common/ident-1b.c +++ b/gcc/testsuite/c-c++-common/ident-1b.c @@ -2,6 +2,7 @@ * Make sure scan-assembler turns off .ident unless -fident in testcase */ /* { dg-do compile } */ /* { dg-options "-fident" } */ +/* { dg-require-effective-target ident_directive }*/ int i; /* { dg-final { scan-assembler "GCC: " { xfail { { hppa*-*-hpux* && { ! lp64 } } || { powerpc-ibm-aix* || powerpc*-*-darwin* } } } } } */ diff --git a/gcc/testsuite/c-c++-common/ident-2b.c b/gcc/testsuite/c-c++-common/ident-2b.c index fae6a03..52f0693 100644 --- a/gcc/testsuite/c-c++-common/ident-2b.c +++ b/gcc/testsuite/c-c++-common/ident-2b.c @@ -2,6 +2,7 @@ * Make sure scan-assembler-times turns off .ident unless -fident in testcase */ /* { dg-do compile } */ /* { dg-options "-fident" } */ +/* { dg-require-effective-target ident_directive }*/ int ident; /* { dg-final { scan-assembler "GCC: " { xfail { { hppa*-*-hpux* && { ! lp64 } } || { powerpc-ibm-aix* || powerpc*-*-darwin* } } } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 5cbe32f..0a00972 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -10510,3 +10510,12 @@ proc check_symver_available { } { } }] } + +# Return 1 if emitted assembly contains .ident directive. + +proc check_effective_target_ident_directive {} { + return [check_no_messages_and_pattern ident_directive \ + "(?n)^\[\t\]+\\.ident" assembly { + int i; + }] +} -- cgit v1.1 From 7ac25ab39897e83f38e1246b0330cd2354ae0f91 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Fri, 25 Sep 2020 08:42:10 +0200 Subject: [testsuite] Add missing require-effective-target alloca Add missing require-effect-target alloca directives. Tested on nvptx. gcc/testsuite/ChangeLog: 2020-09-25 Tom de Vries * gcc.dg/analyzer/pr93355-localealias.c: Require effective target alloca. --- gcc/testsuite/gcc.dg/analyzer/pr93355-localealias.c | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias.c b/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias.c index a5cb0d5..043e45f 100644 --- a/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias.c +++ b/gcc/testsuite/gcc.dg/analyzer/pr93355-localealias.c @@ -5,6 +5,7 @@ /* { dg-do "compile" } */ /* { dg-additional-options "-Wno-analyzer-too-complex -fno-analyzer-feasibility" } */ /* TODO: remove the need for these options. */ +/* { dg-require-effective-target alloca } */ /* Handle aliases for locale names. Copyright (C) 1995-1999, 2000-2001, 2003 Free Software Foundation, Inc. -- cgit v1.1 From e9e2953ceddb804e551d75725f4f603aaf71cc0f Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Thu, 24 Sep 2020 16:37:41 +0200 Subject: Fix spacing in cgraph_node::dump. gcc/ChangeLog: * cgraph.c (cgraph_node::dump): Always print space at the end of a message. Remove one extra space. --- gcc/cgraph.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/cgraph.c b/gcc/cgraph.c index b43adaa..eb5f1a5 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -2272,18 +2272,17 @@ cgraph_node::dump (FILE *f) edge->dump_edge_flags (f); if (edge->indirect_info->param_index != -1) { - fprintf (f, " of param:%i", edge->indirect_info->param_index); + fprintf (f, "of param:%i ", edge->indirect_info->param_index); if (edge->indirect_info->agg_contents) - fprintf (f, " loaded from %s %s at offset %i", + fprintf (f, "loaded from %s %s at offset %i ", edge->indirect_info->member_ptr ? "member ptr" : "aggregate", edge->indirect_info->by_ref ? "passed by reference":"", (int)edge->indirect_info->offset); if (edge->indirect_info->vptr_changed) - fprintf (f, " (vptr maybe changed)"); + fprintf (f, "(vptr maybe changed) "); } - fprintf (f, " Num speculative call targets: %i", + fprintf (f, "num speculative call targets: %i\n", edge->indirect_info->num_speculative_call_targets); - fprintf (f, "\n"); if (edge->indirect_info->polymorphic) edge->indirect_info->context.dump (f); } -- cgit v1.1 From b2784a9698ffdd6cd8434694c11336e9f7905be5 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Thu, 24 Sep 2020 16:29:49 +0200 Subject: Add cgraph_edge::debug function. gcc/ChangeLog: * cgraph.c (cgraph_edge::debug): New. * cgraph.h (cgraph_edge::debug): New. --- gcc/cgraph.c | 14 ++++++++++++++ gcc/cgraph.h | 3 +++ 2 files changed, 17 insertions(+) (limited to 'gcc') diff --git a/gcc/cgraph.c b/gcc/cgraph.c index eb5f1a5..f018020 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -2072,6 +2072,20 @@ cgraph_edge::dump_edge_flags (FILE *f) fprintf (f, "(can throw external) "); } +/* Dump edge to stderr. */ + +void +cgraph_edge::debug (void) +{ + fprintf (stderr, "%s -> %s ", caller->dump_asm_name (), + callee == NULL ? "(null)" : callee->dump_asm_name ()); + dump_edge_flags (stderr); + fprintf (stderr, "\n\n"); + caller->debug (); + if (callee != NULL) + callee->debug (); +} + /* Dump call graph node to file F. */ void diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 0211f08..96d6cf6 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -2022,6 +2022,9 @@ private: /* Output flags of edge to a file F. */ void dump_edge_flags (FILE *f); + /* Dump edge to stderr. */ + void DEBUG_FUNCTION debug (void); + /* Verify that call graph edge corresponds to DECL from the associated statement. Return true if the verification should fail. */ bool verify_corresponds_to_fndecl (tree decl); -- cgit v1.1 From c2ebf4f10de9257baffbe29cd0074893a01cfd83 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 25 Sep 2020 10:43:37 +0200 Subject: openmp: Add support for non-rect simd and improve collapsed simd support The following change adds support for non-rectangular simd loops. While working on that, I've noticed we actually don't vectorize collapsed simd loops at all, because the code that I thought would be vectorizable actually is not vectorized. While in theory for the constant lower/upper bounds and constant step of all but the outermost loop we could in theory vectorize by computing the seprate iterators using vectorized division and modulo for each of them from the single iterator that increments by 1 from 0 to total iteration count in the loop nest, I think that would be fairly expensive and the chances of the loop body being vectorizable would be low e.g. because of array indices unlikely to be linear and would need scatters/gathers. This patch changes the generated code to vectorize only the innermost loop which has higher chance of being vectorized. Below is the list of tests and function names in which the patch resulted in vectorizing something that hasn't been vectorized before (ok, the first line is a new test). I've also found that the vectorizer will not vectorize loops with non-constant steps, I plan to do something about those incrementally on the omp-expand.c side (basically, compute number of iterations before the loop and use a 0 to number_of_iterations step 1 IV as the main one). I have problem with the composite simd vectorization though. The point is that each thread (or task etc.) is given only a range of consecutive iterations, so somewhere earlier it computes total number of iterations and splits the work between the workers and then the intent is to try to vectorize it. So, each thread is then given a begin ... end-1 range that it would handle. This means that from the single begin value I need to compute the individual iteration vars I should start at and then goto into the loop nest to begin iterating there (and actually compute how many iterations the innermost loop should do each time so that it stops before end). Very roughly the IL I emit is something like: int t[100][100][100]; void foo (int a, int b, int c, int d, int e, int f, int g, int h, int u, int v, int w, int x) { int i, j, k; int cnt; if (x) { i = u; j = v; k = w; goto doit; } for (i = a; i < b; i += c) for (j = d; j < e; j += f) { k = g; doit: for (; k < h; k++) t[i][j][k] += i + j + k; } } Unfortunately, some pass then turns the innermost loop to have more than 2 basic blocks and it isn't vectorized because of that. Also, I have disabled (for now) SIMTization of collapsed simd loops, because for SIMT it would be using a single thread anyway and I didn't want to bother with checking SIMT on all places I've been changing. If SIMT support is added for some or all collapsed loops, that omp-low.c change needs to be reverted. Here is that list of what hasn't been vectorized before and is now: gcc/testsuite/gcc.dg/vect/vect-simd-17.c doit gcc/testsuite/gfortran.dg/gomp/openmp-simd-6.f90 bar libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-10.c f28_taskloop_simd_normal._omp_fn.0 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-10.c _Z24f28_taskloop_simd_normalv._omp_fn.0 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-11.c f25_t_simd_normal._omp_fn.0 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-11.c f26_t_simd_normal._omp_fn.0 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-11.c f27_t_simd_normal._omp_fn.0 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-11.c f28_tpf_simd_guided32._omp_fn.1 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-11.c f28_tpf_simd_runtime._omp_fn.1 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-11.c _Z17f25_t_simd_normaliiiiiii._omp_fn.0 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-11.c _Z17f26_t_simd_normaliiiixxi._omp_fn.0 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-11.c _Z17f27_t_simd_normalv._omp_fn.0 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-11.c _Z20f28_tpf_simd_runtimev._omp_fn.1 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-11.c _Z21f28_tpf_simd_guided32v._omp_fn.1 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-2.c f7_simd_normal libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-2.c f7_simd_normal libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-2.c f8_f_simd_guided32 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-2.c f8_f_simd_guided32 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-2.c f8_f_simd_runtime libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-2.c f8_f_simd_runtime libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-2.c f8_pf_simd_guided32._omp_fn.0 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-2.c f8_pf_simd_runtime._omp_fn.0 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-2.c _Z18f8_pf_simd_runtimev._omp_fn.0 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-2.c _Z19f8_pf_simd_guided32v._omp_fn.0 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-4.c f8_taskloop_simd_normal._omp_fn.0 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-4.c _Z23f8_taskloop_simd_normalv._omp_fn.0 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-5.c f7_t_simd_normal._omp_fn.0 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-5.c f8_tpf_simd_guided32._omp_fn.1 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-5.c f8_tpf_simd_runtime._omp_fn.1 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-5.c _Z16f7_t_simd_normalv._omp_fn.0 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-5.c _Z19f8_tpf_simd_runtimev._omp_fn.1 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-5.c _Z20f8_tpf_simd_guided32v._omp_fn.1 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-8.c f25_simd_normal libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-8.c f25_simd_normal libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-8.c f26_simd_normal libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-8.c f26_simd_normal libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-8.c f27_simd_normal libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-8.c f27_simd_normal libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-8.c f28_f_simd_guided32 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-8.c f28_f_simd_guided32 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-8.c f28_f_simd_runtime libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-8.c f28_f_simd_runtime libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-8.c f28_pf_simd_guided32._omp_fn.0 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/for-8.c f28_pf_simd_runtime._omp_fn.0 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-8.c _Z19f28_pf_simd_runtimev._omp_fn.0 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/for-8.c _Z20f28_pf_simd_guided32v._omp_fn.0 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/master-combined-1.c main._omp_fn.9 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/master-combined-1.c main._omp_fn.9 libgomp/testsuite/libgomp.c++/../libgomp.c-c++-common/simd-1.c f2 libgomp/testsuite/libgomp.c/../libgomp.c-c++-common/simd-1.c f2 libgomp/testsuite/libgomp.c/pr70680-2.c f1._omp_fn.0 libgomp/testsuite/libgomp.c/pr70680-2.c f2._omp_fn.0 libgomp/testsuite/libgomp.c/pr70680-2.c f3._omp_fn.0 libgomp/testsuite/libgomp.c/pr70680-2.c f4._omp_fn.0 libgomp/testsuite/libgomp.c/simd-8.c foo libgomp/testsuite/libgomp.c/simd-9.c bar libgomp/testsuite/libgomp.c/simd-9.c foo 2020-09-25 Jakub Jelinek gcc/ * omp-low.c (scan_omp_1_stmt): Don't call scan_omp_simd for collapse > 1 loops as simt doesn't support collapsed loops yet. * omp-expand.c (expand_omp_for_init_counts, expand_omp_for_init_vars): Small tweaks to function comment. (expand_omp_simd): Rewritten collapse > 1 support to only attempt to vectorize the innermost loop and emit set of outer loops around it. For non-composite simd with collapse > 1 without broken loop don't even try to compute number of iterations first. Add support for non-rectangular simd loops. (expand_omp_for): Don't sorry_at on non-rectangular simd loops. gcc/testsuite/ * gcc.dg/vect/vect-simd-17.c: New test. libgomp/ * testsuite/libgomp.c/loop-25.c: New test. --- gcc/omp-expand.c | 412 ++++++++++++++++++++++--------- gcc/omp-low.c | 3 +- gcc/testsuite/gcc.dg/vect/vect-simd-17.c | 304 +++++++++++++++++++++++ 3 files changed, 608 insertions(+), 111 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-simd-17.c (limited to 'gcc') diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index 8f1286e..9160022 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -1700,8 +1700,8 @@ expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, loops, do this only for the rectangular loops. Then pick the loops which reference outer vars in their bound expressions and the loops which they refer to and for this sub-nest compute - number of iterations. For triangular loops use Faulhaber's formula - (TBD.), otherwise as a fallback, compute by iterating the loops. + number of iterations. For triangular loops use Faulhaber's formula, + otherwise as a fallback, compute by iterating the loops. If e.g. the sub-nest is for (I = N11; I COND1 N12; I += STEP1) for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2) @@ -2383,7 +2383,7 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, into its _looptemp_ temporaries instead. For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect inclusive), use the count of all those loops together, and either - find quadratic etc. equation roots (TBD), or as a fallback, do: + find quadratic etc. equation roots, or as a fallback, do: COUNT = 0; for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1) for (tmpj = M21 * tmpi + N21; @@ -6203,49 +6203,8 @@ expand_omp_for_static_chunk (struct omp_region *region, if (V cond N2) goto L0; else goto L2; L2: - For collapsed loops, given parameters: - collapse(3) - for (V1 = N11; V1 cond1 N12; V1 += STEP1) - for (V2 = N21; V2 cond2 N22; V2 += STEP2) - for (V3 = N31; V3 cond3 N32; V3 += STEP3) - BODY; - - we generate pseudocode - - if (cond3 is <) - adj = STEP3 - 1; - else - adj = STEP3 + 1; - count3 = (adj + N32 - N31) / STEP3; - if (cond2 is <) - adj = STEP2 - 1; - else - adj = STEP2 + 1; - count2 = (adj + N22 - N21) / STEP2; - if (cond1 is <) - adj = STEP1 - 1; - else - adj = STEP1 + 1; - count1 = (adj + N12 - N11) / STEP1; - count = count1 * count2 * count3; - V = 0; - V1 = N11; - V2 = N21; - V3 = N31; - goto L1; - L0: - BODY; - V += 1; - V3 += STEP3; - V2 += (V3 cond3 N32) ? 0 : STEP2; - V3 = (V3 cond3 N32) ? V3 : N31; - V1 += (V2 cond2 N22) ? 0 : STEP1; - V2 = (V2 cond2 N22) ? V2 : N21; - L1: - if (V < count) goto L0; else goto L2; - L2: - - */ + For collapsed loops, emit the outer loops as scalar + and only try to vectorize the innermost loop. */ static void expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) @@ -6319,7 +6278,9 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); /* Not needed in SSA form right now. */ gcc_assert (!gimple_in_ssa_p (cfun)); - if (fd->collapse > 1) + if (fd->collapse > 1 + && (gimple_omp_for_combined_into_p (fd->for_stmt) + || broken_loop)) { int first_zero_iter = -1, dummy = -1; basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; @@ -6383,25 +6344,114 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); } - expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); + tree n2var = NULL_TREE; + tree n2v = NULL_TREE; + tree *nonrect_bounds = NULL; if (fd->collapse > 1) { - if (gimple_omp_for_combined_into_p (fd->for_stmt)) + if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt)) { + if (fd->non_rect) + { + nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1); + memset (nonrect_bounds, 0, + sizeof (tree) * (fd->last_nonrect + 1)); + } + expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); + gcc_assert (entry_bb == gsi_bb (gsi)); + gcc_assert (fd->for_stmt == gsi_stmt (gsi)); gsi_prev (&gsi); - expand_omp_for_init_vars (fd, &gsi, counts, NULL, NULL, n1); - gsi_next (&gsi); + entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest; + expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, + NULL, n1); + gsi = gsi_for_stmt (fd->for_stmt); + } + if (broken_loop) + ; + else if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + /* Compute in n2var the limit for the first innermost loop, + i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt) + where cnt is how many iterations would the loop have if + all further iterations were assigned to the current task. */ + n2var = create_tmp_var (type); + i = fd->collapse - 1; + tree itype = TREE_TYPE (fd->loops[i].v); + if (POINTER_TYPE_P (itype)) + itype = signed_type_for (itype); + t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR + ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, fd->loops[i].step), t); + t = fold_build2 (PLUS_EXPR, itype, t, + fold_convert (itype, fd->loops[i].n2)); + if (fd->loops[i].m2) + { + tree t2 = fold_convert (itype, + fd->loops[i - fd->loops[i].outer].v); + tree t3 = fold_convert (itype, fd->loops[i].m2); + t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); + t = fold_build2 (PLUS_EXPR, itype, t, t2); + } + t = fold_build2 (MINUS_EXPR, itype, t, + fold_convert (itype, fd->loops[i].v)); + if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, + fold_convert (itype, + fd->loops[i].step))); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, + fold_convert (itype, fd->loops[i].step)); + t = fold_convert (type, t); + tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1); + t = fold_build2 (MIN_EXPR, type, t2, t); + t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t); + expand_omp_build_assign (&gsi, n2var, t); } else - for (i = 0; i < fd->collapse; i++) - { - tree itype = TREE_TYPE (fd->loops[i].v); - if (POINTER_TYPE_P (itype)) - itype = signed_type_for (itype); - t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); - expand_omp_build_assign (&gsi, fd->loops[i].v, t); - } + { + if (TREE_CODE (n2) == INTEGER_CST) + { + /* Indicate for lastprivate handling that at least one iteration + has been performed, without wasting runtime. */ + if (integer_nonzerop (n2)) + expand_omp_build_assign (&gsi, fd->loop.v, + fold_convert (type, n2)); + else + /* Indicate that no iteration has been performed. */ + expand_omp_build_assign (&gsi, fd->loop.v, + build_one_cst (type)); + } + else + { + expand_omp_build_assign (&gsi, fd->loop.v, + build_zero_cst (type)); + expand_omp_build_assign (&gsi, n2, build_one_cst (type)); + } + for (i = 0; i < fd->collapse; i++) + { + t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); + if (fd->loops[i].m1) + { + tree t2 + = fold_convert (TREE_TYPE (t), + fd->loops[i - fd->loops[i].outer].v); + tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1); + t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2); + } + expand_omp_build_assign (&gsi, fd->loops[i].v, t); + /* For normal non-combined collapsed loops just initialize + the outermost iterator in the entry_bb. */ + if (!broken_loop) + break; + } + } } + else + expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); if (cond_var) { if (POINTER_TYPE_P (type) @@ -6425,11 +6475,17 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) stmt = gsi_stmt (gsi); gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (fd->loop.v, step); - else - t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); - expand_omp_build_assign (&gsi, fd->loop.v, t); + if (fd->collapse == 1 + || gimple_omp_for_combined_into_p (fd->for_stmt)) + { + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (fd->loop.v, step); + else + t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); + expand_omp_build_assign (&gsi, fd->loop.v, t); + } + else if (TREE_CODE (n2) != INTEGER_CST) + expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type)); if (fd->collapse > 1) { @@ -6447,37 +6503,6 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) fd->loops[i].v, t); } expand_omp_build_assign (&gsi, fd->loops[i].v, t); - - for (i = fd->collapse - 1; i > 0; i--) - { - tree itype = TREE_TYPE (fd->loops[i].v); - tree itype2 = TREE_TYPE (fd->loops[i - 1].v); - if (POINTER_TYPE_P (itype2)) - itype2 = signed_type_for (itype2); - t = fold_convert (itype2, fd->loops[i - 1].step); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, - GSI_SAME_STMT); - t = build3 (COND_EXPR, itype2, - build2 (fd->loops[i].cond_code, boolean_type_node, - fd->loops[i].v, - fold_convert (itype, fd->loops[i].n2)), - build_int_cst (itype2, 0), t); - if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v))) - t = fold_build_pointer_plus (fd->loops[i - 1].v, t); - else - t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t); - expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t); - - t = fold_convert (itype, fd->loops[i].n1); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, - GSI_SAME_STMT); - t = build3 (COND_EXPR, itype, - build2 (fd->loops[i].cond_code, boolean_type_node, - fd->loops[i].v, - fold_convert (itype, fd->loops[i].n2)), - fd->loops[i].v, t); - expand_omp_build_assign (&gsi, fd->loops[i].v, t); - } } if (cond_var) { @@ -6500,14 +6525,38 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) /* Emit the condition in L1_BB. */ gsi = gsi_start_bb (l1_bb); - t = fold_convert (type, n2); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - tree v = fd->loop.v; - if (DECL_P (v) && TREE_ADDRESSABLE (v)) - v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - t = build2 (fd->loop.cond_code, boolean_type_node, v, t); + if (fd->collapse > 1 + && !gimple_omp_for_combined_into_p (fd->for_stmt) + && !broken_loop) + { + i = fd->collapse - 1; + tree itype = TREE_TYPE (fd->loops[i].v); + if (fd->loops[i].m2) + t = n2v = create_tmp_var (itype); + else + t = fold_convert (itype, fd->loops[i].n2); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + tree v = fd->loops[i].v; + if (DECL_P (v) && TREE_ADDRESSABLE (v)) + v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t); + } + else + { + if (fd->collapse > 1 && !broken_loop) + t = n2var; + else + t = fold_convert (type, n2); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + tree v = fd->loop.v; + if (DECL_P (v) && TREE_ADDRESSABLE (v)) + v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + t = build2 (fd->loop.cond_code, boolean_type_node, v, t); + } cond_stmt = gimple_build_cond_empty (t); gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, @@ -6572,12 +6621,160 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; FALLTHRU_EDGE (entry_bb)->probability = profile_probability::guessed_always ().apply_scale (7, 8); - BRANCH_EDGE (entry_bb)->probability + BRANCH_EDGE (entry_bb)->probability = FALLTHRU_EDGE (entry_bb)->probability.invert (); l2_dom_bb = entry_bb; } set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); + if (!broken_loop && fd->collapse > 1) + { + basic_block last_bb = l1_bb; + basic_block init_bb = NULL; + for (i = fd->collapse - 2; i >= 0; i--) + { + tree nextn2v = NULL_TREE; + if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE) + e = EDGE_SUCC (last_bb, 0); + else + e = EDGE_SUCC (last_bb, 1); + basic_block bb = split_edge (e); + if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) + { + t = fold_convert (sizetype, fd->loops[i].step); + t = fold_build_pointer_plus (fd->loops[i].v, t); + } + else + { + t = fold_convert (TREE_TYPE (fd->loops[i].v), + fd->loops[i].step); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), + fd->loops[i].v, t); + } + gsi = gsi_after_labels (bb); + expand_omp_build_assign (&gsi, fd->loops[i].v, t); + + bb = split_block (bb, last_stmt (bb))->dest; + gsi = gsi_start_bb (bb); + tree itype = TREE_TYPE (fd->loops[i].v); + if (fd->loops[i].m2) + t = nextn2v = create_tmp_var (itype); + else + t = fold_convert (itype, fd->loops[i].n2); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + tree v = fd->loops[i].v; + if (DECL_P (v) && TREE_ADDRESSABLE (v)) + v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t); + cond_stmt = gimple_build_cond_empty (t); + gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); + if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL) + || walk_tree (gimple_cond_rhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL)) + { + gsi = gsi_for_stmt (cond_stmt); + gimple_regimplify_operands (cond_stmt, &gsi); + } + ne = single_succ_edge (bb); + ne->flags = EDGE_FALSE_VALUE; + + init_bb = create_empty_bb (bb); + set_immediate_dominator (CDI_DOMINATORS, init_bb, bb); + add_bb_to_loop (init_bb, bb->loop_father); + e = make_edge (bb, init_bb, EDGE_TRUE_VALUE); + e->probability + = profile_probability::guessed_always ().apply_scale (7, 8); + ne->probability = e->probability.invert (); + + gsi = gsi_after_labels (init_bb); + t = fold_convert (TREE_TYPE (fd->loops[i + 1].v), + fd->loops[i + 1].n1); + if (fd->loops[i + 1].m1) + { + tree t2 = fold_convert (TREE_TYPE (t), + fd->loops[i + 1 + - fd->loops[i + 1].outer].v); + tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1); + t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2); + } + expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t); + if (fd->loops[i + 1].m2) + { + if (i + 2 == fd->collapse && n2var) + { + gcc_assert (n2v == NULL_TREE); + n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v)); + } + t = fold_convert (TREE_TYPE (fd->loops[i + 1].v), + fd->loops[i + 1].n2); + tree t2 = fold_convert (TREE_TYPE (t), + fd->loops[i + 1 + - fd->loops[i + 1].outer].v); + tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2); + t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2); + expand_omp_build_assign (&gsi, n2v, t); + } + if (i + 2 == fd->collapse && n2var) + { + /* For composite simd, n2 is the first iteration the current + task shouldn't already handle, so we effectively want to use + for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3) + as the vectorized loop. Except the vectorizer will not + vectorize that, so instead compute N2VAR as + N2VAR = V + MIN (N2 - V, COUNTS3) and use + for (V3 = N31; V < N2VAR; V++, V3 += STEP3) + as the loop to vectorize. */ + tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v); + if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2) + { + t = build_int_cst (itype, (fd->loops[i + 1].cond_code + == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, + fd->loops[i + 1].step), t); + if (fd->loops[i + 1].m2) + t = fold_build2 (PLUS_EXPR, itype, t, n2v); + else + t = fold_build2 (PLUS_EXPR, itype, t, + fold_convert (itype, + fd->loops[i + 1].n2)); + t = fold_build2 (MINUS_EXPR, itype, t, + fold_convert (itype, fd->loops[i + 1].v)); + tree step = fold_convert (itype, fd->loops[i + 1].step); + if (TYPE_UNSIGNED (itype) + && fd->loops[i + 1].cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + t = fold_convert (type, t); + } + else + t = counts[i + 1]; + t = fold_build2 (MIN_EXPR, type, t2, t); + t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t); + expand_omp_build_assign (&gsi, n2var, t); + } + n2v = nextn2v; + + make_edge (init_bb, last_bb, EDGE_FALLTHRU); + if (!gimple_omp_for_combined_into_p (fd->for_stmt)) + { + e = find_edge (entry_bb, last_bb); + redirect_edge_succ (e, bb); + set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb); + } + + last_bb = bb; + } + } if (!broken_loop) { class loop *loop = alloc_loop (); @@ -7643,12 +7840,7 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt) loops_state_set (LOOPS_NEED_FIXUP); if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD) - { - if (fd.non_rect) - sorry_at (gimple_location (fd.for_stmt), - "non-rectangular % not supported yet"); - expand_omp_simd (region, &fd); - } + expand_omp_simd (region, &fd); else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) { gcc_assert (!inner_stmt && !fd.non_rect); diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 3d2a9d7..b054961 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -3729,7 +3729,8 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, if ((gimple_omp_for_kind (as_a (stmt)) == GF_OMP_FOR_KIND_SIMD) && omp_maybe_offloaded_ctx (ctx) - && omp_max_simt_vf ()) + && omp_max_simt_vf () + && gimple_omp_for_collapse (stmt) == 1) scan_omp_simd (gsi, as_a (stmt), ctx); else scan_omp_for (as_a (stmt), ctx); diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-17.c b/gcc/testsuite/gcc.dg/vect/vect-simd-17.c new file mode 100644 index 0000000..9330aaa --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-17.c @@ -0,0 +1,304 @@ +/* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump "vectorized \(\[4-9]\|1\[0-2]\) loops" "vect" { target i?86-*-* x86_64-*-* } } } */ + +#include "tree-vect.h" + +int x, i, j; +volatile int a, b, c, d, e, f, g, h; +int k[11][101]; + +__attribute__((noipa)) void +doit (void) +{ + int niters, err = 0; + for (i = 1; i <= 10; i++) + for (j = 1; j <= 10 * i; j++) + { + k[i][j] = 1; + asm volatile ("" : : : "memory"); + } + a = 1; b = 11; c = 1; d = 0; e = 1; f = 10; g = 1; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = 1; i <= 10; i++) + for (j = 1; j <= 10 * i; j++) + { + err |= (i < 1); + err |= (i > 10); + err |= (j < 1); + err |= (j > 10 * i); + err |= (k[i][j] != 1); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 11 || j != 101 || x != 10340 || niters != 550 || err) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + err |= (i < 1); + err |= (i > 10); + err |= (j < 1); + err |= (j > 10 * i); + err |= (k[i][j] != 2); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 11 || j != 101 || x != 10340 || niters != 550 || err) + abort (); + for (i = 1; i <= 10; i++) + for (j = 1; j <= 10 * i; j++) + if (k[i][j] == 3) + k[i][j] = 0; + else + abort (); + for (i = 0; i < 11; i++) + for (j = 0; j < 101; j++) + if (k[i][j] != 0) + abort (); + for (i = 0; i < 10; i++) + for (j = 0; j < 10 * i; j++) + { + k[i][j] = 1; + asm volatile ("" : : : "memory"); + } + a = 0; b = 10; c = 1; d = 0; e = 0; f = 10; g = 0; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = 0; i < 10; i++) + for (j = 0; j < 10 * i; j++) + { + err |= (i < 0); + err |= (i >= 10); + err |= (j < 0); + err |= (j >= 10 * i); + err |= (k[i][j] != 1); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 10 || j != 90 || x != 9305 || niters != 450 || err) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + err |= (i < 0); + err |= (i >= 10); + err |= (j < 0); + err |= (j >= 10 * i); + err |= (k[i][j] != 2); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 10 || j != 90 || x != 9305 || niters != 450 || err) + abort (); + for (i = 0; i < 10; i++) + for (j = 0; j < 10 * i; j++) + if (k[i][j] == 3) + k[i][j] = 0; + else + abort (); + for (i = 0; i < 11; i++) + for (j = 0; j < 101; j++) + if (k[i][j] != 0) + abort (); + for (i = 4; i < 10; i++) + for (j = -9 + 2 * i; j < i; j++) + { + k[i][j + 1] = 1; + asm volatile ("" : : : "memory"); + } + a = 4; b = 10; c = 1; d = 2; e = -9; f = 1; g = 0; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = 4; i < 10; i++) + for (j = -9 + 2 * i; j < i; j++) + { + err |= (i < 4); + err |= (i >= 10); + err |= (j < -9 + 2 * i); + err |= (j >= i); + err |= (k[i][j + 1] != 1); + k[i][j + 1]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != 10 || j != 9 || */x != 8199 || niters != 15 || err) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + err |= (i < 4); + err |= (i >= 10); + err |= (j < -9 + 2 * i); + err |= (j >= i); + err |= (k[i][j + 1] != 2); + k[i][j + 1]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != 10 || j != 9 || */x != 8199 || niters != 15 || err) + abort (); + for (i = 4; i < 10; i++) + for (j = -9 + 2 * i; j < i; j++) + if (k[i][j + 1] == 3) + k[i][j + 1] = 0; + else + abort (); + for (i = 0; i < 11; i++) + for (j = 0; j < 101; j++) + if (k[i][j] != 0) + abort (); + for (i = 1; i < 10; i += 2) + for (j = 1; j < i + 1; j++) + { + k[i][j] = 1; + asm volatile ("" : : : "memory"); + } + a = 1; b = 10; c = 2; d = 0; e = 1; f = 1; g = 1; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = 1; i < 10; i += 2) + for (j = 1; j < i + 1; j++) + { + err |= (i < 1); + err |= (i >= 10); + err |= (j < 1); + err |= (j >= i + 1); + err |= (k[i][j] != 1); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 11 || j != 10 || x != 9225 || niters != 25 || err) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + err |= (i < 1); + err |= (i >= 10); + err |= (j < 1); + err |= (j >= i + 1); + err |= (k[i][j] != 2); + k[i][j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 11 || j != 10 || x != 9225 || niters != 25 || err) + abort (); + for (i = 1; i < 10; i += 2) + for (j = 1; j < i + 1; j++) + if (k[i][j] == 3) + k[i][j] = 0; + else + abort (); + for (i = 0; i < 11; i++) + for (j = 0; j < 101; j++) + if (k[i][j] != 0) + abort (); + for (j = -11; j >= -41; j -= 15) + { + k[0][-j] = 1; + asm volatile ("" : : : "memory"); + } + a = 4; b = 8; c = 12; d = -8; e = -9; f = -3; g = 6; h = 15; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = 4; i < 8; i += 12) + for (j = -8 * i - 9; j < i * -3 + 6; j += 15) + { + err |= (i != 4); + err |= (j < -41); + err |= (j > -11); + err |= (k[0][-j] != 1); + k[0][-j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 16 || j != 4 || x != 5109 || niters != 3 || err) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + err |= (i != 4); + err |= (j < -41); + err |= (j > -11); + err |= (k[0][-j] != 2); + k[0][-j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (i != 16 || j != 4 || x != 5109 || niters != 3 || err) + abort (); + for (j = -11; j >= -41; j -= 15) + if (k[0][-j] == 3) + k[0][-j] = 0; + else + abort (); + for (j = -11; j >= -41; j--) + if (k[0][-j] != 0) + abort (); + for (j = -34; j <= -7; j++) + { + k[0][-j] = 1; + asm volatile ("" : : : "memory"); + } + a = -13; b = 7; c = 12; d = 3; e = 5; f = 0; g = -6; h = 1; + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = -13; i < 7; i += 12) + for (j = 3 * i + 5; j < -6; j++) + { + err |= (i != -13); + err |= (j < -34); + err |= (j > -7); + err |= (k[0][-j] != 1); + k[0][-j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != 11 || j != 2 || */x != -12295 || niters != 28 || err) + abort (); + niters = 0; i = -100; j = -100; x = -100; + #pragma omp simd collapse(2) lastprivate (i, j, x) reduction(+:niters) reduction(|:err) + for (i = a; i < b; i += c) + for (j = d * i + e; j < g + i * f; j += h) + { + err |= (i != -13); + err |= (j < -34); + err |= (j > -7); + err |= (k[0][-j] != 2); + k[0][-j]++; + x = i * 1024 + (j & 1023); + niters++; + } + if (/*i != 11 || j != 2 || */x != -12295 || niters != 28 || err) + abort (); + for (j = -34; j <= -7; j++) + if (k[0][-j] == 3) + k[0][-j] = 0; + else + abort (); +} + +int +main () +{ + check_vect (); + doit (); + return 0; +} -- cgit v1.1 From 499b63048acd5e9ffd3c04061b531f6bf851dc00 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 25 Sep 2020 11:43:43 +0200 Subject: testsuite/97204 - fix gcc.target/i386/sse2-mmx-pinsrw.c This fixes the testcase writing to adjacent stack vars, exposed my IPA modref. 2020-09-25 Richard Biener PR testsuite/97204 * gcc.target/i386/sse2-mmx-pinsrw.c: Fix. --- gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c index c25ddd9..fd93355 100644 --- a/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c +++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-pinsrw.c @@ -42,7 +42,7 @@ compute_correct_result (__m64 *src_p, int val, unsigned int imm, static void sse2_test (void) { - int r, ck; + int r[2], ck[2]; int i; int failed = 0; __v4hi y = { 3320, -3339, 48, 4392 }; @@ -50,9 +50,9 @@ sse2_test (void) /* Run the MMX tests */ for (i = 0; i < 4; i++) { - test_pinsrw ((__m64 *) &y, 0x1234, i, &r); - compute_correct_result ((__m64 *) &y, 0x1234, i, &ck); - if (r != ck) + test_pinsrw ((__m64 *) &y, 0x1234, i, r); + compute_correct_result ((__m64 *) &y, 0x1234, i, ck); + if (r[0] != ck[0] || r[1] != ck[1]) failed++; } -- cgit v1.1 From 8c775bf447e190024fa08c55e38db94dd013a393 Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Fri, 25 Sep 2020 10:40:18 +0000 Subject: testsuite: [aarch64] Fix aarch64/advsimd-intrinsics/v{trn,uzp,zip}_half.c Since r11-3402 (g:65c9878641cbe0ed898aa7047b7b994e9d4a5bb1), the vtrn_half, vuzp_half and vzip_half started failing with vtrn_half.c:76:17: error: redeclaration of 'vector_float64x2' with no linkage vtrn_half.c:77:17: error: redeclaration of 'vector2_float64x2' with no linkage vtrn_half.c:80:17: error: redeclaration of 'vector_res_float64x2' with no linkage This is because r11-3402 now always declares float64x2 variables for aarch64, leading to a duplicate declaration in these testcases. The fix is simply to remove these now useless declarations. These tests are skipped on arm*, so there is no impact on that target. 2020-09-25 Christophe Lyon gcc/testsuite/ PR target/71233 * gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c: Remove declarations of vector, vector2, vector_res for float64x2 type. * gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vzip_half.c: Likewise. --- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c | 3 --- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c | 3 --- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c | 3 --- 3 files changed, 9 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c index 63f820f..25a0f19 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c @@ -73,11 +73,8 @@ void exec_vtrn_half (void) /* Input vector can only have 64 bits. */ DECL_VARIABLE_ALL_VARIANTS(vector); DECL_VARIABLE_ALL_VARIANTS(vector2); - DECL_VARIABLE(vector, float, 64, 2); - DECL_VARIABLE(vector2, float, 64, 2); DECL_VARIABLE_ALL_VARIANTS(vector_res); - DECL_VARIABLE(vector_res, float, 64, 2); clean_results (); /* We don't have vtrn1_T64x1, so set expected to the clean value. */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c index 8706f24..2e6b666 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c @@ -70,11 +70,8 @@ void exec_vuzp_half (void) /* Input vector can only have 64 bits. */ DECL_VARIABLE_ALL_VARIANTS(vector); DECL_VARIABLE_ALL_VARIANTS(vector2); - DECL_VARIABLE(vector, float, 64, 2); - DECL_VARIABLE(vector2, float, 64, 2); DECL_VARIABLE_ALL_VARIANTS(vector_res); - DECL_VARIABLE(vector_res, float, 64, 2); clean_results (); /* We don't have vuzp1_T64x1, so set expected to the clean value. */ diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c index 619d6b2..ef42451 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c @@ -73,11 +73,8 @@ void exec_vzip_half (void) /* Input vector can only have 64 bits. */ DECL_VARIABLE_ALL_VARIANTS(vector); DECL_VARIABLE_ALL_VARIANTS(vector2); - DECL_VARIABLE(vector, float, 64, 2); - DECL_VARIABLE(vector2, float, 64, 2); DECL_VARIABLE_ALL_VARIANTS(vector_res); - DECL_VARIABLE(vector_res, float, 64, 2); clean_results (); /* We don't have vzip1_T64x1, so set expected to the clean value. */ -- cgit v1.1 From 4dcc7f03b54087638e084ac69d40d7507fe83bd8 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 25 Sep 2020 13:08:48 +0200 Subject: tree-optimization/97199 - fix virtual operand update in if-conversion This fixes a corner case with virtual operand update in if-conversion by re-organizing the code to remove edges only after the last point we need virtual PHI operands to be available. 2020-09-25 Richard Biener PR tree-optimization/97199 * tree-if-conv.c (combine_blocks): Remove edges only after looking at virtual PHI args. --- gcc/tree-if-conv.c | 107 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 44 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index 4b8d457..2062758 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -2544,8 +2544,7 @@ combine_blocks (class loop *loop) if (need_to_predicate) predicate_statements (loop); - /* Merge basic blocks: first remove all the edges in the loop, - except for those from the exit block. */ + /* Merge basic blocks. */ exit_bb = NULL; bool *predicated = XNEWVEC (bool, orig_loop_num_nodes); for (i = 0; i < orig_loop_num_nodes; i++) @@ -2561,43 +2560,6 @@ combine_blocks (class loop *loop) } gcc_assert (exit_bb != loop->latch); - for (i = 1; i < orig_loop_num_nodes; i++) - { - bb = ifc_bbs[i]; - - for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) - { - if (e->src == exit_bb) - ei_next (&ei); - else - remove_edge (e); - } - } - - if (exit_bb != NULL) - { - if (exit_bb != loop->header) - { - /* Connect this node to loop header. */ - make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU); - set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header); - } - - /* Redirect non-exit edges to loop->latch. */ - FOR_EACH_EDGE (e, ei, exit_bb->succs) - { - if (!loop_exit_edge_p (loop, e)) - redirect_edge_and_branch (e, loop->latch); - } - set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb); - } - else - { - /* If the loop does not have an exit, reconnect header and latch. */ - make_edge (loop->header, loop->latch, EDGE_FALLTHRU); - set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header); - } - merge_target_bb = loop->header; /* Get at the virtual def valid for uses starting at the first block @@ -2682,13 +2644,9 @@ combine_blocks (class loop *loop) last = gsi_last_bb (merge_target_bb); gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT); set_bb_seq (bb, NULL); - - delete_basic_block (bb); } - /* If possible, merge loop header to the block with the exit edge. - This reduces the number of basic blocks to two, to please the - vectorizer that handles only loops with two nodes. */ + /* Fixup virtual operands in the exit block. */ if (exit_bb && exit_bb != loop->header) { @@ -2698,6 +2656,11 @@ combine_blocks (class loop *loop) vphi = get_virtual_phi (exit_bb); if (vphi) { + /* When there's just loads inside the loop a stray virtual + PHI merging the uses can appear, update last_vdef from + it. */ + if (!last_vdef) + last_vdef = gimple_phi_arg_def (vphi, 0); imm_use_iterator iter; use_operand_p use_p; gimple *use_stmt; @@ -2711,7 +2674,63 @@ combine_blocks (class loop *loop) gimple_stmt_iterator gsi = gsi_for_stmt (vphi); remove_phi_node (&gsi, true); } + } + + /* Now remove all the edges in the loop, except for those from the exit + block and delete the blocks we elided. */ + for (i = 1; i < orig_loop_num_nodes; i++) + { + bb = ifc_bbs[i]; + + for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) + { + if (e->src == exit_bb) + ei_next (&ei); + else + remove_edge (e); + } + } + for (i = 1; i < orig_loop_num_nodes; i++) + { + bb = ifc_bbs[i]; + + if (bb == exit_bb || bb == loop->latch) + continue; + + delete_basic_block (bb); + } + + /* Re-connect the exit block. */ + if (exit_bb != NULL) + { + if (exit_bb != loop->header) + { + /* Connect this node to loop header. */ + make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU); + set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header); + } + /* Redirect non-exit edges to loop->latch. */ + FOR_EACH_EDGE (e, ei, exit_bb->succs) + { + if (!loop_exit_edge_p (loop, e)) + redirect_edge_and_branch (e, loop->latch); + } + set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb); + } + else + { + /* If the loop does not have an exit, reconnect header and latch. */ + make_edge (loop->header, loop->latch, EDGE_FALLTHRU); + set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header); + } + + /* If possible, merge loop header to the block with the exit edge. + This reduces the number of basic blocks to two, to please the + vectorizer that handles only loops with two nodes. */ + if (exit_bb + && exit_bb != loop->header) + { if (can_merge_blocks_p (loop->header, exit_bb)) merge_blocks (loop->header, exit_bb); } -- cgit v1.1 From 6abd428605e3a279e533fde1cecbc9735ce03b66 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 25 Sep 2020 12:45:25 +0100 Subject: arm: Fix fp16 move patterns for base MVE This patch fixes ICEs in gcc.dg/torture/float16-basic.c for -march=armv8.1-m.main+mve -mfloat-abi=hard. The problem was that an fp16 argument was (rightly) being passed in FPRs, but the fp16 move patterns only handled GPRs. LRA then cycled trying to look for a way of handling the FPR. It looks like there are three related problems here: (1) We're using the wrong fp16 move pattern for base MVE. *mov_vfp_16 (the pattern we use for +mve.fp) works for base MVE too. (2) The fp16 MVE load and store patterns are separate from the main move patterns. The loads and stores should instead be alternatives of the main move patterns, so that LRA knows what to do with pseudo registers that become stack slots. (3) The range restrictions for the loads and stores were wrong for fp16: we were enforcing a multiple of 4 in [-255*4, 255*4] instead of a multiple of 2 in [-255*2, 255*2]. (2) came from a patch to prevent writeback being used for MVE. That patch also added a Uj constraint to enforce the correct memory types for MVE. I think the simplest fix is therefore to merge the loads and stores back into the main pattern and extend the Uj constraint so that it acts like Um for non-MVE. The testcase for that patch was mve-vldstr16-no-writeback.c, whose main function is: void fn1 (__fp16 *pSrc) { __fp16 high; __fp16 *pDst = 0; unsigned i; for (i = 0;; i++) if (pSrc[i]) pDst[i] = high; } Fixing (2) causes the store part to fail, not because we're using writeback, but because we decide to use GPRs to store high (which is uninitialised, and so gets replaced with zero). This patch therefore adds some scan-assembler-nots instead. (I wondered about changing the testcase to initialise high, but that seemed like a bad idea for a regression test.) For (3): MVE seems to be the only thing to use arm_coproc_mem_operand_wb (and its various interfaces) for 16-bit scalars: the Neon patterns only use it for 32-bit scalars. I've added new tests to try the various FPR alternatives of the move patterns. The range of offsets that GCC uses for FPR loads and stores is the intersection of the range allowed for GPRs and FPRs, so the tests include GPR<->memory tests as well. The fp32 and fp64 tests already pass, they're just there for completeness. gcc/ * config/arm/arm-protos.h (arm_mve_mode_and_operands_type_check): Delete. * config/arm/arm.c (arm_coproc_mem_operand_wb): Use a scale factor of 2 rather than 4 for 16-bit modes. (arm_mve_mode_and_operands_type_check): Delete. * config/arm/constraints.md (Uj): Allow writeback for Neon, but continue to disallow it for MVE. * config/arm/arm.md (*arm32_mov): Add !TARGET_HAVE_MVE. * config/arm/vfp.md (*mov_load_vfp_hf16, *mov_store_vfp_hf16): Fold back into... (*mov_vfp_16): ...here but use Uj for the FPR memory constraints. Use for base MVE too. gcc/testsuite/ * gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: Allow the store to use GPRs instead of FPRs. Add scan-assembler-nots for writeback. * gcc.target/arm/armv8_1m-fp16-move-1.c: New test. * gcc.target/arm/armv8_1m-fp32-move-1.c: Likewise. * gcc.target/arm/armv8_1m-fp64-move-1.c: Likewise. --- gcc/config/arm/arm-protos.h | 1 - gcc/config/arm/arm.c | 25 +- gcc/config/arm/arm.md | 4 +- gcc/config/arm/constraints.md | 9 +- gcc/config/arm/vfp.md | 32 +- .../gcc.target/arm/armv8_1m-fp16-move-1.c | 418 ++++++++++++++++++++ .../gcc.target/arm/armv8_1m-fp32-move-1.c | 420 ++++++++++++++++++++ .../gcc.target/arm/armv8_1m-fp64-move-1.c | 426 +++++++++++++++++++++ .../arm/mve/intrinsics/mve-vldstr16-no-writeback.c | 5 +- 9 files changed, 1295 insertions(+), 45 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/armv8_1m-fp16-move-1.c create mode 100644 gcc/testsuite/gcc.target/arm/armv8_1m-fp32-move-1.c create mode 100644 gcc/testsuite/gcc.target/arm/armv8_1m-fp64-move-1.c (limited to 'gcc') diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 0cc0ae7..9bb9c61 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -120,7 +120,6 @@ extern int arm_coproc_mem_operand_no_writeback (rtx); extern int arm_coproc_mem_operand_wb (rtx, int); extern int neon_vector_mem_operand (rtx, int, bool); extern int mve_vector_mem_operand (machine_mode, rtx, bool); -bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx); extern int neon_struct_mem_operand (rtx); extern rtx *neon_vcmla_lane_prepare_operands (rtx *); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 022ef6c..8105b39 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -13277,14 +13277,18 @@ arm_coproc_mem_operand_wb (rtx op, int wb_level) /* Match: (plus (reg) - (const)). */ + (const)) + + The encoded immediate for 16-bit modes is multiplied by 2, + while the encoded immediate for 32-bit and 64-bit modes is + multiplied by 4. */ + int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4); if (GET_CODE (ind) == PLUS && REG_P (XEXP (ind, 0)) && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) && CONST_INT_P (XEXP (ind, 1)) - && INTVAL (XEXP (ind, 1)) > -1024 - && INTVAL (XEXP (ind, 1)) < 1024 - && (INTVAL (XEXP (ind, 1)) & 3) == 0) + && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor) + && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0) return TRUE; return FALSE; @@ -33578,17 +33582,4 @@ arm_mode_base_reg_class (machine_mode mode) struct gcc_target targetm = TARGET_INITIALIZER; -bool -arm_mve_mode_and_operands_type_check (machine_mode mode, rtx op0, rtx op1) -{ - if (!(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT)) - return true; - else if (mode == E_BFmode) - return false; - else if ((s_register_operand (op0, mode) && MEM_P (op1)) - || (s_register_operand (op1, mode) && MEM_P (op0))) - return false; - return true; -} - #include "gt-arm.h" diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index c4fa116..147c4a5 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -7289,7 +7289,9 @@ (define_insn "*arm32_mov" [(set (match_operand:HFBF 0 "nonimmediate_operand" "=r,m,r,r") (match_operand:HFBF 1 "general_operand" " m,r,r,F"))] - "TARGET_32BIT && !TARGET_HARD_FLOAT + "TARGET_32BIT + && !TARGET_HARD_FLOAT + && !TARGET_HAVE_MVE && ( s_register_operand (operands[0], mode) || s_register_operand (operands[1], mode))" "* diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index ff229aa..789e333 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -454,10 +454,13 @@ (define_memory_constraint "Uj" "@internal - In ARM/Thumb-2 state an VFP load/store address which does not support - writeback at all (eg vldr.16)." + In ARM/Thumb-2 state a VFP load/store address that supports writeback + for Neon but not for MVE" (and (match_code "mem") - (match_test "TARGET_32BIT && arm_coproc_mem_operand_no_writeback (op)"))) + (match_test "TARGET_32BIT") + (match_test "TARGET_HAVE_MVE + ? arm_coproc_mem_operand_no_writeback (op) + : neon_vector_mem_operand (op, 2, true)"))) (define_memory_constraint "Uy" "@internal diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 6a2bc5a..72707c1 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -387,31 +387,15 @@ (set_attr "arch" "t2,any,any,any,a,t2,any,any,any,any,any,any")] ) -(define_insn "*mov_load_vfp_hf16" - [(set (match_operand:HF 0 "s_register_operand" "=t") - (match_operand:HF 1 "memory_operand" "Uj"))] - "TARGET_HAVE_MVE_FLOAT" - "vldr.16\\t%0, %E1" -) - -(define_insn "*mov_store_vfp_hf16" - [(set (match_operand:HF 0 "memory_operand" "=Uj") - (match_operand:HF 1 "s_register_operand" "t"))] - "TARGET_HAVE_MVE_FLOAT" - "vstr.16\\t%1, %E0" -) - ;; HFmode and BFmode moves (define_insn "*mov_vfp_16" [(set (match_operand:HFBF 0 "nonimmediate_operand" - "= ?r,?m,t,r,t,r,t, t, Um,r") + "= ?r,?m,t,r,t,r,t, t, Uj,r") (match_operand:HFBF 1 "general_operand" - " m,r,t,r,r,t,Dv,Um,t, F"))] + " m,r,t,r,r,t,Dv,Uj,t, F"))] "TARGET_32BIT - && TARGET_VFP_FP16INST - && arm_mve_mode_and_operands_type_check (mode, operands[0], - operands[1]) + && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE) && (s_register_operand (operands[0], mode) || s_register_operand (operands[1], mode))" { @@ -430,9 +414,15 @@ case 6: /* S register from immediate. */ return \"vmov.f16\\t%0, %1\t%@ __\"; case 7: /* S register from memory. */ - return \"vld1.16\\t{%z0}, %A1\"; + if (TARGET_HAVE_MVE) + return \"vldr.16\\t%0, %1\"; + else + return \"vld1.16\\t{%z0}, %A1\"; case 8: /* Memory from S register. */ - return \"vst1.16\\t{%z1}, %A0\"; + if (TARGET_HAVE_MVE) + return \"vstr.16\\t%1, %0\"; + else + return \"vst1.16\\t{%z1}, %A0\"; case 9: /* ARM register from constant. */ { long bits; diff --git a/gcc/testsuite/gcc.target/arm/armv8_1m-fp16-move-1.c b/gcc/testsuite/gcc.target/arm/armv8_1m-fp16-move-1.c new file mode 100644 index 0000000..67a9f41 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_1m-fp16-move-1.c @@ -0,0 +1,418 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mfloat-abi=hard -mfp16-format=ieee" } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** r_w: +** vmov.f16 r0, s0 @ __fp16 +** bx lr +*/ +void +r_w (_Float16 s0) +{ + register _Float16 r0 asm ("r0"); + r0 = s0; + asm volatile ("" :: "r" (r0)); +} + +/* +** w_r: +** vmov.f16 s0, r0 @ __fp16 +** bx lr +*/ +_Float16 +w_r () +{ + register _Float16 r0 asm ("r0"); + asm volatile ("" : "=r" (r0)); + return r0; +} + +/* +** w_w: +** vmov s1, s0 @ __fp16 +** bx lr +*/ +void +w_w (_Float16 s0) +{ + register _Float16 s1 asm ("s1"); + s1 = s0; + asm volatile ("" :: "w" (s1)); +} + +/* +** r_m_m128: +** sub (r[0-9]+), r0, #256 +** ldrh r1, \[\1\] @ __fp16 +** bx lr +*/ +void +r_m_m128 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[-128]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_m127: +** ldrh r1, \[r0, #-254\] @ __fp16 +** bx lr +*/ +void +r_m_m127 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[-127]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_m1: +** ldrh r1, \[r0, #-2\] @ __fp16 +** bx lr +*/ +void +r_m_m1 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[-1]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_0: +** ldrh r1, \[r0\] @ __fp16 +** bx lr +*/ +void +r_m_0 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[0]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_1: +** ldrh r1, \[r0, #2\] @ __fp16 +** bx lr +*/ +void +r_m_1 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[1]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_255: +** ldrh r1, \[r0, #510\] @ __fp16 +** bx lr +*/ +void +r_m_255 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[255]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_256: +** ldrh r1, \[r0, #512\] @ __fp16 +** bx lr +*/ +void +r_m_256 (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + r1 = r0[256]; + asm volatile ("" :: "r" (r1)); +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** w_m_m128: +** sub (r[0-9]+), r0, #256 +** vldr.16 s0, \[\1\] +** bx lr +*/ +void +w_m_m128 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[-128]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_m127: +** vldr.16 s0, \[r0, #-254\] +** bx lr +*/ +void +w_m_m127 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[-127]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_m1: +** vldr.16 s0, \[r0, #-2\] +** bx lr +*/ +void +w_m_m1 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[-1]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_0: +** vldr.16 s0, \[r0\] +** bx lr +*/ +void +w_m_0 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[0]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_1: +** vldr.16 s0, \[r0, #2\] +** bx lr +*/ +void +w_m_1 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[1]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_255: +** vldr.16 s0, \[r0, #510\] +** bx lr +*/ +void +w_m_255 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[255]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_256: +** add (r[0-9]+), r0, #512 +** vldr.16 s0, \[\1\] +** bx lr +*/ +void +w_m_256 (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + s0 = r0[256]; + asm volatile ("" :: "w" (s0)); +} + +/* +** m_m128_r: +** sub (r[0-9]+), r0, #256 +** strh r1, \[\1\] @ __fp16 +** bx lr +*/ +void +m_m128_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-128] = r1; +} + +/* +** m_m127_r: +** strh r1, \[r0, #-254\] @ __fp16 +** bx lr +*/ +void +m_m127_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-127] = r1; +} + +/* +** m_m1_r: +** strh r1, \[r0, #-2\] @ __fp16 +** bx lr +*/ +void +m_m1_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-1] = r1; +} + +/* +** m_0_r: +** strh r1, \[r0\] @ __fp16 +** bx lr +*/ +void +m_0_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[0] = r1; +} + +/* +** m_1_r: +** strh r1, \[r0, #2\] @ __fp16 +** bx lr +*/ +void +m_1_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[1] = r1; +} + +/* +** m_255_r: +** strh r1, \[r0, #510\] @ __fp16 +** bx lr +*/ +void +m_255_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[255] = r1; +} + +/* +** m_256_r: +** strh r1, \[r0, #512\] @ __fp16 +** bx lr +*/ +void +m_256_r (_Float16 *r0) +{ + register _Float16 r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[256] = r1; +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** m_m128_w: +** sub (r[0-9]+), r0, #256 +** vstr.16 s0, \[\1\] +** bx lr +*/ +void +m_m128_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-128] = s0; +} + +/* +** m_m127_w: +** vstr.16 s0, \[r0, #-254\] +** bx lr +*/ +void +m_m127_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-127] = s0; +} + +/* +** m_m1_w: +** vstr.16 s0, \[r0, #-2\] +** bx lr +*/ +void +m_m1_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-1] = s0; +} + +/* +** m_0_w: +** vstr.16 s0, \[r0\] +** bx lr +*/ +void +m_0_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[0] = s0; +} + +/* +** m_1_w: +** vstr.16 s0, \[r0, #2\] +** bx lr +*/ +void +m_1_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[1] = s0; +} + +/* +** m_255_w: +** vstr.16 s0, \[r0, #510\] +** bx lr +*/ +void +m_255_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[255] = s0; +} + +/* +** m_256_w: +** add (r[0-9]+), r0, #512 +** vstr.16 s0, \[\1\] +** bx lr +*/ +void +m_256_w (_Float16 *r0) +{ + register _Float16 s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[256] = s0; +} diff --git a/gcc/testsuite/gcc.target/arm/armv8_1m-fp32-move-1.c b/gcc/testsuite/gcc.target/arm/armv8_1m-fp32-move-1.c new file mode 100644 index 0000000..1ecb839 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_1m-fp32-move-1.c @@ -0,0 +1,420 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mfloat-abi=hard" } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** r_w: +** vmov r0, s0 +** bx lr +*/ +void +r_w (float s0) +{ + register float r0 asm ("r0"); + r0 = s0; + asm volatile ("" :: "r" (r0)); +} + +/* +** w_r: +** vmov s0, r0 +** bx lr +*/ +float +w_r () +{ + register float r0 asm ("r0"); + asm volatile ("" : "=r" (r0)); + return r0; +} + +/* +** w_w: +** vmov.f32 s1, s0 +** bx lr +*/ +void +w_w (float s0) +{ + register float s1 asm ("s1"); + s1 = s0; + asm volatile ("" :: "w" (s1)); +} + +/* +** r_m_m64: +** sub (r[0-9]+), r0, #256 +** ldr r1, \[\1\] @ float +** bx lr +*/ +void +r_m_m64 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[-64]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_m63: +** ldr r1, \[r0, #-252\] @ float +** bx lr +*/ +void +r_m_m63 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[-63]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_m1: +** ldr r1, \[r0, #-4\] @ float +** bx lr +*/ +void +r_m_m1 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[-1]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_0: +** ldr r1, \[r0\] @ float +** bx lr +*/ +void +r_m_0 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[0]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_1: +** ldr r1, \[r0, #4\] @ float +** bx lr +*/ +void +r_m_1 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[1]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_255: +** ldr r1, \[r0, #1020\] @ float +** bx lr +*/ +void +r_m_255 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[255]; + asm volatile ("" :: "r" (r1)); +} + +/* +** r_m_256: +** add (r[0-9]+), r0, #1024 +** ldr r1, \[r0\] @ float +** bx lr +*/ +void +r_m_256 (float *r0) +{ + register float r1 asm ("r1"); + r1 = r0[256]; + asm volatile ("" :: "r" (r1)); +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** w_m_m64: +** sub (r[0-9]+), r0, #256 +** vldr.32 s0, \[\1\] +** bx lr +*/ +void +w_m_m64 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[-64]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_m63: +** vldr.32 s0, \[r0, #-252\] +** bx lr +*/ +void +w_m_m63 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[-63]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_m1: +** vldr.32 s0, \[r0, #-4\] +** bx lr +*/ +void +w_m_m1 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[-1]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_0: +** vldr.32 s0, \[r0\] +** bx lr +*/ +void +w_m_0 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[0]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_1: +** vldr.32 s0, \[r0, #4\] +** bx lr +*/ +void +w_m_1 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[1]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_255: +** vldr.32 s0, \[r0, #1020\] +** bx lr +*/ +void +w_m_255 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[255]; + asm volatile ("" :: "w" (s0)); +} + +/* +** w_m_256: +** add (r[0-9]+), r0, #1024 +** vldr.32 s0, \[\1\] +** bx lr +*/ +void +w_m_256 (float *r0) +{ + register float s0 asm ("s0"); + s0 = r0[256]; + asm volatile ("" :: "w" (s0)); +} + +/* +** m_m64_r: +** sub (r[0-9]+), r0, #256 +** str r1, \[\1\] @ float +** bx lr +*/ +void +m_m64_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-64] = r1; +} + +/* +** m_m63_r: +** str r1, \[r0, #-252\] @ float +** bx lr +*/ +void +m_m63_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-63] = r1; +} + +/* +** m_m1_r: +** str r1, \[r0, #-4\] @ float +** bx lr +*/ +void +m_m1_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[-1] = r1; +} + +/* +** m_0_r: +** str r1, \[r0\] @ float +** bx lr +*/ +void +m_0_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[0] = r1; +} + +/* +** m_1_r: +** str r1, \[r0, #4\] @ float +** bx lr +*/ +void +m_1_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[1] = r1; +} + +/* +** m_255_r: +** str r1, \[r0, #1020\] @ float +** bx lr +*/ +void +m_255_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[255] = r1; +} + +/* +** m_256_r: +** add (r[0-9]+), r0, #1024 +** str r1, \[r0\] @ float +** bx lr +*/ +void +m_256_r (float *r0) +{ + register float r1 asm ("r1"); + asm volatile ("" : "=r" (r1)); + r0[256] = r1; +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** m_m64_w: +** sub (r[0-9]+), r0, #256 +** vstr.32 s0, \[\1\] +** bx lr +*/ +void +m_m64_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-64] = s0; +} + +/* +** m_m63_w: +** vstr.32 s0, \[r0, #-252\] +** bx lr +*/ +void +m_m63_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-63] = s0; +} + +/* +** m_m1_w: +** vstr.32 s0, \[r0, #-4\] +** bx lr +*/ +void +m_m1_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[-1] = s0; +} + +/* +** m_0_w: +** vstr.32 s0, \[r0\] +** bx lr +*/ +void +m_0_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[0] = s0; +} + +/* +** m_1_w: +** vstr.32 s0, \[r0, #4\] +** bx lr +*/ +void +m_1_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[1] = s0; +} + +/* +** m_255_w: +** vstr.32 s0, \[r0, #1020\] +** bx lr +*/ +void +m_255_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[255] = s0; +} + +/* +** m_256_w: +** add (r[0-9]+), r0, #1024 +** vstr.32 s0, \[\1\] +** bx lr +*/ +void +m_256_w (float *r0) +{ + register float s0 asm ("s0"); + asm volatile ("" : "=w" (s0)); + r0[256] = s0; +} diff --git a/gcc/testsuite/gcc.target/arm/armv8_1m-fp64-move-1.c b/gcc/testsuite/gcc.target/arm/armv8_1m-fp64-move-1.c new file mode 100644 index 0000000..3f81350 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/armv8_1m-fp64-move-1.c @@ -0,0 +1,426 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mfloat-abi=hard" } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** r_w: +** vmov r0, r1, d0 +** bx lr +*/ +void +r_w (double d0) +{ + register double r0 asm ("r0"); + r0 = d0; + asm volatile ("" :: "r" (r0)); +} + +/* +** w_r: +** vmov d0, r0, r1 +** bx lr +*/ +double +w_r () +{ + register double r0 asm ("r0"); + asm volatile ("" : "=r" (r0)); + return r0; +} + +/* +** w_w: +** ( +** vmov.f32 s2, s0 +** vmov.f32 s3, s1 +** | +** vmov.f32 s3, s1 +** vmov.f32 s2, s0 +** ) +** bx lr +*/ +void +w_w (double d0) +{ + register double d1 asm ("d1"); + d1 = d0; + asm volatile ("" :: "w" (d1)); +} + +/* +** r_m_m32: +** sub (r[0-9]+), r0, #256 +** ldrd r2, \[\1\] +** bx lr +*/ +void +r_m_m32 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[-32]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_m31: +** ldrd r2, \[r0, #-248\] +** bx lr +*/ +void +r_m_m31 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[-31]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_m1: +** ldrd r2, \[r0, #-8\] +** bx lr +*/ +void +r_m_m1 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[-1]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_0: +** ldrd r2, \[r0\] +** bx lr +*/ +void +r_m_0 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[0]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_1: +** ldrd r2, \[r0, #8\] +** bx lr +*/ +void +r_m_1 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[1]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_127: +** ldrd r2, \[r0, #1016\] +** bx lr +*/ +void +r_m_127 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[127]; + asm volatile ("" :: "r" (r2)); +} + +/* +** r_m_128: +** add (r[0-9]+), r0, #1024 +** ldrd r2, \[r0\] +** bx lr +*/ +void +r_m_128 (double *r0) +{ + register double r2 asm ("r2"); + r2 = r0[128]; + asm volatile ("" :: "r" (r2)); +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** w_m_m32: +** sub (r[0-9]+), r0, #256 +** vldr.64 d0, \[\1\] +** bx lr +*/ +void +w_m_m32 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[-32]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_m31: +** vldr.64 d0, \[r0, #-248\] +** bx lr +*/ +void +w_m_m31 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[-31]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_m1: +** vldr.64 d0, \[r0, #-8\] +** bx lr +*/ +void +w_m_m1 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[-1]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_0: +** vldr.64 d0, \[r0\] +** bx lr +*/ +void +w_m_0 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[0]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_1: +** vldr.64 d0, \[r0, #8\] +** bx lr +*/ +void +w_m_1 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[1]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_127: +** vldr.64 d0, \[r0, #1016\] +** bx lr +*/ +void +w_m_127 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[127]; + asm volatile ("" :: "w" (d0)); +} + +/* +** w_m_128: +** add (r[0-9]+), r0, #1024 +** vldr.64 d0, \[\1\] +** bx lr +*/ +void +w_m_128 (double *r0) +{ + register double d0 asm ("d0"); + d0 = r0[128]; + asm volatile ("" :: "w" (d0)); +} + +/* +** m_m32_r: +** sub (r[0-9]+), r0, #256 +** strd r2, \[\1\] +** bx lr +*/ +void +m_m32_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[-32] = r2; +} + +/* +** m_m31_r: +** strd r2, \[r0, #-248\] +** bx lr +*/ +void +m_m31_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[-31] = r2; +} + +/* +** m_m1_r: +** strd r2, \[r0, #-8\] +** bx lr +*/ +void +m_m1_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[-1] = r2; +} + +/* +** m_0_r: +** strd r2, \[r0\] +** bx lr +*/ +void +m_0_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[0] = r2; +} + +/* +** m_1_r: +** strd r2, \[r0, #8\] +** bx lr +*/ +void +m_1_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[1] = r2; +} + +/* +** m_127_r: +** strd r2, \[r0, #1016\] +** bx lr +*/ +void +m_127_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[127] = r2; +} + +/* +** m_128_r: +** add (r[0-9]+), r0, #1024 +** strd r2, \[r0\] +** bx lr +*/ +void +m_128_r (double *r0) +{ + register double r2 asm ("r2"); + asm volatile ("" : "=r" (r2)); + r0[128] = r2; +} + +/* ??? This could be done in one instruction, but without mve.fp, + it makes more sense for memory_operand to enforce the GPR range. */ +/* +** m_m32_w: +** sub (r[0-9]+), r0, #256 +** vstr.64 d0, \[\1\] +** bx lr +*/ +void +m_m32_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[-32] = d0; +} + +/* +** m_m31_w: +** vstr.64 d0, \[r0, #-248\] +** bx lr +*/ +void +m_m31_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[-31] = d0; +} + +/* +** m_m1_w: +** vstr.64 d0, \[r0, #-8\] +** bx lr +*/ +void +m_m1_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[-1] = d0; +} + +/* +** m_0_w: +** vstr.64 d0, \[r0\] +** bx lr +*/ +void +m_0_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[0] = d0; +} + +/* +** m_1_w: +** vstr.64 d0, \[r0, #8\] +** bx lr +*/ +void +m_1_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[1] = d0; +} + +/* +** m_127_w: +** vstr.64 d0, \[r0, #1016\] +** bx lr +*/ +void +m_127_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[127] = d0; +} + +/* +** m_128_w: +** add (r[0-9]+), r0, #1024 +** vstr.64 d0, \[\1\] +** bx lr +*/ +void +m_128_w (double *r0) +{ + register double d0 asm ("d0"); + asm volatile ("" : "=w" (d0)); + r0[128] = d0; +} diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c index 0a69ace..50b1953 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c @@ -13,5 +13,6 @@ fn1 (__fp16 *pSrc) pDst[i] = high; } -/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */ -/* { dg-final { scan-assembler {vstr\.16\ts[0-9]+, \[r[0-9]+\]\n} } } */ +/* { dg-final { scan-assembler {vldr\.16\ts[0-9]+, \[r[0-9]+(, #-?[0-9]+)?\]\n} } } */ +/* { dg-final { scan-assembler-not {vldr\.16\t[^\n]*\]!} } } */ +/* { dg-final { scan-assembler-not {vstr\.16\t[^\n]*\]!} } } */ -- cgit v1.1 From 7bfc4cd2c812a3197c09797796828459714f8849 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 25 Sep 2020 13:59:15 +0200 Subject: middle-end/97207 - implement move assign for auto_vec<> This implements the missing move assignment to make std::swap work on auto_vec<> 2020-09-25 Richard Biener PR middle-end/97207 * vec.h (auto_vec::operator=(auto_vec&&)): Implement. --- gcc/vec.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/vec.h b/gcc/vec.h index d73d865..d8c7cda 100644 --- a/gcc/vec.h +++ b/gcc/vec.h @@ -1546,7 +1546,13 @@ public: this->m_vec = r.m_vec; r.m_vec = NULL; } - void operator= (auto_vec&&) = delete; + auto_vec& operator= (auto_vec&& r) + { + this->release (); + this->m_vec = r.m_vec; + r.m_vec = NULL; + return *this; + } }; -- cgit v1.1 From d16b5975ca985cbe97698479fc38b6a636886978 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 25 Sep 2020 11:13:13 +0200 Subject: middle-end/96814 - fix VECTOR_BOOLEAN_TYPE_P CTOR RTL expansion The RTL expansion code for CTORs doesn't handle VECTOR_BOOLEAN_TYPE_P with bit-precision elements correctly as the testcase shows before the PR97085 fix. The following makes it do the correct thing (not 100% sure for CTOR of sub-vectors due to the lack of a testcase). The alternative would be to assert such CTORs do not happen (and also add IL verification for this). The GIMPLE FE needs a way to declare the VECTOR_BOOLEAN_TYPE_P vectors (thus the C FE needs that). 2020-09-25 Richard Biener PR middle-end/96814 * expr.c (store_constructor): Handle VECTOR_BOOLEAN_TYPE_P CTORs correctly. * gcc.target/i386/pr96814.c: New testcase. --- gcc/expr.c | 18 +++++++++++------- gcc/testsuite/gcc.target/i386/pr96814.c | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr96814.c (limited to 'gcc') diff --git a/gcc/expr.c b/gcc/expr.c index 1a15f24..1c79518 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -6922,7 +6922,7 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size, insn_code icode = CODE_FOR_nothing; tree elt; tree elttype = TREE_TYPE (type); - int elt_size = tree_to_uhwi (TYPE_SIZE (elttype)); + int elt_size = vector_element_bits (type); machine_mode eltmode = TYPE_MODE (elttype); HOST_WIDE_INT bitsize; HOST_WIDE_INT bitpos; @@ -6987,6 +6987,15 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size, } } + /* Compute the size of the elements in the CTOR. It differs + from the size of the vector type elements only when the + CTOR elements are vectors themselves. */ + tree val_type = TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value); + if (VECTOR_TYPE_P (val_type)) + bitsize = tree_to_uhwi (TYPE_SIZE (val_type)); + else + bitsize = elt_size; + /* If the constructor has fewer elements than the vector, clear the whole array first. Similarly if this is static constructor of a non-BLKmode object. */ @@ -7001,11 +7010,7 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size, FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), idx, value) { - tree sz = TYPE_SIZE (TREE_TYPE (value)); - int n_elts_here - = tree_to_uhwi (int_const_binop (TRUNC_DIV_EXPR, sz, - TYPE_SIZE (elttype))); - + int n_elts_here = bitsize / elt_size; count += n_elts_here; if (mostly_zeros_p (value)) zero_count += n_elts_here; @@ -7045,7 +7050,6 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size, HOST_WIDE_INT eltpos; tree value = ce->value; - bitsize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (value))); if (cleared && initializer_zerop (value)) continue; diff --git a/gcc/testsuite/gcc.target/i386/pr96814.c b/gcc/testsuite/gcc.target/i386/pr96814.c new file mode 100644 index 0000000..b280c73 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr96814.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-options "-mavx512vl -mavx512bw" } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target avx512vl } */ + +typedef unsigned char __attribute__ ((__vector_size__ (32))) V; + +void +test (void) +{ + V x = ((V){8} > 0) == 0; + for (unsigned i = 0; i < sizeof (x); i++) + if (x[i] != (i ? 0xff : 0)) __builtin_abort(); +} + +#define DO_TEST test +#define AVX512VL +#define AVX512BW +#include "avx512-check.h" -- cgit v1.1 From 69cf7decf1be230739b1e309115832373025b41d Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Fri, 25 Sep 2020 15:23:49 +0200 Subject: [nvptx] Fix Wimplicit-fallthrough in nvptx.c with -save-temps When compiling nvptx.c using -save-temps, I ran into Wimplicit-fallthrough warnings. The fallthrough locations have been marked with a fallthrough comment, but that doesn't work with -save-temps, something that has been filed as PR78497. Work around this by using gcc_fallthrough () in addition to the comment. Tested by building target nvptx, copying nvptx.c compile line and adding -save-temps. gcc/ChangeLog: 2020-09-25 Tom de Vries * config/nvptx/nvptx.c (nvptx_assemble_integer, nvptx_print_operand): Use gcc_fallthrough (). --- gcc/config/nvptx/nvptx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 54b1fdf..de82f9a 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -2101,7 +2101,7 @@ nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p)) val = INTVAL (XEXP (x, 1)); x = XEXP (x, 0); gcc_assert (GET_CODE (x) == SYMBOL_REF); - /* FALLTHROUGH */ + gcc_fallthrough (); /* FALLTHROUGH */ case SYMBOL_REF: gcc_assert (size == init_frag.size); @@ -2603,7 +2603,7 @@ nvptx_print_operand (FILE *file, rtx x, int code) { case 'A': x = XEXP (x, 0); - /* FALLTHROUGH. */ + gcc_fallthrough (); /* FALLTHROUGH. */ case 'D': if (GET_CODE (x) == CONST) -- cgit v1.1 From a28542df4d069d9937070b5456a54d5e10bcfe56 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Fri, 25 Sep 2020 06:53:06 -0700 Subject: c++: DECL_BUILTIN_P for builtins We currently detect builtin decls via DECL_ARTIFICIAL && !DECL_HIDDEN_FUNCTION_P, which, besides being clunky, is a problem as hiddenness is a property of the symbol table -- not the decl being hidden. This adds DECL_BUILTIN_P, which just looks at the SOURCE_LOCATION -- we have a magic one for builtins. One of the consequential changes is to make function-scope omp udrs have function context (needed because otherwise duplicate-decls thinks the types don't match at the point we check). This is also morally better, because that's what they are -- nested functions, stop lying. (That's actually my plan for all DECL_LOCAL_DECL_P decls, as they are distinct decls to the namespace-scope decl they alias.) gcc/cp/ * cp-tree.h (DECL_BUILTIN_P): New. * decl.c (duplicate_decls): Use it. Do not treat omp-udr as a builtin. * name-lookup.c (anticipated_builtin): Use it. (set_decl_context_in_fn): Function-scope OMP UDRs have function context. (do_nonmember_using_decl): Use DECL_BUILTIN_P. * parser.c (cp_parser_omp_declare_reduction): Function-scope OMP UDRs have function context. Assert we never find a valid duplicate. * pt.c (tsubst_expr): Function-scope OMP UDRs have function context. libcc1/ * libcp1plugin.cc (supplement_binding): Use DECL_BULTIN_P. --- gcc/cp/cp-tree.h | 4 ++++ gcc/cp/decl.c | 24 ++++++++++++------------ gcc/cp/name-lookup.c | 15 +++++++++------ gcc/cp/parser.c | 6 ++++-- gcc/cp/pt.c | 11 +++-------- 5 files changed, 32 insertions(+), 28 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 3ae4874..bd78f00 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -4040,6 +4040,10 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter) #define FNDECL_USED_AUTO(NODE) \ TREE_LANG_FLAG_2 (FUNCTION_DECL_CHECK (NODE)) +/* True if NODE is a builtin decl. */ +#define DECL_BUILTIN_P(NODE) \ + (DECL_SOURCE_LOCATION(NODE) == BUILTINS_LOCATION) + /* Nonzero if NODE is a DECL which we know about but which has not been explicitly declared, such as a built-in function or a friend declared inside a class. In the latter case DECL_HIDDEN_FRIEND_P diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 6019051..1709dd9 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -1464,9 +1464,7 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) /* Check for redeclaration and other discrepancies. */ if (TREE_CODE (olddecl) == FUNCTION_DECL - && DECL_ARTIFICIAL (olddecl) - /* A C++20 implicit friend operator== uses the normal path (94462). */ - && !DECL_HIDDEN_FRIEND_P (olddecl)) + && DECL_BUILTIN_P (olddecl)) { if (TREE_CODE (newdecl) != FUNCTION_DECL) { @@ -1508,15 +1506,6 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) "declaration %q#D", newdecl, olddecl); return NULL_TREE; } - else if (DECL_OMP_DECLARE_REDUCTION_P (olddecl)) - { - gcc_assert (DECL_OMP_DECLARE_REDUCTION_P (newdecl)); - error_at (newdecl_loc, - "redeclaration of %"); - inform (olddecl_loc, - "previous % declaration"); - return error_mark_node; - } else if (!types_match) { /* Avoid warnings redeclaring built-ins which have not been @@ -1816,6 +1805,17 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) } } else if (TREE_CODE (newdecl) == FUNCTION_DECL + && DECL_OMP_DECLARE_REDUCTION_P (newdecl)) + { + /* OMP UDRs are never duplicates. */ + gcc_assert (DECL_OMP_DECLARE_REDUCTION_P (olddecl)); + error_at (newdecl_loc, + "redeclaration of %"); + inform (olddecl_loc, + "previous % declaration"); + return error_mark_node; + } + else if (TREE_CODE (newdecl) == FUNCTION_DECL && ((DECL_TEMPLATE_SPECIALIZATION (olddecl) && (!DECL_TEMPLATE_INFO (newdecl) || (DECL_TI_TEMPLATE (newdecl) diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index e7764ab..dbc6cc3 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -2119,10 +2119,10 @@ anticipated_builtin_p (tree ovl) tree fn = OVL_FUNCTION (ovl); gcc_checking_assert (DECL_ANTICIPATED (fn)); - if (DECL_HIDDEN_FRIEND_P (fn)) - return false; + if (DECL_BUILTIN_P (fn)) + return true; - return true; + return false; } /* BINDING records an existing declaration for a name in the current scope. @@ -2857,9 +2857,12 @@ set_decl_context_in_fn (tree ctx, tree decl) { if (TREE_CODE (decl) == FUNCTION_DECL || (VAR_P (decl) && DECL_EXTERNAL (decl))) - /* Make sure local externs are marked as such. */ + /* Make sure local externs are marked as such. OMP UDRs really + are nested functions. */ gcc_checking_assert (DECL_LOCAL_DECL_P (decl) - && DECL_NAMESPACE_SCOPE_P (decl)); + && (DECL_NAMESPACE_SCOPE_P (decl) + || (TREE_CODE (decl) == FUNCTION_DECL + && DECL_OMP_DECLARE_REDUCTION_P (decl)))); if (!DECL_CONTEXT (decl) /* When parsing the parameter list of a function declarator, @@ -3934,7 +3937,7 @@ do_nonmember_using_decl (name_lookup &lookup, bool fn_scope_p, } else if (old.using_p ()) continue; /* This is a using decl. */ - else if (old.hidden_p () && !DECL_HIDDEN_FRIEND_P (old_fn)) + else if (old.hidden_p () && DECL_BUILTIN_P (old_fn)) continue; /* This is an anticipated builtin. */ else if (!matching_fn_p (new_fn, old_fn)) continue; /* Parameters do not match. */ diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index fba3fcc..ccfae78 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -42567,7 +42567,7 @@ cp_parser_omp_declare_reduction (cp_parser *parser, cp_token *pragma_tok, if (current_function_decl) { block_scope = true; - DECL_CONTEXT (fndecl) = global_namespace; + DECL_CONTEXT (fndecl) = current_function_decl; DECL_LOCAL_DECL_P (fndecl) = true; if (!processing_template_decl) pushdecl (fndecl); @@ -42592,7 +42592,9 @@ cp_parser_omp_declare_reduction (cp_parser *parser, cp_token *pragma_tok, else { DECL_CONTEXT (fndecl) = current_namespace; - pushdecl (fndecl); + tree d = pushdecl (fndecl); + /* We should never meet a matched duplicate decl. */ + gcc_checking_assert (d == error_mark_node || d == fndecl); } if (!block_scope) start_preparsed_function (fndecl, NULL_TREE, SF_PRE_PARSED); diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 6f8dbc3..a4530db 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -18109,16 +18109,11 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl, else if (DECL_IMPLICIT_TYPEDEF_P (t)) /* We already did a pushtag. */; else if (TREE_CODE (decl) == FUNCTION_DECL - && DECL_OMP_DECLARE_REDUCTION_P (decl) - && DECL_FUNCTION_SCOPE_P (pattern_decl)) + && DECL_LOCAL_DECL_P (decl) + && DECL_OMP_DECLARE_REDUCTION_P (decl)) { - /* We pretend this is regular local extern decl of - a namespace-scope fn. Then we make it really - local, it is a nested function. */ - gcc_checking_assert (DECL_LOCAL_DECL_P (decl)); - DECL_CONTEXT (decl) = global_namespace; - pushdecl (decl); DECL_CONTEXT (decl) = current_function_decl; + pushdecl (decl); if (cp_check_omp_declare_reduction (decl)) instantiate_body (pattern_decl, args, decl, true); } -- cgit v1.1 From 1921ebcaf6467996aede69e1bbe32400d8a20fe7 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Fri, 25 Sep 2020 16:21:34 +0200 Subject: gcov: fix streaming of HIST_TYPE_IOR histogram type. gcc/ChangeLog: PR gcov-profile/64636 * value-prof.c (stream_out_histogram_value): Allow negative values for HIST_TYPE_IOR. --- gcc/value-prof.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/value-prof.c b/gcc/value-prof.c index ea1b1a8..95d33c6 100644 --- a/gcc/value-prof.c +++ b/gcc/value-prof.c @@ -331,7 +331,10 @@ stream_out_histogram_value (struct output_block *ob, histogram_value hist) /* When user uses an unsigned type with a big value, constant converted to gcov_type (a signed type) can be negative. */ gcov_type value = hist->hvalue.counters[i]; - if (hist->type == HIST_TYPE_TOPN_VALUES) + if (hist->type == HIST_TYPE_TOPN_VALUES + || hist->type == HIST_TYPE_IOR) + /* Note that the IOR counter tracks pointer values and these can have + sign bit set. */ ; else gcc_assert (value >= 0); -- cgit v1.1 From 4af3472517b7914c5f68da0f2b5ea962812368a1 Mon Sep 17 00:00:00 2001 From: Alex Coplan Date: Fri, 25 Sep 2020 17:16:34 +0100 Subject: arm: Add missing Neoverse V1 feature This adds a missing feature (FP16) to the Neoverse V1 description in AArch32 GCC. gcc/ChangeLog: * config/arm/arm-cpus.in (neoverse-v1): Add FP16. --- gcc/config/arm/arm-cpus.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index be563b7..bf460dd 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -1494,7 +1494,7 @@ begin cpu neoverse-v1 cname neoversev1 tune for cortex-a57 tune flags LDSCHED - architecture armv8.4-a+bf16+i8mm + architecture armv8.4-a+fp16+bf16+i8mm option crypto add FP_ARMv8 CRYPTO costs cortex_a57 end cpu neoverse-v1 -- cgit v1.1 From 0d8f3f612d662ea3007c184a11ea5eb7d58760e7 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Fri, 25 Sep 2020 17:32:43 +0100 Subject: AArch64: Add Linux cpuinfo string for rng feature The Linux kernel has defined the cpuinfo string for the +rng feature, so this patch adds that to GCC so that -march=native can pick it up. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ * config/aarch64/aarch64-option-extensions.def (rng): Add cpuinfo string. --- gcc/config/aarch64/aarch64-option-extensions.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 8257df9..ca08642 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -155,7 +155,7 @@ AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | \ AARCH64_OPT_EXTENSION("profile", AARCH64_FL_PROFILE, 0, 0, false, "") /* Enabling/Disabling "rng" only changes "rng". */ -AARCH64_OPT_EXTENSION("rng", AARCH64_FL_RNG, 0, 0, false, "") +AARCH64_OPT_EXTENSION("rng", AARCH64_FL_RNG, 0, 0, false, "rng") /* Enabling/Disabling "memtag" only changes "memtag". */ AARCH64_OPT_EXTENSION("memtag", AARCH64_FL_MEMTAG, 0, 0, false, "") -- cgit v1.1 From 00aaae03db249e61dde41facbf373c3fcde38154 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Fri, 25 Sep 2020 10:24:09 -0700 Subject: c++: Replace tag_scope with TAG_how I always found tag_scope confusing, as it is not a scope, but a direction of how to lookup or insert an elaborated type tag. This replaces it with a enum class TAG_how. I also add a new value, HIDDEN_FRIEND, to distinguish the two cases of innermost-non-class insertion that we currently conflate. Also renamed 'lookup_type_scope' to 'lookup_elaborated_type', because again, we're not providing a scope to lookup in. gcc/cp/ * name-lookup.h (enum tag_scope): Replace with ... (enum class TAG_how): ... this. Add HIDDEN_FRIEND value. (lookup_type_scope): Replace with ... (lookup_elaborated_type): ... this. (pushtag): Use TAG_how, not tag_scope. * cp-tree.h (xref_tag): Parameter is TAG_how, not tag_scope. * decl.c (lookup_and_check_tag): Likewise. Adjust. (xref_tag_1, xref_tag): Likewise. adjust. (start_enum): Adjust lookup_and_check_tag call. * name-lookup.c (lookup_type_scope_1): Rename to ... (lookup_elaborated_type_1) ... here. Use TAG_how, not tag_scope. (lookup_type_scope): Rename to ... (lookup_elaborated_type): ... here. Use TAG_how, not tag_scope. (do_pushtag): Use TAG_how, not tag_scope. Adjust. (pushtag): Likewise. * parser.c (cp_parser_elaborated_type_specifier): Adjust. (cp_parser_class_head): Likewise. gcc/objcp/ * objcp-decl.c (objcp_start_struct): Use TAG_how not tag_scope. (objcp_xref_tag): Likewise. --- gcc/cp/cp-tree.h | 2 +- gcc/cp/decl.c | 58 ++++++++++++++++++++++------------------------ gcc/cp/name-lookup.c | 62 +++++++++++++++++++++++--------------------------- gcc/cp/name-lookup.h | 32 ++++++++++++-------------- gcc/cp/parser.c | 22 +++++++++--------- gcc/objcp/objcp-decl.c | 4 ++-- 6 files changed, 84 insertions(+), 96 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index bd78f00..321bb95 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6507,7 +6507,7 @@ extern void grok_special_member_properties (tree); extern bool grok_ctor_properties (const_tree, const_tree); extern bool grok_op_properties (tree, bool); extern tree xref_tag (tag_types, tree, - tag_scope = ts_current, + TAG_how = TAG_how::CURRENT_ONLY, bool tpl_header_p = false); extern void xref_basetypes (tree, tree); extern tree start_enum (tree, tree, tree, tree, bool, bool *); diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 1709dd9..b481bbd 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -75,7 +75,7 @@ static void record_unknown_type (tree, const char *); static int member_function_or_else (tree, tree, enum overload_flags); static tree local_variable_p_walkfn (tree *, int *, void *); static const char *tag_name (enum tag_types); -static tree lookup_and_check_tag (enum tag_types, tree, tag_scope, bool); +static tree lookup_and_check_tag (enum tag_types, tree, TAG_how, bool); static void maybe_deduce_size_from_array_init (tree, tree); static void layout_var_decl (tree); static tree check_initializer (tree, tree, int, vec **); @@ -14862,11 +14862,10 @@ check_elaborated_type_specifier (enum tag_types tag_code, static tree lookup_and_check_tag (enum tag_types tag_code, tree name, - tag_scope scope, bool template_header_p) + TAG_how how, bool template_header_p) { - tree t; tree decl; - if (scope == ts_global) + if (how == TAG_how::GLOBAL) { /* First try ordinary name lookup, ignoring hidden class name injected via friend declaration. */ @@ -14879,16 +14878,16 @@ lookup_and_check_tag (enum tag_types tag_code, tree name, If we find one, that name will be made visible rather than creating a new tag. */ if (!decl) - decl = lookup_type_scope (name, ts_within_enclosing_non_class); + decl = lookup_elaborated_type (name, TAG_how::INNERMOST_NON_CLASS); } else - decl = lookup_type_scope (name, scope); + decl = lookup_elaborated_type (name, how); if (decl && (DECL_CLASS_TEMPLATE_P (decl) - /* If scope is ts_current we're defining a class, so ignore a - template template parameter. */ - || (scope != ts_current + /* If scope is TAG_how::CURRENT_ONLY we're defining a class, + so ignore a template template parameter. */ + || (how != TAG_how::CURRENT_ONLY && DECL_TEMPLATE_TEMPLATE_PARM_P (decl)))) decl = DECL_TEMPLATE_RESULT (decl); @@ -14898,11 +14897,10 @@ lookup_and_check_tag (enum tag_types tag_code, tree name, class C { class C {}; }; */ - if (scope == ts_current && DECL_SELF_REFERENCE_P (decl)) + if (how == TAG_how::CURRENT_ONLY && DECL_SELF_REFERENCE_P (decl)) { error ("%qD has the same name as the class in which it is " - "declared", - decl); + "declared", decl); return error_mark_node; } @@ -14922,10 +14920,10 @@ lookup_and_check_tag (enum tag_types tag_code, tree name, class C *c2; // DECL_SELF_REFERENCE_P is true }; */ - t = check_elaborated_type_specifier (tag_code, - decl, - template_header_p - | DECL_SELF_REFERENCE_P (decl)); + tree t = check_elaborated_type_specifier (tag_code, + decl, + template_header_p + | DECL_SELF_REFERENCE_P (decl)); if (template_header_p && t && CLASS_TYPE_P (t) && (!CLASSTYPE_TEMPLATE_INFO (t) || (!PRIMARY_TEMPLATE_P (CLASSTYPE_TI_TEMPLATE (t))))) @@ -14969,7 +14967,7 @@ lookup_and_check_tag (enum tag_types tag_code, tree name, static tree xref_tag_1 (enum tag_types tag_code, tree name, - tag_scope scope, bool template_header_p) + TAG_how how, bool template_header_p) { enum tree_code code; tree context = NULL_TREE; @@ -14996,22 +14994,22 @@ xref_tag_1 (enum tag_types tag_code, tree name, make type node and push name. Name lookup is not required. */ tree t = NULL_TREE; if (!IDENTIFIER_ANON_P (name)) - t = lookup_and_check_tag (tag_code, name, scope, template_header_p); + t = lookup_and_check_tag (tag_code, name, how, template_header_p); if (t == error_mark_node) return error_mark_node; - if (scope != ts_current && t && current_class_type + if (how != TAG_how::CURRENT_ONLY && t && current_class_type && template_class_depth (current_class_type) && template_header_p) { if (TREE_CODE (t) == TEMPLATE_TEMPLATE_PARM) return t; - /* Since SCOPE is not TS_CURRENT, we are not looking at a - definition of this tag. Since, in addition, we are currently - processing a (member) template declaration of a template - class, we must be very careful; consider: + /* Since HOW is not TAG_how::CURRENT_ONLY, we are not looking at + a definition of this tag. Since, in addition, we are + currently processing a (member) template declaration of a + template class, we must be very careful; consider: template struct S1 @@ -15057,7 +15055,7 @@ xref_tag_1 (enum tag_types tag_code, tree name, /* Mark it as a lambda type right now. Our caller will correct the value. */ CLASSTYPE_LAMBDA_EXPR (t) = error_mark_node; - t = pushtag (name, t, scope); + t = pushtag (name, t, how); } else { @@ -15083,7 +15081,7 @@ xref_tag_1 (enum tag_types tag_code, tree name, return error_mark_node; } - if (scope != ts_within_enclosing_non_class && TYPE_HIDDEN_P (t)) + if (how != TAG_how::HIDDEN_FRIEND && TYPE_HIDDEN_P (t)) { /* This is no longer an invisible friend. Make it visible. */ @@ -15108,12 +15106,10 @@ xref_tag_1 (enum tag_types tag_code, tree name, tree xref_tag (enum tag_types tag_code, tree name, - tag_scope scope, bool template_header_p) + TAG_how how, bool template_header_p) { - tree ret; - bool subtime; - subtime = timevar_cond_start (TV_NAME_LOOKUP); - ret = xref_tag_1 (tag_code, name, scope, template_header_p); + bool subtime = timevar_cond_start (TV_NAME_LOOKUP); + tree ret = xref_tag_1 (tag_code, name, how, template_header_p); timevar_cond_stop (TV_NAME_LOOKUP, subtime); return ret; } @@ -15412,7 +15408,7 @@ start_enum (tree name, tree enumtype, tree underlying_type, forward reference. */ if (!enumtype) enumtype = lookup_and_check_tag (enum_type, name, - /*tag_scope=*/ts_current, + /*tag_scope=*/TAG_how::CURRENT_ONLY, /*template_header_p=*/false); /* In case of a template_decl, the only check that should be deferred diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index dbc6cc3..0115a4b 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -6583,22 +6583,20 @@ lookup_name (tree name) } /* Look up NAME for type used in elaborated name specifier in - the scopes given by SCOPE. SCOPE can be either TS_CURRENT or - TS_WITHIN_ENCLOSING_NON_CLASS. Although not implied by the - name, more scopes are checked if cleanup or template parameter - scope is encountered. + the scopes given by HOW. Unlike lookup_name_1, we make sure that NAME is actually declared in the desired scope, not from inheritance, nor using directive. For using declaration, there is DR138 still waiting to be resolved. Hidden name coming from an earlier friend - declaration is also returned. + declaration is also returned, and will be made visible unless HOW + is TAG_how::HIDDEN_FRIEND. A TYPE_DECL best matching the NAME is returned. Catching error and issuing diagnostics are caller's responsibility. */ static tree -lookup_type_scope_1 (tree name, tag_scope scope) +lookup_elaborated_type_1 (tree name, TAG_how how) { cp_binding_level *b = current_binding_level; @@ -6613,28 +6611,28 @@ lookup_type_scope_1 (tree name, tag_scope scope) if (!(b->kind == sk_cleanup || b->kind == sk_template_parms || b->kind == sk_function_parms - || (b->kind == sk_class - && scope == ts_within_enclosing_non_class))) + || (b->kind == sk_class && how != TAG_how::CURRENT_ONLY))) return NULL_TREE; /* Check if this is the kind of thing we're looking for. If - SCOPE is TS_CURRENT, also make sure it doesn't come from - base class. For ITER->VALUE, we can simply use - INHERITED_VALUE_BINDING_P. For ITER->TYPE, we have to - use our own check. + HOW is TAG_how::CURRENT_ONLY, also make sure it doesn't + come from base class. For ITER->VALUE, we can simply use + INHERITED_VALUE_BINDING_P. For ITER->TYPE, we have to use + our own check. We check ITER->TYPE before ITER->VALUE in order to handle typedef struct C {} C; correctly. */ + if (tree type = iter->type) if (qualify_lookup (type, LOOK_want::TYPE) - && (scope != ts_current + && (how != TAG_how::CURRENT_ONLY || LOCAL_BINDING_P (iter) || DECL_CONTEXT (type) == iter->scope->this_entity)) return type; if (qualify_lookup (iter->value, LOOK_want::TYPE) - && (scope != ts_current + && (how != TAG_how::CURRENT_ONLY || !INHERITED_VALUE_BINDING_P (iter))) return iter->value; } @@ -6644,8 +6642,7 @@ lookup_type_scope_1 (tree name, tag_scope scope) if (!(b->kind == sk_cleanup || b->kind == sk_template_parms || b->kind == sk_function_parms - || (b->kind == sk_class - && scope == ts_within_enclosing_non_class))) + || (b->kind == sk_class && how != TAG_how::CURRENT_ONLY))) return NULL_TREE; /* Look in the innermost namespace. */ @@ -6664,15 +6661,14 @@ lookup_type_scope_1 (tree name, tag_scope scope) return NULL_TREE; } - + /* Wrapper for lookup_type_scope_1. */ tree -lookup_type_scope (tree name, tag_scope scope) +lookup_elaborated_type (tree name, TAG_how how) { - tree ret; bool subtime = timevar_cond_start (TV_NAME_LOOKUP); - ret = lookup_type_scope_1 (name, scope); + tree ret = lookup_elaborated_type_1 (name, how); timevar_cond_stop (TV_NAME_LOOKUP, subtime); return ret; } @@ -6782,7 +6778,7 @@ maybe_process_template_type_declaration (tree type, int is_friend, Returns TYPE upon success and ERROR_MARK_NODE otherwise. */ static tree -do_pushtag (tree name, tree type, tag_scope scope) +do_pushtag (tree name, tree type, TAG_how how) { tree decl; @@ -6799,10 +6795,9 @@ do_pushtag (tree name, tree type, tag_scope scope) declaration, these scopes are not scopes from the point of view of the language. */ || (b->kind == sk_template_parms - && (b->explicit_spec_p || scope == ts_global))) + && (b->explicit_spec_p || how == TAG_how::GLOBAL))) b = b->level_chain; - else if (b->kind == sk_class - && scope != ts_current) + else if (b->kind == sk_class && how != TAG_how::CURRENT_ONLY) { b = b->level_chain; if (b->kind == sk_template_parms) @@ -6836,7 +6831,7 @@ do_pushtag (tree name, tree type, tag_scope scope) : TYPE_P (cs) ? cs == current_class_type : cs == current_namespace); - if (scope == ts_current + if (how == TAG_how::CURRENT_ONLY || (cs && TREE_CODE (cs) == FUNCTION_DECL)) context = cs; else if (cs && TYPE_P (cs)) @@ -6856,18 +6851,19 @@ do_pushtag (tree name, tree type, tag_scope scope) tdef = create_implicit_typedef (name, type); DECL_CONTEXT (tdef) = FROB_CONTEXT (context); - if (scope == ts_within_enclosing_non_class) + bool is_friend = how == TAG_how::HIDDEN_FRIEND; + if (is_friend) { + // FIXME: can go away /* This is a friend. Make this TYPE_DECL node hidden from ordinary name lookup. Its corresponding TEMPLATE_DECL - will be marked in push_template_decl_real. */ + will be marked in push_template_decl. */ retrofit_lang_decl (tdef); DECL_ANTICIPATED (tdef) = 1; DECL_FRIEND_P (tdef) = 1; } - decl = maybe_process_template_type_declaration - (type, scope == ts_within_enclosing_non_class, b); + decl = maybe_process_template_type_declaration (type, is_friend, b); if (decl == error_mark_node) return decl; @@ -6888,7 +6884,8 @@ do_pushtag (tree name, tree type, tag_scope scope) } else if (b->kind != sk_template_parms) { - decl = do_pushdecl_with_scope (decl, b, /*is_friend=*/false); + decl = do_pushdecl_with_scope + (decl, b, /*hiding=*/(how == TAG_how::HIDDEN_FRIEND)); if (decl == error_mark_node) return decl; @@ -6954,11 +6951,10 @@ do_pushtag (tree name, tree type, tag_scope scope) /* Wrapper for do_pushtag. */ tree -pushtag (tree name, tree type, tag_scope scope) +pushtag (tree name, tree type, TAG_how how) { - tree ret; bool subtime = timevar_cond_start (TV_NAME_LOOKUP); - ret = do_pushtag (name, type, scope); + tree ret = do_pushtag (name, type, how); timevar_cond_stop (TV_NAME_LOOKUP, subtime); return ret; } diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h index 76ec8f2..82f4d51 100644 --- a/gcc/cp/name-lookup.h +++ b/gcc/cp/name-lookup.h @@ -124,22 +124,6 @@ enum scope_kind { sk_omp /* An OpenMP structured block. */ }; -/* The scope where the class/struct/union/enum tag applies. */ -enum tag_scope { - ts_current = 0, /* Current scope only. This is for the - class-key identifier; - case mentioned in [basic.lookup.elab]/2, - or the class/enum definition - class-key identifier { ... }; */ - ts_global = 1, /* All scopes. This is the 3.4.1 - [basic.lookup.unqual] lookup mentioned - in [basic.lookup.elab]/2. */ - ts_within_enclosing_non_class = 2, /* Search within enclosing non-class - only, for friend class lookup - according to [namespace.memdef]/3 - and [class.friend]/9. */ -}; - struct GTY(()) cp_class_binding { cxx_binding *base; /* The bound name. */ @@ -326,7 +310,19 @@ inline tree lookup_name (tree name, LOOK_want want) return lookup_name (name, LOOK_where::ALL, want); } -extern tree lookup_type_scope (tree, tag_scope); +enum class TAG_how +{ + CURRENT_ONLY = 0, // Look and insert only in current scope + + GLOBAL = 1, // Unqualified lookup, innermost-non-class insertion + + INNERMOST_NON_CLASS = 2, // Look and insert only into + // innermost-non-class + + HIDDEN_FRIEND = 3, // As INNERMOST_NON_CLASS, but hide it +}; + +extern tree lookup_elaborated_type (tree, TAG_how); extern tree get_namespace_binding (tree ns, tree id); extern void set_global_binding (tree decl); inline tree get_global_binding (tree id) @@ -371,7 +367,7 @@ extern tree pushdecl (tree, bool is_friend = false); extern tree pushdecl_outermost_localscope (tree); extern tree pushdecl_top_level (tree, bool is_friend = false); extern tree pushdecl_top_level_and_finish (tree, tree); -extern tree pushtag (tree, tree, tag_scope = ts_current); +extern tree pushtag (tree, tree, TAG_how = TAG_how::CURRENT_ONLY); extern int push_namespace (tree, bool make_inline = false); extern void pop_namespace (void); extern void push_nested_namespace (tree); diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index ccfae78..8905833 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -19057,21 +19057,20 @@ cp_parser_elaborated_type_specifier (cp_parser* parser, definition of a new type; a new type can only be declared in a declaration context. */ - tag_scope ts; - bool template_p; + TAG_how how; if (is_friend) /* Friends have special name lookup rules. */ - ts = ts_within_enclosing_non_class; + how = TAG_how::HIDDEN_FRIEND; else if (is_declaration && cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON)) /* This is a `class-key identifier ;' */ - ts = ts_current; + how = TAG_how::CURRENT_ONLY; else - ts = ts_global; + how = TAG_how::GLOBAL; - template_p = + bool template_p = (template_parm_lists_apply && (cp_parser_next_token_starts_class_definition_p (parser) || cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON))); @@ -19084,7 +19083,8 @@ cp_parser_elaborated_type_specifier (cp_parser* parser, token->location, /*declarator=*/NULL)) return error_mark_node; - type = xref_tag (tag_type, identifier, ts, template_p); + + type = xref_tag (tag_type, identifier, how, template_p); } } @@ -24708,10 +24708,10 @@ cp_parser_class_head (cp_parser* parser, /* If the class was unnamed, create a dummy name. */ if (!id) id = make_anon_name (); - tag_scope tag_scope = (parser->in_type_id_in_expr_p - ? ts_within_enclosing_non_class - : ts_current); - type = xref_tag (class_key, id, tag_scope, + TAG_how how = (parser->in_type_id_in_expr_p + ? TAG_how::INNERMOST_NON_CLASS + : TAG_how::CURRENT_ONLY); + type = xref_tag (class_key, id, how, parser->num_template_parameter_lists); } diff --git a/gcc/objcp/objcp-decl.c b/gcc/objcp/objcp-decl.c index 087b5d5..c6c4ee5 100644 --- a/gcc/objcp/objcp-decl.c +++ b/gcc/objcp/objcp-decl.c @@ -41,7 +41,7 @@ objcp_start_struct (location_t loc ATTRIBUTE_UNUSED, if (!name) name = make_anon_name (); - s = xref_tag (record_type, name, ts_global); + s = xref_tag (record_type, name, TAG_how::GLOBAL); CLASSTYPE_DECLARED_CLASS (s) = 0; /* this is a 'struct', not a 'class'. */ xref_basetypes (s, NULL_TREE); /* no base classes here! */ @@ -84,7 +84,7 @@ objcp_finish_function (void) tree objcp_xref_tag (enum tree_code code ATTRIBUTE_UNUSED, tree name) { - return xref_tag (record_type, name, ts_global); + return xref_tag (record_type, name, TAG_how::GLOBAL); } int -- cgit v1.1 From c74e6f7cfd7a741fc0477fe3660eec57581b22c5 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Fri, 25 Sep 2020 11:58:26 -0700 Subject: c++: Adjust pushdecl/duplicate_decls API The decl pushing APIs and duplicate_decls take an 'is_friend' parm, when what they actually mean is 'hide this from name lookup'. That conflation has gotten more anachronistic as time moved on. We now have anticipated builtins, and I plan to have injected extern decls soon. So this patch is mainly a renaming excercise. is_friend -> hiding. duplicate_decls gets an additional 'was_hidden' parm. As I've already said, hiddenness is a property of the symbol table, not the decl. Builtins are now pushed requesting hiding, and pushdecl asserts that we don't attempt to push a thing that should be hidden without asking for it to be hidden. This is the final piece of groundwork to get rid of a bunch of 'this is hidden' markers on decls and move the hiding management entirely into name lookup. gcc/cp/ * cp-tree.h (duplicate_decls): Replace 'is_friend' with 'hiding' and add 'was_hidden'. * name-lookup.h (pushdecl_namespace_level): Replace 'is_friend' with 'hiding'. (pushdecl): Likewise. (pushdecl_top_level): Drop is_friend parm. * decl.c (check_no_redeclaration_friend_default_args): Rename parm olddelc_hidden_p. (duplicate_decls): Replace 'is_friend' with 'hiding' and 'was_hidden'. Do minimal adjustments in body. (cxx_builtin_function): Pass 'hiding' to pushdecl. * friend.c (do_friend): Pass 'hiding' to pushdecl. * name-lookup.c (supplement_binding_1): Drop defaulted arg to duplicate_decls. (update_binding): Replace 'is_friend' with 'hiding'. Drop defaulted arg to duplicate_decls. (do_pushdecl): Replace 'is_friend' with 'hiding'. Assert no surprise hidhing. Adjust duplicate_decls calls to inform of old decl's hiddennes. (pushdecl): Replace 'is_friend' with 'hiding'. (set_identifier_type_value_with_scope): Adjust update_binding call. (do_pushdecl_with_scope): Replace 'is_friend' with 'hiding'. (pushdecl_outermost_localscope): Drop default arg to do_pushdecl_with_scope. (pushdecl_namespace_level): Replace 'is_friend' with 'hiding'. (pushdecl_top_level): Drop is_friend parm. * pt.c (register_specialization): Comment duplicate_decls call args. (push_template_decl): Commont pushdecl_namespace_level. (tsubst_friend_function, tsubst_friend_class): Likewise. --- gcc/cp/cp-tree.h | 3 ++- gcc/cp/decl.c | 62 +++++++++++++++++++++++++++++----------------------- gcc/cp/friend.c | 8 +++---- gcc/cp/name-lookup.c | 52 +++++++++++++++++++++++++------------------ gcc/cp/name-lookup.h | 6 ++--- gcc/cp/pt.c | 12 +++++----- 6 files changed, 81 insertions(+), 62 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 321bb95..b7f5b6b 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6466,7 +6466,8 @@ extern void determine_local_discriminator (tree); extern int decls_match (tree, tree, bool = true); extern bool maybe_version_functions (tree, tree, bool); extern tree duplicate_decls (tree, tree, - bool is_friend = false); + bool hiding = false, + bool was_hidden = false); extern tree declare_local_label (tree); extern tree define_label (location_t, tree); extern void check_goto (tree); diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index b481bbd..c00b996 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -1341,17 +1341,16 @@ check_redeclaration_no_default_args (tree decl) static void check_no_redeclaration_friend_default_args (tree olddecl, tree newdecl, - bool olddecl_hidden_friend_p) + bool olddecl_hidden_p) { - if (!olddecl_hidden_friend_p && !DECL_FRIEND_P (newdecl)) + if (!olddecl_hidden_p && !DECL_FRIEND_P (newdecl)) return; - tree t1 = FUNCTION_FIRST_USER_PARMTYPE (olddecl); - tree t2 = FUNCTION_FIRST_USER_PARMTYPE (newdecl); - - for (; t1 && t1 != void_list_node; + for (tree t1 = FUNCTION_FIRST_USER_PARMTYPE (olddecl), + t2 = FUNCTION_FIRST_USER_PARMTYPE (newdecl); + t1 && t1 != void_list_node; t1 = TREE_CHAIN (t1), t2 = TREE_CHAIN (t2)) - if ((olddecl_hidden_friend_p && TREE_PURPOSE (t1)) + if ((olddecl_hidden_p && TREE_PURPOSE (t1)) || (DECL_FRIEND_P (newdecl) && TREE_PURPOSE (t2))) { auto_diagnostic_group d; @@ -1435,10 +1434,14 @@ duplicate_function_template_decls (tree newdecl, tree olddecl) If NEWDECL is not a redeclaration of OLDDECL, NULL_TREE is returned. - NEWDECL_IS_FRIEND is true if NEWDECL was declared as a friend. */ + HIDING is true if the new decl is being hidden. WAS_HIDDEN is true + if the old decl was hidden. + + Hidden decls can be anticipated builtins, injected friends, or + (coming soon) injected from a local-extern decl. */ tree -duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) +duplicate_decls (tree newdecl, tree olddecl, bool hiding, bool was_hidden) { unsigned olddecl_uid = DECL_UID (olddecl); int olddecl_friend = 0, types_match = 0, hidden_friend = 0; @@ -1510,7 +1513,7 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) { /* Avoid warnings redeclaring built-ins which have not been explicitly declared. */ - if (DECL_ANTICIPATED (olddecl)) + if (was_hidden) { tree t1, t2; @@ -1550,7 +1553,7 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) types_match = decls_match (newdecl, olddecl); if (types_match) return duplicate_decls (newdecl, olddecl, - newdecl_is_friend); + hiding, was_hidden); TYPE_ARG_TYPES (TREE_TYPE (olddecl)) = oldargs; } goto next_arg; @@ -1985,7 +1988,7 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) declaration of the function or function template in the translation unit." */ check_no_redeclaration_friend_default_args - (olddecl, newdecl, DECL_HIDDEN_FRIEND_P (olddecl)); + (olddecl, newdecl, was_hidden); } } } @@ -2075,8 +2078,8 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) && !(new_defines_function && DECL_INITIAL (olddecl) == NULL_TREE) /* Don't warn about extern decl followed by definition. */ && !(DECL_EXTERNAL (olddecl) && ! DECL_EXTERNAL (newdecl)) - /* Don't warn about friends, let add_friend take care of it. */ - && ! (newdecl_is_friend || DECL_FRIEND_P (olddecl)) + /* Don't warn if at least one is/was hidden. */ + && !(hiding || was_hidden) /* Don't warn about declaration followed by specialization. */ && (! DECL_TEMPLATE_SPECIALIZATION (newdecl) || DECL_TEMPLATE_SPECIALIZATION (olddecl))) @@ -2134,11 +2137,9 @@ duplicate_decls (tree newdecl, tree olddecl, bool newdecl_is_friend) if (DECL_DECLARES_FUNCTION_P (olddecl)) { - olddecl_friend = DECL_FRIEND_P (olddecl); - olddecl_hidden_friend = DECL_HIDDEN_FRIEND_P (olddecl); - hidden_friend = (DECL_ANTICIPATED (olddecl) - && DECL_HIDDEN_FRIEND_P (olddecl) - && newdecl_is_friend); + olddecl_friend = DECL_FRIEND_P (STRIP_TEMPLATE (olddecl)); + olddecl_hidden_friend = olddecl_friend && was_hidden; + hidden_friend = olddecl_hidden_friend && hiding; if (!hidden_friend) { DECL_ANTICIPATED (olddecl) = 0; @@ -4714,16 +4715,23 @@ cxx_builtin_function (tree decl) tree id = DECL_NAME (decl); const char *name = IDENTIFIER_POINTER (id); + bool hiding = false; if (name[0] != '_' || name[1] != '_') - /* In the user's namespace, it must be declared before use. */ - DECL_ANTICIPATED (decl) = 1; + { + /* In the user's namespace, it must be declared before use. */ + DECL_ANTICIPATED (decl) = 1; + hiding = true; + } else if (IDENTIFIER_LENGTH (id) > strlen ("___chk") && 0 != strncmp (name + 2, "builtin_", strlen ("builtin_")) && 0 == memcmp (name + IDENTIFIER_LENGTH (id) - strlen ("_chk"), "_chk", strlen ("_chk") + 1)) - /* Treat __*_chk fortification functions as anticipated as well, - unless they are __builtin_*_chk. */ - DECL_ANTICIPATED (decl) = 1; + { + /* Treat __*_chk fortification functions as anticipated as well, + unless they are __builtin_*_chk. */ + DECL_ANTICIPATED (decl) = 1; + hiding = true; + } /* All builtins that don't begin with an '_' should additionally go in the 'std' namespace. */ @@ -4733,12 +4741,12 @@ cxx_builtin_function (tree decl) push_nested_namespace (std_node); DECL_CONTEXT (std_decl) = FROB_CONTEXT (std_node); - pushdecl (std_decl); + pushdecl (std_decl, hiding); pop_nested_namespace (std_node); } DECL_CONTEXT (decl) = FROB_CONTEXT (current_namespace); - decl = pushdecl (decl); + decl = pushdecl (decl, hiding); return decl; } @@ -9925,7 +9933,7 @@ grokfndecl (tree ctype, /* Attempt to merge the declarations. This can fail, in the case of some invalid specialization declarations. */ pushed_scope = push_scope (ctype); - ok = duplicate_decls (decl, old_decl, friendp); + ok = duplicate_decls (decl, old_decl); if (pushed_scope) pop_scope (pushed_scope); if (!ok) diff --git a/gcc/cp/friend.c b/gcc/cp/friend.c index e484134..6a783a9 100644 --- a/gcc/cp/friend.c +++ b/gcc/cp/friend.c @@ -598,8 +598,8 @@ do_friend (tree ctype, tree declarator, tree decl, if (! DECL_USE_TEMPLATE (decl)) { /* We must check whether the decl refers to template - arguments before push_template_decl_real adds a - reference to the containing template class. */ + arguments before push_template_decl adds a reference to + the containing template class. */ int warn = (warn_nontemplate_friend && ! funcdef_flag && ! is_friend_template && current_template_parms @@ -614,7 +614,7 @@ do_friend (tree ctype, tree declarator, tree decl, decl = push_template_decl (decl, /*is_friend=*/true); else if (current_function_decl) /* pushdecl will check there's a local decl already. */ - decl = pushdecl (decl, /*is_friend=*/true); + decl = pushdecl (decl, /*hiding=*/true); else { /* We can't use pushdecl, as we might be in a template @@ -624,7 +624,7 @@ do_friend (tree ctype, tree declarator, tree decl, tree ns = decl_namespace_context (decl); push_nested_namespace (ns); - decl = pushdecl_namespace_level (decl, /*is_friend=*/true); + decl = pushdecl_namespace_level (decl, /*hiding=*/true); pop_nested_namespace (ns); } diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index 0115a4b..184e9c8 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -2232,7 +2232,7 @@ supplement_binding_1 (cxx_binding *binding, tree decl) && DECL_EXTERNAL (target_decl) && DECL_EXTERNAL (target_bval) && !DECL_CLASS_SCOPE_P (target_decl)) { - duplicate_decls (decl, binding->value, /*newdecl_is_friend=*/false); + duplicate_decls (decl, binding->value); ok = false; } else if (TREE_CODE (decl) == NAMESPACE_DECL @@ -2354,7 +2354,7 @@ matching_fn_p (tree one, tree two) static tree update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, - tree old, tree decl, bool is_friend) + tree old, tree decl, bool hiding = false) { tree to_val = decl; tree old_type = slot ? MAYBE_STAT_TYPE (*slot) : binding->type; @@ -2410,13 +2410,14 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, if (iter.using_p () && matching_fn_p (fn, decl)) { + gcc_checking_assert (!iter.hidden_p ()); /* If a function declaration in namespace scope or block scope has the same name and the same parameter-type- list (8.3.5) as a function introduced by a using-declaration, and the declarations do not declare the same function, the program is ill-formed. [namespace.udecl]/14 */ - if (tree match = duplicate_decls (decl, fn, is_friend)) + if (tree match = duplicate_decls (decl, fn, hiding)) return match; else /* FIXME: To preserve existing error behavior, we @@ -2468,7 +2469,7 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, variable, so long as they are `extern' declarations. */ if (!DECL_EXTERNAL (old) || !DECL_EXTERNAL (decl)) goto conflict; - else if (tree match = duplicate_decls (decl, old, false)) + else if (tree match = duplicate_decls (decl, old)) return match; else goto conflict; @@ -2989,12 +2990,12 @@ set_local_extern_decl_linkage (tree decl, bool shadowed) says. */ static tree -do_pushdecl (tree decl, bool is_friend) +do_pushdecl (tree decl, bool hiding) { if (decl == error_mark_node) return error_mark_node; - if (!DECL_TEMPLATE_PARM_P (decl) && current_function_decl && !is_friend) + if (!DECL_TEMPLATE_PARM_P (decl) && current_function_decl && !hiding) set_decl_context_in_fn (current_function_decl, decl); /* The binding level we will be pushing into. During local class @@ -3014,6 +3015,14 @@ do_pushdecl (tree decl, bool is_friend) tree *slot = NULL; /* Binding slot in namespace. */ tree old = NULL_TREE; + if (!hiding) + /* We should never unknownly push an anticipated decl. */ + gcc_checking_assert (!((TREE_CODE (decl) == TYPE_DECL + || TREE_CODE (decl) == FUNCTION_DECL + || TREE_CODE (decl) == TEMPLATE_DECL) + && DECL_LANG_SPECIFIC (decl) + && DECL_ANTICIPATED (decl))); + if (level->kind == sk_namespace) { /* We look in the decl's namespace for an existing @@ -3044,7 +3053,8 @@ do_pushdecl (tree decl, bool is_friend) for (ovl_iterator iter (old); iter; ++iter) if (iter.using_p ()) ; /* Ignore using decls here. */ - else if (tree match = duplicate_decls (decl, *iter, is_friend)) + else if (tree match + = duplicate_decls (decl, *iter, hiding, iter.hidden_p ())) { if (match == error_mark_node) ; @@ -3052,7 +3062,7 @@ do_pushdecl (tree decl, bool is_friend) /* The IDENTIFIER will have the type referring to the now-smashed TYPE_DECL, because ...? Reset it. */ SET_IDENTIFIER_TYPE_VALUE (name, TREE_TYPE (match)); - else if (iter.hidden_p () && !DECL_HIDDEN_P (match)) + else if (iter.hidden_p () && !hiding) { /* Unhiding a previously hidden decl. */ tree head = iter.reveal_node (old); @@ -3088,7 +3098,7 @@ do_pushdecl (tree decl, bool is_friend) { check_default_args (decl); - if (is_friend) + if (hiding) { if (level->kind != sk_namespace) { @@ -3126,7 +3136,7 @@ do_pushdecl (tree decl, bool is_friend) old = MAYBE_STAT_DECL (*slot); } - old = update_binding (level, binding, slot, old, decl, is_friend); + old = update_binding (level, binding, slot, old, decl, hiding); if (old != decl) /* An existing decl matched, use it. */ @@ -3170,10 +3180,10 @@ do_pushdecl (tree decl, bool is_friend) we push it. */ tree -pushdecl (tree x, bool is_friend) +pushdecl (tree x, bool hiding) { bool subtime = timevar_cond_start (TV_NAME_LOOKUP); - tree ret = do_pushdecl (x, is_friend); + tree ret = do_pushdecl (x, hiding); timevar_cond_stop (TV_NAME_LOOKUP, subtime); return ret; } @@ -3780,7 +3790,7 @@ set_identifier_type_value_with_scope (tree id, tree decl, cp_binding_level *b) { tree *slot = find_namespace_slot (current_namespace, id, true); gcc_assert (decl); - update_binding (b, NULL, slot, MAYBE_STAT_DECL (*slot), decl, false); + update_binding (b, NULL, slot, MAYBE_STAT_DECL (*slot), decl); /* Store marker instead of real type. */ type = global_type_node; @@ -3836,12 +3846,13 @@ constructor_name_p (tree name, tree type) closer binding level than LEVEL. */ static tree -do_pushdecl_with_scope (tree x, cp_binding_level *level, bool is_friend) +do_pushdecl_with_scope (tree x, cp_binding_level *level, bool hiding = false) { cp_binding_level *b; if (level->kind == sk_class) { + gcc_checking_assert (!hiding); b = class_binding_level; class_binding_level = level; pushdecl_class_level (x); @@ -3854,7 +3865,7 @@ do_pushdecl_with_scope (tree x, cp_binding_level *level, bool is_friend) current_function_decl = NULL_TREE; b = current_binding_level; current_binding_level = level; - x = pushdecl (x, is_friend); + x = pushdecl (x, hiding); current_binding_level = b; current_function_decl = function_decl; } @@ -3874,7 +3885,7 @@ pushdecl_outermost_localscope (tree x) n->kind != sk_function_parms; n = b->level_chain) b = n; - tree ret = b ? do_pushdecl_with_scope (x, b, false) : error_mark_node; + tree ret = b ? do_pushdecl_with_scope (x, b) : error_mark_node; timevar_cond_stop (TV_NAME_LOOKUP, subtime); return ret; @@ -5072,14 +5083,13 @@ do_namespace_alias (tree alias, tree name_space) if appropriate. */ tree -pushdecl_namespace_level (tree x, bool is_friend) +pushdecl_namespace_level (tree x, bool hiding) { cp_binding_level *b = current_binding_level; tree t; bool subtime = timevar_cond_start (TV_NAME_LOOKUP); - t = do_pushdecl_with_scope - (x, NAMESPACE_LEVEL (current_namespace), is_friend); + t = do_pushdecl_with_scope (x, NAMESPACE_LEVEL (current_namespace), hiding); /* Now, the type_shadowed stack may screw us. Munge it so it does what we want. */ @@ -7282,11 +7292,11 @@ finish_using_directive (tree target, tree attribs) /* Pushes X into the global namespace. */ tree -pushdecl_top_level (tree x, bool is_friend) +pushdecl_top_level (tree x) { bool subtime = timevar_cond_start (TV_NAME_LOOKUP); do_push_to_top_level (); - x = pushdecl_namespace_level (x, is_friend); + x = pushdecl_namespace_level (x); do_pop_from_top_level (); timevar_cond_stop (TV_NAME_LOOKUP, subtime); return x; diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h index 82f4d51..7b46338 100644 --- a/gcc/cp/name-lookup.h +++ b/gcc/cp/name-lookup.h @@ -337,7 +337,7 @@ extern tree lookup_qualified_name (tree scope, const char *name, bool = true); extern bool is_local_extern (tree); extern bool pushdecl_class_level (tree); -extern tree pushdecl_namespace_level (tree, bool is_friend = false); +extern tree pushdecl_namespace_level (tree, bool hiding = false); extern bool push_class_level_binding (tree, tree); extern tree get_local_decls (); extern int function_parm_depth (void); @@ -363,9 +363,9 @@ extern void cp_emit_debug_info_for_using (tree, tree); extern void finish_nonmember_using_decl (tree scope, tree name); extern void finish_using_directive (tree target, tree attribs); -extern tree pushdecl (tree, bool is_friend = false); +extern tree pushdecl (tree, bool hiding = false); extern tree pushdecl_outermost_localscope (tree); -extern tree pushdecl_top_level (tree, bool is_friend = false); +extern tree pushdecl_top_level (tree); extern tree pushdecl_top_level_and_finish (tree, tree); extern tree pushtag (tree, tree, TAG_how = TAG_how::CURRENT_ONLY); extern int push_namespace (tree, bool make_inline = false); diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index a4530db..199fe65 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -1635,7 +1635,7 @@ register_specialization (tree spec, tree tmpl, tree args, bool is_friend, for the specialization, we want this to look as if there were no definition, and vice versa. */ DECL_INITIAL (fn) = NULL_TREE; - duplicate_decls (spec, fn, is_friend); + duplicate_decls (spec, fn, /*hiding=*/is_friend); /* The call to duplicate_decls will have applied [temp.expl.spec]: @@ -1662,7 +1662,7 @@ register_specialization (tree spec, tree tmpl, tree args, bool is_friend, } else if (DECL_TEMPLATE_SPECIALIZATION (fn)) { - tree dd = duplicate_decls (spec, fn, is_friend); + tree dd = duplicate_decls (spec, fn, /*hiding=*/is_friend); if (dd == error_mark_node) /* We've already complained in duplicate_decls. */ return error_mark_node; @@ -1677,7 +1677,7 @@ register_specialization (tree spec, tree tmpl, tree args, bool is_friend, } } else if (fn) - return duplicate_decls (spec, fn, is_friend); + return duplicate_decls (spec, fn, /*hiding=*/is_friend); /* A specialization must be declared in the same namespace as the template it is specializing. */ @@ -6018,7 +6018,7 @@ push_template_decl (tree decl, bool is_friend) if (!ctx && !(is_friend && template_class_depth (current_class_type) > 0)) { - tmpl = pushdecl_namespace_level (tmpl, is_friend); + tmpl = pushdecl_namespace_level (tmpl, /*hiding=*/is_friend); if (tmpl == error_mark_node) return error_mark_node; @@ -11078,7 +11078,7 @@ tsubst_friend_function (tree decl, tree args) into the namespace of the template. */ ns = decl_namespace_context (new_friend); push_nested_namespace (ns); - old_decl = pushdecl_namespace_level (new_friend, /*is_friend=*/true); + old_decl = pushdecl_namespace_level (new_friend, /*hiding=*/true); pop_nested_namespace (ns); if (old_decl == error_mark_node) @@ -11323,7 +11323,7 @@ tsubst_friend_class (tree friend_tmpl, tree args) } /* Inject this template into the enclosing namspace scope. */ - tmpl = pushdecl_namespace_level (tmpl, true); + tmpl = pushdecl_namespace_level (tmpl, /*hiding=*/true); } } -- cgit v1.1 From 67a5c215940f4b21bac1aa489ce1f2fb3d52a53a Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 26 Sep 2020 00:01:57 +0200 Subject: Fix gimple_clobber handling in ipa-modref 2020-09-25 Jan Hubicka * ipa-modref.c (analyze_stmt): Fix return value for gimple_clobber. --- gcc/ipa-modref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index aa6929f..44b844b 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -658,7 +658,7 @@ analyze_stmt (modref_summary *summary, gimple *stmt, bool ipa) { /* There is no need to record clobbers. */ if (gimple_clobber_p (stmt)) - return false; + return true; /* Analyze all loads and stores in STMT. */ walk_stmt_load_store_ops (stmt, summary, analyze_load, analyze_store); -- cgit v1.1 From f46f899b4fa41ec889540c4a663feaa814b73d24 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 26 Sep 2020 00:05:53 +0200 Subject: Disable ipa-modref with live patching 2020-09-26 Jan Hubicka * doc/invoke.texi: Add -fno-ipa-modref to flags disabled by -flive-patching. * opts.c (control_options_for_live_patching): Disable ipa-modref. --- gcc/doc/invoke.texi | 2 +- gcc/opts.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 2091e0c..226b0e1 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -10879,7 +10879,7 @@ callers are impacted, therefore need to be patched as well. @gccoptlist{-fwhole-program -fipa-pta -fipa-reference -fipa-ra @gol -fipa-icf -fipa-icf-functions -fipa-icf-variables @gol -fipa-bit-cp -fipa-vrp -fipa-pure-const -fipa-reference-addressable @gol --fipa-stack-alignment} +-fipa-stack-alignment -fipa-modref} @item inline-only-static diff --git a/gcc/opts.c b/gcc/opts.c index 3c4a0b5..3bda59a 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -792,6 +792,13 @@ control_options_for_live_patching (struct gcc_options *opts, else opts->x_flag_ipa_pure_const = 0; + if (opts_set->x_flag_ipa_modref && opts->x_flag_ipa_modref) + error_at (loc, + "%<-fipa-modref%> is incompatible with " + "%<-flive-patching=inline-only-static|inline-clone%>"); + else + opts->x_flag_ipa_modref = 0; + /* FIXME: disable unreachable code removal. */ /* discovery of functions/variables with no address taken. */ -- cgit v1.1 From cdd8f031c7edf0fcbf5f20bf5fbd1526a64461d5 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sat, 26 Sep 2020 00:16:25 +0000 Subject: Daily bump. --- gcc/ChangeLog | 85 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/cp/ChangeLog | 66 ++++++++++++++++++++++++++++++++++++++ gcc/objcp/ChangeLog | 5 +++ gcc/testsuite/ChangeLog | 42 ++++++++++++++++++++++++ 5 files changed, 199 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b8ececa..d8144b2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,88 @@ +2020-09-25 Jan Hubicka + + * doc/invoke.texi: Add -fno-ipa-modref to flags disabled by + -flive-patching. + * opts.c (control_options_for_live_patching): Disable ipa-modref. + +2020-09-25 Jan Hubicka + + * ipa-modref.c (analyze_stmt): Fix return value for gimple_clobber. + +2020-09-25 Kyrylo Tkachov + + * config/aarch64/aarch64-option-extensions.def (rng): Add + cpuinfo string. + +2020-09-25 Alex Coplan + + * config/arm/arm-cpus.in (neoverse-v1): Add FP16. + +2020-09-25 Martin Liska + + PR gcov-profile/64636 + * value-prof.c (stream_out_histogram_value): Allow negative + values for HIST_TYPE_IOR. + +2020-09-25 Tom de Vries + + * config/nvptx/nvptx.c (nvptx_assemble_integer, nvptx_print_operand): + Use gcc_fallthrough (). + +2020-09-25 Richard Biener + + PR middle-end/96814 + * expr.c (store_constructor): Handle VECTOR_BOOLEAN_TYPE_P + CTORs correctly. + +2020-09-25 Richard Biener + + PR middle-end/97207 + * vec.h (auto_vec::operator=(auto_vec&&)): Implement. + +2020-09-25 Richard Sandiford + + * config/arm/arm-protos.h (arm_mve_mode_and_operands_type_check): + Delete. + * config/arm/arm.c (arm_coproc_mem_operand_wb): Use a scale factor + of 2 rather than 4 for 16-bit modes. + (arm_mve_mode_and_operands_type_check): Delete. + * config/arm/constraints.md (Uj): Allow writeback for Neon, + but continue to disallow it for MVE. + * config/arm/arm.md (*arm32_mov): Add !TARGET_HAVE_MVE. + * config/arm/vfp.md (*mov_load_vfp_hf16, *mov_store_vfp_hf16): Fold + back into... + (*mov_vfp_16): ...here but use Uj for the FPR memory + constraints. Use for base MVE too. + +2020-09-25 Richard Biener + + PR tree-optimization/97199 + * tree-if-conv.c (combine_blocks): Remove edges only + after looking at virtual PHI args. + +2020-09-25 Jakub Jelinek + + * omp-low.c (scan_omp_1_stmt): Don't call scan_omp_simd for + collapse > 1 loops as simt doesn't support collapsed loops yet. + * omp-expand.c (expand_omp_for_init_counts, expand_omp_for_init_vars): + Small tweaks to function comment. + (expand_omp_simd): Rewritten collapse > 1 support to only attempt + to vectorize the innermost loop and emit set of outer loops around it. + For non-composite simd with collapse > 1 without broken loop don't + even try to compute number of iterations first. Add support for + non-rectangular simd loops. + (expand_omp_for): Don't sorry_at on non-rectangular simd loops. + +2020-09-25 Martin Liska + + * cgraph.c (cgraph_edge::debug): New. + * cgraph.h (cgraph_edge::debug): New. + +2020-09-25 Martin Liska + + * cgraph.c (cgraph_node::dump): Always print space at the end + of a message. Remove one extra space. + 2020-09-24 Alex Coplan * config/arm/arm-cpus.in (neoverse-n2): New. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index cfe4a2e..bfdd19d 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200925 +20200926 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index e411f34..04f63a4 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,69 @@ +2020-09-25 Nathan Sidwell + + * cp-tree.h (duplicate_decls): Replace 'is_friend' with 'hiding' + and add 'was_hidden'. + * name-lookup.h (pushdecl_namespace_level): Replace 'is_friend' + with 'hiding'. + (pushdecl): Likewise. + (pushdecl_top_level): Drop is_friend parm. + * decl.c (check_no_redeclaration_friend_default_args): Rename parm + olddelc_hidden_p. + (duplicate_decls): Replace 'is_friend' with 'hiding' + and 'was_hidden'. Do minimal adjustments in body. + (cxx_builtin_function): Pass 'hiding' to pushdecl. + * friend.c (do_friend): Pass 'hiding' to pushdecl. + * name-lookup.c (supplement_binding_1): Drop defaulted arg to + duplicate_decls. + (update_binding): Replace 'is_friend' with 'hiding'. Drop + defaulted arg to duplicate_decls. + (do_pushdecl): Replace 'is_friend' with 'hiding'. Assert no + surprise hidhing. Adjust duplicate_decls calls to inform of old + decl's hiddennes. + (pushdecl): Replace 'is_friend' with 'hiding'. + (set_identifier_type_value_with_scope): Adjust update_binding + call. + (do_pushdecl_with_scope): Replace 'is_friend' with 'hiding'. + (pushdecl_outermost_localscope): Drop default arg to + do_pushdecl_with_scope. + (pushdecl_namespace_level): Replace 'is_friend' with 'hiding'. + (pushdecl_top_level): Drop is_friend parm. + * pt.c (register_specialization): Comment duplicate_decls call + args. + (push_template_decl): Commont pushdecl_namespace_level. + (tsubst_friend_function, tsubst_friend_class): Likewise. + +2020-09-25 Nathan Sidwell + + * name-lookup.h (enum tag_scope): Replace with ... + (enum class TAG_how): ... this. Add HIDDEN_FRIEND value. + (lookup_type_scope): Replace with ... + (lookup_elaborated_type): ... this. + (pushtag): Use TAG_how, not tag_scope. + * cp-tree.h (xref_tag): Parameter is TAG_how, not tag_scope. + * decl.c (lookup_and_check_tag): Likewise. Adjust. + (xref_tag_1, xref_tag): Likewise. adjust. + (start_enum): Adjust lookup_and_check_tag call. + * name-lookup.c (lookup_type_scope_1): Rename to ... + (lookup_elaborated_type_1) ... here. Use TAG_how, not tag_scope. + (lookup_type_scope): Rename to ... + (lookup_elaborated_type): ... here. Use TAG_how, not tag_scope. + (do_pushtag): Use TAG_how, not tag_scope. Adjust. + (pushtag): Likewise. + * parser.c (cp_parser_elaborated_type_specifier): Adjust. + (cp_parser_class_head): Likewise. + +2020-09-25 Nathan Sidwell + + * cp-tree.h (DECL_BUILTIN_P): New. + * decl.c (duplicate_decls): Use it. Do not treat omp-udr as a + builtin. + * name-lookup.c (anticipated_builtin): Use it. + (set_decl_context_in_fn): Function-scope OMP UDRs have function context. + (do_nonmember_using_decl): Use DECL_BUILTIN_P. + * parser.c (cp_parser_omp_declare_reduction): Function-scope OMP + UDRs have function context. Assert we never find a valid duplicate. + * pt.c (tsubst_expr): Function-scope OMP UDRs have function context. + 2020-09-24 Nathan Sidwell * cp-tree.h (duplicate_decls): Default is_friend to false. diff --git a/gcc/objcp/ChangeLog b/gcc/objcp/ChangeLog index 993c36d5..64a0deb 100644 --- a/gcc/objcp/ChangeLog +++ b/gcc/objcp/ChangeLog @@ -1,3 +1,8 @@ +2020-09-25 Nathan Sidwell + + * objcp-decl.c (objcp_start_struct): Use TAG_how not tag_scope. + (objcp_xref_tag): Likewise. + 2020-09-24 Nathan Sidwell * objcp-decl.c (objcp_start_struct): Drop default args to diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0a190f8..df23a09 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,45 @@ +2020-09-25 Richard Biener + + PR middle-end/96814 + * gcc.target/i386/pr96814.c: New testcase. + +2020-09-25 Richard Sandiford + + * gcc.target/arm/mve/intrinsics/mve-vldstr16-no-writeback.c: Allow + the store to use GPRs instead of FPRs. Add scan-assembler-nots + for writeback. + * gcc.target/arm/armv8_1m-fp16-move-1.c: New test. + * gcc.target/arm/armv8_1m-fp32-move-1.c: Likewise. + * gcc.target/arm/armv8_1m-fp64-move-1.c: Likewise. + +2020-09-25 Christophe Lyon + + PR target/71233 + * gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c: Remove + declarations of vector, vector2, vector_res for float64x2 type. + * gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c: Likewise. + * gcc.target/aarch64/advsimd-intrinsics/vzip_half.c: Likewise. + +2020-09-25 Richard Biener + + PR testsuite/97204 + * gcc.target/i386/sse2-mmx-pinsrw.c: Fix. + +2020-09-25 Jakub Jelinek + + * gcc.dg/vect/vect-simd-17.c: New test. + +2020-09-25 Tom de Vries + + * gcc.dg/analyzer/pr93355-localealias.c: Require effective target + alloca. + +2020-09-25 Tom de Vries + + * lib/target-supports.exp (check_effective_target_ident_directive): New proc. + * c-c++-common/ident-1b.c: Require effective target ident_directive. + * c-c++-common/ident-2b.c: Same. + 2020-09-24 Nathan Sidwell * g++.dg/template/local-var1.C: New. -- cgit v1.1 From 29f5db8ef81fac4db8e66e5f06fdf1d469e8161c Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Fri, 25 Sep 2020 17:15:42 -0400 Subject: analyzer: fix ICEs treeifying offset_region [PR96646, PR96841] gcc/analyzer/ChangeLog: PR analyzer/96646 PR analyzer/96841 * region-model.cc (region_model::get_representative_path_var): When handling offset_region, wrap the MEM_REF's first argument in an ADDR_EXPR of pointer type, rather than simply using the tree for the parent region. Require the MEM_REF's second argument to be an integer constant. gcc/testsuite/ChangeLog: PR analyzer/96646 PR analyzer/96841 * gcc.dg/analyzer/pr96646.c: New test. * gcc.dg/analyzer/pr96841.c: New test. --- gcc/analyzer/region-model.cc | 7 +++++-- gcc/testsuite/gcc.dg/analyzer/pr96646.c | 24 ++++++++++++++++++++++++ gcc/testsuite/gcc.dg/analyzer/pr96841.c | 23 +++++++++++++++++++++++ 3 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr96646.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr96841.c (limited to 'gcc') diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index 981fb77..a88a295 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -2140,11 +2140,14 @@ region_model::get_representative_path_var (const region *reg, path_var offset_pv = get_representative_path_var (offset_reg->get_byte_offset (), visited); - if (!offset_pv) + if (!offset_pv || TREE_CODE (offset_pv.m_tree) != INTEGER_CST) return path_var (NULL_TREE, 0); + tree addr_parent = build1 (ADDR_EXPR, + build_pointer_type (reg->get_type ()), + parent_pv.m_tree); return path_var (build2 (MEM_REF, reg->get_type (), - parent_pv.m_tree, offset_pv.m_tree), + addr_parent, offset_pv.m_tree), parent_pv.m_stack_depth); } diff --git a/gcc/testsuite/gcc.dg/analyzer/pr96646.c b/gcc/testsuite/gcc.dg/analyzer/pr96646.c new file mode 100644 index 0000000..2ac5a03 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr96646.c @@ -0,0 +1,24 @@ +/* { dg-additional-options "-O1" } */ + +struct zx { + struct zx *b4, *g0; +}; + +struct oo { + void *ph; + struct zx el; +}; + +inline void +k7 (struct zx *xj) +{ + xj->b4->g0 = 0; /* { dg-warning "dereference of NULL" } */ + xj->b4 = 0; +} + +void +n8 (struct oo *yx) +{ + k7 (&yx->el); + n8 (yx); +} diff --git a/gcc/testsuite/gcc.dg/analyzer/pr96841.c b/gcc/testsuite/gcc.dg/analyzer/pr96841.c new file mode 100644 index 0000000..d9d35f3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr96841.c @@ -0,0 +1,23 @@ +/* { dg-additional-options "-O1 -Wno-builtin-declaration-mismatch" } */ + +int +l8 (void); + +__SIZE_TYPE__ +malloc (__SIZE_TYPE__); + +void +th (int *); + +void +bv (__SIZE_TYPE__ ny) +{ + int ***mf; + + while (l8 ()) + { + *mf = 0; + (*mf)[ny] = (int *) malloc (sizeof (int)); + th ((*mf)[ny]); /* { dg-warning "leak" } */ + } +} -- cgit v1.1 From d4a906e7b51f3fc31f3328810f45ae4cf2e7bbc3 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Fri, 25 Sep 2020 14:31:46 -0400 Subject: analyzer: add test for placement new gcc/testsuite/ChangeLog: PR analyzer/94355 * g++.dg/analyzer/placement-new.C: New test. --- gcc/testsuite/g++.dg/analyzer/placement-new.C | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 gcc/testsuite/g++.dg/analyzer/placement-new.C (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/analyzer/placement-new.C b/gcc/testsuite/g++.dg/analyzer/placement-new.C new file mode 100644 index 0000000..8250f45 --- /dev/null +++ b/gcc/testsuite/g++.dg/analyzer/placement-new.C @@ -0,0 +1,26 @@ +#include + +/* Placement new. */ + +void test_1 (void) +{ + char buf[sizeof(int)]; + int *p = new(buf) int (42); +} + +/* Placement new[]. */ + +void test_2 (void) +{ + char buf[sizeof(int) * 10]; + int *p = new(buf) int[10]; +} + +/* Delete of placement new. */ + +void test_3 (void) +{ + char buf[sizeof(int)]; + int *p = new(buf) int (42); + delete p; // { dg-warning "memory not on the heap" } +} -- cgit v1.1 From 5a90a18668fef8d51e5b3fe9f69123f53cbd8f25 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 26 Sep 2020 08:09:53 +0200 Subject: Add support for iterative dataflow to ipa-modref-tree.h Track if insert and merge operations changed anything in the summary. gcc/ChangeLog: 2020-09-26 Jan Hubicka * ipa-modref-tree.h (modref_ref_node::insert_access): Track if something changed. (modref_base_node::insert_ref): Likewise (and add a new optional argument) (modref_tree::insert): Likewise. (modref_tree::merge): Rewrite --- gcc/ipa-modref-tree.h | 192 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 124 insertions(+), 68 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-modref-tree.h b/gcc/ipa-modref-tree.h index caf5d34..abf3fc1 100644 --- a/gcc/ipa-modref-tree.h +++ b/gcc/ipa-modref-tree.h @@ -88,17 +88,18 @@ struct GTY((user)) modref_ref_node } /* Insert access with OFFSET and SIZE. - Collapse tree if it has more than MAX_ACCESSES entries. */ - void insert_access (modref_access_node a, size_t max_accesses) + Collapse tree if it has more than MAX_ACCESSES entries. + Return true if record was changed. */ + bool insert_access (modref_access_node a, size_t max_accesses) { /* If this base->ref pair has no access information, bail out. */ if (every_access) - return; + return false; /* Otherwise, insert a node for the ref of the access under the base. */ modref_access_node *access_node = search (a); if (access_node) - return; + return false; /* If this base->ref pair has too many accesses stored, we will clear all accesses and bail out. */ @@ -109,9 +110,10 @@ struct GTY((user)) modref_ref_node fprintf (dump_file, "--param param=modref-max-accesses limit reached\n"); collapse (); - return; + return true; } vec_safe_push (accesses, a); + return true; } }; @@ -139,8 +141,11 @@ struct GTY((user)) modref_base_node return NULL; } - /* Insert REF; collapse tree if there are more than MAX_REFS. */ - modref_ref_node *insert_ref (T ref, size_t max_refs) + /* Insert REF; collapse tree if there are more than MAX_REFS. + Return inserted ref and if CHANGED is non-null set it to true if + something changed. */ + modref_ref_node *insert_ref (T ref, size_t max_refs, + bool *changed = NULL) { modref_ref_node *ref_node; @@ -153,6 +158,9 @@ struct GTY((user)) modref_base_node if (ref_node) return ref_node; + if (changed) + *changed = true; + /* Collapse the node if too full already. */ if (refs && refs->length () >= max_refs) { @@ -204,7 +212,11 @@ struct GTY((user)) modref_tree max_accesses (max_accesses), every_base (false) {} - modref_base_node *insert_base (T base) + /* Insert BASE; collapse tree if there are more than MAX_REFS. + Return inserted base and if CHANGED is non-null set it to true if + something changed. */ + + modref_base_node *insert_base (T base, bool *changed = NULL) { modref_base_node *base_node; @@ -217,6 +229,9 @@ struct GTY((user)) modref_tree if (base_node) return base_node; + if (changed) + *changed = true; + /* Collapse the node if too full already. */ if (bases && bases->length () >= max_bases) { @@ -232,43 +247,72 @@ struct GTY((user)) modref_tree return base_node; } - /* Insert memory access to the tree. */ - void insert (T base, T ref, modref_access_node a) + /* Insert memory access to the tree. + Return true if something changed. */ + bool insert (T base, T ref, modref_access_node a) { + if (every_base) + return false; + + bool changed = false; + /* No useful information tracked; collapse everything. */ if (!base && !ref && !a.useful_p ()) { collapse (); - return; + return true; } - modref_base_node *base_node = insert_base (base); - if (!base_node) - return; - gcc_assert (search (base) != NULL); + modref_base_node *base_node = insert_base (base, &changed); + if (!base_node || base_node->every_ref) + return changed; + gcc_checking_assert (search (base) != NULL); + + /* No useful ref info tracked; collapse base. */ + if (!ref && !a.useful_p ()) + { + base_node->collapse (); + return true; + } - modref_ref_node *ref_node = base_node->insert_ref (ref, max_refs); + modref_ref_node *ref_node = base_node->insert_ref (ref, max_refs, + &changed); - /* No useful ref information and no useful base; collapse everyting. */ - if (!base && base_node->every_ref) + /* If we failed to insert ref, just see if there is a cleanup possible. */ + if (!ref_node) { - collapse (); - return; + /* No useful ref information and no useful base; collapse everyting. */ + if (!base && base_node->every_ref) + { + collapse (); + gcc_checking_assert (changed); + } + else if (changed) + cleanup (); } - if (ref_node) + else { - /* No useful ref and access; collapse ref. */ - if (!ref && !a.useful_p ()) - ref_node->collapse (); - else + if (ref_node->every_access) + return changed; + changed |= ref_node->insert_access (a, max_accesses); + /* See if we failed to add useful access. */ + if (ref_node->every_access) { - ref_node->insert_access (a, max_accesses); - /* If ref has collapses and there is no useful base; collapse - everything. */ - if (!base && !ref && ref_node->every_access) - collapse (); + /* Collapse everything if there is no useful base and ref. */ + if (!base && !ref) + { + collapse (); + gcc_checking_assert (changed); + } + /* Collapse base if there is no useful ref. */ + else if (!ref) + { + base_node->collapse (); + gcc_checking_assert (changed); + } } } + return changed; } /* Remove tree branches that are not useful (i.e. they will allways pass). */ @@ -317,62 +361,74 @@ struct GTY((user)) modref_tree /* Merge OTHER into the tree. PARM_MAP, if non-NULL, maps parm indexes of callee to caller. -2 is used - to signalize that parameter is local and does not need to be tracked. */ - void merge (modref_tree *other, vec *parm_map) + to signalize that parameter is local and does not need to be tracked. + Return true if something has changed. */ + bool merge (modref_tree *other, vec *parm_map) { - if (!other) - return; + if (!other || every_base) + return false; if (other->every_base) { collapse (); - return; + return true; } + bool changed = false; size_t i, j, k; modref_base_node *base_node, *my_base_node; - modref_ref_node *ref_node, *my_ref_node; + modref_ref_node *ref_node; modref_access_node *access_node; - FOR_EACH_VEC_SAFE_ELT (other->bases, i, base_node) + bool release = false; + + /* For self-recursive functions we may end up merging summary into itself; + produce copy first so we do not modify summary under our own hands. */ + if (other == this) { - my_base_node = insert_base (base_node->base); - if (!my_base_node) - continue; + release = true; + other = modref_tree::create_ggc (max_bases, max_refs, max_accesses); + other->copy_from (this); + } + FOR_EACH_VEC_SAFE_ELT (other->bases, i, base_node) + { if (base_node->every_ref) { - my_base_node->collapse (); - continue; - } - - FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node) - { - my_ref_node = my_base_node->insert_ref (ref_node->ref, max_refs); - if (!my_ref_node) - continue; - - if (ref_node->every_access) + my_base_node = insert_base (base_node->base, &changed); + if (my_base_node && !my_base_node->every_ref) { - my_ref_node->collapse (); - continue; + my_base_node->collapse (); + cleanup (); + changed = true; } - FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node) - { - modref_access_node a = *access_node; - if (a.parm_index != -1 && parm_map) + } + else + FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node) + { + if (ref_node->every_access) + { + modref_access_node a = {-1}; + changed |= insert (base_node->base, ref_node->ref, a); + } + else + FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node) { - if (a.parm_index >= (int)parm_map->length ()) - a.parm_index = -1; - else if ((*parm_map) [a.parm_index] == -2) - continue; - else - a.parm_index = (*parm_map) [a.parm_index]; + modref_access_node a = *access_node; + if (a.parm_index != -1 && parm_map) + { + if (a.parm_index >= (int)parm_map->length ()) + a.parm_index = -1; + else if ((*parm_map) [a.parm_index] == -2) + continue; + else + a.parm_index = (*parm_map) [a.parm_index]; + } + changed |= insert (base_node->base, ref_node->ref, a); } - my_ref_node->insert_access (a, max_accesses); - } - } + } } - if (parm_map) - cleanup (); + if (release) + ggc_delete (other); + return changed; } /* Copy OTHER to THIS. */ -- cgit v1.1 From b89e4559950c79d8933013305c19d7014a39cdb1 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 26 Sep 2020 08:12:44 +0200 Subject: Track arguments pointing to local or readonly memory in ipa-fnsummary this patch implement tracking wehther argument points to readonly memory. This is is useful for ipa-modref as well as for inline heuristics. It is desirable to inline functions that dereference pointers to local variables in order to support SRA. We always did the oposite heuristics (guessing that the dereferences will be optimized out with 50% probability) but here we could increase the probability for cases where we can track that argument is indeed a local memory (or readonly which is also good) * ipa-fnsummary.c (dump_ipa_call_summary): Dump points_to_local_or_readonly_memory flag. (analyze_function_body): Compute points_to_local_or_readonly_memory flag. (remap_edge_change_prob): Rename to ... (remap_edge_params): ... this one; update points_to_local_or_readonly_memory. (remap_edge_summaries): Update. (read_ipa_call_summary): Stream the new flag. (write_ipa_call_summary): Likewise. * ipa-predicate.h (struct inline_param_summary): Add points_to_local_or_readonly_memory. (inline_param_summary::equal_to): Update. (inline_param_summary::useless_p): Update. --- gcc/ipa-fnsummary.c | 43 +++++++++++++++++++++++++++++++++++-------- gcc/ipa-predicate.h | 10 +++++++--- 2 files changed, 42 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index cbcf0c4..4c1c1f9 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -980,6 +980,9 @@ dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node, else if (prob != REG_BR_PROB_BASE) fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i, prob * 100.0 / REG_BR_PROB_BASE); + if (es->param[i].points_to_local_or_readonly_memory) + fprintf (f, "%*s op%i points to local or readonly memory\n", + indent + 2, "", i); } if (!edge->inline_failed) { @@ -2671,6 +2674,9 @@ analyze_function_body (struct cgraph_node *node, bool early) int prob = param_change_prob (&fbi, stmt, i); gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE); es->param[i].change_prob = prob; + es->param[i].points_to_local_or_readonly_memory + = points_to_local_or_readonly_memory_p + (gimple_call_arg (stmt, i)); } } @@ -3781,15 +3787,17 @@ inline_update_callee_summaries (struct cgraph_node *node, int depth) ipa_call_summaries->get (e)->loop_depth += depth; } -/* Update change_prob of EDGE after INLINED_EDGE has been inlined. +/* Update change_prob and points_to_local_or_readonly_memory of EDGE after + INLINED_EDGE has been inlined. + When function A is inlined in B and A calls C with parameter that changes with probability PROB1 and C is known to be passthrough of argument if B that change with probability PROB2, the probability of change is now PROB1*PROB2. */ static void -remap_edge_change_prob (struct cgraph_edge *inlined_edge, - struct cgraph_edge *edge) +remap_edge_params (struct cgraph_edge *inlined_edge, + struct cgraph_edge *edge) { if (ipa_node_params_sum) { @@ -3823,7 +3831,16 @@ remap_edge_change_prob (struct cgraph_edge *inlined_edge, prob = 1; es->param[i].change_prob = prob; + + if (inlined_es + ->param[id].points_to_local_or_readonly_memory) + es->param[i].points_to_local_or_readonly_memory = true; } + if (!es->param[i].points_to_local_or_readonly_memory + && jfunc->type == IPA_JF_CONST + && points_to_local_or_readonly_memory_p + (ipa_get_jf_constant (jfunc))) + es->param[i].points_to_local_or_readonly_memory = true; } } } @@ -3856,7 +3873,7 @@ remap_edge_summaries (struct cgraph_edge *inlined_edge, if (e->inline_failed) { class ipa_call_summary *es = ipa_call_summaries->get (e); - remap_edge_change_prob (inlined_edge, e); + remap_edge_params (inlined_edge, e); if (es->predicate) { @@ -3882,7 +3899,7 @@ remap_edge_summaries (struct cgraph_edge *inlined_edge, predicate p; next = e->next_callee; - remap_edge_change_prob (inlined_edge, e); + remap_edge_params (inlined_edge, e); if (es->predicate) { p = es->predicate->remap_after_inlining @@ -4208,12 +4225,19 @@ read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e, { es->param.safe_grow_cleared (length, true); for (i = 0; i < length; i++) - es->param[i].change_prob = streamer_read_uhwi (ib); + { + es->param[i].change_prob = streamer_read_uhwi (ib); + es->param[i].points_to_local_or_readonly_memory + = streamer_read_uhwi (ib); + } } else { for (i = 0; i < length; i++) - streamer_read_uhwi (ib); + { + streamer_read_uhwi (ib); + streamer_read_uhwi (ib); + } } } @@ -4438,7 +4462,10 @@ write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e) streamer_write_uhwi (ob, 0); streamer_write_uhwi (ob, es->param.length ()); for (i = 0; i < (int) es->param.length (); i++) - streamer_write_uhwi (ob, es->param[i].change_prob); + { + streamer_write_uhwi (ob, es->param[i].change_prob); + streamer_write_uhwi (ob, es->param[i].points_to_local_or_readonly_memory); + } } diff --git a/gcc/ipa-predicate.h b/gcc/ipa-predicate.h index 9b75ffc..05e3707 100644 --- a/gcc/ipa-predicate.h +++ b/gcc/ipa-predicate.h @@ -76,14 +76,18 @@ struct inline_param_summary parameters REG_BR_PROB_BASE. Value 0 is reserved for compile time invariants. */ - int change_prob; + short change_prob; + unsigned points_to_local_or_readonly_memory : 1; bool equal_to (const inline_param_summary &other) const { - return change_prob == other.change_prob; + return change_prob == other.change_prob + && points_to_local_or_readonly_memory + == other.points_to_local_or_readonly_memory; } bool useless_p (void) const { - return change_prob == REG_BR_PROB_BASE; + return change_prob == REG_BR_PROB_BASE + && !points_to_local_or_readonly_memory; } }; -- cgit v1.1 From f0ae0d512d86352aba976d01ac2929bf04a65c74 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 26 Sep 2020 08:13:52 +0200 Subject: Disable modref for ipa-pta-13.c * gcc.dg/ipa/ipa-pta-13.c: Disable modref. --- gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c b/gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c index 93dd871..e7bf6d4 100644 --- a/gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c +++ b/gcc/testsuite/gcc.dg/ipa/ipa-pta-13.c @@ -1,5 +1,5 @@ /* { dg-do link } */ -/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta2-details -fdump-tree-fre3 -fno-ipa-icf" } */ +/* { dg-options "-O2 -fipa-pta -fdump-ipa-pta2-details -fdump-tree-fre3 -fno-ipa-icf -fno-ipa-modref" } */ static int x, y; -- cgit v1.1 From a29bd4f59e9eebf52ac41f7d7a6fa83cf2aae09d Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sat, 26 Sep 2020 10:10:09 +0200 Subject: openmp: Improve #pragma omp simd vectorization As mentioned earlier, the vectorizer punts on vectorization of loops with non-constant steps. As for OpenMP loops it is by the language restriction always possible to compute the number of loop iterations before the loop, this change helps those cases by computing it and using an alternate IV that iterates from 0 to < niterations with step of 1 next to the normal IV which will be just linear in that. List of functions where we compared to current trunk vectorize some loops where we previously didn't (for c-c++-common only listing the C function names, both C and C++ are affected though): gcc/testsuite/gcc.dg/vect/vect-simd-17.c doit gcc/testsuite/gcc.dg/vect/vect-simd-18.c foo gcc/testsuite/gcc.dg/vect/vect-simd-19.c foo gcc/testsuite/gcc.dg/vect/vect-simd-20.c foo libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_f_simd_auto libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_f_simd_guided32 libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_f_simd_runtime libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_f_simd_static libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_f_simd_static32 libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_pf_simd_auto._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_pf_simd_guided32._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_pf_simd_runtime._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_pf_simd_static32._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_pf_simd_static._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-2.c f3_simd_normal libgomp/testsuite/libgomp.c-c++-common/for-2.c f5_simd_normal libgomp/testsuite/libgomp.c-c++-common/for-2.c f6_simd_normal libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_auto._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_ds128_auto._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_ds128_guided32._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_ds128_runtime._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_ds128_static32._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_ds128_static._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_guided32._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_runtime._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_static32._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_dpfs_static._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_ds_ds128_normal libgomp/testsuite/libgomp.c-c++-common/for-3.c f3_ds_normal libgomp/testsuite/libgomp.c-c++-common/for-4.c f3_taskloop_simd_normal._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_tpf_simd_auto._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_tpf_simd_guided32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_tpf_simd_runtime._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_tpf_simd_static32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_tpf_simd_static._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_t_simd_normal._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_auto._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_ds128_auto._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_ds128_guided32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_ds128_runtime._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_ds128_static32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_ds128_static._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_guided32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_runtime._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_static32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttdpfs_static._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttds_ds128_normal._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-5.c f3_ttds_normal._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-5.c f5_t_simd_normal._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-5.c f6_t_simd_normal._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_auto._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_ds128_auto._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_ds128_guided32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_ds128_runtime._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_ds128_static32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_ds128_static._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_guided32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_runtime._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_static32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tdpfs_static._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tds_ds128_normal._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-6.c f3_tds_normal._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_auto._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_ds128_auto._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_ds128_guided32._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_ds128_runtime._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_ds128_static32._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_ds128_static._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_guided32._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_runtime._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_static32._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_dpfs_static._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_ds_ds128_normal libgomp/testsuite/libgomp.c-c++-common/for-14.c f3_ds_normal libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_auto._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_ds128_auto._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_ds128_guided32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_ds128_runtime._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_ds128_static32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_ds128_static._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_guided32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_runtime._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_static32._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tdpfs_static._omp_fn.1 libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tds_ds128_normal._omp_fn.0 libgomp/testsuite/libgomp.c-c++-common/for-15.c f3_tds_normal._omp_fn.0 2020-09-26 Jakub Jelinek * omp-expand.c (expand_omp_simd): Help vectorizer for the collapse == 1 and non-composite collapse > 1 case with non-constant innermost loop step by precomputing number of iterations before loop and using an alternate IV from 0 to number of iterations - 1 with step of 1. * gcc.dg/vect/vect-simd-17.c: Expect 11 or more vectorized loops. * gcc.dg/vect/vect-simd-18.c: New test. * gcc.dg/vect/vect-simd-19.c: New test. * gcc.dg/vect/vect-simd-20.c: New test. --- gcc/omp-expand.c | 110 +++++++++++++++++++++++++++++-- gcc/testsuite/gcc.dg/vect/vect-simd-17.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-simd-18.c | 40 +++++++++++ gcc/testsuite/gcc.dg/vect/vect-simd-19.c | 40 +++++++++++ gcc/testsuite/gcc.dg/vect/vect-simd-20.c | 43 ++++++++++++ 5 files changed, 230 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-simd-18.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-simd-19.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-simd-20.c (limited to 'gcc') diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index 9160022..99cb4f9 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -6452,6 +6452,56 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) } else expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); + tree altv = NULL_TREE, altn2 = NULL_TREE; + if (fd->collapse == 1 + && !broken_loop + && TREE_CODE (fd->loops[0].step) != INTEGER_CST) + { + /* The vectorizer currently punts on loops with non-constant steps + for the main IV (can't compute number of iterations and gives up + because of that). As for OpenMP loops it is always possible to + compute the number of iterations upfront, use an alternate IV + as the loop iterator: + altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0; + for (i = n1, altv = 0; altv < altn2; altv++, i += step) */ + altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v))); + expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv))); + tree itype = TREE_TYPE (fd->loop.v); + if (POINTER_TYPE_P (itype)) + itype = signed_type_for (itype); + t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, fd->loop.step), t); + t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2)); + t = fold_build2 (MINUS_EXPR, itype, t, + fold_convert (itype, fd->loop.v)); + if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, + fold_convert (itype, fd->loop.step))); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, + fold_convert (itype, fd->loop.step)); + t = fold_convert (TREE_TYPE (altv), t); + altn2 = create_tmp_var (TREE_TYPE (altv)); + expand_omp_build_assign (&gsi, altn2, t); + tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2); + t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE, + true, GSI_SAME_STMT); + t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2); + gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2, + build_zero_cst (TREE_TYPE (altv))); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + } + else if (fd->collapse > 1 + && !broken_loop + && !gimple_omp_for_combined_into_p (fd->for_stmt) + && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST) + { + altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v))); + altn2 = create_tmp_var (TREE_TYPE (altv)); + } if (cond_var) { if (POINTER_TYPE_P (type) @@ -6486,6 +6536,12 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) } else if (TREE_CODE (n2) != INTEGER_CST) expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type)); + if (altv) + { + t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv, + build_one_cst (TREE_TYPE (altv))); + expand_omp_build_assign (&gsi, altv, t); + } if (fd->collapse > 1) { @@ -6525,9 +6581,11 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) /* Emit the condition in L1_BB. */ gsi = gsi_start_bb (l1_bb); - if (fd->collapse > 1 - && !gimple_omp_for_combined_into_p (fd->for_stmt) - && !broken_loop) + if (altv) + t = build2 (LT_EXPR, boolean_type_node, altv, altn2); + else if (fd->collapse > 1 + && !gimple_omp_for_combined_into_p (fd->for_stmt) + && !broken_loop) { i = fd->collapse - 1; tree itype = TREE_TYPE (fd->loops[i].v); @@ -6704,7 +6762,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t); if (fd->loops[i + 1].m2) { - if (i + 2 == fd->collapse && n2var) + if (i + 2 == fd->collapse && (n2var || altv)) { gcc_assert (n2v == NULL_TREE); n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v)); @@ -6761,6 +6819,50 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t); expand_omp_build_assign (&gsi, n2var, t); } + if (i + 2 == fd->collapse && altv) + { + /* The vectorizer currently punts on loops with non-constant + steps for the main IV (can't compute number of iterations + and gives up because of that). As for OpenMP loops it is + always possible to compute the number of iterations upfront, + use an alternate IV as the loop iterator. */ + expand_omp_build_assign (&gsi, altv, + build_zero_cst (TREE_TYPE (altv))); + tree itype = TREE_TYPE (fd->loops[i + 1].v); + if (POINTER_TYPE_P (itype)) + itype = signed_type_for (itype); + t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR + ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, fd->loops[i + 1].step), t); + t = fold_build2 (PLUS_EXPR, itype, t, + fold_convert (itype, + fd->loops[i + 1].m2 + ? n2v : fd->loops[i + 1].n2)); + t = fold_build2 (MINUS_EXPR, itype, t, + fold_convert (itype, fd->loops[i + 1].v)); + tree step = fold_convert (itype, fd->loops[i + 1].step); + if (TYPE_UNSIGNED (itype) + && fd->loops[i + 1].cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + t = fold_convert (TREE_TYPE (altv), t); + expand_omp_build_assign (&gsi, altn2, t); + tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v), + fd->loops[i + 1].m2 + ? n2v : fd->loops[i + 1].n2); + t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE, + true, GSI_SAME_STMT); + t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node, + fd->loops[i + 1].v, t2); + gassign *g + = gimple_build_assign (altn2, COND_EXPR, t2, altn2, + build_zero_cst (TREE_TYPE (altv))); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + } n2v = nextn2v; make_edge (init_bb, last_bb, EDGE_FALLTHRU); diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-17.c b/gcc/testsuite/gcc.dg/vect/vect-simd-17.c index 9330aaa..951ba3a 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-simd-17.c +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-17.c @@ -1,6 +1,6 @@ /* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */ /* { dg-additional-options "-mavx" { target avx_runtime } } */ -/* { dg-final { scan-tree-dump "vectorized \(\[4-9]\|1\[0-2]\) loops" "vect" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump "vectorized 1\[1-2] loops" "vect" { target i?86-*-* x86_64-*-* } } } */ #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-18.c b/gcc/testsuite/gcc.dg/vect/vect-simd-18.c new file mode 100644 index 0000000..b25f5a5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-18.c @@ -0,0 +1,40 @@ +/* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target i?86-*-* x86_64-*-* } } } */ + +#include "tree-vect.h" + +__attribute__((noipa)) int +foo (int s, int *p) +{ + int r = 0, l = 0, i; + #pragma omp simd reduction (+:r) linear(l) + for (i = 0; i < 10000; i += s) + { + p[l++] = i; + r += i * 3; + } + return r; +} + +int p[10000 / 78]; + +int +main () +{ + int i, r; + check_vect (); + r = foo (78, p); + for (i = 0; i < 10000 / 78; i++) + if (p[i] != 78 * i) + abort (); + if (r != (10000 / 78) * (10000 / 78 + 1) / 2 * 78 * 3) + abort (); + r = foo (87, p); + for (i = 0; i < 10000 / 87; i++) + if (p[i] != 87 * i) + abort (); + if (r != (10000 / 87) * (10000 / 87 + 1) / 2 * 87 * 3) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-19.c b/gcc/testsuite/gcc.dg/vect/vect-simd-19.c new file mode 100644 index 0000000..a71dfa6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-19.c @@ -0,0 +1,40 @@ +/* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target i?86-*-* x86_64-*-* } } } */ + +#include "tree-vect.h" + +__attribute__((noipa)) int +foo (int s, int m, int n, int *p) +{ + int r = 0, l = 0, i; + #pragma omp simd reduction (+:r) linear(l) + for (i = m; i < n; i += s) + { + p[l++] = i; + r += i * 3; + } + return r; +} + +int p[10000 / 78]; + +int +main () +{ + int i, r; + check_vect (); + r = foo (78, 0, 10000, p); + for (i = 0; i < 10000 / 78; i++) + if (p[i] != 78 * i) + abort (); + if (r != (10000 / 78) * (10000 / 78 + 1) / 2 * 78 * 3) + abort (); + r = foo (87, 0, 10000, p); + for (i = 0; i < 10000 / 87; i++) + if (p[i] != 87 * i) + abort (); + if (r != (10000 / 87) * (10000 / 87 + 1) / 2 * 87 * 3) + abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-20.c b/gcc/testsuite/gcc.dg/vect/vect-simd-20.c new file mode 100644 index 0000000..c85f05f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-20.c @@ -0,0 +1,43 @@ +/* { dg-additional-options "-fopenmp-simd -fno-tree-vectorize" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target i?86-*-* x86_64-*-* } } } */ + +#include "tree-vect.h" + +__attribute__((noipa)) int +foo (int s, int m, int n, int *p) +{ + int r = 0, l = 0, i, j; + #pragma omp simd reduction (+:r) linear(l) collapse(2) + for (j = 0; j < 7; j++) + for (i = m; i < n; i += s) + { + p[l++] = i; + r += i * 3; + } + return r; +} + +int p[10000 / 78 * 7]; + +int +main () +{ + int i, j, r; + check_vect (); + r = foo (78, 0, 10000, p); + for (j = 0; j < 7; j++) + for (i = 0; i < 10000 / 78; i++) + if (p[j * (10000 / 78 + 1) + i] != 78 * i) + abort (); + if (r != (10000 / 78) * (10000 / 78 + 1) / 2 * 78 * 3 * 7) + abort (); + r = foo (87, 0, 10000, p); + for (j = 0; j < 7; j++) + for (i = 0; i < 10000 / 87; i++) + if (p[j * (10000 / 87 + 1) + i] != 87 * i) + abort (); + if (r != (10000 / 87) * (10000 / 87 + 1) / 2 * 87 * 3 * 7) + abort (); + return 0; +} -- cgit v1.1 From ada353b87909fd6cd37a30083b4fdcb76acbf5fe Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 26 Sep 2020 10:43:57 +0200 Subject: Implement iterative dataflow in mod-ref cc1plus stats are now: Alias oracle query stats: refs_may_alias_p: 62971744 disambiguations, 73160711 queries ref_maybe_used_by_call_p: 141176 disambiguations, 63867883 queries call_may_clobber_ref_p: 23573 disambiguations, 29322 queries nonoverlapping_component_refs_p: 0 disambiguations, 37720 queries nonoverlapping_refs_since_match_p: 19432 disambiguations, 55659 must overlaps, 75860 queries aliasing_component_refs_p: 54724 disambiguations, 753570 queries TBAA oracle: 24124230 disambiguations 56228428 queries 16058141 are in alias set 0 10338303 queries asked about the same object 125 queries asked about the same alias set 0 access volatile 3919230 are dependent in the DAG 1788399 are aritificially in conflict with void * Modref stats: modref use: 10408 disambiguations, 46993 queries modref clobber: 1418549 disambiguations, 1951251 queries 4898707 tbaa queries (2.510547 per modref query) 396878 base compares (0.203397 per modref query) PTA query stats: pt_solution_includes: 975364 disambiguations, 13604284 queries pt_solutions_intersect: 1026606 disambiguations, 13181198 queries So compared to https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554692.html we get 25% use disambiguations and 91% more clobber disambiguations. Tramp3d is Alias oracle query stats: refs_may_alias_p: 2056905 disambiguations, 2317461 queries ref_maybe_used_by_call_p: 7137 disambiguations, 2093762 queries call_may_clobber_ref_p: 234 disambiguations, 234 queries nonoverlapping_component_refs_p: 0 disambiguations, 4313 queries nonoverlapping_refs_since_match_p: 329 disambiguations, 10200 must overlaps, 10616 queries aliasing_component_refs_p: 858 disambiguations, 34600 queries TBAA oracle: 894996 disambiguations 1695991 queries 138346 are in alias set 0 470668 queries asked about the same object 0 queries asked about the same alias set 0 access volatile 191666 are dependent in the DAG 315 are aritificially in conflict with void * Modref stats: modref use: 842 disambiguations, 2265 queries modref clobber: 14833 disambiguations, 28900 queries 34884 tbaa queries (1.207059 per modref query) 5041 base compares (0.174429 per modref query) PTA query stats: pt_solution_includes: 313372 disambiguations, 525724 queries pt_solutions_intersect: 130374 disambiguations, 415138 queries So about twice many use and 40% clobber disambiguations. Bootstrapped/regtested x86_64-linux, I plan to commit it later today after more testing. 2020-09-26 Jan Hubicka * ipa-inline-transform.c: Include ipa-modref-tree.h and ipa-modref.h. (inline_call): Call ipa_merge_modref_summary_after_inlining. * ipa-inline.c (ipa_inline): Do not free summaries. * ipa-modref.c (dump_records): Fix formating. (merge_call_side_effects): Break out from ... (analyze_call): ... here; record recursive calls. (analyze_stmt): Add new parameter RECURSIVE_CALLS. (analyze_function): Do iterative dataflow on recursive calls. (compute_parm_map): New function. (ipa_merge_modref_summary_after_inlining): New function. (collapse_loads): New function. (modref_propagate_in_scc): Break out from ... (pass_ipa_modref::execute): ... here; Do iterative dataflow. * ipa-modref.h (ipa_merge_modref_summary_after_inlining): Declare. --- gcc/ipa-inline-transform.c | 3 + gcc/ipa-inline.c | 3 - gcc/ipa-modref.c | 571 +++++++++++++++++++++++++++++---------------- gcc/ipa-modref.h | 1 + 4 files changed, 373 insertions(+), 205 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-inline-transform.c b/gcc/ipa-inline-transform.c index 5e37e61..af2c285 100644 --- a/gcc/ipa-inline-transform.c +++ b/gcc/ipa-inline-transform.c @@ -48,6 +48,8 @@ along with GCC; see the file COPYING3. If not see #include "cfg.h" #include "basic-block.h" #include "ipa-utils.h" +#include "ipa-modref-tree.h" +#include "ipa-modref.h" int ncalls_inlined; int nfunctions_inlined; @@ -487,6 +489,7 @@ inline_call (struct cgraph_edge *e, bool update_original, gcc_assert (curr->callee->inlined_to == to); old_size = ipa_size_summaries->get (to)->size; + ipa_merge_modref_summary_after_inlining (e); ipa_merge_fn_summary_after_inlining (e); if (e->in_polymorphic_cdtor) mark_all_inlined_calls_cdtor (e->callee); diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c index c667de2..225a014 100644 --- a/gcc/ipa-inline.c +++ b/gcc/ipa-inline.c @@ -2770,9 +2770,6 @@ ipa_inline (void) } } - /* Free ipa-prop structures if they are no longer needed. */ - ipa_free_all_structures_after_iinln (); - if (dump_enabled_p ()) dump_printf (MSG_NOTE, "\nInlined %i calls, eliminated %i functions\n\n", diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 44b844b..73a7900 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -175,7 +175,7 @@ dump_records (modref_records *tt, FILE *out) fprintf (out, " Ref %i: alias set %i\n", (int)j, r->ref); if (r->every_access) { - fprintf (out, " Every access\n"); + fprintf (out, " Every access\n"); continue; } size_t k; @@ -437,11 +437,70 @@ ignore_stores_p (tree caller, int flags) return false; } -/* Analyze function call STMT in function F. */ +/* Merge side effects of call STMT to function with CALLEE_SUMMARY + int CUR_SUMMARY. Return true if something changed. + If IGNORE_STORES is true, do not merge stores. */ + +bool +merge_call_side_effects (modref_summary *cur_summary, + gimple *stmt, modref_summary *callee_summary, + bool ignore_stores) +{ + auto_vec parm_map; + bool changed = false; + + parm_map.safe_grow (gimple_call_num_args (stmt)); + for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) + { + tree op = gimple_call_arg (stmt, i); + STRIP_NOPS (op); + if (TREE_CODE (op) == SSA_NAME + && SSA_NAME_IS_DEFAULT_DEF (op) + && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL) + { + int index = 0; + for (tree t = DECL_ARGUMENTS (current_function_decl); + t != SSA_NAME_VAR (op); t = DECL_CHAIN (t)) + { + if (!t) + { + index = -1; + break; + } + index++; + } + parm_map[i] = index; + } + else if (points_to_local_or_readonly_memory_p (op)) + parm_map[i] = -2; + else + parm_map[i] = -1; + } + + /* Merge with callee's summary. */ + if (cur_summary->loads) + changed |= cur_summary->loads->merge (callee_summary->loads, &parm_map); + if (cur_summary->loads_lto) + changed |= cur_summary->loads_lto->merge (callee_summary->loads_lto, + &parm_map); + if (!ignore_stores) + { + if (cur_summary->stores) + changed |= cur_summary->stores->merge (callee_summary->stores, + &parm_map); + if (cur_summary->stores_lto) + changed |= cur_summary->stores_lto->merge (callee_summary->stores_lto, + &parm_map); + } + return changed; +} + +/* Analyze function call STMT in function F. + Remember recursive calls in RECURSIVE_CALLS. */ static bool analyze_call (modref_summary *cur_summary, - gimple *stmt) + gimple *stmt, vec *recursive_calls) { /* Check flags on the function call. In certain cases, analysis can be simplified. */ @@ -505,6 +564,7 @@ analyze_call (modref_summary *cur_summary, there's nothing to do. */ if (recursive_call_p (current_function_decl, callee)) { + recursive_calls->safe_push (stmt); if (dump_file) fprintf (dump_file, " - Skipping recursive call.\n"); return true; @@ -550,48 +610,7 @@ analyze_call (modref_summary *cur_summary, return false; } - auto_vec parm_map; - - parm_map.safe_grow (gimple_call_num_args (stmt)); - for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) - { - tree op = gimple_call_arg (stmt, i); - STRIP_NOPS (op); - if (TREE_CODE (op) == SSA_NAME - && SSA_NAME_IS_DEFAULT_DEF (op) - && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL) - { - int index = 0; - for (tree t = DECL_ARGUMENTS (current_function_decl); - t != SSA_NAME_VAR (op); t = DECL_CHAIN (t)) - { - if (!t) - { - index = -1; - break; - } - index++; - } - parm_map[i] = index; - } - else if (points_to_local_or_readonly_memory_p (op)) - parm_map[i] = -2; - else - parm_map[i] = -1; - } - - /* Merge with callee's summary. */ - if (cur_summary->loads) - cur_summary->loads->merge (callee_summary->loads, &parm_map); - if (cur_summary->loads_lto) - cur_summary->loads_lto->merge (callee_summary->loads_lto, &parm_map); - if (!ignore_stores) - { - if (cur_summary->stores) - cur_summary->stores->merge (callee_summary->stores, &parm_map); - if (cur_summary->stores_lto) - cur_summary->stores_lto->merge (callee_summary->stores_lto, &parm_map); - } + merge_call_side_effects (cur_summary, stmt, callee_summary, ignore_stores); return true; } @@ -654,7 +673,8 @@ analyze_store (gimple *, tree, tree op, void *data) If IPA is true do not merge in side effects of calls. */ static bool -analyze_stmt (modref_summary *summary, gimple *stmt, bool ipa) +analyze_stmt (modref_summary *summary, gimple *stmt, bool ipa, + vec *recursive_calls) { /* There is no need to record clobbers. */ if (gimple_clobber_p (stmt)) @@ -677,7 +697,7 @@ analyze_stmt (modref_summary *summary, gimple *stmt, bool ipa) return false; case GIMPLE_CALL: if (!ipa) - return analyze_call (summary, stmt); + return analyze_call (summary, stmt, recursive_calls); return true; default: /* Nothing to do for other types of statements. */ @@ -750,6 +770,7 @@ analyze_function (function *f, bool ipa) } summary->finished = false; int ecf_flags = flags_from_decl_or_type (current_function_decl); + auto_vec recursive_calls; /* Analyze each statement in each basic block of the function. If the statement cannot be analyzed (for any reason), the entire function cannot @@ -760,7 +781,7 @@ analyze_function (function *f, bool ipa) gimple_stmt_iterator si; for (si = gsi_after_labels (bb); !gsi_end_p (si); gsi_next (&si)) { - if (!analyze_stmt (summary, gsi_stmt (si), ipa) + if (!analyze_stmt (summary, gsi_stmt (si), ipa, &recursive_calls) || !summary->useful_p (ecf_flags)) { cgraph_node *fnode = cgraph_node::get (current_function_decl); @@ -773,6 +794,34 @@ analyze_function (function *f, bool ipa) } } + /* In non-IPA mode we need to perform iterative datafow on recursive calls. + This needs to be done after all other side effects are computed. */ + if (!ipa) + { + bool changed = true; + while (changed) + { + changed = false; + for (unsigned i = 0; i < recursive_calls.length (); i++) + { + changed |= merge_call_side_effects + (summary, recursive_calls[i], summary, + ignore_stores_p (current_function_decl, + gimple_call_flags + (recursive_calls[i]))); + if (!summary->useful_p (ecf_flags)) + { + cgraph_node *fnode = cgraph_node::get (current_function_decl); + summaries->remove (fnode); + if (dump_file) + fprintf (dump_file, + " - modref done with result: not tracked.\n"); + return; + } + } + } + } + if (!ipa) summary->finished = true; @@ -1276,71 +1325,176 @@ ignore_edge (struct cgraph_edge *e) & (ECF_CONST | ECF_NOVOPS)); } -/* Run the IPA pass. This will take a function's summaries and calls and - construct new summaries which represent a transitive closure. So that - summary of an analyzed function contains information about the loads and - stores that the function or any function that it calls does. */ +/* Compute parm_map for CALLE_EDGE. */ -unsigned int pass_ipa_modref::execute (function *) +static void +compute_parm_map (cgraph_edge *callee_edge, vec *parm_map) +{ + class ipa_edge_args *args; + if (ipa_node_params_sum + && !callee_edge->call_stmt_cannot_inline_p + && (args = IPA_EDGE_REF (callee_edge)) != NULL) + { + int i, count = ipa_get_cs_argument_count (args); + class ipa_node_params *caller_parms_info, *callee_pi; + class ipa_call_summary *es + = ipa_call_summaries->get (callee_edge); + cgraph_node *callee + = callee_edge->callee->function_or_virtual_thunk_symbol + (NULL, callee_edge->caller); + + caller_parms_info = IPA_NODE_REF (callee_edge->caller->inlined_to + ? callee_edge->caller->inlined_to + : callee_edge->caller); + callee_pi = IPA_NODE_REF (callee); + + (*parm_map).safe_grow (count); + + for (i = 0; i < count; i++) + { + if (es && es->param[i].points_to_local_or_readonly_memory) + { + (*parm_map)[i] = -2; + continue; + } + + struct ipa_jump_func *jf + = ipa_get_ith_jump_func (args, i); + if (jf) + { + tree cst = ipa_value_from_jfunc (caller_parms_info, + jf, + ipa_get_type + (callee_pi, i)); + if (cst && points_to_local_or_readonly_memory_p (cst)) + { + (*parm_map)[i] = -2; + continue; + } + } + if (jf && jf->type == IPA_JF_PASS_THROUGH) + { + (*parm_map)[i] + = ipa_get_jf_pass_through_formal_id (jf); + continue; + } + if (jf && jf->type == IPA_JF_ANCESTOR) + (*parm_map)[i] = ipa_get_jf_ancestor_formal_id (jf); + else + (*parm_map)[i] = -1; + } + if (dump_file) + { + fprintf (dump_file, " Parm map: "); + for (i = 0; i < count; i++) + fprintf (dump_file, " %i", (*parm_map)[i]); + fprintf (dump_file, "\n"); + } + } +} + +/* Call EDGE was inlined; merge summary from callee to the caller. */ + +void +ipa_merge_modref_summary_after_inlining (cgraph_edge *edge) { if (!summaries) - return 0; + return; - struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, - symtab->cgraph_count); - int order_pos; - order_pos = ipa_reduced_postorder (order, true, ignore_edge); - int i; + struct cgraph_node *to = (edge->caller->inlined_to + ? edge->caller->inlined_to : edge->caller); + class modref_summary *to_info = summaries->get (to); - /* Iterate over all strongly connected components in post-order. */ - for (i = 0; i < order_pos; i++) + if (!to_info) + return; + + class modref_summary *callee_info = summaries->get (edge->callee); + int flags = flags_from_decl_or_type (edge->callee->decl); + + if (!callee_info) { - bool its_hopeless = false; - modref_records *loads = NULL; - modref_records *stores = NULL; - modref_records_lto *loads_lto = NULL; - modref_records_lto *stores_lto = NULL; + if (ignore_stores_p (edge->callee->decl, flags)) + { + if (to_info->loads) + to_info->loads->collapse (); + if (to_info->loads_lto) + to_info->loads_lto->collapse (); + } + else + { + summaries->remove (to); + summaries->remove (edge->callee); + return; + } + } + else + { + auto_vec parm_map; + + compute_parm_map (edge, &parm_map); + + if (to_info->loads) + to_info->loads->merge (callee_info->loads, &parm_map); + if (to_info->stores) + to_info->stores->merge (callee_info->stores, &parm_map); + if (to_info->loads_lto) + to_info->loads_lto->merge (callee_info->loads_lto, &parm_map); + if (to_info->stores_lto) + to_info->stores_lto->merge (callee_info->stores_lto, &parm_map); + } + if (!to_info->useful_p (flags)) + summaries->remove (to); + summaries->remove (edge->callee); + return; +} - /* Get the component's representative. That's just any node in the - component from which we can traverse the entire component. */ - struct cgraph_node *component_node = order[i]; - cgraph_node *first = NULL; +/* Collapse loads and return true if something changed. */ - if (dump_file) - fprintf (dump_file, "Start of SCC component\n"); +bool +collapse_loads (modref_summary *cur_summary) +{ + bool changed = false; + + if (cur_summary->loads && !cur_summary->loads->every_base) + { + cur_summary->loads->collapse (); + changed = true; + } + if (cur_summary->loads_lto + && !cur_summary->loads_lto->every_base) + { + cur_summary->loads_lto->collapse (); + changed = true; + } + return changed; +} - /* Walk the component. CUR is the current node of the component that's - being processed. */ - for (struct cgraph_node *cur = component_node; cur && !its_hopeless; +/* Perform iterative dataflow on SCC component starting in COMPONENT_NODE. */ + +static void +modref_propagate_in_scc (cgraph_node *component_node) +{ + bool changed = true; + int iteration = 0; + + while (changed) + { + changed = false; + for (struct cgraph_node *cur = component_node; cur; cur = ((struct ipa_dfs_info *) cur->aux)->next_cycle) { - /* Merge in summaries from CUR. */ - modref_summary *cur_summary = summaries->get (cur); - - if (dump_file) - fprintf (dump_file, " Processing %s\n", - cur->dump_name ()); + cgraph_node *node = cur->inlined_to ? cur->inlined_to : cur; + modref_summary *cur_summary = summaries->get (node); - /* We don't know anything about CUR, hence we cannot tell anything - about the entire component. */ if (!cur_summary) - { - if (dump_file) - fprintf (dump_file, " No summary\n"); - its_hopeless = true; - break; - } + continue; + + if (dump_file) + fprintf (dump_file, " Processing %s%s%s\n", + cur->dump_name (), + TREE_READONLY (cur->decl) ? " (const)" : "", + DECL_PURE_P (cur->decl) ? " (pure)" : ""); - /* Summaries are all going to be same, pick first ones and merge - everything in. */ - if (!first) - { - first = cur; - loads = cur_summary->loads; - stores = cur_summary->stores; - loads_lto = cur_summary->loads_lto; - stores_lto = cur_summary->stores_lto; - } for (cgraph_edge *e = cur->indirect_calls; e; e = e->next_callee) { if (e->indirect_info->ecf_flags & (ECF_CONST | ECF_NOVOPS)) @@ -1350,20 +1504,22 @@ unsigned int pass_ipa_modref::execute (function *) if (dump_file) fprintf (dump_file, " Indirect call: " "collapsing loads\n"); - if (loads) - loads->collapse (); - if (loads_lto) - loads_lto->collapse (); + changed |= collapse_loads (cur_summary); } else { if (dump_file) fprintf (dump_file, " Indirect call: giving up\n"); - its_hopeless = true; + summaries->remove (node); + changed = true; + cur_summary = NULL; + break; } } - /* Walk every function that CUR calls and merge its summary. */ + if (!cur_summary) + continue; + for (cgraph_edge *callee_edge = cur->callees; callee_edge; callee_edge = callee_edge->next_callee) { @@ -1371,42 +1527,26 @@ unsigned int pass_ipa_modref::execute (function *) modref_summary *callee_summary; struct cgraph_node *callee; - if (flags & (ECF_CONST | ECF_NOVOPS)) + if (flags & (ECF_CONST | ECF_NOVOPS) + || !callee_edge->inline_failed) continue; - if (dump_file) - fprintf (dump_file, " Call to %s\n", - callee_edge->callee->dump_name ()); - - /* We can not safely optimize based on summary of callee if it - does not always bind to current def: it is possible that - memory load was optimized out earlier which may not happen in - the interposed variant. */ - if (!callee_edge->binds_to_current_def_p ()) - { - if (loads) - loads->collapse (); - if (loads_lto) - loads_lto->collapse (); - if (dump_file) - fprintf (dump_file, " May not bind local;" - " collapsing loads\n"); - } - /* Get the callee and its summary. */ enum availability avail; callee = callee_edge->callee->function_or_virtual_thunk_symbol (&avail, cur); - /* See if we can derive something from ECF flags. Be careful on - not skipping calls within the SCC component: we must merge - all their summaries. - If we switch to iterative dataflow that may be necessary - for future improvements this may go away. */ - if (callee->aux - && ((struct ipa_dfs_info *)cur->aux)->scc_no - == ((struct ipa_dfs_info *)callee->aux)->scc_no) - flags = 0; + /* It is not necessary to re-process calls outside of the + SCC component. */ + if (iteration > 0 + && (!callee->aux + || ((struct ipa_dfs_info *)cur->aux)->scc_no + != ((struct ipa_dfs_info *)callee->aux)->scc_no)) + continue; + + if (dump_file) + fprintf (dump_file, " Call to %s\n", + callee_edge->callee->dump_name ()); bool ignore_stores = ignore_stores_p (cur->decl, flags); @@ -1418,101 +1558,128 @@ unsigned int pass_ipa_modref::execute (function *) { if (!ignore_stores) { - its_hopeless = true; if (dump_file && avail <= AVAIL_INTERPOSABLE) fprintf (dump_file, " Call target interposable" " or not available\n"); else if (dump_file) fprintf (dump_file, " No call target summary\n"); + + summaries->remove (node); + changed = true; break; } else { - if (loads) - loads->collapse (); - if (loads_lto) - loads_lto->collapse (); if (dump_file && avail <= AVAIL_INTERPOSABLE) fprintf (dump_file, " Call target interposable" - "or not available; collapsing loads\n"); + " or not available; collapsing loads\n"); else if (dump_file) fprintf (dump_file, " No call target summary;" " collapsing loads\n"); + + changed |= collapse_loads (cur_summary); continue; } } + /* We can not safely optimize based on summary of callee if it + does not always bind to current def: it is possible that + memory load was optimized out earlier which may not happen in + the interposed variant. */ + if (!callee_edge->binds_to_current_def_p ()) + { + changed |= collapse_loads (cur_summary); + if (dump_file) + fprintf (dump_file, " May not bind local;" + " collapsing loads\n"); + } + + auto_vec parm_map; - /* TODO: compute parm_map. */ + + compute_parm_map (callee_edge, &parm_map); /* Merge in callee's information. */ - if (callee_summary->loads - && callee_summary->loads != loads) - loads->merge (callee_summary->loads, &parm_map); - if (callee_summary->stores - && callee_summary->stores != stores) - stores->merge (callee_summary->stores, &parm_map); - if (callee_summary->loads_lto - && callee_summary->loads_lto != loads_lto) - loads_lto->merge (callee_summary->loads_lto, &parm_map); - if (callee_summary->stores_lto - && callee_summary->stores_lto != stores_lto) - stores_lto->merge (callee_summary->stores_lto, &parm_map); + if (callee_summary->loads) + changed |= cur_summary->loads->merge + (callee_summary->loads, &parm_map); + if (callee_summary->stores) + changed |= cur_summary->stores->merge + (callee_summary->stores, &parm_map); + if (callee_summary->loads_lto) + changed |= cur_summary->loads_lto->merge + (callee_summary->loads_lto, &parm_map); + if (callee_summary->stores_lto) + changed |= cur_summary->stores_lto->merge + (callee_summary->stores_lto, &parm_map); + if (dump_file && changed) + cur_summary->dump (dump_file); } } - - /* At this time, ipa_loads and ipa_stores contain information - about all loads and stores done by any of the component's nodes and - all functions that any of the nodes calls. We will now propagate - this information to all nodes in the component. Therefore, we will - walk the component one more time to do it. */ - for (struct cgraph_node *cur = component_node; cur; + iteration++; + } + for (struct cgraph_node *cur = component_node; cur; + cur = ((struct ipa_dfs_info *) cur->aux)->next_cycle) + { + modref_summary *cur_summary = summaries->get (cur); + if (cur_summary) + cur_summary->finished = true; + } + if (dump_file) + { + fprintf (dump_file, + "Propagation finished in %i iterations\n", iteration); + for (struct cgraph_node *cur = component_node; cur; cur = ((struct ipa_dfs_info *) cur->aux)->next_cycle) - { - modref_summary *cur_summary = summaries->get (cur); - if (!cur_summary) - { - /* The function doesn't have a summary. We must have noticed - that during the first pass and the hopeless flag must - therefore be set. Skip the function. */ - gcc_assert (its_hopeless); - } - else if (its_hopeless) - { - if (dump_file) - fprintf (dump_file, "Cleared modref info for %s\n", - cur->dump_name ()); - summaries->remove (cur); - } - else - { - if (cur == first) - ; - else - { - if (loads) - cur_summary->loads->merge (loads, NULL); - if (stores) - cur_summary->stores->merge (stores, NULL); - if (loads_lto) - cur_summary->loads_lto->merge (loads_lto, NULL); - if (stores_lto) - cur_summary->stores_lto->merge (stores_lto, NULL); - } - cur_summary->finished = true; - if (dump_file) - { - fprintf (dump_file, "Propagated modref for %s%s%s\n", - cur->dump_name (), - TREE_READONLY (cur->decl) ? " (const)" : "", - DECL_PURE_P (cur->decl) ? " (pure)" : ""); - cur_summary->dump (dump_file); - } - } - } + if (!cur->inlined_to) + { + modref_summary *cur_summary = summaries->get (cur); + + fprintf (dump_file, "Propagated modref for %s%s%s\n", + cur->dump_name (), + TREE_READONLY (cur->decl) ? " (const)" : "", + DECL_PURE_P (cur->decl) ? " (pure)" : ""); + if (cur_summary) + cur_summary->dump (dump_file); + else + fprintf (dump_file, " Not tracked\n"); + } + } +} + +/* Run the IPA pass. This will take a function's summaries and calls and + construct new summaries which represent a transitive closure. So that + summary of an analyzed function contains information about the loads and + stores that the function or any function that it calls does. */ + +unsigned int +pass_ipa_modref::execute (function *) +{ + if (!summaries) + return 0; + + struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, + symtab->cgraph_count); + int order_pos; + order_pos = ipa_reduced_postorder (order, true, ignore_edge); + int i; + + /* Iterate over all strongly connected components in post-order. */ + for (i = 0; i < order_pos; i++) + { + /* Get the component's representative. That's just any node in the + component from which we can traverse the entire component. */ + struct cgraph_node *component_node = order[i]; + + if (dump_file) + fprintf (dump_file, "\n\nStart of SCC component\n"); + + modref_propagate_in_scc (component_node); } ((modref_summaries *)summaries)->ipa = false; ipa_free_postorder_info (); + /* Free ipa-prop structures if they are no longer needed. */ + ipa_free_all_structures_after_iinln (); return 0; } diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h index 152e715..b6621b4 100644 --- a/gcc/ipa-modref.h +++ b/gcc/ipa-modref.h @@ -47,5 +47,6 @@ struct GTY(()) modref_summary modref_summary *get_modref_function_summary (cgraph_node *func); void ipa_modref_c_finalize (); +void ipa_merge_modref_summary_after_inlining (cgraph_edge *e); #endif -- cgit v1.1 From a8d2d89de2ef6b87166acc81589bdbb622917705 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 26 Sep 2020 10:44:53 +0200 Subject: Add modref testcases gcc/testsuite/ * gcc.dg/lto/modref-1_0.c: New test. * gcc.dg/lto/modref-1_1.c: New test. * gcc.dg/tree-ssa/modref-2.c: New test. --- gcc/testsuite/gcc.dg/lto/modref-1_0.c | 14 ++++++++++++++ gcc/testsuite/gcc.dg/lto/modref-1_1.c | 13 +++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/modref-2.c | 26 ++++++++++++++++++++++++++ 3 files changed, 53 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/lto/modref-1_0.c create mode 100644 gcc/testsuite/gcc.dg/lto/modref-1_1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/modref-2.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/lto/modref-1_0.c b/gcc/testsuite/gcc.dg/lto/modref-1_0.c new file mode 100644 index 0000000..8fcb9ec --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/modref-1_0.c @@ -0,0 +1,14 @@ +/* { dg-lto-do run } */ +/* { dg-lto-options {"-O2 -flto-partition=max -flto"} } */ +extern void recursive (int *a, int *b, int *c, int level); +int +main() +{ + int x = 123, y=124, z=125; + recursive (&x,&y,&z,1); + if (y) + __builtin_abort (); + if (!__builtin_constant_p (z)) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/lto/modref-1_1.c b/gcc/testsuite/gcc.dg/lto/modref-1_1.c new file mode 100644 index 0000000..c7c0eae --- /dev/null +++ b/gcc/testsuite/gcc.dg/lto/modref-1_1.c @@ -0,0 +1,13 @@ +short aa; +void +__attribute__ ((noinline, noclone)) +recursive (int *a, int *b, int *c, int level) +{ + if (level && c) + { + recursive (b,a,c,0); + aa++; + } + else + *a=0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-2.c b/gcc/testsuite/gcc.dg/tree-ssa/modref-2.c new file mode 100644 index 0000000..9999d37 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-2.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-options "-O2" } */ +short aa; +void +__attribute__ ((noinline, noclone)) +recursive (int *a, int *b, int *c, int level) +{ + if (level && c) + { + recursive (b,a,c,0); + aa++; + } + else + *a=0; +} +int +main() +{ + int x = 123, y=124, z=125; + recursive (&x,&y,&z,1); + if (y) + __builtin_abort (); + if (!__builtin_constant_p (z)) + __builtin_abort (); + return 0; +} -- cgit v1.1 From 5b26b3b3f5c75a86a5a3e851866247ac7fcb6c8b Mon Sep 17 00:00:00 2001 From: Paul Thomas Date: Sat, 26 Sep 2020 12:32:35 +0100 Subject: Correct overwrite of alloc_comp_result_2.f90 in fix of PR96495. 2020-26-09 Paul Thomas gcc/testsuite/ PR fortran/96495 * gfortran.dg/alloc_comp_result_2.f90 : Restore original. * gfortran.dg/alloc_comp_result_3.f90 : New test. --- gcc/testsuite/gfortran.dg/alloc_comp_result_2.f90 | 94 ++++++----------------- gcc/testsuite/gfortran.dg/alloc_comp_result_3.f90 | 75 ++++++++++++++++++ 2 files changed, 98 insertions(+), 71 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/alloc_comp_result_3.f90 (limited to 'gcc') diff --git a/gcc/testsuite/gfortran.dg/alloc_comp_result_2.f90 b/gcc/testsuite/gfortran.dg/alloc_comp_result_2.f90 index 6b09187..2e907e3 100644 --- a/gcc/testsuite/gfortran.dg/alloc_comp_result_2.f90 +++ b/gcc/testsuite/gfortran.dg/alloc_comp_result_2.f90 @@ -1,75 +1,27 @@ -! { dg-do run } +! Tests the fix for PR40440, in which gfortran tried to deallocate +! the allocatable components of the actual argument of CALL SUB ! -! Test the fix for PR96495 - segfaults at runtime at locations below. +! Contributed by Juergen Reuter +! Reduced testcase from Tobias Burnus ! -! Contributed by Paul Luckner -! -module foo_m - implicit none - - type foo - integer, allocatable :: j(:) - end type - - interface operator(.unary.) - module procedure neg_foo - end interface - - interface operator(.binary.) - module procedure foo_sub_foo - end interface - - interface operator(.binaryElemental.) - module procedure foo_add_foo - end interface - + type t + integer, allocatable :: A(:) + end type t + type (t) :: arg + arg = t ([1,2,3]) + call sub (func (arg)) contains - - elemental function foo_add_foo(f, g) result(h) - !! an example for an elemental binary operator - type(foo), intent(in) :: f, g - type(foo) :: h - - allocate (h%j(size(f%j)), source = f%j+g%j) - end function - - elemental function foo_sub_foo(f, g) result(h) - !! an example for an elemental binary operator - type(foo), intent(in) :: f, g - type(foo) :: h - - allocate (h%j(size(f%j)), source = f%j-3*g%j) - end function - - pure function neg_foo(f) result(g) - !! an example for a unary operator - type(foo), intent(in) :: f - type(foo) :: g - - allocate (g%j(size(f%j)), source = -f%j) - end function - -end module - -program main_tmp - - use foo_m - - implicit none - - type(foo) f, g(2) - - allocate (f%j(3)) - f%j = [2, 3, 4] - - g = f - if (any (g(2)%j .ne. [2, 3, 4])) stop 1 - - g = g .binaryElemental. (f .binary. f) ! threw "Segmentation fault" - if (any (g(2)%j .ne. [-2,-3,-4])) stop 2 - - g = g .binaryElemental. ( .unary. f) ! threw "Segmentation fault" - if (any (g(2)%j .ne. [-4,-6,-8])) stop 3 - -end program \ No newline at end of file + function func (a) + type(t), pointer :: func + type(t), target :: a + integer, save :: i = 0 + if (i /= 0) STOP 1! multiple calls would cause this abort + i = i + 1 + func => a + end function func + subroutine sub (a) + type(t), intent(IN), target :: a + if (any (a%A .ne. [1,2,3])) STOP 2 + end subroutine sub +end diff --git a/gcc/testsuite/gfortran.dg/alloc_comp_result_3.f90 b/gcc/testsuite/gfortran.dg/alloc_comp_result_3.f90 new file mode 100644 index 0000000..8c4c982 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/alloc_comp_result_3.f90 @@ -0,0 +1,75 @@ +! { dg-do run } +! +! Test the fix for PR96495 - segfaults at runtime at locations below. +! +! Contributed by Paul Luckner +! +module foo_m + + implicit none + + type foo + integer, allocatable :: j(:) + end type + + interface operator(.unary.) + module procedure neg_foo + end interface + + interface operator(.binary.) + module procedure foo_sub_foo + end interface + + interface operator(.binaryElemental.) + module procedure foo_add_foo + end interface + +contains + + elemental function foo_add_foo(f, g) result(h) + !! an example for an elemental binary operator + type(foo), intent(in) :: f, g + type(foo) :: h + + allocate (h%j(size(f%j)), source = f%j+g%j) + end function + + elemental function foo_sub_foo(f, g) result(h) + !! an example for an elemental binary operator + type(foo), intent(in) :: f, g + type(foo) :: h + + allocate (h%j(size(f%j)), source = f%j-3*g%j) + end function + + pure function neg_foo(f) result(g) + !! an example for a unary operator + type(foo), intent(in) :: f + type(foo) :: g + + allocate (g%j(size(f%j)), source = -f%j) + end function + +end module + +program main_tmp + + use foo_m + + implicit none + + type(foo) f, g(2) + + allocate (f%j(3)) + f%j = [2, 3, 4] + + g = f + if (any (g(2)%j .ne. [2, 3, 4])) stop 1 + + g = g .binaryElemental. (f .binary. f) ! threw "Segmentation fault" + if (any (g(2)%j .ne. [-2,-3,-4])) stop 2 + + g = g .binaryElemental. ( .unary. f) ! threw "Segmentation fault" + if (any (g(2)%j .ne. [-4,-6,-8])) stop 3 + +end program -- cgit v1.1 From e721d1137fb3f0323d31b767bc64c772086ff868 Mon Sep 17 00:00:00 2001 From: David Edelsohn Date: Wed, 23 Sep 2020 16:52:15 -0400 Subject: aix: collect2 visibility The code that collect2 generates, compiles and links into applications and shared libraries to initialize constructors and register DWARF tables is built with the compiler options used to invoke the linker. If the compiler options change the visibility from default, the library initialization routines will not be visible and this can prevent initialization. This patch checks if the command line sets visibiliity and then adds GCC pragmas to the initialization code generated by collect2 if necessary to define the visibility on global, exported functions as default. gcc/ChangeLog: 2020-09-26 David Edelsohn Clement Chigot * collect2.c (visibility_flag): New. (main): Detect -fvisibility. (write_c_file_stat): Push and pop default visibility. --- gcc/collect2.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/collect2.c b/gcc/collect2.c index f8a5ce4..6d074a7 100644 --- a/gcc/collect2.c +++ b/gcc/collect2.c @@ -184,7 +184,8 @@ static int strip_flag; /* true if -s */ static int export_flag; /* true if -bE */ static int aix64_flag; /* true if -b64 */ static int aixrtl_flag; /* true if -brtl */ -static int aixlazy_flag; /* true if -blazy */ +static int aixlazy_flag; /* true if -blazy */ +static int visibility_flag; /* true if -fvisibility */ #endif enum lto_mode_d { @@ -1239,6 +1240,11 @@ main (int argc, char **argv) *c_ptr++ = xstrdup (q); } } +#ifdef COLLECT_EXPORT_LIST + /* Detect any invocation with -fvisibility. */ + if (strncmp (q, "-fvisibility", 12) == 0) + visibility_flag = 1; +#endif } obstack_free (&temporary_obstack, temporary_firstobj); *c_ptr++ = "-fno-profile-arcs"; @@ -2131,6 +2137,11 @@ write_c_file_stat (FILE *stream, const char *name ATTRIBUTE_UNUSED) fprintf (stream, "\t}\n"); } +#ifdef COLLECT_EXPORT_LIST + /* Set visibility of initializers to default. */ + if (visibility_flag) + fprintf (stream, "#pragma GCC visibility push(default)\n"); +#endif fprintf (stream, "void %s() {\n", initname); if (constructors.number > 0 || frames) { @@ -2163,11 +2174,24 @@ write_c_file_stat (FILE *stream, const char *name ATTRIBUTE_UNUSED) destructors.number + frames); } fprintf (stream, "}\n"); +#ifdef COLLECT_EXPORT_LIST + if (visibility_flag) + fprintf (stream, "#pragma GCC visibility pop\n"); +#endif if (shared_obj) { +#ifdef COLLECT_EXPORT_LIST + /* Set visibility of initializers to default. */ + if (visibility_flag) + fprintf (stream, "#pragma GCC visibility push(default)\n"); +#endif COLLECT_SHARED_INIT_FUNC (stream, initname); COLLECT_SHARED_FINI_FUNC (stream, fininame); +#ifdef COLLECT_EXPORT_LIST + if (visibility_flag) + fprintf (stream, "#pragma GCC visibility pop\n"); +#endif } } -- cgit v1.1 From 3991912e260d68f0da8d3711b5258c3a3009dc4c Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 26 Sep 2020 18:40:50 +0200 Subject: Fix handling of clobbers in ipa-modref.c * ipa-modref.c (analyze_stmt): Do not skip clobbers in early pass. * ipa-pure-const.c (analyze_stmt): Update comment. --- gcc/ipa-modref.c | 11 +++++++---- gcc/ipa-pure-const.c | 2 ++ 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 73a7900..728c6c1 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -676,13 +676,16 @@ static bool analyze_stmt (modref_summary *summary, gimple *stmt, bool ipa, vec *recursive_calls) { - /* There is no need to record clobbers. */ - if (gimple_clobber_p (stmt)) + /* In general we can not ignore clobbers because they are barries for code + motion, however after inlining it is safe to do becuase local optimization + passes do not consider clobbers from other functions. + Similar logic is in ipa-pure-consts. */ + if ((ipa || cfun->after_inlining) && gimple_clobber_p (stmt)) return true; + /* Analyze all loads and stores in STMT. */ walk_stmt_load_store_ops (stmt, summary, analyze_load, analyze_store); - /* or call analyze_load_ipa, analyze_store_ipa */ switch (gimple_code (stmt)) { @@ -705,7 +708,7 @@ analyze_stmt (modref_summary *summary, gimple *stmt, bool ipa, } } -/* Analyze function F. IPA indicates whether we're running in tree mode (false) +/* Analyze function F. IPA indicates whether we're running in local mode (false) or the IPA mode (true). */ static void diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c index bdbccd0..1af3206 100644 --- a/gcc/ipa-pure-const.c +++ b/gcc/ipa-pure-const.c @@ -742,6 +742,8 @@ check_stmt (gimple_stmt_iterator *gsip, funct_state local, bool ipa) /* Do consider clobber as side effects before IPA, so we rather inline C++ destructors and keep clobber semantics than eliminate them. + Similar logic is in ipa-modref. + TODO: We may get smarter during early optimizations on these and let functions containing only clobbers to be optimized more. This is a common case of C++ destructors. */ -- cgit v1.1 From 91dd4a3864110704c921ab8467f568ff42c38e5c Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sun, 27 Sep 2020 00:16:24 +0000 Subject: Daily bump. --- gcc/ChangeLog | 62 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 10 ++++++++ gcc/testsuite/ChangeLog | 35 ++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d8144b2..a81090e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,65 @@ +2020-09-26 Jan Hubicka + + * ipa-modref.c (analyze_stmt): Do not skip clobbers in early pass. + * ipa-pure-const.c (analyze_stmt): Update comment. + +2020-09-26 David Edelsohn + Clement Chigot + + * collect2.c (visibility_flag): New. + (main): Detect -fvisibility. + (write_c_file_stat): Push and pop default visibility. + +2020-09-26 Jan Hubicka + + * ipa-inline-transform.c: Include ipa-modref-tree.h and ipa-modref.h. + (inline_call): Call ipa_merge_modref_summary_after_inlining. + * ipa-inline.c (ipa_inline): Do not free summaries. + * ipa-modref.c (dump_records): Fix formating. + (merge_call_side_effects): Break out from ... + (analyze_call): ... here; record recursive calls. + (analyze_stmt): Add new parameter RECURSIVE_CALLS. + (analyze_function): Do iterative dataflow on recursive calls. + (compute_parm_map): New function. + (ipa_merge_modref_summary_after_inlining): New function. + (collapse_loads): New function. + (modref_propagate_in_scc): Break out from ... + (pass_ipa_modref::execute): ... here; Do iterative dataflow. + * ipa-modref.h (ipa_merge_modref_summary_after_inlining): Declare. + +2020-09-26 Jakub Jelinek + + * omp-expand.c (expand_omp_simd): Help vectorizer for the collapse == 1 + and non-composite collapse > 1 case with non-constant innermost loop + step by precomputing number of iterations before loop and using an + alternate IV from 0 to number of iterations - 1 with step of 1. + +2020-09-26 Jan Hubicka + + * ipa-fnsummary.c (dump_ipa_call_summary): Dump + points_to_local_or_readonly_memory flag. + (analyze_function_body): Compute points_to_local_or_readonly_memory + flag. + (remap_edge_change_prob): Rename to ... + (remap_edge_params): ... this one; update + points_to_local_or_readonly_memory. + (remap_edge_summaries): Update. + (read_ipa_call_summary): Stream the new flag. + (write_ipa_call_summary): Likewise. + * ipa-predicate.h (struct inline_param_summary): Add + points_to_local_or_readonly_memory. + (inline_param_summary::equal_to): Update. + (inline_param_summary::useless_p): Update. + +2020-09-26 Jan Hubicka + + * ipa-modref-tree.h (modref_ref_node::insert_access): Track if something + changed. + (modref_base_node::insert_ref): Likewise (and add a new optional + argument) + (modref_tree::insert): Likewise. + (modref_tree::merge): Rewrite + 2020-09-25 Jan Hubicka * doc/invoke.texi: Add -fno-ipa-modref to flags disabled by diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index bfdd19d..51daa72 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200926 +20200927 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 2047917..0d04604 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,13 @@ +2020-09-26 David Malcolm + + PR analyzer/96646 + PR analyzer/96841 + * region-model.cc (region_model::get_representative_path_var): + When handling offset_region, wrap the MEM_REF's first argument in + an ADDR_EXPR of pointer type, rather than simply using the tree + for the parent region. Require the MEM_REF's second argument to + be an integer constant. + 2020-09-24 David Malcolm * analyzer.h (struct rejected_constraint): New decl. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index df23a09..1e67696 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,38 @@ +2020-09-26 Paul Thomas + + PR fortran/96495 + * gfortran.dg/alloc_comp_result_2.f90 : Restore original. + * gfortran.dg/alloc_comp_result_3.f90 : New test. + +2020-09-26 Jan Hubicka + + * gcc.dg/lto/modref-1_0.c: New test. + * gcc.dg/lto/modref-1_1.c: New test. + * gcc.dg/tree-ssa/modref-2.c: New test. + +2020-09-26 Jakub Jelinek + + * gcc.dg/vect/vect-simd-17.c: Expect 11 or more vectorized loops. + * gcc.dg/vect/vect-simd-18.c: New test. + * gcc.dg/vect/vect-simd-19.c: New test. + * gcc.dg/vect/vect-simd-20.c: New test. + +2020-09-26 Jan Hubicka + + * gcc.dg/ipa/ipa-pta-13.c: Disable modref. + +2020-09-26 David Malcolm + + PR analyzer/94355 + * g++.dg/analyzer/placement-new.C: New test. + +2020-09-26 David Malcolm + + PR analyzer/96646 + PR analyzer/96841 + * gcc.dg/analyzer/pr96646.c: New test. + * gcc.dg/analyzer/pr96841.c: New test. + 2020-09-25 Richard Biener PR middle-end/96814 -- cgit v1.1 From 683e55facfd5c1d348796119424d7b3f9e889ffe Mon Sep 17 00:00:00 2001 From: Xionghu Luo Date: Sun, 27 Sep 2020 00:27:32 -0500 Subject: IFN: Implement IFN_VEC_SET for ARRAY_REF with VIEW_CONVERT_EXPR This patch enables transformation from ARRAY_REF(VIEW_CONVERT_EXPR) to VEC_SET internal function in gimple-isel pass if target supports vec_set with variable index by checking can_vec_set_var_idx_p. gcc/ChangeLog: 2020-09-27 Xionghu Luo * gimple-isel.cc (gimple_expand_vec_set_expr): New function. (gimple_expand_vec_cond_exprs): Rename to ... (gimple_expand_vec_exprs): ... this and call gimple_expand_vec_set_expr. * internal-fn.c (vec_set_direct): New define. (expand_vec_set_optab_fn): New function. (direct_vec_set_optab_supported_p): New define. * internal-fn.def (VEC_SET): New DEF_INTERNAL_OPTAB_FN. * optabs.c (can_vec_set_var_idx_p): New function. * optabs.h (can_vec_set_var_idx_p): New declaration. --- gcc/gimple-isel.cc | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++-- gcc/internal-fn.c | 41 +++++++++++++++++++++++++++++ gcc/internal-fn.def | 2 ++ gcc/optabs.c | 21 +++++++++++++++ gcc/optabs.h | 4 +++ 5 files changed, 141 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc index 9792263..b64e31f 100644 --- a/gcc/gimple-isel.cc +++ b/gcc/gimple-isel.cc @@ -35,6 +35,74 @@ along with GCC; see the file COPYING3. If not see #include "tree-cfg.h" #include "bitmap.h" #include "tree-ssa-dce.h" +#include "memmodel.h" +#include "optabs.h" + +/* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to + internal function based on vector type of selected expansion. + i.e.: + VIEW_CONVERT_EXPR(u)[_1] = = i_4(D); + => + _7 = u; + _8 = .VEC_SET (_7, i_4(D), _1); + u = _8; */ + +static gimple * +gimple_expand_vec_set_expr (gimple_stmt_iterator *gsi) +{ + enum tree_code code; + gcall *new_stmt = NULL; + gassign *ass_stmt = NULL; + + /* Only consider code == GIMPLE_ASSIGN. */ + gassign *stmt = dyn_cast (gsi_stmt (*gsi)); + if (!stmt) + return NULL; + + tree lhs = gimple_assign_lhs (stmt); + code = TREE_CODE (lhs); + if (code != ARRAY_REF) + return NULL; + + tree val = gimple_assign_rhs1 (stmt); + tree op0 = TREE_OPERAND (lhs, 0); + if (TREE_CODE (op0) == VIEW_CONVERT_EXPR && DECL_P (TREE_OPERAND (op0, 0)) + && VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (op0, 0))) + && TYPE_MODE (TREE_TYPE (lhs)) + == TYPE_MODE (TREE_TYPE (TREE_TYPE (TREE_OPERAND (op0, 0))))) + { + tree pos = TREE_OPERAND (lhs, 1); + tree view_op0 = TREE_OPERAND (op0, 0); + machine_mode outermode = TYPE_MODE (TREE_TYPE (view_op0)); + if (auto_var_in_fn_p (view_op0, cfun->decl) + && !TREE_ADDRESSABLE (view_op0) && can_vec_set_var_idx_p (outermode)) + { + location_t loc = gimple_location (stmt); + tree var_src = make_ssa_name (TREE_TYPE (view_op0)); + tree var_dst = make_ssa_name (TREE_TYPE (view_op0)); + + ass_stmt = gimple_build_assign (var_src, view_op0); + gimple_set_vuse (ass_stmt, gimple_vuse (stmt)); + gimple_set_location (ass_stmt, loc); + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT); + + new_stmt + = gimple_build_call_internal (IFN_VEC_SET, 3, var_src, val, pos); + gimple_call_set_lhs (new_stmt, var_dst); + gimple_set_location (new_stmt, loc); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + + ass_stmt = gimple_build_assign (view_op0, var_dst); + gimple_set_location (ass_stmt, loc); + gsi_insert_before (gsi, ass_stmt, GSI_SAME_STMT); + + gimple_move_vops (ass_stmt, stmt); + gsi_remove (gsi, true); + } + } + + return ass_stmt; +} /* Expand all VEC_COND_EXPR gimple assignments into calls to internal function based on type of selected expansion. */ @@ -165,7 +233,7 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi, VEC_COND_EXPR assignments. */ static unsigned int -gimple_expand_vec_cond_exprs (void) +gimple_expand_vec_exprs (void) { gimple_stmt_iterator gsi; basic_block bb; @@ -178,12 +246,15 @@ gimple_expand_vec_cond_exprs (void) { gimple *g = gimple_expand_vec_cond_expr (&gsi, &vec_cond_ssa_name_uses); + if (g != NULL) { tree lhs = gimple_assign_lhs (gsi_stmt (gsi)); gimple_set_lhs (g, lhs); gsi_replace (&gsi, g, false); } + + gimple_expand_vec_set_expr (&gsi); } } @@ -226,7 +297,7 @@ public: virtual unsigned int execute (function *) { - return gimple_expand_vec_cond_exprs (); + return gimple_expand_vec_exprs (); } }; // class pass_gimple_isel diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 6cf2687..8ea3195 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -115,6 +115,7 @@ init_internal_fns () #define vec_condeq_direct { 0, 0, false } #define scatter_store_direct { 3, 1, false } #define len_store_direct { 3, 3, false } +#define vec_set_direct { 3, 3, false } #define unary_direct { 0, 0, true } #define binary_direct { 0, 0, true } #define ternary_direct { 0, 0, true } @@ -2658,6 +2659,45 @@ expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab) #define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn +/* Expand VEC_SET internal functions. */ + +static void +expand_vec_set_optab_fn (internal_fn, gcall *stmt, convert_optab optab) +{ + tree lhs = gimple_call_lhs (stmt); + tree op0 = gimple_call_arg (stmt, 0); + tree op1 = gimple_call_arg (stmt, 1); + tree op2 = gimple_call_arg (stmt, 2); + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx src = expand_normal (op0); + + machine_mode outermode = TYPE_MODE (TREE_TYPE (op0)); + scalar_mode innermode = GET_MODE_INNER (outermode); + + rtx value = expand_normal (op1); + rtx pos = expand_normal (op2); + + class expand_operand ops[3]; + enum insn_code icode = optab_handler (optab, outermode); + + if (icode != CODE_FOR_nothing) + { + rtx temp = gen_reg_rtx (outermode); + emit_move_insn (temp, src); + + create_fixed_operand (&ops[0], temp); + create_input_operand (&ops[1], value, innermode); + create_convert_operand_from (&ops[2], pos, TYPE_MODE (TREE_TYPE (op2)), + true); + if (maybe_expand_insn (icode, 3, ops)) + { + emit_move_insn (target, temp); + return; + } + } + gcc_unreachable (); +} + static void expand_ABNORMAL_DISPATCHER (internal_fn, gcall *) { @@ -3253,6 +3293,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, #define direct_fold_left_optab_supported_p direct_optab_supported_p #define direct_mask_fold_left_optab_supported_p direct_optab_supported_p #define direct_check_ptrs_optab_supported_p direct_optab_supported_p +#define direct_vec_set_optab_supported_p direct_optab_supported_p /* Return the optab used by internal function FN. */ diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 13e6082..e6cfe1b 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -145,6 +145,8 @@ DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu) DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq) DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask) +DEF_INTERNAL_OPTAB_FN (VEC_SET, 0, vec_set, vec_set) + DEF_INTERNAL_OPTAB_FN (LEN_STORE, 0, len_store, len_store) DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) diff --git a/gcc/optabs.c b/gcc/optabs.c index 184827f..8e844028 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -3841,6 +3841,27 @@ can_vcond_compare_p (enum rtx_code code, machine_mode value_mode, && insn_operand_matches (icode, 3, test); } +/* Return whether the backend can emit vector set instructions for inserting + element into vector at variable index position. */ + +bool +can_vec_set_var_idx_p (machine_mode vec_mode) +{ + if (!VECTOR_MODE_P (vec_mode)) + return false; + + machine_mode inner_mode = GET_MODE_INNER (vec_mode); + rtx reg1 = alloca_raw_REG (vec_mode, LAST_VIRTUAL_REGISTER + 1); + rtx reg2 = alloca_raw_REG (inner_mode, LAST_VIRTUAL_REGISTER + 2); + rtx reg3 = alloca_raw_REG (VOIDmode, LAST_VIRTUAL_REGISTER + 3); + + enum insn_code icode = optab_handler (vec_set_optab, vec_mode); + + return icode != CODE_FOR_nothing && insn_operand_matches (icode, 0, reg1) + && insn_operand_matches (icode, 1, reg2) + && insn_operand_matches (icode, 2, reg3); +} + /* This function is called when we are going to emit a compare instruction that compares the values found in X and Y, using the rtl operator COMPARISON. diff --git a/gcc/optabs.h b/gcc/optabs.h index 7c2ec25..0b14700 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -249,6 +249,10 @@ extern int can_compare_p (enum rtx_code, machine_mode, VALUE_MODE. */ extern bool can_vcond_compare_p (enum rtx_code, machine_mode, machine_mode); +/* Return whether the backend can emit vector set instructions for inserting + element into vector at variable index position. */ +extern bool can_vec_set_var_idx_p (machine_mode); + extern rtx prepare_operand (enum insn_code, rtx, int, machine_mode, machine_mode, int); /* Emit a pair of rtl insns to compare two rtx's and to jump -- cgit v1.1 From e5a76af3a2f3324efc60b4b2778ffb29d5c377bc Mon Sep 17 00:00:00 2001 From: Mark Eggleston Date: Thu, 11 Jun 2020 14:33:51 +0100 Subject: Fortran : ICE in build_field PR95614 Local identifiers can not be the same as a module name. Original patch by Steve Kargl resulted in name clashes between common block names and local identifiers. A local identifier can be the same as a global identier if that identifier represents a common. The patch was modified to allow global identifiers that represent a common block. 2020-09-27 Steven G. Kargl Mark Eggleston gcc/fortran/ PR fortran/95614 * decl.c (gfc_get_common): Use gfc_match_common_name instead of match_common_name. * decl.c (gfc_bind_idents): Use gfc_match_common_name instead of match_common_name. * match.c : Rename match_common_name to gfc_match_common_name. * match.c (gfc_match_common): Use gfc_match_common_name instead of match_common_name. * match.h : Rename match_common_name to gfc_match_common_name. * resolve.c (resolve_common_vars): Check each symbol in a common block has a global symbol. If there is a global symbol issue an error if the symbol type is known as is not a common block name. 2020-09-27 Mark Eggleston gcc/testsuite/ PR fortran/95614 * gfortran.dg/pr95614_1.f90: New test. * gfortran.dg/pr95614_2.f90: New test. --- gcc/fortran/decl.c | 4 ++-- gcc/fortran/match.c | 5 +++-- gcc/fortran/match.h | 6 ++---- gcc/fortran/resolve.c | 7 +++++++ gcc/testsuite/gfortran.dg/pr95614_1.f90 | 6 ++++++ gcc/testsuite/gfortran.dg/pr95614_2.f90 | 6 ++++++ 6 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/pr95614_1.f90 create mode 100644 gcc/testsuite/gfortran.dg/pr95614_2.f90 (limited to 'gcc') diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c index 326e6f5..9bfaa60 100644 --- a/gcc/fortran/decl.c +++ b/gcc/fortran/decl.c @@ -6007,7 +6007,7 @@ get_bind_c_idents (void) found_id = MATCH_YES; gfc_get_ha_symbol (name, &tmp_sym); } - else if (match_common_name (name) == MATCH_YES) + else if (gfc_match_common_name (name) == MATCH_YES) { found_id = MATCH_YES; com_block = gfc_get_common (name, 0); @@ -6052,7 +6052,7 @@ get_bind_c_idents (void) found_id = MATCH_YES; gfc_get_ha_symbol (name, &tmp_sym); } - else if (match_common_name (name) == MATCH_YES) + else if (gfc_match_common_name (name) == MATCH_YES) { found_id = MATCH_YES; com_block = gfc_get_common (name, 0); diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c index cb09c5f..bee73e7 100644 --- a/gcc/fortran/match.c +++ b/gcc/fortran/match.c @@ -5166,7 +5166,8 @@ gfc_get_common (const char *name, int from_module) /* Match a common block name. */ -match match_common_name (char *name) +match +gfc_match_common_name (char *name) { match m; @@ -5218,7 +5219,7 @@ gfc_match_common (void) for (;;) { - m = match_common_name (name); + m = gfc_match_common_name (name); if (m == MATCH_ERROR) goto cleanup; diff --git a/gcc/fortran/match.h b/gcc/fortran/match.h index 7bf70d7..4ccb596 100644 --- a/gcc/fortran/match.h +++ b/gcc/fortran/match.h @@ -103,11 +103,9 @@ match gfc_match_call (void); /* We want to use this function to check for a common-block-name that can exist in a bind statement, so removed the "static" - declaration of the function in match.c. + declaration of the function in match.c. */ - TODO: should probably rename this now that it'll be globally seen to - gfc_match_common_name. */ -match match_common_name (char *name); +match gfc_match_common_name (char *name); match gfc_match_common (void); match gfc_match_block_data (void); diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c index f4ce49f..3c767a7 100644 --- a/gcc/fortran/resolve.c +++ b/gcc/fortran/resolve.c @@ -936,9 +936,16 @@ static void resolve_common_vars (gfc_common_head *common_block, bool named_common) { gfc_symbol *csym = common_block->head; + gfc_gsymbol *gsym; for (; csym; csym = csym->common_next) { + gsym = gfc_find_gsymbol (gfc_gsym_root, csym->name); + if (gsym && gsym->type != GSYM_UNKNOWN && gsym->type != GSYM_COMMON) + gfc_error_now ("Global entity %qs at %L cannot appear in a " + "COMMON block at %L", gsym->name, + &gsym->where, &csym->common_block->where); + /* gfc_add_in_common may have been called before, but the reported errors have been ignored to continue parsing. We do the checks again here. */ diff --git a/gcc/testsuite/gfortran.dg/pr95614_1.f90 b/gcc/testsuite/gfortran.dg/pr95614_1.f90 new file mode 100644 index 0000000..f835143 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr95614_1.f90 @@ -0,0 +1,6 @@ +! { dg-do compile } + +module m ! { dg-error ".1." } + common m ! { dg-error "cannot appear in a COMMON" } +end + diff --git a/gcc/testsuite/gfortran.dg/pr95614_2.f90 b/gcc/testsuite/gfortran.dg/pr95614_2.f90 new file mode 100644 index 0000000..9d69a50 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr95614_2.f90 @@ -0,0 +1,6 @@ +! { dg-do compile } + +module m ! { dg-error ".1." } + common /xc/ m ! { dg-error "cannot appear in a COMMON" } +end + -- cgit v1.1 From a4b31d5807f2bc67c8999b3d53369cf2a5c6e1ec Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sun, 27 Sep 2020 23:18:26 +0200 Subject: optabs: Don't reuse target for multi-word expansions if it overlaps operand(s) [PR97073] The following testcase is miscompiled on i686-linux, because we try to expand a double-word bitwise logic operation with op0 being a (mem:DI u) and target (mem:DI u+4), i.e. partial overlap, and thus end up with: movl 4(%esp), %eax andl u, %eax movl %eax, u+4 ! movl u+4, %eax optimized out andl 8(%esp), %eax movl %eax, u+8 rather than with the desired: movl 4(%esp), %edx movl 8(%esp), %eax andl u, %edx andl u+4, %eax movl %eax, u+8 movl %edx, u+4 because the store of the first word to target overwrites the second word of the operand. expand_binop for this (and several similar places) already check for target == op0 or target == op1, this patch just adds reg_overlap_mentioned_p calls next to it. Pedantically, at least for some of these it might be sufficient to force a different target if there is overlap but target is not rtx_equal_p to the operand (e.g. in this bitwise logical case, but e.g. not in the shift cases where there is reordering), though that would go against the preexisting target == op? checks and the rationale that REG_EQUAL notes in that case isn't correct. 2020-09-27 Jakub Jelinek PR middle-end/97073 * optabs.c (expand_binop, expand_absneg_bit, expand_unop, expand_copysign_bit): Check reg_overlap_mentioned_p between target and operand(s) and if it returns true, force a pseudo as target. * gcc.c-torture/execute/pr97073.c: New test. --- gcc/optabs.c | 14 +++++++++++++- gcc/testsuite/gcc.c-torture/execute/pr97073.c | 21 +++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr97073.c (limited to 'gcc') diff --git a/gcc/optabs.c b/gcc/optabs.c index 8e844028..8ad7f4b 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -1395,6 +1395,8 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1, if (target == 0 || target == op0 || target == op1 + || reg_overlap_mentioned_p (target, op0) + || reg_overlap_mentioned_p (target, op1) || !valid_multiword_target_p (target)) target = gen_reg_rtx (int_mode); @@ -1475,6 +1477,8 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1, if (target == 0 || target == op0 || target == op1 + || reg_overlap_mentioned_p (target, op0) + || reg_overlap_mentioned_p (target, op1) || !valid_multiword_target_p (target)) target = gen_reg_rtx (int_mode); @@ -1533,6 +1537,8 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1, || target == op0 || target == op1 || !REG_P (target) + || reg_overlap_mentioned_p (target, op0) + || reg_overlap_mentioned_p (target, op1) || !valid_multiword_target_p (target)) target = gen_reg_rtx (int_mode); @@ -2670,6 +2676,7 @@ expand_absneg_bit (enum rtx_code code, scalar_float_mode mode, if (target == 0 || target == op0 + || reg_overlap_mentioned_p (target, op0) || (nwords > 1 && !valid_multiword_target_p (target))) target = gen_reg_rtx (mode); @@ -2951,7 +2958,10 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target, int i; rtx_insn *insns; - if (target == 0 || target == op0 || !valid_multiword_target_p (target)) + if (target == 0 + || target == op0 + || reg_overlap_mentioned_p (target, op0) + || !valid_multiword_target_p (target)) target = gen_reg_rtx (int_mode); start_sequence (); @@ -3472,6 +3482,8 @@ expand_copysign_bit (scalar_float_mode mode, rtx op0, rtx op1, rtx target, if (target == 0 || target == op0 || target == op1 + || reg_overlap_mentioned_p (target, op0) + || reg_overlap_mentioned_p (target, op1) || (nwords > 1 && !valid_multiword_target_p (target))) target = gen_reg_rtx (mode); diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97073.c b/gcc/testsuite/gcc.c-torture/execute/pr97073.c new file mode 100644 index 0000000..1955e6b --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr97073.c @@ -0,0 +1,21 @@ +/* PR middle-end/97073 */ +/* { dg-additional-options "-mno-stv" { target i?86-*-* x86_64-*-* } } */ + +typedef unsigned long long L; +union U { L i; struct T { unsigned k; L l; } j; } u; + +__attribute__((noinline,noclone)) void +foo (L x) +{ + u.j.l = u.i & x; +} + +int +main () +{ + u.i = 5; + foo (-1ULL); + if (u.j.l != 5) + __builtin_abort (); + return 0; +} -- cgit v1.1 From e24817aa7a1c6d12039b486ab5ea9b5ee0a46cd4 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sun, 27 Sep 2020 23:44:15 +0200 Subject: Fix handling of stores in modref_summary::useful_p * ipa-modref.c (modref_summary::useful_p): Fix testing of stores. --- gcc/ipa-modref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 728c6c1..6225552 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -135,7 +135,7 @@ modref_summary::useful_p (int ecf_flags) return true; if (ecf_flags & ECF_PURE) return false; - return stores && !loads->every_base; + return stores && !stores->every_base; } /* Dump A to OUT. */ -- cgit v1.1 From 4383c595ce5cc6ef6bcb45d2c9caf43002afbc4f Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Mon, 28 Sep 2020 00:16:21 +0000 Subject: Daily bump. --- gcc/ChangeLog | 24 ++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/fortran/ChangeLog | 17 +++++++++++++++++ gcc/testsuite/ChangeLog | 12 ++++++++++++ 4 files changed, 54 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a81090e..64556c0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,27 @@ +2020-09-27 Jan Hubicka + + * ipa-modref.c (modref_summary::useful_p): Fix testing of stores. + +2020-09-27 Jakub Jelinek + + PR middle-end/97073 + * optabs.c (expand_binop, expand_absneg_bit, expand_unop, + expand_copysign_bit): Check reg_overlap_mentioned_p between target + and operand(s) and if it returns true, force a pseudo as target. + +2020-09-27 Xionghu Luo + + * gimple-isel.cc (gimple_expand_vec_set_expr): New function. + (gimple_expand_vec_cond_exprs): Rename to ... + (gimple_expand_vec_exprs): ... this and call + gimple_expand_vec_set_expr. + * internal-fn.c (vec_set_direct): New define. + (expand_vec_set_optab_fn): New function. + (direct_vec_set_optab_supported_p): New define. + * internal-fn.def (VEC_SET): New DEF_INTERNAL_OPTAB_FN. + * optabs.c (can_vec_set_var_idx_p): New function. + * optabs.h (can_vec_set_var_idx_p): New declaration. + 2020-09-26 Jan Hubicka * ipa-modref.c (analyze_stmt): Do not skip clobbers in early pass. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 51daa72..0c0687f 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200927 +20200928 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 19f2e6a..e3bb9fb 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,20 @@ +2020-09-27 Steven G. Kargl + Mark Eggleston + + PR fortran/95614 + * decl.c (gfc_get_common): Use gfc_match_common_name instead + of match_common_name. + * decl.c (gfc_bind_idents): Use gfc_match_common_name instead + of match_common_name. + * match.c : Rename match_common_name to gfc_match_common_name. + * match.c (gfc_match_common): Use gfc_match_common_name instead + of match_common_name. + * match.h : Rename match_common_name to gfc_match_common_name. + * resolve.c (resolve_common_vars): Check each symbol in a + common block has a global symbol. If there is a global symbol + issue an error if the symbol type is known as is not a common + block name. + 2020-09-24 Paul Thomas PR fortran/96495 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 1e67696..8c3db46 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,15 @@ +2020-09-27 Jakub Jelinek + + PR middle-end/97073 + * gcc.c-torture/execute/pr97073.c: New test. + +2020-09-27 Steven G. Kargl + Mark Eggleston + + PR fortran/95614 + * gfortran.dg/pr95614_1.f90: New test. + * gfortran.dg/pr95614_2.f90: New test. + 2020-09-26 Paul Thomas PR fortran/96495 -- cgit v1.1 From dabef758bfe23d847cb1e2de2d0c0a67a53dc56a Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Mon, 28 Sep 2020 08:39:25 +0200 Subject: Revert recent changes to lower_try_finally_dup_block This reverts the recent changes made to lower_try_finally_dup_block and aimed at tweaking the souce location info for __builtin_stack_restore. gcc/ChangeLog: * tree-eh.c (lower_try_finally_dup_block): Revert latest change. --- gcc/tree-eh.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-eh.c b/gcc/tree-eh.c index 1376b82..c3314bb 100644 --- a/gcc/tree-eh.c +++ b/gcc/tree-eh.c @@ -899,26 +899,18 @@ lower_try_finally_dup_block (gimple_seq seq, struct leh_state *outer_state, gtry *region = NULL; gimple_seq new_seq; gimple_stmt_iterator gsi; - location_t last_loc = UNKNOWN_LOCATION; new_seq = copy_gimple_seq_and_replace_locals (seq); - for (gsi = gsi_last (new_seq); !gsi_end_p (gsi); gsi_prev (&gsi)) + for (gsi = gsi_start (new_seq); !gsi_end_p (gsi); gsi_next (&gsi)) { gimple *stmt = gsi_stmt (gsi); - /* We duplicate __builtin_stack_restore at -O0 in the hope of eliminating - it on the EH paths. When it is not eliminated, give it the next - location in the sequence or make it transparent in the debug info. */ - if (gimple_call_builtin_p (stmt, BUILT_IN_STACK_RESTORE)) - gimple_set_location (stmt, last_loc); - else if (LOCATION_LOCUS (gimple_location (stmt)) == UNKNOWN_LOCATION) + if (LOCATION_LOCUS (gimple_location (stmt)) == UNKNOWN_LOCATION) { tree block = gimple_block (stmt); gimple_set_location (stmt, loc); gimple_set_block (stmt, block); } - else - last_loc = gimple_location (stmt); } if (outer_state->tf) -- cgit v1.1 From 88795e14ae7143bd8039af68d2d876ae34568799 Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Mon, 28 Sep 2020 09:00:46 +0200 Subject: Fix bogus alignment warning on address clause The compiler gives a bogus alignment warning on an address clause and a discriminated record type with variable size. gcc/ada/ChangeLog: * gcc-interface/decl.c (maybe_saturate_size): Add ALIGN parameter and round down the result to ALIGN. (gnat_to_gnu_entity): Adjust calls to maybe_saturate_size. gcc/testsuite/ChangeLog: * gnat.dg/addr16.adb: New test. * gnat.dg/addr16_pkg.ads: New helper. --- gcc/ada/gcc-interface/decl.c | 28 ++++++++++++++++++++-------- gcc/testsuite/gnat.dg/addr16.adb | 14 ++++++++++++++ gcc/testsuite/gnat.dg/addr16_pkg.ads | 9 +++++++++ 3 files changed, 43 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gnat.dg/addr16.adb create mode 100644 gcc/testsuite/gnat.dg/addr16_pkg.ads (limited to 'gcc') diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c index c9c2a95..cd0a50b 100644 --- a/gcc/ada/gcc-interface/decl.c +++ b/gcc/ada/gcc-interface/decl.c @@ -232,7 +232,7 @@ static tree build_position_list (tree, bool, tree, tree, unsigned int, tree); static vec build_subst_list (Entity_Id, Entity_Id, bool); static vec build_variant_list (tree, Node_Id, vec, vec); -static tree maybe_saturate_size (tree); +static tree maybe_saturate_size (tree, unsigned int align); static tree validate_size (Uint, tree, Entity_Id, enum tree_code, bool, bool, const char *, const char *); static void set_rm_size (Uint, tree, Entity_Id); @@ -4425,7 +4425,12 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) /* If the size is self-referential, annotate the maximum value after saturating it, if need be, to avoid a No_Uint value. */ if (CONTAINS_PLACEHOLDER_P (gnu_size)) - gnu_size = maybe_saturate_size (max_size (gnu_size, true)); + { + const unsigned int align + = UI_To_Int (Alignment (gnat_entity)) * BITS_PER_UNIT; + gnu_size + = maybe_saturate_size (max_size (gnu_size, true), align); + } /* If we are just annotating types and the type is tagged, the tag and the parent components are not generated by the front-end so @@ -4461,7 +4466,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition) gnu_size = size_binop (PLUS_EXPR, gnu_size, offset); } - gnu_size = maybe_saturate_size (round_up (gnu_size, align)); + gnu_size + = maybe_saturate_size (round_up (gnu_size, align), align); Set_Esize (gnat_entity, annotate_value (gnu_size)); /* Tagged types are Strict_Alignment so RM_Size = Esize. */ @@ -8946,15 +8952,21 @@ build_variant_list (tree gnu_qual_union_type, Node_Id gnat_variant_part, } /* If SIZE has overflowed, return the maximum valid size, which is the upper - bound of the signed sizetype in bits; otherwise return SIZE unmodified. */ + bound of the signed sizetype in bits, rounded down to ALIGN. Otherwise + return SIZE unmodified. */ static tree -maybe_saturate_size (tree size) +maybe_saturate_size (tree size, unsigned int align) { if (TREE_CODE (size) == INTEGER_CST && TREE_OVERFLOW (size)) - size = size_binop (MULT_EXPR, - fold_convert (bitsizetype, TYPE_MAX_VALUE (ssizetype)), - build_int_cst (bitsizetype, BITS_PER_UNIT)); + { + size + = size_binop (MULT_EXPR, + fold_convert (bitsizetype, TYPE_MAX_VALUE (ssizetype)), + build_int_cst (bitsizetype, BITS_PER_UNIT)); + size = round_down (size, align); + } + return size; } diff --git a/gcc/testsuite/gnat.dg/addr16.adb b/gcc/testsuite/gnat.dg/addr16.adb new file mode 100644 index 0000000..8f09da0 --- /dev/null +++ b/gcc/testsuite/gnat.dg/addr16.adb @@ -0,0 +1,14 @@ +-- { dg-do compile } + +with Addr16_Pkg; use Addr16_Pkg; + +procedure Addr16 (R : Rec) is + + pragma Unsuppress (Alignment_Check); + + B : Integer; + for B'Address use R.A'Address; + +begin + null; +end; diff --git a/gcc/testsuite/gnat.dg/addr16_pkg.ads b/gcc/testsuite/gnat.dg/addr16_pkg.ads new file mode 100644 index 0000000..9a1b9e3 --- /dev/null +++ b/gcc/testsuite/gnat.dg/addr16_pkg.ads @@ -0,0 +1,9 @@ +package Addr16_Pkg is + + type Arr is array (Positive range <>) of Long_Long_Integer; + + type Rec (D : Positive) is record + A : Arr (1 .. D); + end record; + +end Addr16_Pkg; -- cgit v1.1 From cc997e0cc4fcc9ae8e389757d94a0ae69d34cd50 Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Mon, 28 Sep 2020 09:09:41 +0200 Subject: Add missing end location information In some cases we would fail to put the end location information on the outermost BIND_EXPR of a function, which is problematic when there is a dynamic stack allocation. gcc/ada/ChangeLog: * gcc-interface/trans.c (Subprogram_Body_to_gnu): Set the end locus of body and declaration earlier. --- gcc/ada/gcc-interface/trans.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'gcc') diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c index 3491451..f03d591 100644 --- a/gcc/ada/gcc-interface/trans.c +++ b/gcc/ada/gcc-interface/trans.c @@ -4017,6 +4017,11 @@ Subprogram_Body_to_gnu (Node_Id gnat_node) gnat_poplevel (); gnu_result = end_stmt_group (); + /* Attempt setting the end_locus of our GCC body tree, typically a BIND_EXPR, + then the end_locus of our GCC subprogram declaration tree. */ + set_end_locus_from_node (gnu_result, gnat_node); + set_end_locus_from_node (gnu_subprog_decl, gnat_node); + /* If we populated the parameter attributes cache, we need to make sure that the cached expressions are evaluated on all the possible paths leading to their uses. So we force their evaluation on entry of the function. */ @@ -4111,12 +4116,6 @@ Subprogram_Body_to_gnu (Node_Id gnat_node) gnu_return_label_stack->pop (); - /* Attempt setting the end_locus of our GCC body tree, typically a - BIND_EXPR or STATEMENT_LIST, then the end_locus of our GCC subprogram - declaration tree. */ - set_end_locus_from_node (gnu_result, gnat_node); - set_end_locus_from_node (gnu_subprog_decl, gnat_node); - /* On SEH targets, install an exception handler around the main entry point to catch unhandled exceptions. */ if (DECL_NAME (gnu_subprog_decl) == main_identifier_node -- cgit v1.1 From 53673d763f9e0c655d4239042bb69993a8d950b5 Mon Sep 17 00:00:00 2001 From: Mark Eggleston Date: Mon, 28 Sep 2020 11:01:40 +0100 Subject: Revert "Fortran : ICE in build_field PR95614" This reverts commit e5a76af3a2f3324efc60b4b2778ffb29d5c377bc. --- gcc/fortran/decl.c | 4 ++-- gcc/fortran/match.c | 5 ++--- gcc/fortran/match.h | 6 ++++-- gcc/fortran/resolve.c | 7 ------- gcc/testsuite/gfortran.dg/pr95614_1.f90 | 6 ------ gcc/testsuite/gfortran.dg/pr95614_2.f90 | 6 ------ 6 files changed, 8 insertions(+), 26 deletions(-) delete mode 100644 gcc/testsuite/gfortran.dg/pr95614_1.f90 delete mode 100644 gcc/testsuite/gfortran.dg/pr95614_2.f90 (limited to 'gcc') diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c index 9bfaa60..326e6f5 100644 --- a/gcc/fortran/decl.c +++ b/gcc/fortran/decl.c @@ -6007,7 +6007,7 @@ get_bind_c_idents (void) found_id = MATCH_YES; gfc_get_ha_symbol (name, &tmp_sym); } - else if (gfc_match_common_name (name) == MATCH_YES) + else if (match_common_name (name) == MATCH_YES) { found_id = MATCH_YES; com_block = gfc_get_common (name, 0); @@ -6052,7 +6052,7 @@ get_bind_c_idents (void) found_id = MATCH_YES; gfc_get_ha_symbol (name, &tmp_sym); } - else if (gfc_match_common_name (name) == MATCH_YES) + else if (match_common_name (name) == MATCH_YES) { found_id = MATCH_YES; com_block = gfc_get_common (name, 0); diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c index bee73e7..cb09c5f 100644 --- a/gcc/fortran/match.c +++ b/gcc/fortran/match.c @@ -5166,8 +5166,7 @@ gfc_get_common (const char *name, int from_module) /* Match a common block name. */ -match -gfc_match_common_name (char *name) +match match_common_name (char *name) { match m; @@ -5219,7 +5218,7 @@ gfc_match_common (void) for (;;) { - m = gfc_match_common_name (name); + m = match_common_name (name); if (m == MATCH_ERROR) goto cleanup; diff --git a/gcc/fortran/match.h b/gcc/fortran/match.h index 4ccb596..7bf70d7 100644 --- a/gcc/fortran/match.h +++ b/gcc/fortran/match.h @@ -103,9 +103,11 @@ match gfc_match_call (void); /* We want to use this function to check for a common-block-name that can exist in a bind statement, so removed the "static" - declaration of the function in match.c. */ + declaration of the function in match.c. -match gfc_match_common_name (char *name); + TODO: should probably rename this now that it'll be globally seen to + gfc_match_common_name. */ +match match_common_name (char *name); match gfc_match_common (void); match gfc_match_block_data (void); diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c index 3c767a7..f4ce49f 100644 --- a/gcc/fortran/resolve.c +++ b/gcc/fortran/resolve.c @@ -936,16 +936,9 @@ static void resolve_common_vars (gfc_common_head *common_block, bool named_common) { gfc_symbol *csym = common_block->head; - gfc_gsymbol *gsym; for (; csym; csym = csym->common_next) { - gsym = gfc_find_gsymbol (gfc_gsym_root, csym->name); - if (gsym && gsym->type != GSYM_UNKNOWN && gsym->type != GSYM_COMMON) - gfc_error_now ("Global entity %qs at %L cannot appear in a " - "COMMON block at %L", gsym->name, - &gsym->where, &csym->common_block->where); - /* gfc_add_in_common may have been called before, but the reported errors have been ignored to continue parsing. We do the checks again here. */ diff --git a/gcc/testsuite/gfortran.dg/pr95614_1.f90 b/gcc/testsuite/gfortran.dg/pr95614_1.f90 deleted file mode 100644 index f835143..0000000 --- a/gcc/testsuite/gfortran.dg/pr95614_1.f90 +++ /dev/null @@ -1,6 +0,0 @@ -! { dg-do compile } - -module m ! { dg-error ".1." } - common m ! { dg-error "cannot appear in a COMMON" } -end - diff --git a/gcc/testsuite/gfortran.dg/pr95614_2.f90 b/gcc/testsuite/gfortran.dg/pr95614_2.f90 deleted file mode 100644 index 9d69a50..0000000 --- a/gcc/testsuite/gfortran.dg/pr95614_2.f90 +++ /dev/null @@ -1,6 +0,0 @@ -! { dg-do compile } - -module m ! { dg-error ".1." } - common /xc/ m ! { dg-error "cannot appear in a COMMON" } -end - -- cgit v1.1 From 92f0d3d03a78a8aabe62e4c1e1b300b01516732f Mon Sep 17 00:00:00 2001 From: Andrea Corallo Date: Mon, 21 Sep 2020 13:52:45 +0100 Subject: aarch64: Do not alter force_reg returned rtx expanding pauth builtins 2020-09-21 Andrea Corallo * config/aarch64/aarch64-builtins.c (aarch64_general_expand_builtin): Do not alter value on a force_reg returned rtx. --- gcc/config/aarch64/aarch64-builtins.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 1cfb5c0..732a4dc 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -2091,20 +2091,13 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, arg0 = CALL_EXPR_ARG (exp, 0); op0 = force_reg (Pmode, expand_normal (arg0)); - if (!target) - target = gen_reg_rtx (Pmode); - else - target = force_reg (Pmode, target); - - emit_move_insn (target, op0); - if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI) { rtx lr = gen_rtx_REG (Pmode, R30_REGNUM); icode = CODE_FOR_xpaclri; emit_move_insn (lr, op0); emit_insn (GEN_FCN (icode) ()); - emit_move_insn (target, lr); + return lr; } else { @@ -2134,11 +2127,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, emit_move_insn (x17_reg, op0); emit_move_insn (x16_reg, op1); emit_insn (GEN_FCN (icode) ()); - emit_move_insn (target, x17_reg); + return x17_reg; } - return target; - case AARCH64_JSCVT: { expand_operand ops[2]; -- cgit v1.1 From 5c609842d13a4c9c6be1a10f7980a74d27daeb85 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Thu, 25 Jul 2019 16:49:36 +0800 Subject: Enable GCC support for AMX-TILE,AMX-INT8,AMX-BF16. AMX-TILE:ldtilecfg/sttilecfg/tileloadd/tileloaddt1/tilezero/tilerelease AMX-INT8:tdpbssd/tdpbsud/tdpbusd/tdpbuud AMX-BF16:tdpbf16ps gcc/ChangeLog * common/config/i386/i386-common.c (OPTION_MASK_ISA2_AMX_TILE_SET, OPTION_MASK_ISA2_AMX_INT8_SET, OPTION_MASK_ISA2_AMX_BF16_SET, OPTION_MASK_ISA2_AMX_TILE_UNSET, OPTION_MASK_ISA2_AMX_INT8_UNSET, OPTION_MASK_ISA2_AMX_BF16_UNSET, OPTION_MASK_ISA2_XSAVE_UNSET): New marcos. (ix86_handle_option): Hanlde -mamx-tile, -mamx-int8, -mamx-bf16. * common/config/i386/i386-cpuinfo.h (processor_types): Add FEATURE_AMX_TILE, FEATURE_AMX_INT8, FEATURE_AMX_BF16. * common/config/i386/cpuinfo.h (XSTATE_TILECFG, XSTATE_TILEDATA, XCR_AMX_ENABLED_MASK): New macro. (get_available_features): Enable AMX features only if their states are suoorited by OSXSAVE. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for amx-tile, amx-int8, amx-bf16. * config.gcc: Add amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h to extra headers. * config/i386/amxbf16intrin.h: New file. * config/i386/amxint8intrin.h: Ditto. * config/i386/amxtileintrin.h: Ditto. * config/i386/cpuid.h (bit_AMX_BF16, bit_AMX_TILE, bit_AMX_INT8): New macro. * config/i386/i386-c.c (ix86_target_macros_internal): Define __AMX_TILE__, __AMX_INT8__, AMX_BF16__. * config/i386/i386-options.c (ix86_target_string): Add -mamx-tile, -mamx-int8, -mamx-bf16. (ix86_option_override_internal): Handle AMX-TILE, AMX-INT8, AMX-BF16. * config/i386/i386.h (TARGET_AMX_TILE, TARGET_AMX_TILE_P, TARGET_AMX_INT8, TARGET_AMX_INT8_P, TARGET_AMX_BF16_P, PTA_AMX_TILE, PTA_AMX_INT8, PTA_AMX_BF16): New macros. * config/i386/i386.opt: Add -mamx-tile, -mamx-int8, -mamx-bf16. * config/i386/immintrin.h: Include amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h. * doc/invoke.texi: Document -mamx-tile, -mamx-int8, -mamx-bf16. * doc/extend.texi: Document amx-tile, amx-int8, amx-bf16. * doc/sourcebuild.texi ((Effective-Target Keywords, Other hardware attributes): Document amx_int8, amx_tile, amx_bf16. gcc/testsuite/ChangeLog * lib/target-supports.exp (check_effective_target_amx_tile, check_effective_target_amx_int8, check_effective_target_amx_bf16): New proc. * g++.dg/other/i386-2.C: Add -mamx-tile, -mamx-int8, -mamx-bf16. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/sse-12.c: Ditto. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * gcc.target/i386/amx-check.h: New header file. * gcc.target/i386/amxbf16-asmatt-1.c: New test. * gcc.target/i386/amxint8-asmatt-1.c: New test. * gcc.target/i386/amxtile-asmatt-1.c: Ditto. * gcc.target/i386/amxbf16-asmintel-1.c: Ditto. * gcc.target/i386/amxint8-asmintel-1.c: Ditto. * gcc.target/i386/amxtile-asmintel-1.c: Ditto. * gcc.target/i386/amxbf16-dpbf16ps-2.c: Ditto. * gcc.target/i386/amxint8-dpbssd-2.c: Ditto. * gcc.target/i386/amxint8-dpbsud-2.c: Ditto. * gcc.target/i386/amxint8-dpbusd-2.c: Ditto. * gcc.target/i386/amxint8-dpbuud-2.c: Ditto. * gcc.target/i386/amxtile-2.c: Ditto. --- gcc/common/config/i386/cpuinfo.h | 16 ++ gcc/common/config/i386/i386-common.c | 50 ++++++ gcc/common/config/i386/i386-cpuinfo.h | 3 + gcc/common/config/i386/i386-isas.h | 3 + gcc/config.gcc | 6 +- gcc/config/i386/amxbf16intrin.h | 29 ++++ gcc/config/i386/amxint8intrin.h | 38 +++++ gcc/config/i386/amxtileintrin.h | 75 +++++++++ gcc/config/i386/cpuid.h | 3 + gcc/config/i386/i386-c.c | 7 + gcc/config/i386/i386-options.c | 20 ++- gcc/config/i386/i386.h | 12 +- gcc/config/i386/i386.opt | 14 +- gcc/config/i386/immintrin.h | 6 + gcc/doc/extend.texi | 15 ++ gcc/doc/invoke.texi | 10 ++ gcc/doc/sourcebuild.texi | 9 + gcc/testsuite/g++.dg/other/i386-2.C | 3 +- gcc/testsuite/g++.dg/other/i386-3.C | 3 +- gcc/testsuite/gcc.target/i386/amx-check.h | 185 +++++++++++++++++++++ gcc/testsuite/gcc.target/i386/amxbf16-asmatt-1.c | 13 ++ gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c | 9 + gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c | 83 +++++++++ gcc/testsuite/gcc.target/i386/amxint8-asmatt-1.c | 19 +++ gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c | 15 ++ gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c | 62 +++++++ gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c | 61 +++++++ gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c | 61 +++++++ gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c | 61 +++++++ gcc/testsuite/gcc.target/i386/amxtile-2.c | 47 ++++++ gcc/testsuite/gcc.target/i386/amxtile-asmatt-1.c | 30 ++++ gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c | 24 +++ gcc/testsuite/gcc.target/i386/funcspec-56.inc | 6 + gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- gcc/testsuite/gcc.target/i386/sse-22.c | 5 +- gcc/testsuite/gcc.target/i386/sse-23.c | 3 +- gcc/testsuite/lib/target-supports.exp | 33 ++++ 39 files changed, 1032 insertions(+), 13 deletions(-) create mode 100644 gcc/config/i386/amxbf16intrin.h create mode 100644 gcc/config/i386/amxint8intrin.h create mode 100644 gcc/config/i386/amxtileintrin.h create mode 100644 gcc/testsuite/gcc.target/i386/amx-check.h create mode 100644 gcc/testsuite/gcc.target/i386/amxbf16-asmatt-1.c create mode 100644 gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c create mode 100644 gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c create mode 100644 gcc/testsuite/gcc.target/i386/amxint8-asmatt-1.c create mode 100644 gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c create mode 100644 gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c create mode 100644 gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-2.c create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-asmatt-1.c create mode 100644 gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c (limited to 'gcc') diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index 12237e2..c96455c 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -509,15 +509,20 @@ get_available_features (struct __processor_model *cpu_model, #define XSTATE_OPMASK 0x20 #define XSTATE_ZMM 0x40 #define XSTATE_HI_ZMM 0x80 +#define XSTATE_TILECFG 0x20000 +#define XSTATE_TILEDATA 0x40000 #define XCR_AVX_ENABLED_MASK \ (XSTATE_SSE | XSTATE_YMM) #define XCR_AVX512F_ENABLED_MASK \ (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) +#define XCR_AMX_ENABLED_MASK \ + (XSTATE_TILECFG | XSTATE_TILEDATA) /* Check if AVX and AVX512 are usable. */ int avx_usable = 0; int avx512_usable = 0; + int amx_usable = 0; if ((ecx & bit_OSXSAVE)) { /* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and @@ -533,6 +538,8 @@ get_available_features (struct __processor_model *cpu_model, avx512_usable = ((xcrlow & XCR_AVX512F_ENABLED_MASK) == XCR_AVX512F_ENABLED_MASK); } + amx_usable = ((xcrlow & XCR_AMX_ENABLED_MASK) + == XCR_AMX_ENABLED_MASK); } #define set_feature(f) \ @@ -651,6 +658,15 @@ get_available_features (struct __processor_model *cpu_model, set_feature (FEATURE_PCONFIG); if (edx & bit_IBT) set_feature (FEATURE_IBT); + if (amx_usable) + { + if (edx & bit_AMX_TILE) + set_feature (FEATURE_AMX_TILE); + if (edx & bit_AMX_INT8) + set_feature (FEATURE_AMX_INT8); + if (edx & bit_AMX_BF16) + set_feature (FEATURE_AMX_BF16); + } if (avx512_usable) { if (ebx & bit_AVX512F) diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 6e34095..1014214 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -101,6 +101,9 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_XSAVE_SET) #define OPTION_MASK_ISA_CLWB_SET OPTION_MASK_ISA_CLWB #define OPTION_MASK_ISA2_AVX512VP2INTERSECT_SET OPTION_MASK_ISA2_AVX512VP2INTERSECT +#define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE +#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8 +#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same as -msse4.2. */ @@ -194,6 +197,7 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_XSAVEOPT_UNSET \ | OPTION_MASK_ISA_XSAVES_UNSET | OPTION_MASK_ISA_XSAVEC_UNSET \ | OPTION_MASK_ISA_AVX_UNSET) +#define OPTION_MASK_ISA2_XSAVE_UNSET OPTION_MASK_ISA2_AMX_TILE_UNSET #define OPTION_MASK_ISA_XSAVEOPT_UNSET OPTION_MASK_ISA_XSAVEOPT #define OPTION_MASK_ISA_AVX2_UNSET \ (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET) @@ -247,6 +251,9 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE #define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET OPTION_MASK_ISA2_AVX512VP2INTERSECT #define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK +#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE +#define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8 +#define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same as -mno-sse4.1. */ @@ -931,6 +938,47 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mamx_tile: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_TILE_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_TILE_SET; + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XSAVE_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_TILE_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_TILE_UNSET; + } + return true; + + case OPT_mamx_int8: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_INT8_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_INT8_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_INT8_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_INT8_UNSET; + } + return true; + + case OPT_mamx_bf16: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_BF16_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_BF16_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AMX_BF16_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AMX_BF16_UNSET; + } + return true; + case OPT_mfma: if (value) { @@ -1265,6 +1313,8 @@ ix86_handle_option (struct gcc_options *opts, { opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_XSAVE_UNSET; opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_XSAVE_UNSET; + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_XSAVE_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_XSAVE_UNSET; } return true; diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index 84ca97e..5b94b1f 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -216,6 +216,9 @@ enum processor_features FEATURE_XSAVEC, FEATURE_XSAVEOPT, FEATURE_XSAVES, + FEATURE_AMX_TILE, + FEATURE_AMX_INT8, + FEATURE_AMX_BF16, CPU_FEATURE_MAX }; diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h index 08c9dbe..3c830ea 100644 --- a/gcc/common/config/i386/i386-isas.h +++ b/gcc/common/config/i386/i386-isas.h @@ -160,4 +160,7 @@ ISA_NAMES_TABLE_START ISA_NAMES_TABLE_ENTRY("xsaveopt", FEATURE_XSAVEOPT, P_NONE, "-mxsaveopt") ISA_NAMES_TABLE_ENTRY("xsaves", FEATURE_XSAVES, P_NONE, "-mxsaves") + ISA_NAMES_TABLE_ENTRY("amx-tile", FEATURE_AMX_TILE, P_NONE, "-mamx-tile") + ISA_NAMES_TABLE_ENTRY("amx-int8", FEATURE_AMX_INT8, P_NONE, "-mamx-int8") + ISA_NAMES_TABLE_ENTRY("amx-bf16", FEATURE_AMX_BF16, P_NONE, "-mamx-bf16") ISA_NAMES_TABLE_END diff --git a/gcc/config.gcc b/gcc/config.gcc index 845f10e..2d0cfde 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -412,7 +412,8 @@ i[34567]86-*-*) waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h avx512bf16intrin.h enqcmdintrin.h serializeintrin.h avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h - tsxldtrkintrin.h" + tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h + amxbf16intrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -447,7 +448,8 @@ x86_64-*-*) waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h avx512bf16intrin.h enqcmdintrin.h serializeintrin.h avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h - tsxldtrkintrin.h" + tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h + amxbf16intrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/amxbf16intrin.h b/gcc/config/i386/amxbf16intrin.h new file mode 100644 index 0000000..b162096 --- /dev/null +++ b/gcc/config/i386/amxbf16intrin.h @@ -0,0 +1,29 @@ +#if !defined _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _AMXBF16INTRIN_H_INCLUDED +#define _AMXBF16INTRIN_H_INCLUDED + +#if !defined(__AMX_BF16__) +#pragma GCC push_options +#pragma GCC target("amx-bf16") +#define __DISABLE_AMX_BF16__ +#endif /* __AMX_BF16__ */ + +#if defined(__x86_64__) && defined(__AMX_BF16__) +#define _tile_dpbf16ps_internal(dst,src1,src2) \ + __asm__ volatile\ + ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) + +#define _tile_dpbf16ps(dst,src1,src2) \ + _tile_dpbf16ps_internal (dst, src1, src2) + +#endif + +#ifdef __DISABLE_AMX_BF16__ +#undef __DISABLE_AMX_BF16__ +#pragma GCC pop_options +#endif /* __DISABLE_AMX_BF16__ */ + +#endif /* _AMXBF16INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/amxint8intrin.h b/gcc/config/i386/amxint8intrin.h new file mode 100644 index 0000000..11adc1f --- /dev/null +++ b/gcc/config/i386/amxint8intrin.h @@ -0,0 +1,38 @@ +#if !defined _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _AMXINT8INTRIN_H_INCLUDED +#define _AMXINT8INTRIN_H_INCLUDED + +#if !defined(__AMX_INT8__) +#pragma GCC push_options +#pragma GCC target("amx-int8") +#define __DISABLE_AMX_INT8__ +#endif /* __AMX_INT8__ */ + +#if defined(__x86_64__) && defined(__AMX_INT8__) +#define _tile_int8_dp_internal(name,dst,src1,src2) \ + __asm__ volatile \ + ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) + +#define _tile_dpbssd(dst,src1,src2) \ + _tile_int8_dp_internal (tdpbssd, dst, src1, src2) + +#define _tile_dpbsud(dst,src1,src2) \ + _tile_int8_dp_internal (tdpbsud, dst, src1, src2) + +#define _tile_dpbusd(dst,src1,src2) \ + _tile_int8_dp_internal (tdpbusd, dst, src1, src2) + +#define _tile_dpbuud(dst,src1,src2) \ + _tile_int8_dp_internal (tdpbuud, dst, src1, src2) + +#endif + +#ifdef __DISABLE_AMX_INT8__ +#undef __DISABLE_AMX_INT8__ +#pragma GCC pop_options +#endif /* __DISABLE_AMX_INT8__ */ + +#endif /* _AMXINT8INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h new file mode 100644 index 0000000..e78e5c0 --- /dev/null +++ b/gcc/config/i386/amxtileintrin.h @@ -0,0 +1,75 @@ +#if !defined _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _AMXTILEINTRIN_H_INCLUDED +#define _AMXTILEINTRIN_H_INCLUDED + +#if !defined(__AMX_TILE__) +#pragma GCC push_options +#pragma GCC target("amx-tile") +#define __DISABLE_AMX_TILE__ +#endif /* __AMX_TILE__ */ + +#if defined(__x86_64__) && defined(__AMX_TILE__) +extern __inline void +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tile_loadconfig (const void *__config) +{ + __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config))); +} + +extern __inline void +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tile_storeconfig (void *__config) +{ + __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config))); +} + +extern __inline void +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tile_release (void) +{ + __asm__ volatile ("tilerelease" ::); +} + +#define _tile_loadd(dst,base,stride) \ + _tile_loadd_internal (dst, base, stride) + +#define _tile_loadd_internal(dst,base,stride) \ + __asm__ volatile \ + ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" \ + :: "r" ((const void*) base), "r" ((long) stride)) + +#define _tile_stream_loadd(dst,base,stride) \ + _tile_stream_loadd_internal (dst, base, stride) + +#define _tile_stream_loadd_internal(dst,base,stride) \ + __asm__ volatile \ + ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" \ + :: "r" ((const void*) base), "r" ((long) stride)) + +#define _tile_stored(dst,base,stride) \ + _tile_stored_internal (dst, base, stride) + +#define _tile_stored_internal(src,base,stride) \ + __asm__ volatile \ + ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" \ + :: "r" ((void*) base), "r" ((long) stride) \ + : "memory") + +#define _tile_zero(dst) \ + _tile_zero_internal (dst) + +#define _tile_zero_internal(dst) \ + __asm__ volatile \ + ("tilezero\t%%tmm"#dst ::) + +#endif + +#ifdef __DISABLE_AMX_TILE__ +#undef __DISABLE_AMX_TILE__ +#pragma GCC pop_options +#endif /* __DISABLE_AMX_TILE__ */ + +#endif /* _AMXTILEINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index bca61d62..4598434 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -127,6 +127,9 @@ #define bit_PCONFIG (1 << 18) #define bit_SERIALIZE (1 << 14) #define bit_TSXLDTRK (1 << 16) +#define bit_AMX_BF16 (1 << 22) +#define bit_AMX_TILE (1 << 24) +#define bit_AMX_INT8 (1 << 25) /* XFEATURE_ENABLED_MASK register bits (%eax == 0xd, %ecx == 0) */ #define bit_BNDREGS (1 << 3) diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 3553a37..9da682a 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -588,6 +588,13 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__ENQCMD__"); if (isa_flag2 & OPTION_MASK_ISA2_TSXLDTRK) def_or_undef (parse_in, "__TSXLDTRK__"); + if (isa_flag2 & OPTION_MASK_ISA2_AMX_TILE) + def_or_undef (parse_in, "__AMX_TILE__"); + if (isa_flag2 & OPTION_MASK_ISA2_AMX_INT8) + def_or_undef (parse_in, "__AMX_INT8__"); + if (isa_flag2 & OPTION_MASK_ISA2_AMX_BF16) + def_or_undef (parse_in, "__AMX_BF16__"); + if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index 2fabd20..597de53 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -209,7 +209,10 @@ static struct ix86_target_opts isa2_opts[] = { "-mavx512bf16", OPTION_MASK_ISA2_AVX512BF16 }, { "-menqcmd", OPTION_MASK_ISA2_ENQCMD }, { "-mserialize", OPTION_MASK_ISA2_SERIALIZE }, - { "-mtsxldtrk", OPTION_MASK_ISA2_TSXLDTRK } + { "-mtsxldtrk", OPTION_MASK_ISA2_TSXLDTRK }, + { "-mamx-tile", OPTION_MASK_ISA2_AMX_TILE }, + { "-mamx-int8", OPTION_MASK_ISA2_AMX_INT8 }, + { "-mamx-bf16", OPTION_MASK_ISA2_AMX_BF16 } }; static struct ix86_target_opts isa_opts[] = { @@ -1033,6 +1036,9 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("enqcmd", OPT_menqcmd), IX86_ATTR_ISA ("serialize", OPT_mserialize), IX86_ATTR_ISA ("tsxldtrk", OPT_mtsxldtrk), + IX86_ATTR_ISA ("amx-tile", OPT_mamx_tile), + IX86_ATTR_ISA ("amx-int8", OPT_mamx_int8), + IX86_ATTR_ISA ("amx-bf16", OPT_mamx_bf16), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), @@ -2258,6 +2264,18 @@ ix86_option_override_internal (bool main_args_p, && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_AVX512BF16)) opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX512BF16; + if (((processor_alias_table[i].flags & PTA_AMX_TILE) != 0) + && !(opts->x_ix86_isa_flags2_explicit + & OPTION_MASK_ISA2_AMX_TILE)) + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_TILE; + if (((processor_alias_table[i].flags & PTA_AMX_INT8) != 0) + && !(opts->x_ix86_isa_flags2_explicit + & OPTION_MASK_ISA2_AMX_INT8)) + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_INT8; + if (((processor_alias_table[i].flags & PTA_AMX_BF16) != 0) + && !(opts->x_ix86_isa_flags2_explicit + & OPTION_MASK_ISA2_AMX_BF16)) + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_BF16; if (((processor_alias_table[i].flags & PTA_MOVDIRI) != 0) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVDIRI)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVDIRI; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 92b7475..a449653 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -203,6 +203,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_SERIALIZE_P(x) TARGET_ISA2_SERIALIZE_P(x) #define TARGET_TSXLDTRK TARGET_ISA2_TSXLDTRK #define TARGET_TSXLDTRK_P(x) TARGET_ISA2_TSXLDTRK_P(x) +#define TARGET_AMX_TILE TARGET_ISA2_AMX_TILE +#define TARGET_AMX_TILE_P(x) TARGET_ISA2_AMX_TILE(x) +#define TARGET_AMX_INT8 TARGET_ISA2_AMX_INT8 +#define TARGET_AMX_INT8_P(x) TARGET_ISA2_AMX_INT8(x) +#define TARGET_AMX_BF16 TARGET_ISA2_AMX_BF16 +#define TARGET_AMX_BF16_P(x) TARGET_ISA2_AMX_BF16(x) #define TARGET_LP64 TARGET_ABI_64 #define TARGET_LP64_P(x) TARGET_ABI_64_P(x) @@ -2466,6 +2472,9 @@ const wide_int_bitmask PTA_ENQCMD (0, HOST_WIDE_INT_1U << 15); const wide_int_bitmask PTA_CLDEMOTE (0, HOST_WIDE_INT_1U << 16); const wide_int_bitmask PTA_SERIALIZE (0, HOST_WIDE_INT_1U << 17); const wide_int_bitmask PTA_TSXLDTRK (0, HOST_WIDE_INT_1U << 18); +const wide_int_bitmask PTA_AMX_TILE(0, HOST_WIDE_INT_1U << 19); +const wide_int_bitmask PTA_AMX_INT8(0, HOST_WIDE_INT_1U << 20); +const wide_int_bitmask PTA_AMX_BF16(0, HOST_WIDE_INT_1U << 21); const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR; @@ -2499,7 +2508,8 @@ const wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT; const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE - | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK; + | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE + | PTA_AMX_INT8 | PTA_AMX_BF16; const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE; const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index c9f7195..9389dc2 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1114,4 +1114,16 @@ Support SERIALIZE built-in functions and code generation. mtsxldtrk Target Report Mask(ISA2_TSXLDTRK) Var(ix86_isa_flags2) Save -Support TSXLDTRK built-in functions and code generation. \ No newline at end of file +Support TSXLDTRK built-in functions and code generation. + +mamx-tile +Target Report Mask(ISA2_AMX_TILE) Var(ix86_isa_flags2) Save +Support AMX-TILE built-in functions and code generation. + +mamx-int8 +Target Report Mask(ISA2_AMX_INT8) Var(ix86_isa_flags2) Save +Support AMX-INT8 built-in functions and code generation. + +mamx-bf16 +Target Report Mask(ISA2_AMX_BF16) Var(ix86_isa_flags2) Save +Support AMX-BF16 built-in functions and code generation. diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index b660d0d..6d25f44 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -144,6 +144,12 @@ #include +#include + +#include + +#include + #include #include diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 7f14a28..c9f7299 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -6623,6 +6623,21 @@ Enable/disable the generation of the XSAVEOPT instructions. @cindex @code{target("xsaves")} function attribute, x86 Enable/disable the generation of the XSAVES instructions. +@item amx-tile +@itemx no-amx-tile +@cindex @code{target("amx-tile")} function attribute, x86 +Enable/disable the generation of the AMX-TILE instructions. + +@item amx-int8 +@itemx no-amx-int8 +@cindex @code{target("amx-int8")} function attribute, x86 +Enable/disable the generation of the AMX-INT8 instructions. + +@item amx-bf16 +@itemx no-amx-bf16 +@cindex @code{target("amx-bf16")} function attribute, x86 +Enable/disable the generation of the AMX-BF16 instructions. + @item cld @itemx no-cld @cindex @code{target("cld")} function attribute, x86 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 226b0e1..3e087b6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1362,6 +1362,7 @@ See RS/6000 and PowerPC Options. -mvpclmulqdq -mavx512bitalg -mmovdiri -mmovdir64b -mavx512vpopcntdq @gol -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol +-mamx-tile -mamx-int8 -mamx-bf16@gol -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol @@ -30205,6 +30206,15 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @need 200 @itemx -mserialize @opindex mserialize +@need 200 +@itemx -mamx-tile +@opindex mamx-tile +@need 200 +@itemx -mamx-int8 +@opindex mamx-int8 +@need 200 +@itemx -mamx-bf16 +@opindex mamx-bf16 These switches enable the use of instructions in the MMX, SSE, SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 65b2e55..b625f1e 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2249,6 +2249,15 @@ Target supports the execution of @code{avx512f} instructions. @item avx512vp2intersect Target supports the execution of @code{avx512vp2intersect} instructions. +@item amx_tile +Target supports the execution of @code{amx-tile} instructions. + +@item amx_int8 +Target supports the execution of @code{amx-int8} instructions. + +@item amx_bf16 +Target supports the execution of @code{amx-bf16} instructions. + @item cell_hw Test system can execute AltiVec and Cell PPU instructions. diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index 04d5fec..449f30d 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,11 +1,12 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h avx512bitalgintrin.h, avx512vp2intersectintrin.h, tsxldtrkintrin.h, + amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h, avx512vp2intersectvlintrin.h and mm_malloc.h.h are usable with -O -pedantic-errors. */ diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index f40172e..29e9891 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,11 +1,12 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h avx512bitalgintrin.h, avx512vp2intersectintrin.h, tsxldtrkintrin.h, + amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h, avx512vp2intersectvlintrin.h and mm_malloc.h are usable with -O -fkeep-inline-functions. */ diff --git a/gcc/testsuite/gcc.target/i386/amx-check.h b/gcc/testsuite/gcc.target/i386/amx-check.h new file mode 100644 index 0000000..03616ff --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amx-check.h @@ -0,0 +1,185 @@ +#ifndef AMX_CHECK_H_INCLUDED +#define AMX_CHECK_H_INCLUDED + +#include +#include +#include +#ifdef DEBUG +#include +#endif +#include "cpuid.h" + +/* TODO: The tmm emulation is temporary for current + AMX implementation with no tmm regclass, should + be changed in the future. */ +typedef struct __tile_config +{ + uint8_t palette_id; + uint8_t start_row; + uint8_t reserved_0[14]; + uint16_t colsb[8]; /* Colum size of each tmm register in bytes */ + uint16_t reserved_1[8]; + uint8_t rows[8]; /* Row size of each tmm reg in bytes */ + uint8_t reserved_2[8]; +} __tilecfg; + +typedef union __union_tile_config +{ + __tilecfg s; + uint8_t a[64]; +} __tilecfg_u; + +typedef struct __tile +{ + /* Max size of tile register */ + uint8_t buf[1024]; + int rows; + int colsb; +} __tile; + +/* Maxium col/row size in bytes */ +#define MAX_ROWS 16 +#define MAX_COLS 64 + +/* Stride (colum width in byte) used for tileload/store */ +#define _STRIDE 64 + +/* Initialize tile config by setting all tmm size to 16x64 */ +void init_tile_config (__tilecfg_u *dst) +{ + int i; + + dst->s.palette_id = 1; + dst->s.start_row = 0; + + for (i = 0; i < 14; i++) + dst->s.reserved_0[i] = 0; + + for (i = 0; i < 8; i++) + { + dst->s.colsb[i] = _STRIDE; + dst->s.rows[i] = 16; + dst->s.reserved_1[i] = 0; + dst->s.reserved_2[i] = 0; + } + + _tile_loadconfig (dst->a); +} + +/* Init __tile variable that going to be store to register + w/o extra buffer. If buffer exists, it should be the same + size matrix as corresponding tmm register. + Should execute init_tile_config first */ +void init_tile_src (const int tmm_num, __tile *src, uint8_t *buffer) +{ + int rows, colsb, i, j; + __tilecfg_u tmp; + + _tile_storeconfig (tmp.a); + + src->rows = rows = tmp.s.rows[tmm_num]; + src->colsb = colsb = tmp.s.colsb[tmm_num]; + + for (i = 0; i < rows; i++) + for (j = 0; j < colsb; j++) + { + if(buffer) + src->buf[i * colsb + j] = buffer[i * colsb + j]; + else + src->buf[i * colsb + j] = (i + 11 * j) % 256; + } + +} + +/* Init __tile src and corresponding tmm register */ +#define init_tile_reg_and_src(tmm_num, src) \ +{ \ + init_tile_src (tmm_num, &src, NULL); \ + _tile_loadd (tmm_num, src.buf, _STRIDE); \ +} + +#define init_tile_reg_and_src_with_buffer(tmm_num, src, buffer) \ +{ \ + init_tile_src (tmm_num, &src, buffer); \ + _tile_loadd (tmm_num, src.buf, _STRIDE); \ +} + +/* Zero __tile src. It should be init first. */ +void zero_tile_src (__tile *src) +{ + int i, j; + + for (i = 0; i < src->rows; i++) + for (j = 0; j < src->colsb; j++) + src->buf[i * src->colsb + j] = 0; +} + +/* Compare tile config value with __tilecfg_u dst */ +int check_tile_config (__tilecfg_u *src, __tilecfg_u *dst) +{ + size_t size = sizeof(__tilecfg); + uint8_t *pa_src = (uint8_t *) src->a; + uint8_t *pa_dst = (uint8_t *) dst->a; + + for (int i = 0; i < size; i++) + if (pa_src[i] != pa_dst[i]) + return 0; + + return 1; +} + +/* Compare tile register value with __tile variable */ +int check_tile_register (__tile* ref, __tile* target) +{ + /* Tile register should be stored from tmm to + memory and compare with emulation results. */ + int rows = target->rows; + int colsb = target->colsb; + int i, j; + + for (i = 0; i < rows; i++) + for (j = 0; j < colsb; j++) + if (ref->buf[i * colsb + j] != target->buf[i * colsb + j]) + return 0; + + return 1; +} + +#ifndef DO_TEST +#define DO_TEST do_test +static void test_amx (void); +__attribute__ ((noinline)) +static void +do_test (void) +{ + test_amx (); +} +#endif + +int +main () +{ + /* Check cpu support for AMX */ + if (__builtin_cpu_supports ("amx-tile") +#ifdef AMX_INT8 + && __builtin_cpu_supports ("amx-int8") +#endif +#ifdef AMX_BF16 + && __builtin_cpu_supports ("amx-bf16") +#endif + ) + { + DO_TEST (); +#ifdef DEBUG + printf ("PASSED\n"); +#endif + } +#ifdef DEBUG + else + printf ("SKIPPED\n"); +#endif + + return 0; +} + +#endif diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxbf16-asmatt-1.c new file mode 100644 index 0000000..a5e5bdd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxbf16-asmatt-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-bf16" } */ +/* { dg-final { scan-assembler "tdpbf16ps\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */ +#include + +#define TMM1 1 +#define TMM2 2 +#define TMM3 3 + +void TEST () +{ + _tile_dpbf16ps (TMM1, TMM2, TMM3); +} diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c new file mode 100644 index 0000000..c2d6074 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxbf16-asmintel-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-bf16 -masm=intel" } */ +/* { dg-final { scan-assembler "tdpbf16ps\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ +#include + +void TEST () +{ + _tile_dpbf16ps (1, 2, 3); +} diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c new file mode 100644 index 0000000..c819113 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c @@ -0,0 +1,83 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-tile -mamx-bf16" } */ +#include + +#define AMX_BF16 +#define DO_TEST test_amx_bf16_dpbf16ps +void test_amx_bf16_dpbf16ps (); +#include "amx-check.h" + +/* Transformation functions between bf16/float */ +static uint16_t make_bf16 (float f) +{ + uint32_t u = (uint32_t)f; + u = (u >> 16) & 0xffff; + return (uint16_t)u; +} + +static float make_f32 (uint16_t bf) +{ + uint32_t u = (uint32_t)(bf << 16); + return (float)u; +} + +/* Init tile buffer with bf16 pairs */ +void init_bf16_max_tile_buffer (uint8_t *buf) +{ + int i, j; + uint16_t *ptr = (uint16_t *)buf; + + for(i = 0; i < 16; i++) + for(j = 0; j < 32; j++) + { + float f = 16.1f * i + 3.4f * j; + ptr[i * 32 + j] = make_bf16(f); + } +} + +void calc_matrix_dpbf16ps (__tile *dst, __tile *src1, __tile *src2) +{ + uint16_t *src1_buf = (uint16_t *)src1->buf; + uint16_t *src2_buf = (uint16_t *)src2->buf; + float *dst_buf = (float *)dst->buf; + + int M = src1->rows; + int N = src1->colsb / 4; + int K = src2->colsb / 4; + int i, j, k, t; + + for (i = 0; i < M; i++) + for (j = 0; j < N; j++) + for (k = 0; k < K; k++) + for (t = 0; t < 2; t+=2) + { + dst_buf[i * N + k] += + (make_f32(src1_buf[i * 4 * N + 4 * j + t]) * + make_f32(src2_buf[j * 4 * K + 4 * k + t])) + + (make_f32(src1_buf[i * 4 * N + 4 * j + t + 1]) * + make_f32(src1_buf[i * 4 * N + 4 * j + t + 1])); + } + +} + +void test_amx_bf16_dpbf16ps () +{ + __tilecfg_u cfg; + __tile dst, dst_ref, src1, src2; + uint8_t tmp_dst_buf[1024]; + + init_bf16_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf); + init_tile_reg_and_src_with_buffer (2, dst, tmp_dst_buf); + init_tile_reg_and_src_with_buffer (3, dst, tmp_dst_buf); + + calc_matrix_dpbf16ps (&dst, &src1, &src2); + + _tile_dpbf16ps (1, 2, 3); + _tile_stored (1, dst_ref.buf, _STRIDE); + + if (!check_tile_register (&dst_ref, &dst)) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/amxint8-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxint8-asmatt-1.c new file mode 100644 index 0000000..1842c23 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxint8-asmatt-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-int8" } */ +/* { dg-final { scan-assembler "tdpbssd\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */ +/* { dg-final { scan-assembler "tdpbsud\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } * +/* { dg-final { scan-assembler "tdpbusd\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */ +/* { dg-final { scan-assembler "tdpbuud\[ \\t]+\[^\n\]*%tmm3+\[^\n\]*%tmm2+\[^\n\]*%tmm1" } } */ +#include + +#define TMM1 1 +#define TMM2 2 +#define TMM3 3 + +void TEST () +{ + _tile_dpbssd (TMM1, TMM2, TMM3); + _tile_dpbsud (TMM1, TMM2, TMM3); + _tile_dpbusd (TMM1, TMM2, TMM3); + _tile_dpbuud (TMM1, TMM2, TMM3); +} diff --git a/gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c new file mode 100644 index 0000000..bcfbb3f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxint8-asmintel-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-int8 -masm=intel" } */ +/* { dg-final { scan-assembler "tdpbssd\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ +/* { dg-final { scan-assembler "tdpbsud\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } * +/* { dg-final { scan-assembler "tdpbusd\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ +/* { dg-final { scan-assembler "tdpbuud\[ \\t]+\[^\n\]*%tmm1+\[^\n\]*%tmm2+\[^\n\]*%tmm3" } } */ +#include + +void TEST () +{ + _tile_dpbssd (1, 2, 3); + _tile_dpbsud (1, 2, 3); + _tile_dpbusd (1, 2, 3); + _tile_dpbuud (1, 2, 3); +} diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c new file mode 100644 index 0000000..62d31ce --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c @@ -0,0 +1,62 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-tile -mamx-int8" } */ +#include + +#define AMX_INT8 +#define DO_TEST test_amx_int8_dpbssd +void test_amx_int8_dpbssd (); +#include "amx-check.h" + +/* Init tile buffer with int32 value*/ +void init_i32_max_tile_buffer (uint8_t *buf) +{ + int i, j; + int *ptr = (int *)buf; + for (i = 0; i < 16; i++) + for (j = 0; j < 16; j++) + ptr[i * 16 + j] = 2 * i - (16 - j); +} + +void calc_matrix_dpbssd (__tile *dst, __tile *src1, __tile *src2) +{ + int8_t *src1_buf = (int8_t *)src1->buf; + int8_t *src2_buf = (int8_t *)src2->buf; + int *dst_buf = (int *)dst->buf; + + int M = src1->rows; + int N = src1->colsb / 4; + int K = src2->colsb / 4; + int i, j, k, t; + + for (i = 0; i < M; i++) + for (j = 0; j < N; j++) + for (k = 0; k < K; k++) + for (t = 0; t < 4; t++) + { + dst_buf[i * N + k] += + ((int) src1_buf[i * 4 * N + 4 * j + t]) * + ((int) src2_buf[j * 4 * K + 4 * k + t]); + } +} + +void test_amx_int8_dpbssd () +{ + __tilecfg_u cfg; + __tile dst, dst_ref, src1, src2; + uint8_t tmp_dst_buf[1024]; + + init_i32_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf); + init_tile_reg_and_src (2, src1); + init_tile_reg_and_src (3, src2); + + calc_matrix_dpbssd (&dst, &src1, &src2); + + _tile_dpbssd (1, 2, 3); + _tile_stored (1, dst_ref.buf, _STRIDE); + + if (!check_tile_register (&dst_ref, &dst)) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c new file mode 100644 index 0000000..5007ee9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c @@ -0,0 +1,61 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-tile -mamx-int8" } */ +#include + +#define AMX_INT8 +#define DO_TEST test_amx_int8_dpbsud +void test_amx_int8_dpbsud (); +#include "amx-check.h" + +/* Init tile buffer with int32 value*/ +void init_i32_max_tile_buffer (uint8_t *buf) +{ + int i, j; + int *ptr = (int *)buf; + for (i = 0; i < 16; i++) + for (j = 0; j < 16; j++) + ptr[i * 16 + j] = 2 * i - (16 - j); +} + +void calc_matrix_dpbsud (__tile *dst, __tile *src1, __tile *src2) +{ + int8_t *src1_buf = (int8_t *)src1->buf; + uint8_t *src2_buf = (uint8_t *)src2->buf; + int *dst_buf = (int *)dst->buf; + + int M = src1->rows; + int N = src1->colsb / 4; + int K = src2->colsb / 4; + int i, j, k, t; + + for (i = 0; i < M; i++) + for (j = 0; j < N; j++) + for (k = 0; k < K; k++) + for (t = 0; t < 4; t++) + { + dst_buf[i * N + k] += + ((int) src1_buf[i * 4 * N + 4 * j + t]) * + ((unsigned) src2_buf[j * 4 * K + 4 * k + t]); + } +} + +void test_amx_int8_dpbsud () +{ + __tilecfg_u cfg; + __tile dst, dst_ref, src1, src2; + uint8_t tmp_dst_buf[1024]; + + init_i32_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf); + init_tile_reg_and_src (2, src1); + init_tile_reg_and_src (3, src2); + + calc_matrix_dpbsud (&dst, &src1, &src2); + _tile_dpbsud (1, 2, 3); + _tile_stored (1, dst_ref.buf, _STRIDE); + + if (!check_tile_register (&dst_ref, &dst)) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c new file mode 100644 index 0000000..17888e2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c @@ -0,0 +1,61 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-tile -mamx-int8" } */ +#include + +#define AMX_INT8 +#define DO_TEST test_amx_int8_dpbusd +void test_amx_int8_dpbusd (); +#include "amx-check.h" + +/* Init tile buffer with int32 value*/ +void init_i32_max_tile_buffer (uint8_t *buf) +{ + int i, j; + int *ptr = (int *)buf; + for (i = 0; i < 16; i++) + for (j = 0; j < 16; j++) + ptr[i * 16 + j] = 2 * i - (16 - j); +} + +void calc_matrix_dpbusd (__tile *dst, __tile *src1, __tile *src2) +{ + uint8_t *src1_buf = (uint8_t *)src1->buf; + int8_t *src2_buf = (int8_t *)src2->buf; + int *dst_buf = (int *)dst->buf; + + int M = src1->rows; + int N = src1->colsb / 4; + int K = src2->colsb / 4; + int i, j, k, t; + + for (i = 0; i < M; i++) + for (j = 0; j < N; j++) + for (k = 0; k < K; k++) + for (t = 0; t < 4; t++) + { + dst_buf[i * N + k] += + ((unsigned) src1_buf[i * 4 * N + 4 * j + t]) * + ((int) src2_buf[j * 4 * K + 4 * k + t]); + } +} + +void test_amx_int8_dpbusd () +{ + __tilecfg_u cfg; + __tile dst, dst_ref, src1, src2; + uint8_t tmp_dst_buf[1024]; + + init_i32_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf); + init_tile_reg_and_src (2, src1); + init_tile_reg_and_src (3, src2); + + calc_matrix_dpbusd (&dst, &src1, &src2); + _tile_dpbusd (1, 2, 3); + _tile_stored (1, dst_ref.buf, _STRIDE); + + if (!check_tile_register (&dst_ref, &dst)) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c new file mode 100644 index 0000000..c39666c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c @@ -0,0 +1,61 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-tile -mamx-int8" } */ +#include + +#define AMX_INT8 +#define DO_TEST test_amx_int8_dpbuud +void test_amx_int8_dpbuud (); +#include "amx-check.h" + +/* Init tile buffer with int32 value*/ +void init_i32_max_tile_buffer (uint8_t *buf) +{ + int i, j; + int *ptr = (int *)buf; + for (i = 0; i < 16; i++) + for (j = 0; j < 16; j++) + ptr[i * 16 + j] = 2 * i - (16 - j); +} + +void calc_matrix_dpbuud (__tile *dst, __tile *src1, __tile *src2) +{ + uint8_t *src1_buf = (uint8_t *)src1->buf; + uint8_t *src2_buf = (uint8_t *)src2->buf; + int *dst_buf = (int *)dst->buf; + + int M = src1->rows; + int N = src1->colsb / 4; + int K = src2->colsb / 4; + int i, j, k, t; + + for (i = 0; i < M; i++) + for (j = 0; j < N; j++) + for (k = 0; k < K; k++) + for (t = 0; t < 4; t++) + { + dst_buf[i * N + k] += + ((unsigned) src1_buf[i * 4 * N + 4 * j + t]) * + ((unsigned) src2_buf[j * 4 * K + 4 * k + t]); + } +} + +void test_amx_int8_dpbuud () +{ + __tilecfg_u cfg; + __tile dst, dst_ref, src1, src2; + uint8_t tmp_dst_buf[1024]; + + init_i32_max_tile_buffer (tmp_dst_buf); + + init_tile_config (&cfg); + init_tile_reg_and_src_with_buffer (1, dst, tmp_dst_buf); + init_tile_reg_and_src (2, src1); + init_tile_reg_and_src (3, src2); + + calc_matrix_dpbuud (&dst, &src1, &src2); + _tile_dpbuud (1, 2, 3); + _tile_stored (1, dst_ref.buf, _STRIDE); + + if (!check_tile_register (&dst_ref, &dst)) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/amxtile-2.c b/gcc/testsuite/gcc.target/i386/amxtile-2.c new file mode 100644 index 0000000..cef84f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxtile-2.c @@ -0,0 +1,47 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-tile " } */ +#include + +#define DO_TEST test_amx_tile +void test_amx_tile (); +#include "amx-check.h" + +void test_amx_tile () +{ + __tilecfg_u cfg_src, cfg_dst; + __tile reg_src1, reg_src2, reg_ref; + + /* check tile config load & store. */ + init_tile_config (&cfg_src); + _tile_storeconfig (cfg_dst.a); + + if (!check_tile_config (&cfg_src, &cfg_dst)) + abort (); + + /* check tile register load & store. */ + init_tile_reg_and_src (1, reg_src1); + _tile_stored (1, reg_ref.buf, _STRIDE); + if (!check_tile_register (®_ref, ®_src1)) + abort (); + + /* check tile stream load instruction */ + init_tile_src (2, ®_src2, NULL); + _tile_stream_loadd (2, reg_src2.buf, _STRIDE); + _tile_stored (2, reg_ref.buf, _STRIDE); + if (!check_tile_register (®_ref, ®_src2)) + abort (); + + /* check tile register zeroing */ + zero_tile_src (®_src2); + _tile_zero (2); + _tile_stored (2, reg_ref.buf, _STRIDE); + if (!check_tile_register (®_ref, ®_src2)) + abort (); + + /* check tile cfg zeroing */ + memset (cfg_dst.a, 0, sizeof(__tilecfg)); + _tile_release (); + _tile_storeconfig (cfg_src.a); + if (!check_tile_config (&cfg_src, &cfg_dst)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/amxtile-asmatt-1.c b/gcc/testsuite/gcc.target/i386/amxtile-asmatt-1.c new file mode 100644 index 0000000..ceb5fa4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxtile-asmatt-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-tile " } */ +/* { dg-final { scan-assembler "ldtilecfg\[ \\t]+\(\[^\)\n\]*\)" } } */ +/* { dg-final { scan-assembler "sttilecfg\[ \\t]+\(\[^\)\n\]*\)" } } */ +/* { dg-final { scan-assembler "tilerelease" } } */ +/* { dg-final { scan-assembler "tileloadd\[ \\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "tileloaddt1\[ \\t]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)+\[^\n\]*%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "tilestored\[ \\t]+\[^\n\]*%tmm\[0-9\]+\[^\n\]*\\(%\[a-z0-9]*\,%\[a-z0-9\]*\,\[124\]\\)" } } */ +/* { dg-final { scan-assembler "tilezero\[ \\t]+\[^\n\]*%tmm\[0-9\]" } } */ +#include + +extern int a[]; +extern const void* base; +extern const int stride; + +#define TMM0 0 +#define TMM1 1 +#define TMM2 2 +#define TMM3 3 + +void TEST () +{ + _tile_loadconfig (a); + _tile_storeconfig (a); + _tile_release (); + _tile_loadd (TMM3, base, stride); + _tile_stream_loadd (TMM2, base, stride); + _tile_stored (TMM1, base, stride); + _tile_zero (TMM0); +} diff --git a/gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c b/gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c new file mode 100644 index 0000000..88ef612 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/amxtile-asmintel-1.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mamx-tile -masm=intel " } */ +/* { dg-final { scan-assembler "ldtilecfg\[ \\t]" } } */ +/* { dg-final { scan-assembler "sttilecfg\[ \\t]" } } */ +/* { dg-final { scan-assembler "tilerelease" } } */ +/* { dg-final { scan-assembler "tileloadd\[ \\t]%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "tileloaddt1\[ \\t]%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "tilestored\[ \\t]\[^\n\]+\[^\n\]*%tmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "tilezero\[ \\t]+\[^\n\]*%tmm\[0-9\]" } } */ +#include + +extern int a[]; +extern const void* base; +extern const int stride; +void TEST () +{ + _tile_loadconfig (a); + _tile_storeconfig (a); + _tile_release (); + _tile_loadd (5, base, stride); + _tile_stream_loadd (4, base, stride); + _tile_stored (3, base, stride); + _tile_zero (2); +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index 94ffbb6..8e669f1 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -71,6 +71,9 @@ extern void test_tsxldtrk (void) __attribute__((__target__("tsxldtrk"))); extern void test_enqcmd (void) __attribute__((__target__("enqcmd"))); extern void test_avx512bf16 (void) __attribute__((__target__("avx512bf16"))); extern void test_avx512vp2intersect (void) __attribute__((__target__("avx512vp2intersect"))); +extern void test_amx_tile (void) __attribute__((__target__("amx-tile"))); +extern void test_amx_int8 (void) __attribute__((__target__("amx-int8"))); +extern void test_amx_bf16 (void) __attribute__((__target__("amx-bf16"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps"))); @@ -143,6 +146,9 @@ extern void test_no_tsxldtrk (void) __attribute__((__target__("no-tsxldtrk"))); extern void test_no_enqcmd (void) __attribute__((__target__("no-enqcmd"))); extern void test_no_avx512bf16 (void) __attribute__((__target__("no-avx512bf16"))); extern void test_no_avx512vp2intersect (void) __attribute__((__target__("no-avx512vp2intersect"))); +extern void test_no_amx_tile (void) __attribute__((__target__("no-amx-tile"))); +extern void test_no_amx_int8 (void) __attribute__((__target__("no-amx-int8"))); +extern void test_no_amx_bf16 (void) __attribute__((__target__("no-amx-bf16"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index b1690d7..61146b2b 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h gfniintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 3a640470..4d6c9b3 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index edaa2aa..837b51c 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 7364b2f..fc75669f 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -11,6 +11,7 @@ tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, tsxldtrkintrin.h, avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h, avx512bitalgintrin.h, avx512vp2intersectintrin.h, + amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h, avx512vp2intersectvlintrin.h and mm_malloc.h that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being @@ -102,7 +103,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16") #endif /* Following intrinsics require immediate arguments. They @@ -219,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16") #endif #include test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index eaadebe..9ca7c5d 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -10,6 +10,7 @@ tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, tsxtrkintrin.h, avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h, avx512bitalgintrin.h, avx512vp2intersectintrin.h, + amxtileintrin.h, amxint8intrin.h, amxbf16intrin.h, avx512vp2intersectvlintrin.h and mm_malloc.h that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being @@ -697,6 +698,6 @@ #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16") #include diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 0a00972..8314e44 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -8956,6 +8956,39 @@ proc check_effective_target_avx512vaes { } { } "-mvaes" ] } +# Return 1 if amx-tile instructions can be compiled. +proc check_effective_target_amx_tile { } { + return [check_no_compiler_messages amx_tile object { + void + foo () + { + __asm__ volatile ("tilerelease" ::); + } + } "-mamx-tile" ] +} + +# Return 1 if amx-int8 instructions can be compiled. +proc check_effective_target_amx_int8 { } { + return [check_no_compiler_messages amx_int8 object { + void + foo () + { + __asm__ volatile ("tdpbssd\t%%tmm1, %%tmm2, %%tmm3" ::); + } + } "-mamx-int8" ] +} + +# Return 1 if amx-bf16 instructions can be compiled. +proc check_effective_target_amx_bf16 { } { + return [check_no_compiler_messages amx_bf16 object { + void + foo () + { + __asm__ volatile ("tdpbf16ps\t%%tmm1, %%tmm2, %%tmm3" ::); + } + } "-mamx-bf16" ] +} + # Return 1 if vpclmulqdq instructions can be compiled. proc check_effective_target_vpclmulqdq { } { return [check_no_compiler_messages vpclmulqdq object { -- cgit v1.1 From 4d09cc91574bfd4f4d49f7f45e54af4fe0a35e9a Mon Sep 17 00:00:00 2001 From: "Paul A. Clarke" Date: Wed, 23 Sep 2020 14:10:19 -0500 Subject: rs6000: Support _mm_insert_epi{8,32,64} Add compatibility implementations for SSE4.1 intrinsics _mm_insert_epi8, _mm_insert_epi32, _mm_insert_epi64. 2020-09-23 Paul A. Clarke gcc/ * config/rs6000/smmintrin.h (_mm_insert_epi8): New. (_mm_insert_epi32): New. (_mm_insert_epi64): New. --- gcc/config/rs6000/smmintrin.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index d78ddba..4c0fc86 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -42,6 +42,36 @@ #include #include +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_insert_epi8 (__m128i const __A, int const __D, int const __N) +{ + __v16qi result = (__v16qi)__A; + + result [__N & 0xf] = __D; + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_insert_epi32 (__m128i const __A, int const __D, int const __N) +{ + __v4si result = (__v4si)__A; + + result [__N & 3] = __D; + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_insert_epi64 (__m128i const __A, long long const __D, int const __N) +{ + __v2di result = (__v2di)__A; + + result [__N & 1] = __D; + + return (__m128i) result; +} + extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_epi8 (__m128i __X, const int __N) { -- cgit v1.1 From e189d5c931c5d94fd7487a2d1df9454d0b124eeb Mon Sep 17 00:00:00 2001 From: "Paul A. Clarke" Date: Wed, 23 Sep 2020 16:48:21 -0500 Subject: rs6000: Add tests for _mm_insert_epi{8,32,64} Copied from gcc.target/i386. 2020-09-23 Paul A. Clarke gcc/testsuite/ChangeLog: * gcc.target/powerpc/sse4_1-pinsrb.c: New test. * gcc.target/powerpc/sse4_1-pinsrd.c: New test. * gcc.target/powerpc/sse4_1-pinsrq.c: New test. --- gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrb.c | 110 +++++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrd.c | 73 +++++++++++++++ gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrq.c | 67 ++++++++++++++ 3 files changed, 250 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrb.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrd.c create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrq.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrb.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrb.c new file mode 100644 index 0000000..4fa5e83 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrb.c @@ -0,0 +1,110 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target p8vector_hw } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include +#include + +#define msk0 0x00 +#define msk1 0x01 +#define msk2 0x02 +#define msk3 0x03 +#define msk4 0x04 +#define msk5 0x05 +#define msk6 0x06 +#define msk7 0x07 +#define msk8 0x08 +#define msk9 0x09 +#define mskA 0x0A +#define mskB 0x0B +#define mskC 0x0C +#define mskD 0x0D +#define mskE 0x0E +#define mskF 0x0F + +static void +TEST (void) +{ + union + { + __m128i x; + unsigned int i[4]; + unsigned char c[16]; + } res [16], val, tmp; + int masks[16]; + unsigned char ins[4] = { 3, 4, 5, 6 }; + int i; + + val.i[0] = 0x35251505; + val.i[1] = 0x75655545; + val.i[2] = 0xB5A59585; + val.i[3] = 0xF5E5D5C5; + + /* Check pinsrb imm8, r32, xmm. */ + res[0].x = _mm_insert_epi8 (val.x, ins[0], msk0); + res[1].x = _mm_insert_epi8 (val.x, ins[0], msk1); + res[2].x = _mm_insert_epi8 (val.x, ins[0], msk2); + res[3].x = _mm_insert_epi8 (val.x, ins[0], msk3); + res[4].x = _mm_insert_epi8 (val.x, ins[0], msk4); + res[5].x = _mm_insert_epi8 (val.x, ins[0], msk5); + res[6].x = _mm_insert_epi8 (val.x, ins[0], msk6); + res[7].x = _mm_insert_epi8 (val.x, ins[0], msk7); + res[8].x = _mm_insert_epi8 (val.x, ins[0], msk8); + res[9].x = _mm_insert_epi8 (val.x, ins[0], msk9); + res[10].x = _mm_insert_epi8 (val.x, ins[0], mskA); + res[11].x = _mm_insert_epi8 (val.x, ins[0], mskB); + res[12].x = _mm_insert_epi8 (val.x, ins[0], mskC); + res[13].x = _mm_insert_epi8 (val.x, ins[0], mskD); + res[14].x = _mm_insert_epi8 (val.x, ins[0], mskE); + res[15].x = _mm_insert_epi8 (val.x, ins[0], mskF); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + masks[4] = msk4; + masks[5] = msk5; + masks[6] = msk6; + masks[7] = msk7; + masks[8] = msk8; + masks[9] = msk9; + masks[10] = mskA; + masks[11] = mskB; + masks[12] = mskC; + masks[13] = mskD; + masks[14] = mskE; + masks[15] = mskF; + + for (i = 0; i < 16; i++) + { + tmp.x = val.x; + tmp.c[masks[i]] = ins[0]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } + + /* Check pinsrb imm8, m8, xmm. */ + for (i = 0; i < 16; i++) + { + res[i].x = _mm_insert_epi8 (val.x, ins[i % 4], msk0); + masks[i] = msk0; + } + + for (i = 0; i < 16; i++) + { + tmp.x = val.x; + tmp.c[masks[i]] = ins[i % 4]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrd.c new file mode 100644 index 0000000..0bec936 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrd.c @@ -0,0 +1,73 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target p8vector_hw } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include +#include + +#define msk0 0x00 +#define msk1 0x01 +#define msk2 0x02 +#define msk3 0x03 + +static void +TEST (void) +{ + union + { + __m128i x; + unsigned int i[4]; + } res [4], val, tmp; + static unsigned int ins[4] = { 3, 4, 5, 6 }; + int masks[4]; + int i; + + val.i[0] = 55; + val.i[1] = 55; + val.i[2] = 55; + val.i[3] = 55; + + /* Check pinsrd imm8, r32, xmm. */ + res[0].x = _mm_insert_epi32 (val.x, ins[0], msk0); + res[1].x = _mm_insert_epi32 (val.x, ins[0], msk1); + res[2].x = _mm_insert_epi32 (val.x, ins[0], msk2); + res[3].x = _mm_insert_epi32 (val.x, ins[0], msk3); + + masks[0] = msk0; + masks[1] = msk1; + masks[2] = msk2; + masks[3] = msk3; + + for (i = 0; i < 4; i++) + { + tmp.x = val.x; + tmp.i[masks[i]] = ins[0]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } + + /* Check pinsrd imm8, m32, xmm. */ + for (i = 0; i < 4; i++) + { + res[i].x = _mm_insert_epi32 (val.x, ins[i], msk0); + masks[i] = msk0; + } + + for (i = 0; i < 4; i++) + { + tmp.x = val.x; + tmp.i[masks[i]] = ins[i]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrq.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrq.c new file mode 100644 index 0000000..395c20e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-pinsrq.c @@ -0,0 +1,67 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */ +/* { dg-require-effective-target p8vector_hw } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include +#include + +#define msk0 0x00 +#define msk1 0x01 + +static void +__attribute__((noinline)) +TEST (void) +{ + union + { + __m128i x; + unsigned long long ll[2]; + } res [4], val, tmp; + int masks[4]; + static unsigned long long ins[2] = + { 0xAABBAABBAABBAABBLL, 0xCCDDCCDDCCDDCCDDLL }; + int i; + + val.ll[0] = 0x0807060504030201LL; + val.ll[1] = 0x100F0E0D0C0B0A09LL; + + /* Check pinsrq imm8, r64, xmm. */ + res[0].x = _mm_insert_epi64 (val.x, ins[0], msk0); + res[1].x = _mm_insert_epi64 (val.x, ins[0], msk1); + + masks[0] = msk0; + masks[1] = msk1; + + for (i = 0; i < 2; i++) + { + tmp.x = val.x; + tmp.ll[masks[i]] = ins[0]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } + + /* Check pinsrq imm8, m64, xmm. */ + for (i = 0; i < 2; i++) + { + res[i].x = _mm_insert_epi64 (val.x, ins[i], msk0); + masks[i] = msk0; + } + + for (i = 0; i < 2; i++) + { + tmp.x = val.x; + tmp.ll[masks[i]] = ins[i]; + if (memcmp (&tmp, &res[i], sizeof (tmp))) + abort (); + } +} -- cgit v1.1 From 2a10a2c0689db280ee3a94164504b7196b8370f4 Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Mon, 28 Sep 2020 18:08:05 +0200 Subject: OpenMP: Handle cpp_implicit_alias in declare-target discovery (PR96390) gcc/ChangeLog: PR middle-end/96390 * omp-offload.c (omp_discover_declare_target_tgt_fn_r): Handle alias nodes. libgomp/ChangeLog: PR middle-end/96390 * testsuite/libgomp.c++/pr96390.C: New test. * testsuite/libgomp.c-c++-common/pr96390.c: New test. --- gcc/omp-offload.c | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) (limited to 'gcc') diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c index 32c2485..a89275b 100644 --- a/gcc/omp-offload.c +++ b/gcc/omp-offload.c @@ -196,21 +196,53 @@ omp_declare_target_var_p (tree decl) static tree omp_discover_declare_target_tgt_fn_r (tree *tp, int *walk_subtrees, void *data) { - if (TREE_CODE (*tp) == FUNCTION_DECL - && !omp_declare_target_fn_p (*tp) - && !lookup_attribute ("omp declare target host", DECL_ATTRIBUTES (*tp))) + if (TREE_CODE (*tp) == FUNCTION_DECL) { + tree decl = *tp; tree id = get_identifier ("omp declare target"); - if (!DECL_EXTERNAL (*tp) && DECL_SAVED_TREE (*tp)) - ((vec *) data)->safe_push (*tp); - DECL_ATTRIBUTES (*tp) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (*tp)); symtab_node *node = symtab_node::get (*tp); if (node != NULL) { + while (node->alias_target) + { + if (!omp_declare_target_fn_p (node->decl) + && !lookup_attribute ("omp declare target host", + DECL_ATTRIBUTES (node->decl))) + { + node->offloadable = 1; + DECL_ATTRIBUTES (node->decl) + = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl)); + } + node = symtab_node::get (node->alias_target); + } + symtab_node *new_node = node->ultimate_alias_target (); + decl = new_node->decl; + while (node != new_node) + { + if (!omp_declare_target_fn_p (node->decl) + && !lookup_attribute ("omp declare target host", + DECL_ATTRIBUTES (node->decl))) + { + node->offloadable = 1; + DECL_ATTRIBUTES (node->decl) + = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl)); + } + gcc_assert (node->alias && node->analyzed); + node = node->get_alias_target (); + } node->offloadable = 1; if (ENABLE_OFFLOADING) g->have_offload = true; } + if (omp_declare_target_fn_p (decl) + || lookup_attribute ("omp declare target host", + DECL_ATTRIBUTES (decl))) + return NULL_TREE; + + if (!DECL_EXTERNAL (decl) && DECL_SAVED_TREE (decl)) + ((vec *) data)->safe_push (decl); + DECL_ATTRIBUTES (decl) = tree_cons (id, NULL_TREE, + DECL_ATTRIBUTES (decl)); } else if (TYPE_P (*tp)) *walk_subtrees = 0; -- cgit v1.1 From 77a23a825c2fcdac2a832998c228fd9d4ef99dd1 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Fri, 11 Sep 2020 10:15:12 +0200 Subject: irange_allocator class This is the irange storage class. It is used to allocate the minimum amount of storage needed for a given irange. Storage is automatically freed at destruction of the storage class. It is meant for long term storage, as opposed to int_range_max which is meant for intermediate temporary results on the stack. The general gist is: irange_allocator alloc; // Allocate an irange of 5 sub-ranges. irange *p = alloc.allocate (5); // Allocate an irange of 3 sub-ranges. irange *q = alloc.allocate (3); // Allocate an irange with as many sub-ranges as are currently // used in "some_other_range". irange *r = alloc.allocate (some_other_range); gcc/ChangeLog: * value-range.h (class irange): Add irange_allocator friend. (class irange_allocator): New. --- gcc/value-range.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'gcc') diff --git a/gcc/value-range.h b/gcc/value-range.h index 8497791..c875e71 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -43,6 +43,7 @@ enum value_range_kind class irange { + friend class irange_allocator; public: // In-place setters. void set (tree, tree, value_range_kind = VR_RANGE); @@ -619,4 +620,68 @@ vrp_val_min (const_tree type) return NULL_TREE; } +// This is the irange storage class. It is used to allocate the +// minimum amount of storage needed for a given irange. Storage is +// automatically freed at destruction of the storage class. +// +// It is meant for long term storage, as opposed to int_range_max +// which is meant for intermediate temporary results on the stack. +// +// The newly allocated irange is initialized to the empty set +// (undefined_p() is true). + +class irange_allocator +{ +public: + irange_allocator (); + ~irange_allocator (); + // Return a new range with NUM_PAIRS. + irange *allocate (unsigned num_pairs); + // Return a copy of SRC with the minimum amount of sub-ranges needed + // to represent it. + irange *allocate (const irange &src); +private: + DISABLE_COPY_AND_ASSIGN (irange_allocator); + struct obstack m_obstack; +}; + +inline +irange_allocator::irange_allocator () +{ + obstack_init (&m_obstack); +} + +inline +irange_allocator::~irange_allocator () +{ + obstack_free (&m_obstack, NULL); +} + +// Return a new range with NUM_PAIRS. + +inline irange * +irange_allocator::allocate (unsigned num_pairs) +{ + // Never allocate 0 pairs. + // Don't allocate 1 either, or we get legacy value_range's. + if (num_pairs < 2) + num_pairs = 2; + + struct newir { + irange range; + tree mem[1]; + }; + size_t nbytes = (sizeof (newir) + sizeof (tree) * 2 * (num_pairs - 1)); + struct newir *r = (newir *) obstack_alloc (&m_obstack, nbytes); + return new (r) irange (r->mem, num_pairs); +} + +inline irange * +irange_allocator::allocate (const irange &src) +{ + irange *r = allocate (src.num_pairs ()); + *r = src; + return r; +} + #endif // GCC_VALUE_RANGE_H -- cgit v1.1 From 9042b6605c259e55378a1f0eefc24b43363bb55b Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Sat, 26 Sep 2020 12:11:51 -0700 Subject: net: add hurd build tag Patch from Svante Signell. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/257857 --- gcc/go/gofrontend/MERGE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index daa0d2d..314ffd2 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -2357468ae9b071de0e2ebe6574d78572967b7183 +9e55baf44ab63ba06af0b57038e7b3aab8216222 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. -- cgit v1.1 From 53d28fd4e16f163a9bd0c323f432914804f1348a Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Fri, 18 Sep 2020 17:15:50 -0400 Subject: analyzer: fix sm_state_map::print In 10fc42a8396072912e9d9d940fba25950b3fdfc5 I converted state_t from unsigned to const state *, but missed this comparison against 0. gcc/analyzer/ChangeLog: * program-state.cc (sm_state_map::print): Update check for m_global_state being the start state. --- gcc/analyzer/program-state.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/analyzer/program-state.cc b/gcc/analyzer/program-state.cc index 78b87d5..5bb8907 100644 --- a/gcc/analyzer/program-state.cc +++ b/gcc/analyzer/program-state.cc @@ -160,7 +160,7 @@ sm_state_map::print (const region_model *model, bool first = true; if (!multiline) pp_string (pp, "{"); - if (m_global_state != 0) + if (m_global_state != m_sm.get_start_state ()) { if (multiline) pp_string (pp, " "); -- cgit v1.1 From 01eabbeadb645959d5dcb0f00f41c3565a8f54f1 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Mon, 28 Sep 2020 15:42:31 -0400 Subject: analyzer: fix ICE on non-pointer longjmp [PR97233] gcc/analyzer/ChangeLog: PR analyzer/97233 * analyzer.cc (is_longjmp_call_p): Require the initial argument to be a pointer. * engine.cc (exploded_node::on_longjmp): Likewise. gcc/testsuite/ChangeLog: PR analyzer/97233 * gcc.dg/analyzer/pr97233.c: New test. --- gcc/analyzer/analyzer.cc | 5 ++++- gcc/analyzer/engine.cc | 1 + gcc/testsuite/gcc.dg/analyzer/pr97233.c | 8 ++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr97233.c (limited to 'gcc') diff --git a/gcc/analyzer/analyzer.cc b/gcc/analyzer/analyzer.cc index 82d4878..c792dc3 100644 --- a/gcc/analyzer/analyzer.cc +++ b/gcc/analyzer/analyzer.cc @@ -218,7 +218,10 @@ is_longjmp_call_p (const gcall *call) { if (is_special_named_call_p (call, "longjmp", 2) || is_special_named_call_p (call, "siglongjmp", 2)) - return true; + /* exploded_node::on_longjmp requires a pointer for the initial + argument. */ + if (POINTER_TYPE_P (TREE_TYPE (gimple_call_arg (call, 0)))) + return true; return false; } diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index aa43e4c..84eaa84 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -1277,6 +1277,7 @@ exploded_node::on_longjmp (exploded_graph &eg, region_model_context *ctxt) const { tree buf_ptr = gimple_call_arg (longjmp_call, 0); + gcc_assert (POINTER_TYPE_P (TREE_TYPE (buf_ptr))); region_model *new_region_model = new_state->m_region_model; const svalue *buf_ptr_sval = new_region_model->get_rvalue (buf_ptr, ctxt); diff --git a/gcc/testsuite/gcc.dg/analyzer/pr97233.c b/gcc/testsuite/gcc.dg/analyzer/pr97233.c new file mode 100644 index 0000000..86930aa --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/pr97233.c @@ -0,0 +1,8 @@ +void +longjmp (__SIZE_TYPE__, int); + +void +e7 (__SIZE_TYPE__ gr) +{ + longjmp (gr, 1); +} -- cgit v1.1 From 20d16d61dd22a9bfb66d5c4a383d193037e8f16d Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Mon, 28 Sep 2020 16:53:53 -0400 Subject: analyzer: remove unused field I added this field (and the struct itself) in the rewrite of region and value-handling (808f4dfeb3a95f50f15e71148e5c1067f90a126d), but the field was never used. Found by cppcheck. gcc/analyzer/ChangeLog: * diagnostic-manager.cc (null_assignment_sm_context::m_visitor): Remove unused field. --- gcc/analyzer/diagnostic-manager.cc | 1 - 1 file changed, 1 deletion(-) (limited to 'gcc') diff --git a/gcc/analyzer/diagnostic-manager.cc b/gcc/analyzer/diagnostic-manager.cc index 13dd3da..cb95a95 100644 --- a/gcc/analyzer/diagnostic-manager.cc +++ b/gcc/analyzer/diagnostic-manager.cc @@ -931,7 +931,6 @@ struct null_assignment_sm_context : public sm_context const program_state *m_new_state; const gimple *m_stmt; const program_point *m_point; - state_change_visitor *m_visitor; checker_path *m_emission_path; const extrinsic_state &m_ext_state; }; -- cgit v1.1 From c0ed6afef7897f32dc199da9a5430664fcbb61bb Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Mon, 28 Sep 2020 16:59:15 -0400 Subject: analyzer: add some missing FINAL OVERRIDEs Spotted by cppcheck. gcc/analyzer/ChangeLog: * region-model.h (binop_svalue::dyn_cast_binop_svalue): Remove redundant "virtual". Add FINAL OVERRIDE. (widening_svalue::dyn_cast_widening_svalue): Add FINAL OVERRIDE. (compound_svalue::dyn_cast_compound_svalue): Likewise. (conjured_svalue::dyn_cast_conjured_svalue): Likewise. --- gcc/analyzer/region-model.h | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/region-model.h b/gcc/analyzer/region-model.h index a61aff2..cfeac8d 100644 --- a/gcc/analyzer/region-model.h +++ b/gcc/analyzer/region-model.h @@ -806,7 +806,10 @@ public: } enum svalue_kind get_kind () const FINAL OVERRIDE { return SK_BINOP; } - virtual const binop_svalue *dyn_cast_binop_svalue () const { return this; } + const binop_svalue *dyn_cast_binop_svalue () const FINAL OVERRIDE + { + return this; + } void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE; void accept (visitor *v) const FINAL OVERRIDE; @@ -1067,7 +1070,10 @@ public: } enum svalue_kind get_kind () const FINAL OVERRIDE { return SK_WIDENING; } - const widening_svalue *dyn_cast_widening_svalue () const { return this; } + const widening_svalue *dyn_cast_widening_svalue () const FINAL OVERRIDE + { + return this; + } void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE; void accept (visitor *v) const FINAL OVERRIDE; @@ -1158,7 +1164,10 @@ public: compound_svalue (tree type, const binding_map &map); enum svalue_kind get_kind () const FINAL OVERRIDE { return SK_COMPOUND; } - const compound_svalue *dyn_cast_compound_svalue () const { return this; } + const compound_svalue *dyn_cast_compound_svalue () const FINAL OVERRIDE + { + return this; + } void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE; void accept (visitor *v) const FINAL OVERRIDE; @@ -1263,7 +1272,10 @@ public: } enum svalue_kind get_kind () const FINAL OVERRIDE { return SK_CONJURED; } - const conjured_svalue *dyn_cast_conjured_svalue () const { return this; } + const conjured_svalue *dyn_cast_conjured_svalue () const FINAL OVERRIDE + { + return this; + } void dump_to_pp (pretty_printer *pp, bool simple) const FINAL OVERRIDE; void accept (visitor *v) const FINAL OVERRIDE; -- cgit v1.1 From e84761c6f32fa6e9b7dccc4245231a7ff4d7da61 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 29 Sep 2020 00:16:30 +0000 Subject: Daily bump. --- gcc/ChangeLog | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/ada/ChangeLog | 11 ++++++++ gcc/analyzer/ChangeLog | 25 ++++++++++++++++++ gcc/fortran/ChangeLog | 20 +++++++++++++++ gcc/testsuite/ChangeLog | 53 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 177 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 64556c0..a2c395b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,70 @@ +2020-09-28 Aldy Hernandez + + * value-range.h (class irange): Add irange_allocator friend. + (class irange_allocator): New. + +2020-09-28 Tobias Burnus + + PR middle-end/96390 + * omp-offload.c (omp_discover_declare_target_tgt_fn_r): Handle + alias nodes. + +2020-09-28 Paul A. Clarke + + * config/rs6000/smmintrin.h (_mm_insert_epi8): New. + (_mm_insert_epi32): New. + (_mm_insert_epi64): New. + +2020-09-28 liuhongt + + * common/config/i386/i386-common.c (OPTION_MASK_ISA2_AMX_TILE_SET, + OPTION_MASK_ISA2_AMX_INT8_SET, OPTION_MASK_ISA2_AMX_BF16_SET, + OPTION_MASK_ISA2_AMX_TILE_UNSET, OPTION_MASK_ISA2_AMX_INT8_UNSET, + OPTION_MASK_ISA2_AMX_BF16_UNSET, OPTION_MASK_ISA2_XSAVE_UNSET): + New marcos. + (ix86_handle_option): Hanlde -mamx-tile, -mamx-int8, -mamx-bf16. + * common/config/i386/i386-cpuinfo.h (processor_types): Add + FEATURE_AMX_TILE, FEATURE_AMX_INT8, FEATURE_AMX_BF16. + * common/config/i386/cpuinfo.h (XSTATE_TILECFG, + XSTATE_TILEDATA, XCR_AMX_ENABLED_MASK): New macro. + (get_available_features): Enable AMX features only if + their states are suoorited by OSXSAVE. + * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY + for amx-tile, amx-int8, amx-bf16. + * config.gcc: Add amxtileintrin.h, amxint8intrin.h, + amxbf16intrin.h to extra headers. + * config/i386/amxbf16intrin.h: New file. + * config/i386/amxint8intrin.h: Ditto. + * config/i386/amxtileintrin.h: Ditto. + * config/i386/cpuid.h (bit_AMX_BF16, bit_AMX_TILE, bit_AMX_INT8): + New macro. + * config/i386/i386-c.c (ix86_target_macros_internal): Define + __AMX_TILE__, __AMX_INT8__, AMX_BF16__. + * config/i386/i386-options.c (ix86_target_string): Add + -mamx-tile, -mamx-int8, -mamx-bf16. + (ix86_option_override_internal): Handle AMX-TILE, + AMX-INT8, AMX-BF16. + * config/i386/i386.h (TARGET_AMX_TILE, TARGET_AMX_TILE_P, + TARGET_AMX_INT8, TARGET_AMX_INT8_P, TARGET_AMX_BF16_P, + PTA_AMX_TILE, PTA_AMX_INT8, PTA_AMX_BF16): New macros. + * config/i386/i386.opt: Add -mamx-tile, -mamx-int8, -mamx-bf16. + * config/i386/immintrin.h: Include amxtileintrin.h, + amxint8intrin.h, amxbf16intrin.h. + * doc/invoke.texi: Document -mamx-tile, -mamx-int8, -mamx-bf16. + * doc/extend.texi: Document amx-tile, amx-int8, amx-bf16. + * doc/sourcebuild.texi ((Effective-Target Keywords, Other + hardware attributes): Document amx_int8, amx_tile, amx_bf16. + +2020-09-28 Andrea Corallo + + * config/aarch64/aarch64-builtins.c + (aarch64_general_expand_builtin): Do not alter value on a + force_reg returned rtx. + +2020-09-28 Eric Botcazou + + * tree-eh.c (lower_try_finally_dup_block): Revert latest change. + 2020-09-27 Jan Hubicka * ipa-modref.c (modref_summary::useful_p): Fix testing of stores. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 0c0687f..91db4fb 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200928 +20200929 diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 280b834..d6e7b4c 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,14 @@ +2020-09-28 Eric Botcazou + + * gcc-interface/trans.c (Subprogram_Body_to_gnu): Set the end locus + of body and declaration earlier. + +2020-09-28 Eric Botcazou + + * gcc-interface/decl.c (maybe_saturate_size): Add ALIGN parameter + and round down the result to ALIGN. + (gnat_to_gnu_entity): Adjust calls to maybe_saturate_size. + 2020-09-14 Jakub Jelinek * gcc-interface/trans.c (gigi): Adjust build_optimization_node diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 0d04604..458fdae 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,28 @@ +2020-09-28 David Malcolm + + * region-model.h (binop_svalue::dyn_cast_binop_svalue): Remove + redundant "virtual". Add FINAL OVERRIDE. + (widening_svalue::dyn_cast_widening_svalue): Add FINAL OVERRIDE. + (compound_svalue::dyn_cast_compound_svalue): Likewise. + (conjured_svalue::dyn_cast_conjured_svalue): Likewise. + +2020-09-28 David Malcolm + + * diagnostic-manager.cc (null_assignment_sm_context::m_visitor): + Remove unused field. + +2020-09-28 David Malcolm + + PR analyzer/97233 + * analyzer.cc (is_longjmp_call_p): Require the initial argument + to be a pointer. + * engine.cc (exploded_node::on_longjmp): Likewise. + +2020-09-28 David Malcolm + + * program-state.cc (sm_state_map::print): Update check + for m_global_state being the start state. + 2020-09-26 David Malcolm PR analyzer/96646 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index e3bb9fb..faa7897 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,23 @@ +2020-09-28 Mark Eggleston + + Revert: + 2020-09-27 Steven G. Kargl + Mark Eggleston + + PR fortran/95614 + * decl.c (gfc_get_common): Use gfc_match_common_name instead + of match_common_name. + * decl.c (gfc_bind_idents): Use gfc_match_common_name instead + of match_common_name. + * match.c : Rename match_common_name to gfc_match_common_name. + * match.c (gfc_match_common): Use gfc_match_common_name instead + of match_common_name. + * match.h : Rename match_common_name to gfc_match_common_name. + * resolve.c (resolve_common_vars): Check each symbol in a + common block has a global symbol. If there is a global symbol + issue an error if the symbol type is known as is not a common + block name. + 2020-09-27 Steven G. Kargl Mark Eggleston diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8c3db46..dc8ccbb 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,56 @@ +2020-09-28 David Malcolm + + PR analyzer/97233 + * gcc.dg/analyzer/pr97233.c: New test. + +2020-09-28 Paul A. Clarke + + * gcc.target/powerpc/sse4_1-pinsrb.c: New test. + * gcc.target/powerpc/sse4_1-pinsrd.c: New test. + * gcc.target/powerpc/sse4_1-pinsrq.c: New test. + +2020-09-28 liuhongt + + * lib/target-supports.exp (check_effective_target_amx_tile, + check_effective_target_amx_int8, + check_effective_target_amx_bf16): New proc. + * g++.dg/other/i386-2.C: Add -mamx-tile, -mamx-int8, -mamx-bf16. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/sse-12.c: Ditto. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-14.c: Ditto. + * gcc.target/i386/sse-22.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/funcspec-56.inc: Add new target attribute. + * gcc.target/i386/amx-check.h: New header file. + * gcc.target/i386/amxbf16-asmatt-1.c: New test. + * gcc.target/i386/amxint8-asmatt-1.c: New test. + * gcc.target/i386/amxtile-asmatt-1.c: Ditto. + * gcc.target/i386/amxbf16-asmintel-1.c: Ditto. + * gcc.target/i386/amxint8-asmintel-1.c: Ditto. + * gcc.target/i386/amxtile-asmintel-1.c: Ditto. + * gcc.target/i386/amxbf16-dpbf16ps-2.c: Ditto. + * gcc.target/i386/amxint8-dpbssd-2.c: Ditto. + * gcc.target/i386/amxint8-dpbsud-2.c: Ditto. + * gcc.target/i386/amxint8-dpbusd-2.c: Ditto. + * gcc.target/i386/amxint8-dpbuud-2.c: Ditto. + * gcc.target/i386/amxtile-2.c: Ditto. + +2020-09-28 Mark Eggleston + + Revert: + 2020-09-28 Steven G. Kargl + Mark Eggleston + + PR fortran/95614 + * gfortran.dg/pr95614_1.f90: New test. + * gfortran.dg/pr95614_2.f90: New test. + +2020-09-28 Eric Botcazou + + * gnat.dg/addr16.adb: New test. + * gnat.dg/addr16_pkg.ads: New helper. + 2020-09-27 Jakub Jelinek PR middle-end/97073 -- cgit v1.1 From 82693c4421d8a372e538ff1bd385f9ac4a34ce6f Mon Sep 17 00:00:00 2001 From: David Edelsohn Date: Mon, 28 Sep 2020 20:39:36 -0400 Subject: testsuite: Skip symver1 on AIX. symver1.c only is valid on ELF targets. Add AIX to the skip list. gcc/testsuite/ChangeLog 2020-09-28 David Edelsohn * gcc.dg/ipa/symver1.c: Skip on AIX. --- gcc/testsuite/gcc.dg/ipa/symver1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/ipa/symver1.c b/gcc/testsuite/gcc.dg/ipa/symver1.c index fca5220..2cd0258 100644 --- a/gcc/testsuite/gcc.dg/ipa/symver1.c +++ b/gcc/testsuite/gcc.dg/ipa/symver1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-skip-if "only works for ELF targets" { *-*-darwin* } } */ +/* { dg-skip-if "only works for ELF targets" { *-*-darwin* *-*-aix* } } */ __attribute__ ((__symver__ ("foo@VER_2"))) __attribute__ ((__symver__ ("foo@VER_3"))) -- cgit v1.1 From 37ffe56c01e4a9e80a3b3c4f5beb86d80a0663db Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Thu, 24 Sep 2020 13:34:58 +0200 Subject: Revert "switch lowering: limit number of cluster attemps" This reverts commit c6df6039e9180c580945266302ec14047d358364. --- gcc/doc/invoke.texi | 4 --- gcc/params.opt | 4 --- gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 50 --------------------------------- gcc/tree-switch-conversion.c | 17 ----------- 4 files changed, 75 deletions(-) delete mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr96979.C (limited to 'gcc') diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 3e087b6..9f956b5 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -13492,10 +13492,6 @@ The smallest number of different values for which it is best to use a jump-table instead of a tree of conditional branches. If the value is 0, use the default for the machine. -@item max-switch-clustering-attempts -The maximum number of clustering attempts used -in bit-test and jump-table switch expansion. - @item jump-table-max-growth-ratio-for-size The maximum code size growth ratio when expanding into a jump table (in percent). The parameter is used when diff --git a/gcc/params.opt b/gcc/params.opt index 5bc7e16..a232161 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -82,10 +82,6 @@ The maximum length of a constant string for a builtin string cmp call eligible f Common Joined UInteger Var(param_case_values_threshold) Param Optimization The smallest number of different values for which it is best to use a jump-table instead of a tree of conditional branches, if 0, use the default for the machine. --param=max-switch-clustering-attempts= -Common Joined UInteger Var(param_max_switch_clustering_attempts) Param Optimization Init(10000) -The maximum number of clustering attempts used in bit-test and jump-table switch expansion. - -param=comdat-sharing-probability= Common Joined UInteger Var(param_comdat_sharing_probability) Init(20) Param Optimization Probability that COMDAT function will be shared with different compilation unit. diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C deleted file mode 100644 index 85c703a..0000000 --- a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C +++ /dev/null @@ -1,50 +0,0 @@ -/* PR tree-optimization/96979 */ -/* { dg-do compile } */ -/* { dg-options "-std=c++17 -O2 -fdump-tree-switchlower1" } */ - -using u64 = unsigned long long; - -constexpr inline u64 -foo (const char *str) noexcept -{ - u64 value = 0xcbf29ce484222325ULL; - for (u64 i = 0; str[i]; i++) - value = (value ^ u64(str[i])) * 0x100000001b3ULL; - return value; -} - -struct V -{ - enum W - { -#define A(n) n, -#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9) -#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9) -#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9) -#define E D(foo1) D(foo2) D(foo3) - E - last - }; - - constexpr static W - bar (const u64 h) noexcept - { - switch (h) - { -#undef A -#define F(n) #n -#define A(n) case foo (F(n)): return n; - E - } - return last; - } -}; - -int -baz (const char *s) -{ - const u64 h = foo (s); - return V::bar (h); -} - -/* { dg-final { scan-tree-dump-times ";; Bail out: --param=max-switch-clustering-attempts reached" 2 "switchlower1" } } */ diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c index e6a2c7a..186411f 100644 --- a/gcc/tree-switch-conversion.c +++ b/gcc/tree-switch-conversion.c @@ -1183,7 +1183,6 @@ jump_table_cluster::find_jump_tables (vec &clusters) min.quick_push (min_cluster_item (0, 0, 0)); - HOST_WIDE_INT attempts = 0; for (unsigned i = 1; i <= l; i++) { /* Set minimal # of clusters with i-th item to infinite. */ @@ -1195,14 +1194,6 @@ jump_table_cluster::find_jump_tables (vec &clusters) if (i - j < case_values_threshold ()) s += i - j; - if (attempts++ == param_max_switch_clustering_attempts) - { - if (dump_file) - fprintf (dump_file, ";; Bail out: " - "--param=max-switch-clustering-attempts reached\n"); - return clusters.copy (); - } - /* Prefer clusters with smaller number of numbers covered. */ if ((min[j].m_count + 1 < min[i].m_count || (min[j].m_count + 1 == min[i].m_count @@ -1317,7 +1308,6 @@ bit_test_cluster::find_bit_tests (vec &clusters) min.quick_push (min_cluster_item (0, 0, 0)); - HOST_WIDE_INT attempts = 0; for (unsigned i = 1; i <= l; i++) { /* Set minimal # of clusters with i-th item to infinite. */ @@ -1325,13 +1315,6 @@ bit_test_cluster::find_bit_tests (vec &clusters) for (unsigned j = 0; j < i; j++) { - if (attempts++ == param_max_switch_clustering_attempts) - { - if (dump_file) - fprintf (dump_file, ";; Bail out: " - "--param=max-switch-clustering-attempts reached\n"); - return clusters.copy (); - } if (min[j].m_count + 1 < min[i].m_count && can_be_handled (clusters, j, i - 1)) min[i] = min_cluster_item (min[j].m_count + 1, j, INT_MAX); -- cgit v1.1 From e46858e4eeee45d35ca4a7df1996186fe884879b Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Thu, 24 Sep 2020 13:34:13 +0200 Subject: switch conversion: make a rapid speed up gcc/ChangeLog: PR tree-optimization/96979 * tree-switch-conversion.c (jump_table_cluster::can_be_handled): Make a fast bail out. (bit_test_cluster::can_be_handled): Likewise here. * tree-switch-conversion.h (get_range): Use wi::to_wide instead of a folding. gcc/testsuite/ChangeLog: PR tree-optimization/96979 * g++.dg/tree-ssa/pr96979.C: New test. --- gcc/testsuite/g++.dg/tree-ssa/pr96979.C | 48 +++++++++++++++++++++++++++++++++ gcc/tree-switch-conversion.c | 37 ++++++++++++++++++------- gcc/tree-switch-conversion.h | 7 +++-- 3 files changed, 79 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr96979.C (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr96979.C b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C new file mode 100644 index 0000000..ec0f57a --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr96979.C @@ -0,0 +1,48 @@ +/* PR tree-optimization/96979 */ +/* { dg-do compile } */ +/* { dg-options "-std=c++17 -O2" } */ + +using u64 = unsigned long long; + +constexpr inline u64 +foo (const char *str) noexcept +{ + u64 value = 0xcbf29ce484222325ULL; + for (u64 i = 0; str[i]; i++) + value = (value ^ u64(str[i])) * 0x100000001b3ULL; + return value; +} + +struct V +{ + enum W + { +#define A(n) n, +#define B(n) A(n##0) A(n##1) A(n##2) A(n##3) A(n##4) A(n##5) A(n##6) A(n##7) A(n##8) A(n##9) +#define C(n) B(n##0) B(n##1) B(n##2) B(n##3) B(n##4) B(n##5) B(n##6) B(n##7) B(n##8) B(n##9) +#define D(n) C(n##0) C(n##1) C(n##2) C(n##3) C(n##4) C(n##5) C(n##6) C(n##7) C(n##8) C(n##9) +#define E D(foo1) D(foo2) D(foo3) + E + last + }; + + constexpr static W + bar (const u64 h) noexcept + { + switch (h) + { +#undef A +#define F(n) #n +#define A(n) case foo (F(n)): return n; + E + } + return last; + } +}; + +int +baz (const char *s) +{ + const u64 h = foo (s); + return V::bar (h); +} diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c index 186411f..03a1fe6 100644 --- a/gcc/tree-switch-conversion.c +++ b/gcc/tree-switch-conversion.c @@ -1268,6 +1268,18 @@ jump_table_cluster::can_be_handled (const vec &clusters, if (range == 0) return false; + if (range > HOST_WIDE_INT_M1U / 100) + return false; + + unsigned HOST_WIDE_INT lhs = 100 * range; + if (lhs < range) + return false; + + /* First make quick guess as each cluster + can add at maximum 2 to the comparison_count. */ + if (lhs > 2 * max_ratio * (end - start + 1)) + return false; + unsigned HOST_WIDE_INT comparison_count = 0; for (unsigned i = start; i <= end; i++) { @@ -1275,10 +1287,6 @@ jump_table_cluster::can_be_handled (const vec &clusters, comparison_count += sc->m_range_p ? 2 : 1; } - unsigned HOST_WIDE_INT lhs = 100 * range; - if (lhs < range) - return false; - return lhs <= max_ratio * comparison_count; } @@ -1364,12 +1372,12 @@ bit_test_cluster::can_be_handled (unsigned HOST_WIDE_INT range, { /* Check overflow. */ if (range == 0) - return 0; + return false; if (range >= GET_MODE_BITSIZE (word_mode)) return false; - return uniq <= 3; + return uniq <= m_max_case_bit_tests; } /* Return true when cluster starting at START and ending at END (inclusive) @@ -1379,6 +1387,7 @@ bool bit_test_cluster::can_be_handled (const vec &clusters, unsigned start, unsigned end) { + auto_vec dest_bbs; /* For algorithm correctness, bit test for a single case must return true. We bail out in is_beneficial if it's called just for a single case. */ @@ -1387,15 +1396,25 @@ bit_test_cluster::can_be_handled (const vec &clusters, unsigned HOST_WIDE_INT range = get_range (clusters[start]->get_low (), clusters[end]->get_high ()); - auto_bitmap dest_bbs; + + /* Make a guess first. */ + if (!can_be_handled (range, m_max_case_bit_tests)) + return false; for (unsigned i = start; i <= end; i++) { simple_cluster *sc = static_cast (clusters[i]); - bitmap_set_bit (dest_bbs, sc->m_case_bb->index); + /* m_max_case_bit_tests is very small integer, thus the operation + is constant. */ + if (!dest_bbs.contains (sc->m_case_bb->index)) + { + if (dest_bbs.length () >= m_max_case_bit_tests) + return false; + dest_bbs.quick_push (sc->m_case_bb->index); + } } - return can_be_handled (range, bitmap_count_bits (dest_bbs)); + return true; } /* Return true when COUNT of cases of UNIQ labels is beneficial for bit test diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h index 9ebcf10..dbfd9ee 100644 --- a/gcc/tree-switch-conversion.h +++ b/gcc/tree-switch-conversion.h @@ -84,11 +84,10 @@ public: then return 0. */ static unsigned HOST_WIDE_INT get_range (tree low, tree high) { - tree r = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low); - if (!tree_fits_uhwi_p (r)) + wide_int w = wi::to_wide (high) - wi::to_wide (low); + if (wi::neg_p (w, TYPE_SIGN (TREE_TYPE (low))) || !wi::fits_uhwi_p (w)) return 0; - - return tree_to_uhwi (r) + 1; + return w.to_uhwi () + 1; } /* Case label. */ -- cgit v1.1 From f322701e2465cac017d45a93fb336aceeee96630 Mon Sep 17 00:00:00 2001 From: Alex Coplan Date: Mon, 28 Sep 2020 17:58:34 +0100 Subject: aarch64: Fix ordering of aarch64-cores.def This patch moves the entry for Neoverse N2 (an Armv8.5-A CPU) after Saphira (an Armv8.4-A CPU) to preserve the overall ordering in the file. Committing as obvious. gcc/ChangeLog: * config/aarch64/aarch64-cores.def: Move neoverse-n2 after saphira. * config/aarch64/aarch64-tune.md: Regenerate. --- gcc/config/aarch64/aarch64-cores.def | 6 +++--- gcc/config/aarch64/aarch64-tune.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 469ee99..5223481 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -136,12 +136,12 @@ AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_ AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversen1, 0x41, 0xd40, -1) AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversen1, 0x41, 0xd40, -1) -/* Armv8.5-A Architecture Processors. */ -AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoversen1, 0x41, 0xd49, -1) - /* Qualcomm ('Q') cores. */ AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) +/* Armv8.5-A Architecture Processors. */ +AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoversen1, 0x41, 0xd49, -1) + /* ARMv8-A big.LITTLE implementations. */ AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1) diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index 3cf69ce..bb5d8da 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoversen2,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) -- cgit v1.1 From 30784833af015a9a74f5315d54f9dc08e1bbdf94 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Fri, 25 Sep 2020 10:57:16 +0800 Subject: RISC-V: Define __riscv_cmodel_medany for PIC mode. - According the conclusion in RISC-V C API document, we decide to deprecat the __riscv_cmodel_pic marco - __riscv_cmodel_pic is deprecated and will removed in next GCC release. [1] https://github.com/riscv/riscv-c-api-doc/pull/11 gcc/ChangeLog: * config/riscv/riscv-c.c (riscv_cpu_cpp_builtins): Define __riscv_cmodel_medany when PIC mode. gcc/testsuite/ChangeLog: * gcc.target/riscv/predef-3.c: Update testcase. * gcc.target/riscv/predef-6.c: Ditto. --- gcc/config/riscv/riscv-c.c | 9 ++++++--- gcc/testsuite/gcc.target/riscv/predef-3.c | 6 +++--- gcc/testsuite/gcc.target/riscv/predef-6.c | 6 +++--- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-c.c b/gcc/config/riscv/riscv-c.c index 735f2f2..c600badb 100644 --- a/gcc/config/riscv/riscv-c.c +++ b/gcc/config/riscv/riscv-c.c @@ -90,12 +90,15 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile) builtin_define ("__riscv_cmodel_medlow"); break; + case CM_PIC: + /* __riscv_cmodel_pic is deprecated, and will removed in next GCC release. + see https://github.com/riscv/riscv-c-api-doc/pull/11 */ + builtin_define ("__riscv_cmodel_pic"); + /* FALLTHROUGH. */ + case CM_MEDANY: builtin_define ("__riscv_cmodel_medany"); break; - case CM_PIC: - builtin_define ("__riscv_cmodel_pic"); - break; } } diff --git a/gcc/testsuite/gcc.target/riscv/predef-3.c b/gcc/testsuite/gcc.target/riscv/predef-3.c index 6f4f2e2..d7c9793 100644 --- a/gcc/testsuite/gcc.target/riscv/predef-3.c +++ b/gcc/testsuite/gcc.target/riscv/predef-3.c @@ -55,11 +55,11 @@ int main () { #if defined(__riscv_cmodel_medlow) #error "__riscv_cmodel_medlow" #endif -#if defined(__riscv_cmodel_medany) -#error "__riscv_cmodel_medlow" +#if !defined(__riscv_cmodel_medany) +#error "__riscv_cmodel_medany" #endif #if !defined(__riscv_cmodel_pic) -#error "__riscv_cmodel_medlow" +#error "__riscv_cmodel_pic" #endif return 0; diff --git a/gcc/testsuite/gcc.target/riscv/predef-6.c b/gcc/testsuite/gcc.target/riscv/predef-6.c index ee4e02b..7530f95 100644 --- a/gcc/testsuite/gcc.target/riscv/predef-6.c +++ b/gcc/testsuite/gcc.target/riscv/predef-6.c @@ -55,11 +55,11 @@ int main () { #if defined(__riscv_cmodel_medlow) #error "__riscv_cmodel_medlow" #endif -#if defined(__riscv_cmodel_medany) -#error "__riscv_cmodel_medlow" +#if !defined(__riscv_cmodel_medany) +#error "__riscv_cmodel_medany" #endif #if !defined(__riscv_cmodel_pic) -#error "__riscv_cmodel_medlow" +#error "__riscv_cmodel_medpic" #endif return 0; -- cgit v1.1 From 95e10b8aa1066dbd5c433e613652674b0636fcd1 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Tue, 29 Sep 2020 11:41:26 +0100 Subject: arm: Add new vector mode macros The AArch32 port now has three vector extensions: iwMMXt, Neon and MVE. We already have some named expanders that are shared by all three, and soon we'll need more. One way of handling this would be to use define_mode_iterators that specify the condition for each mode. For example, (V16QI "TARGET_NEON || TARGET_HAVE_MVE") (V8QI "TARGET_NEON || TARGET_REALLY_IWMXXT") ... (V2SF "TARGET_NEON && flag_unsafe_math_optimizations") etc. However, we'll need several mode iterators, and it would be repetitive to specify the mode condition every time. This patch therefore introduces per-mode macros that say whether we can perform general arithmetic on the mode. Initially there are two sets of macros: ARM_HAVE_NEON__ARITH true if Neon can handle general arithmetic on ARM_HAVE__ARITH true if any vector extension can handle general arithmetic on The macro definitions themselves are undeniably ugly, but hopefully they're justified by the simplifications they allow. The patch converts the addition patterns to use this scheme. Previously there were three copies of the V8HF and V4HF addition patterns for Neon: (1) *add3_neon, which provided plus:VnHF even without TARGET_NEON_FP16INST. This was probably harmless since all the named patterns had an appropriate guard, but it is possible that something could have tried to generate the plus directly, such as by using a REG_EQUAL note to generate a new pattern. (2) addv8hf3_neon and addv4hf3, which had the correct TARGET_NEON_FP16INST target condition, but unnecessarily required flag_unsafe_math_optimizations. Unlike VnSF operations, VnHF operations do not force flush to zero. (3) add3_fp16, which provided plus:VnHF with the correct conditions (TARGET_NEON_FP16INST, with no flag_unsafe_math_optimizations test). The patch in essence renames add3_fp16 to *add3_neon (part of *add3_neon) and removes the other two patterns. gcc/ * config/arm/arm.h (ARM_HAVE_NEON_V8QI_ARITH, ARM_HAVE_NEON_V4HI_ARITH) (ARM_HAVE_NEON_V2SI_ARITH, ARM_HAVE_NEON_V16QI_ARITH): New macros. (ARM_HAVE_NEON_V8HI_ARITH, ARM_HAVE_NEON_V4SI_ARITH): Likewise. (ARM_HAVE_NEON_V2DI_ARITH, ARM_HAVE_NEON_V4HF_ARITH): Likewise. (ARM_HAVE_NEON_V8HF_ARITH, ARM_HAVE_NEON_V2SF_ARITH): Likewise. (ARM_HAVE_NEON_V4SF_ARITH, ARM_HAVE_V8QI_ARITH, ARM_HAVE_V4HI_ARITH) (ARM_HAVE_V2SI_ARITH, ARM_HAVE_V16QI_ARITH, ARM_HAVE_V8HI_ARITH) (ARM_HAVE_V4SI_ARITH, ARM_HAVE_V2DI_ARITH, ARM_HAVE_V4HF_ARITH) (ARM_HAVE_V2SF_ARITH, ARM_HAVE_V8HF_ARITH, ARM_HAVE_V4SF_ARITH): Likewise. * config/arm/iterators.md (VNIM, VNINOTM): Delete. * config/arm/vec-common.md (add3, addv8hf3) (add3): Replace with... (add3): ...this new expander. * config/arm/neon.md (*add3_neon): Use the new ARM_HAVE_NEON__ARITH macros as the C condition. (addv8hf3_neon, addv4hf3, add3_fp16): Delete in favor of the above. (neon_vadd): Use gen_add3 instead of gen_add3_fp16. gcc/testsuite/ * gcc.target/arm/armv8_2-fp16-arith-2.c: Expect FP16 vectorization even without -ffast-math. --- gcc/config/arm/arm.h | 41 +++++++++++++++++++ gcc/config/arm/iterators.md | 8 ---- gcc/config/arm/neon.md | 47 +--------------------- gcc/config/arm/vec-common.md | 42 +++---------------- .../gcc.target/arm/armv8_2-fp16-arith-2.c | 20 +++++---- 5 files changed, 61 insertions(+), 97 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index f4d3676..4a63d33 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1110,6 +1110,47 @@ extern const int arm_arch_cde_coproc_bits[]; #define VALID_MVE_STRUCT_MODE(MODE) \ ((MODE) == TImode || (MODE) == OImode || (MODE) == XImode) +/* The conditions under which vector modes are supported for general + arithmetic using Neon. */ + +#define ARM_HAVE_NEON_V8QI_ARITH TARGET_NEON +#define ARM_HAVE_NEON_V4HI_ARITH TARGET_NEON +#define ARM_HAVE_NEON_V2SI_ARITH TARGET_NEON + +#define ARM_HAVE_NEON_V16QI_ARITH TARGET_NEON +#define ARM_HAVE_NEON_V8HI_ARITH TARGET_NEON +#define ARM_HAVE_NEON_V4SI_ARITH TARGET_NEON +#define ARM_HAVE_NEON_V2DI_ARITH TARGET_NEON + +/* HF operations have their own flush-to-zero control (FPSCR.FZ16). */ +#define ARM_HAVE_NEON_V4HF_ARITH TARGET_NEON_FP16INST +#define ARM_HAVE_NEON_V8HF_ARITH TARGET_NEON_FP16INST + +/* SF operations always flush to zero, regardless of FPSCR.FZ, so we can + only use them for general arithmetic when -funsafe-math-optimizations + is in effect. */ +#define ARM_HAVE_NEON_V2SF_ARITH \ + (TARGET_NEON && flag_unsafe_math_optimizations) +#define ARM_HAVE_NEON_V4SF_ARITH ARM_HAVE_NEON_V2SF_ARITH + +/* The conditions under which vector modes are supported for general + arithmetic by any vector extension. */ + +#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH || TARGET_REALLY_IWMMXT) +#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH || TARGET_REALLY_IWMMXT) +#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH || TARGET_REALLY_IWMMXT) + +#define ARM_HAVE_V16QI_ARITH (ARM_HAVE_NEON_V16QI_ARITH || TARGET_HAVE_MVE) +#define ARM_HAVE_V8HI_ARITH (ARM_HAVE_NEON_V8HI_ARITH || TARGET_HAVE_MVE) +#define ARM_HAVE_V4SI_ARITH (ARM_HAVE_NEON_V4SI_ARITH || TARGET_HAVE_MVE) +#define ARM_HAVE_V2DI_ARITH ARM_HAVE_NEON_V2DI_ARITH + +#define ARM_HAVE_V4HF_ARITH ARM_HAVE_NEON_V4HF_ARITH +#define ARM_HAVE_V2SF_ARITH ARM_HAVE_NEON_V2SF_ARITH + +#define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || TARGET_HAVE_MVE_FLOAT) +#define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || TARGET_HAVE_MVE_FLOAT) + /* The register numbers in sequence, for passing to arm_gen_load_multiple. */ extern int arm_regs_in_sequence[]; diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 0bc9eba..c70e3bc 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -66,14 +66,6 @@ ;; Integer and float modes supported by Neon and IWMMXT. (define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF]) -;; Integer and float modes supported by Neon, IWMMXT and MVE, used by -;; arithmetic epxand patterns. -(define_mode_iterator VNIM [V16QI V8HI V4SI V4SF]) - -;; Integer and float modes supported by Neon and IWMMXT but not MVE, used by -;; arithmetic epxand patterns. -(define_mode_iterator VNINOTM [V2SI V4HI V8QI V2SF V2DI]) - ;; Integer and float modes supported by Neon, IWMMXT and MVE. (define_mode_iterator VNIM1 [V16QI V8HI V4SI V4SF V2DI]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 3e7b51d..96bf277 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -501,7 +501,7 @@ [(set (match_operand:VDQ 0 "s_register_operand" "=w") (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (match_operand:VDQ 2 "s_register_operand" "w")))] - "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "ARM_HAVE_NEON__ARITH" "vadd.\t%0, %1, %2" [(set (attr "type") (if_then_else (match_test "") @@ -509,49 +509,6 @@ (const_string "neon_add")))] ) -;; As with SFmode, full support for HFmode vector arithmetic is only available -;; when flag-unsafe-math-optimizations is enabled. - -;; Add pattern with modes V8HF and V4HF is split into separate patterns to add -;; support for standard pattern addv8hf3 in MVE. Following pattern is called -;; from "addv8hf3" standard pattern inside vec-common.md file. - -(define_insn "addv8hf3_neon" - [(set - (match_operand:V8HF 0 "s_register_operand" "=w") - (plus:V8HF - (match_operand:V8HF 1 "s_register_operand" "w") - (match_operand:V8HF 2 "s_register_operand" "w")))] - "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" - "vadd.f16\t%0, %1, %2" - [(set_attr "type" "neon_fp_addsub_s_q")] -) - -(define_insn "addv4hf3" - [(set - (match_operand:V4HF 0 "s_register_operand" "=w") - (plus:V4HF - (match_operand:V4HF 1 "s_register_operand" "w") - (match_operand:V4HF 2 "s_register_operand" "w")))] - "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" - "vadd.f16\t%0, %1, %2" - [(set_attr "type" "neon_fp_addsub_s_q")] -) - -(define_insn "add3_fp16" - [(set - (match_operand:VH 0 "s_register_operand" "=w") - (plus:VH - (match_operand:VH 1 "s_register_operand" "w") - (match_operand:VH 2 "s_register_operand" "w")))] - "TARGET_NEON_FP16INST" - "vadd.\t%0, %1, %2" - [(set (attr "type") - (if_then_else (match_test "") - (const_string "neon_fp_addsub_s") - (const_string "neon_add")))] -) - (define_insn "*sub3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") @@ -1837,7 +1794,7 @@ (match_operand:VH 2 "s_register_operand")] "TARGET_NEON_FP16INST" { - emit_insn (gen_add3_fp16 (operands[0], operands[1], operands[2])); + emit_insn (gen_add3 (operands[0], operands[1], operands[2])); DONE; }) diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index b7e3619..c3c86c4 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -81,43 +81,11 @@ ;; patterns separately for Neon, IWMMXT and MVE. (define_expand "add3" - [(set (match_operand:VNIM 0 "s_register_operand") - (plus:VNIM (match_operand:VNIM 1 "s_register_operand") - (match_operand:VNIM 2 "s_register_operand")))] - "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) - || flag_unsafe_math_optimizations)) - || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) - || (TARGET_HAVE_MVE && VALID_MVE_SI_MODE(mode)) - || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(mode))" -{ -}) - -;; Vector arithmetic. Expanders are blank, then unnamed insns implement -;; patterns separately for Neon and MVE. - -(define_expand "addv8hf3" - [(set (match_operand:V8HF 0 "s_register_operand") - (plus:V8HF (match_operand:V8HF 1 "s_register_operand") - (match_operand:V8HF 2 "s_register_operand")))] - "(TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(V8HFmode)) - || (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations)" -{ - if (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations) - emit_insn (gen_addv8hf3_neon (operands[0], operands[1], operands[2])); -}) - -;; Vector arithmetic. Expanders are blank, then unnamed insns implement -;; patterns separately for Neon and IWMMXT. - -(define_expand "add3" - [(set (match_operand:VNINOTM 0 "s_register_operand") - (plus:VNINOTM (match_operand:VNINOTM 1 "s_register_operand") - (match_operand:VNINOTM 2 "s_register_operand")))] - "(TARGET_NEON && ((mode != V2SFmode && mode != V4SFmode) - || flag_unsafe_math_optimizations)) - || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))" -{ -}) + [(set (match_operand:VDQ 0 "s_register_operand") + (plus:VDQ (match_operand:VDQ 1 "s_register_operand") + (match_operand:VDQ 2 "s_register_operand")))] + "ARM_HAVE__ARITH" +) ;; Vector arithmetic. Expanders are blank, then unnamed insns implement ;; patterns separately for IWMMXT and Neon. diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c index 24d0528..81bad22 100644 --- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c @@ -89,17 +89,23 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t) /* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } } */ /* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } } */ /* { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 2 } } */ + +/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vadd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vsub\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vsub\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ + +/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ +/* { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ -/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ -/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ /* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ /* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } } */ - /* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } } */ -/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, #0} 2 } } */ - -/* { dg-final { scan-assembler-not {vabs\.f16} } } */ /* { dg-final { scan-assembler-not {vadd\.f32} } } */ /* { dg-final { scan-assembler-not {vsub\.f32} } } */ -- cgit v1.1 From 29aef377d814bd342dd5a306f99e0d614623ce0e Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 29 Sep 2020 14:38:06 +0200 Subject: tree-optimization/97238 - fix typo causing ICE This fixes a typo causing a NULL dereference. 2020-09-29 Richard Biener PR tree-optimization/97238 * tree-ssa-reassoc.c (ovce_extract_ops): Fix typo. * gcc.dg/pr97238.c: New testcase. --- gcc/testsuite/gcc.dg/pr97238.c | 12 ++++++++++++ gcc/tree-ssa-reassoc.c | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/pr97238.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/pr97238.c b/gcc/testsuite/gcc.dg/pr97238.c new file mode 100644 index 0000000..746e93a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr97238.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O -Wno-psabi -w" } */ + +typedef int __attribute__ ((__vector_size__ (8))) V; +int b, c, e; +V d; + +V +foo (void) +{ + return (b || e) | c > d | ((b || e) | c > d); +} diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c index facc794..a2ca171 100644 --- a/gcc/tree-ssa-reassoc.c +++ b/gcc/tree-ssa-reassoc.c @@ -3910,7 +3910,7 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type, return ERROR_MARK; gassign *assign = dyn_cast (SSA_NAME_DEF_STMT (cond)); - if (stmt == NULL + if (assign == NULL || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison) return ERROR_MARK; -- cgit v1.1 From d68f4d2ecb8ed6781e4e535d2abc498b1674d68a Mon Sep 17 00:00:00 2001 From: Hongyu Wang Date: Mon, 28 Sep 2020 22:22:28 +0000 Subject: Add missing FSF copyright notes for x86 intrinsic headers. gcc/ChangeLog: PR target/97231 * config/i386/amxbf16intrin.h: Add FSF copyright notes. * config/i386/amxint8intrin.h: Ditto. * config/i386/amxtileintrin.h: Ditto. * config/i386/avx512vp2intersectintrin.h: Ditto. * config/i386/avx512vp2intersectvlintrin.h: Ditto. * config/i386/pconfigintrin.h: Ditto. * config/i386/tsxldtrkintrin.h: Ditto. * config/i386/wbnoinvdintrin.h: Ditto. --- gcc/config/i386/amxbf16intrin.h | 23 +++++++++++++++++++++++ gcc/config/i386/amxint8intrin.h | 23 +++++++++++++++++++++++ gcc/config/i386/amxtileintrin.h | 23 +++++++++++++++++++++++ gcc/config/i386/avx512vp2intersectintrin.h | 23 +++++++++++++++++++++++ gcc/config/i386/avx512vp2intersectvlintrin.h | 23 +++++++++++++++++++++++ gcc/config/i386/pconfigintrin.h | 23 +++++++++++++++++++++++ gcc/config/i386/tsxldtrkintrin.h | 23 +++++++++++++++++++++++ gcc/config/i386/wbnoinvdintrin.h | 23 +++++++++++++++++++++++ 8 files changed, 184 insertions(+) (limited to 'gcc') diff --git a/gcc/config/i386/amxbf16intrin.h b/gcc/config/i386/amxbf16intrin.h index b162096..77cc395 100644 --- a/gcc/config/i386/amxbf16intrin.h +++ b/gcc/config/i386/amxbf16intrin.h @@ -1,3 +1,26 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + #if !defined _IMMINTRIN_H_INCLUDED #error "Never use directly; include instead." #endif diff --git a/gcc/config/i386/amxint8intrin.h b/gcc/config/i386/amxint8intrin.h index 11adc1f..f4e410b 100644 --- a/gcc/config/i386/amxint8intrin.h +++ b/gcc/config/i386/amxint8intrin.h @@ -1,3 +1,26 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + #if !defined _IMMINTRIN_H_INCLUDED #error "Never use directly; include instead." #endif diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h index e78e5c0..41fb9a5 100644 --- a/gcc/config/i386/amxtileintrin.h +++ b/gcc/config/i386/amxtileintrin.h @@ -1,3 +1,26 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + #if !defined _IMMINTRIN_H_INCLUDED #error "Never use directly; include instead." #endif diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h index 60cb52c..f368d83 100644 --- a/gcc/config/i386/avx512vp2intersectintrin.h +++ b/gcc/config/i386/avx512vp2intersectintrin.h @@ -1,3 +1,26 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + #if !defined _IMMINTRIN_H_INCLUDED #error "Never use directly; include instead." #endif diff --git a/gcc/config/i386/avx512vp2intersectvlintrin.h b/gcc/config/i386/avx512vp2intersectvlintrin.h index 26eee36..f657840 100644 --- a/gcc/config/i386/avx512vp2intersectvlintrin.h +++ b/gcc/config/i386/avx512vp2intersectvlintrin.h @@ -1,3 +1,26 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + #if !defined _IMMINTRIN_H_INCLUDED #error "Never use directly; include instead." #endif diff --git a/gcc/config/i386/pconfigintrin.h b/gcc/config/i386/pconfigintrin.h index d2a3261..709119e 100644 --- a/gcc/config/i386/pconfigintrin.h +++ b/gcc/config/i386/pconfigintrin.h @@ -1,3 +1,26 @@ +/* Copyright (C) 2018-2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + #ifndef _IMMINTRIN_H_INCLUDED #error "Never use directly; include instead." #endif diff --git a/gcc/config/i386/tsxldtrkintrin.h b/gcc/config/i386/tsxldtrkintrin.h index 08b76a9..176d232 100644 --- a/gcc/config/i386/tsxldtrkintrin.h +++ b/gcc/config/i386/tsxldtrkintrin.h @@ -1,3 +1,26 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + #if !defined _IMMINTRIN_H_INCLUDED #error "Never use directly; include instead." #endif diff --git a/gcc/config/i386/wbnoinvdintrin.h b/gcc/config/i386/wbnoinvdintrin.h index 5393698..94e58e9 100644 --- a/gcc/config/i386/wbnoinvdintrin.h +++ b/gcc/config/i386/wbnoinvdintrin.h @@ -1,3 +1,26 @@ +/* Copyright (C) 2018-2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + #ifndef _IMMINTRIN_H_INCLUDED #error "Never use directly; include instead." #endif -- cgit v1.1 From b1570930df659ff3ea6691f9ba8b84c8a189d85d Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 29 Sep 2020 12:01:26 +0200 Subject: move permute optimization to optimize-slp This moves optimizing permutes of SLP reductions to vect_optimize_slp, eliding the global slp_loads array. 2020-09-29 Richard Biener * tree-vect-slp.c (vect_analyze_slp): Move SLP reduction re-arrangement and SLP graph load gathering... (vect_optimize_slp): ... here. * tree-vectorizer.h (vec_info::slp_loads): Remove. --- gcc/tree-vect-slp.c | 19 ++++++++++--------- gcc/tree-vectorizer.h | 1 - 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index c44fd39..8de2480 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2486,8 +2486,15 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) vect_free_slp_tree ((*it).second); delete bst_map; + return opt_result::success (); +} + +void +vect_optimize_slp (vec_info *vinfo) +{ /* Optimize permutations in SLP reductions. */ slp_instance instance; + unsigned i; FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance) { slp_tree node = SLP_INSTANCE_TREE (instance); @@ -2500,20 +2507,14 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) } /* Gather all loads in the SLP graph. */ + auto_vec slp_loads; hash_set visited; FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance) - vect_gather_slp_loads (vinfo->slp_loads, SLP_INSTANCE_TREE (instance), + vect_gather_slp_loads (slp_loads, SLP_INSTANCE_TREE (instance), visited); - return opt_result::success (); -} - -void -vect_optimize_slp (vec_info *vinfo) -{ slp_tree node; - unsigned i; - FOR_EACH_VEC_ELT (vinfo->slp_loads, i, node) + FOR_EACH_VEC_ELT (slp_loads, i, node) { if (!SLP_TREE_LOAD_PERMUTATION (node).exists ()) continue; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index b7fa6bc..e62f1cc 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -359,7 +359,6 @@ public: /* The SLP graph. */ auto_vec slp_instances; - auto_vec slp_loads; /* Maps base addresses to an innermost_loop_behavior that gives the maximum known alignment for that base. */ -- cgit v1.1 From 39a27bb01aa223ce89946f0a4de6b60c4c0b03d2 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 29 Sep 2020 15:02:47 +0200 Subject: tree-optimization/97241 - fix ICE in reduction vectorization The following moves an ad-hoc attempt at discovering the SLP node for a stmt to the place where we can find it in lock-step when we find the stmt itself. 2020-09-29 Richard Biener PR tree-optimization/97241 * tree-vect-loop.c (vectorizable_reduction): Move finding the SLP node for the reduction stmt to a better place. * gcc.dg/vect/pr97241.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr97241.c | 19 +++++++++++++++++++ gcc/tree-vect-loop.c | 17 +++++------------ 2 files changed, 24 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr97241.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/pr97241.c b/gcc/testsuite/gcc.dg/vect/pr97241.c new file mode 100644 index 0000000..d4be8f6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97241.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 --param max-loop-header-insns=2" } */ + +short int *ev; +int l4; + +short int +a7 (void) +{ + short int uo = ev[0], ie = uo; + + for (int kp = 0; kp < l4; kp += 4) + { + uo += ev[kp + 1]; + ie += ev[kp]; + } + + return uo + ie; +} diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index f1d6bdd..ce5d95d 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -6357,12 +6357,14 @@ vectorizable_reduction (loop_vec_info loop_vinfo, gphi *reduc_def_phi = as_a (phi_info->stmt); /* Verify following REDUC_IDX from the latch def leads us back to the PHI - and compute the reduction chain length. */ + and compute the reduction chain length. Discover the real + reduction operation stmt on the way (stmt_info and slp_for_stmt_info). */ tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, loop_latch_edge (loop)); unsigned reduc_chain_length = 0; bool only_slp_reduc_chain = true; stmt_info = NULL; + slp_tree slp_for_stmt_info = slp_node ? slp_node_instance->root : NULL; while (reduc_def != PHI_RESULT (reduc_def_phi)) { stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); @@ -6405,6 +6407,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, stmt_info = vdef; reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef)); reduc_chain_length++; + if (!stmt_info && slp_node) + slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0]; } /* PHIs should not participate in patterns. */ gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info)); @@ -6491,17 +6495,6 @@ vectorizable_reduction (loop_vec_info loop_vinfo, The last use is the reduction variable. In case of nested cycle this assumption is not true: we use reduc_index to record the index of the reduction variable. */ - /* ??? To get at invariant/constant uses on the SLP node we have to - get to it here, slp_node is still the reduction PHI. */ - slp_tree slp_for_stmt_info = NULL; - if (slp_node) - { - slp_for_stmt_info = slp_node_instance->root; - /* And then there's reduction chain with a conversion ... */ - if (SLP_TREE_REPRESENTATIVE (slp_for_stmt_info) != stmt_info) - slp_for_stmt_info = SLP_TREE_CHILDREN (slp_for_stmt_info)[0]; - gcc_assert (SLP_TREE_REPRESENTATIVE (slp_for_stmt_info) == stmt_info); - } slp_tree *slp_op = XALLOCAVEC (slp_tree, op_type); /* We need to skip an extra operand for COND_EXPRs with embedded comparison. */ -- cgit v1.1 From cc61827b55e7735a831e52f7ef093c409ddd607a Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Tue, 29 Sep 2020 07:38:48 -0700 Subject: c++: Identifier type value should not update binding This simplification removes some unneeded behaviour in set_identifier_type_value_with_scope, which was updating the namespace binding. And causing update_binding to have to deal with meeting two implicit typedefs. But the typedef is already there, and there's no other way to have two such typedef's collide (we'll already have dealt with that in lookup_elaborated_type). So, let's kill this crufty code. gcc/cp/ * name-lookup.c (update_binding): We never meet two implicit typedefs. (do_pushdecl): Adjust set_identifier_type_value_with_scope calls. (set_identifier_type_value_with_scope): Do not update binding in the namespace-case. Assert it is already there. --- gcc/cp/name-lookup.c | 50 ++++++++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 28 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index 184e9c8..f195e81 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -2365,33 +2365,24 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, if (old == error_mark_node) old = NULL_TREE; - if (TREE_CODE (decl) == TYPE_DECL && DECL_ARTIFICIAL (decl)) + if (DECL_IMPLICIT_TYPEDEF_P (decl)) { - tree other = to_type; - - if (old && TREE_CODE (old) == TYPE_DECL && DECL_ARTIFICIAL (old)) - other = old; - - /* Pushing an artificial typedef. See if this matches either - the type slot or the old value slot. */ - if (!other) - ; - else if (same_type_p (TREE_TYPE (other), TREE_TYPE (decl))) - /* Two artificial decls to same type. Do nothing. */ - return other; - else - goto conflict; + /* Pushing an artificial decl. We should not find another + artificial decl here already -- lookup_elaborated_type will + have already found it. */ + gcc_checking_assert (!to_type + && !(old && DECL_IMPLICIT_TYPEDEF_P (old))); if (old) { /* Slide decl into the type slot, keep old unaltered */ to_type = decl; to_val = old; - goto done; } + goto done; } - if (old && TREE_CODE (old) == TYPE_DECL && DECL_ARTIFICIAL (old)) + if (old && DECL_IMPLICIT_TYPEDEF_P (old)) { /* Slide old into the type slot. */ to_type = old; @@ -3122,7 +3113,7 @@ do_pushdecl (tree decl, bool hiding) if (TREE_CODE (decl) == NAMESPACE_DECL) /* A local namespace alias. */ - set_identifier_type_value (name, NULL_TREE); + set_identifier_type_value_with_scope (name, NULL_TREE, level); if (!binding) binding = create_local_binding (level, name); @@ -3150,10 +3141,7 @@ do_pushdecl (tree decl, bool hiding) if (TYPE_NAME (type) != decl) set_underlying_type (decl); - if (!ns) - set_identifier_type_value_with_scope (name, decl, level); - else - SET_IDENTIFIER_TYPE_VALUE (name, global_type_node); + set_identifier_type_value_with_scope (name, decl, level); } /* If this is a locally defined typedef in a function that @@ -3768,8 +3756,9 @@ identifier_type_value (tree id) } /* Push a definition of struct, union or enum tag named ID. into - binding_level B. DECL is a TYPE_DECL for the type. We assume that - the tag ID is not already defined. */ + binding_level B. DECL is a TYPE_DECL for the type. DECL has + already been pushed into its binding level. This is bookkeeping to + find it easily. */ static void set_identifier_type_value_with_scope (tree id, tree decl, cp_binding_level *b) @@ -3781,20 +3770,25 @@ set_identifier_type_value_with_scope (tree id, tree decl, cp_binding_level *b) /* Shadow the marker, not the real thing, so that the marker gets restored later. */ tree old_type_value = REAL_IDENTIFIER_TYPE_VALUE (id); - b->type_shadowed - = tree_cons (id, old_type_value, b->type_shadowed); + b->type_shadowed = tree_cons (id, old_type_value, b->type_shadowed); type = decl ? TREE_TYPE (decl) : NULL_TREE; TREE_TYPE (b->type_shadowed) = type; } else { - tree *slot = find_namespace_slot (current_namespace, id, true); gcc_assert (decl); - update_binding (b, NULL, slot, MAYBE_STAT_DECL (*slot), decl); + if (CHECKING_P) + { + tree *slot = find_namespace_slot (current_namespace, id); + gcc_checking_assert (slot + && (decl == MAYBE_STAT_TYPE (*slot) + || decl == MAYBE_STAT_DECL (*slot))); + } /* Store marker instead of real type. */ type = global_type_node; } + SET_IDENTIFIER_TYPE_VALUE (id, type); } -- cgit v1.1 From 74b5b8dec467f922f6c40a320859054081c7bb09 Mon Sep 17 00:00:00 2001 From: David Edelsohn Date: Sun, 27 Sep 2020 11:47:25 -0400 Subject: testsuite: Prevent spellcheck-inttypes failures on AIX. AIX stdio.h implicitly includes sys/types.h, which implicitly includes inttypes.h. With a recent AIX header fixincludes change to unilaterally define STDC Macros, the GCC testsuite uses of inttypes now fails. This patch explicitly defines the _STD_TYPES_T macro when the test is run on AIX so that the inttypes.h header behaves as the testcase requires. gcc/testsuite/ChangeLog: 2020-09-29 David Edelsohn * g++.dg/spellcheck-inttypes.C: Define _STD_TYPES_T on AIX. * gcc.dg/spellcheck-inttypes.c: Same. --- gcc/testsuite/g++.dg/spellcheck-inttypes.C | 3 +++ gcc/testsuite/gcc.dg/spellcheck-inttypes.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/spellcheck-inttypes.C b/gcc/testsuite/g++.dg/spellcheck-inttypes.C index 84bfc12..fea3096 100644 --- a/gcc/testsuite/g++.dg/spellcheck-inttypes.C +++ b/gcc/testsuite/g++.dg/spellcheck-inttypes.C @@ -1,4 +1,7 @@ /* { dg-options "-std=c++11" } */ +#ifdef _AIX +#define _STD_TYPES_T +#endif #include #include /* Missing . */ diff --git a/gcc/testsuite/gcc.dg/spellcheck-inttypes.c b/gcc/testsuite/gcc.dg/spellcheck-inttypes.c index 1146a7c..611d7f0 100644 --- a/gcc/testsuite/gcc.dg/spellcheck-inttypes.c +++ b/gcc/testsuite/gcc.dg/spellcheck-inttypes.c @@ -1,7 +1,7 @@ /* { dg-options "-std=c99" } */ /* Prevent AIX from implicitly including inttypes.h. */ #ifdef _AIX -#define _H_INTTYPES_TYPE_TS +#define _STD_TYPES_T #endif #include #include -- cgit v1.1 From adcf8a11c772e7a0c64d4ae3eb19a520566f32b9 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Tue, 29 Sep 2020 09:38:34 -0700 Subject: c++: Name lookup simplifications Here are a few cleanups, prior to landing the hidden decl changes. 1) Clear cxx_binding flags in the allocator, not at each user of the allocator. 2) Refactor update_binding. The logic was getting too convoluted. 3) Set friendliness and anticipatedness before pushing a template decl (not after). gcc/cp/ * name-lookup.c (create_local_binding): Do not clear INHERITED_VALUE_BINDING_P here. (name_lookup::process_binding): Move done hidden-decl triage to ... (name_lookup::search_namespace_only): ... here, its only caller. (cxx_binding_make): Clear flags here. (push_binding): Not here. (pop_local_binding): RAII. (update_binding): Refactor. (do_pushdecl): Assert we're never revealing a local binding. (do_pushdecl_with_scope): Directly call do_pushdecl. (get_class_binding): Do not clear LOCAL_BINDING_P here. * pt.c (push_template_decl): Set friend & anticipated before pushing. --- gcc/cp/name-lookup.c | 167 +++++++++++++++++++++++++++++---------------------- gcc/cp/pt.c | 8 +-- 2 files changed, 98 insertions(+), 77 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index f195e81..89f1a4c 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -77,7 +77,6 @@ create_local_binding (cp_binding_level *level, tree name) { cxx_binding *binding = cxx_binding_make (NULL, NULL); - INHERITED_VALUE_BINDING_P (binding) = false; LOCAL_BINDING_P (binding) = true; binding->scope = level; binding->previous = IDENTIFIER_BINDING (name); @@ -480,22 +479,17 @@ name_lookup::add_type (tree new_type) } /* Process a found binding containing NEW_VAL and NEW_TYPE. Returns - true if we actually found something noteworthy. */ + true if we actually found something noteworthy. Hiddenness has + already been handled in the caller. */ bool name_lookup::process_binding (tree new_val, tree new_type) { /* Did we really see a type? */ if (new_type - && ((want & LOOK_want::TYPE_NAMESPACE) == LOOK_want::NAMESPACE - || (!bool (want & LOOK_want::HIDDEN_FRIEND) - && DECL_LANG_SPECIFIC (new_type) - && DECL_ANTICIPATED (new_type)))) + && (want & LOOK_want::TYPE_NAMESPACE) == LOOK_want::NAMESPACE) new_type = NULL_TREE; - if (new_val && !bool (want & LOOK_want::HIDDEN_FRIEND)) - new_val = ovl_skip_hidden (new_val); - /* Do we really see a value? */ if (new_val) switch (TREE_CODE (new_val)) @@ -544,8 +538,25 @@ name_lookup::search_namespace_only (tree scope) bool found = false; if (tree *binding = find_namespace_slot (scope, name)) - found |= process_binding (MAYBE_STAT_DECL (*binding), - MAYBE_STAT_TYPE (*binding)); + { + tree value = *binding, type = NULL_TREE; + + if (STAT_HACK_P (value)) + { + type = STAT_TYPE (value); + value = STAT_DECL (value); + + if (!bool (want & LOOK_want::HIDDEN_FRIEND) + && DECL_LANG_SPECIFIC (type) + && DECL_ANTICIPATED (type)) + type = NULL_TREE; + } + + if (!bool (want & LOOK_want::HIDDEN_FRIEND)) + value = ovl_skip_hidden (value); + + found |= process_binding (value, type); + } return found; } @@ -1954,15 +1965,17 @@ cxx_binding_init (cxx_binding *binding, tree value, tree type) static cxx_binding * cxx_binding_make (tree value, tree type) { - cxx_binding *binding; - if (free_bindings) - { - binding = free_bindings; - free_bindings = binding->previous; - } + cxx_binding *binding = free_bindings; + + if (binding) + free_bindings = binding->previous; else binding = ggc_alloc (); + /* Clear flags by default. */ + LOCAL_BINDING_P (binding) = false; + INHERITED_VALUE_BINDING_P (binding) = false; + cxx_binding_init (binding, value, type); return binding; @@ -2009,7 +2022,6 @@ push_binding (tree id, tree decl, cp_binding_level* level) /* Now, fill in the binding information. */ binding->previous = IDENTIFIER_BINDING (id); - INHERITED_VALUE_BINDING_P (binding) = 0; LOCAL_BINDING_P (binding) = (level != class_binding_level); /* And put it on the front of the list of bindings for ID. */ @@ -2022,8 +2034,6 @@ push_binding (tree id, tree decl, cp_binding_level* level) void pop_local_binding (tree id, tree decl) { - cxx_binding *binding; - if (id == NULL_TREE) /* It's easiest to write the loops that call this function without checking whether or not the entities involved have names. We @@ -2031,7 +2041,7 @@ pop_local_binding (tree id, tree decl) return; /* Get the innermost binding for ID. */ - binding = IDENTIFIER_BINDING (id); + cxx_binding *binding = IDENTIFIER_BINDING (id); /* The name should be bound. */ gcc_assert (binding != NULL); @@ -2356,9 +2366,16 @@ static tree update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, tree old, tree decl, bool hiding = false) { + tree old_type = NULL_TREE; + + if (!slot) + old_type = binding->type; + else if (STAT_HACK_P (*slot)) + old_type = STAT_TYPE (*slot); + tree to_val = decl; - tree old_type = slot ? MAYBE_STAT_TYPE (*slot) : binding->type; tree to_type = old_type; + bool local_overload = false; gcc_assert (level->kind == sk_namespace ? !binding : level->kind != sk_class && !slot); @@ -2375,16 +2392,20 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, if (old) { - /* Slide decl into the type slot, keep old unaltered */ + /* Put DECL into the type slot. */ + gcc_checking_assert (!to_type); to_type = decl; to_val = old; } + goto done; } if (old && DECL_IMPLICIT_TYPEDEF_P (old)) { - /* Slide old into the type slot. */ + /* OLD is an implicit typedef. Move it to to_type. */ + gcc_checking_assert (!to_type); + to_type = old; old = NULL_TREE; } @@ -2428,60 +2449,66 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, warning (OPT_Wshadow, "%q#D hides constructor for %q#D", decl, to_type); + local_overload = old && level->kind != sk_namespace; to_val = ovl_insert (decl, old); } - else if (!old) - ; - else if (TREE_CODE (old) != TREE_CODE (decl)) - /* Different kinds of decls conflict. */ - goto conflict; - else if (TREE_CODE (old) == TYPE_DECL) - { - if (same_type_p (TREE_TYPE (old), TREE_TYPE (decl))) - /* Two type decls to the same type. Do nothing. */ - return old; - else - goto conflict; - } - else if (TREE_CODE (old) == NAMESPACE_DECL) + else if (old) { - /* Two maybe-aliased namespaces. If they're to the same target - namespace, that's ok. */ - if (ORIGINAL_NAMESPACE (old) != ORIGINAL_NAMESPACE (decl)) + if (TREE_CODE (old) != TREE_CODE (decl)) + /* Different kinds of decls conflict. */ goto conflict; - - /* The new one must be an alias at this point. */ - gcc_assert (DECL_NAMESPACE_ALIAS (decl)); - return old; - } - else if (TREE_CODE (old) == VAR_DECL) - { - /* There can be two block-scope declarations of the same - variable, so long as they are `extern' declarations. */ - if (!DECL_EXTERNAL (old) || !DECL_EXTERNAL (decl)) - goto conflict; - else if (tree match = duplicate_decls (decl, old)) - return match; + else if (TREE_CODE (old) == TYPE_DECL) + { + if (same_type_p (TREE_TYPE (old), TREE_TYPE (decl))) + { + /* Two type decls to the same type. Do nothing. */ + gcc_checking_assert (!hiding); + return old; + } + else + goto conflict; + } + else if (TREE_CODE (old) == NAMESPACE_DECL) + { + /* Two maybe-aliased namespaces. If they're to the same target + namespace, that's ok. */ + if (ORIGINAL_NAMESPACE (old) != ORIGINAL_NAMESPACE (decl)) + goto conflict; + + /* The new one must be an alias at this point. */ + gcc_assert (DECL_NAMESPACE_ALIAS (decl) && !hiding); + return old; + } + else if (TREE_CODE (old) == VAR_DECL) + { + /* There can be two block-scope declarations of the same + variable, so long as they are `extern' declarations. */ + // FIXME: This is DECL_LOCAL_DECL_P type stuff. + if (!DECL_EXTERNAL (old) || !DECL_EXTERNAL (decl)) + goto conflict; + else if (tree match = duplicate_decls (decl, old)) + return match; + else + goto conflict; + } else - goto conflict; - } - else - { - conflict: - diagnose_name_conflict (decl, old); - to_val = NULL_TREE; + { + conflict: + diagnose_name_conflict (decl, old); + to_val = NULL_TREE; + } } done: if (to_val) { - if (level->kind == sk_namespace || to_type == decl || to_val == decl) - add_decl_to_level (level, decl); - else + if (local_overload) { gcc_checking_assert (binding->value && OVL_P (binding->value)); update_local_overload (binding, to_val); } + else + add_decl_to_level (level, decl); if (slot) { @@ -3059,12 +3086,8 @@ do_pushdecl (tree decl, bool hiding) tree head = iter.reveal_node (old); if (head != old) { - if (!ns) - { - update_local_overload (binding, head); - binding->value = head; - } - else if (STAT_HACK_P (*slot)) + gcc_checking_assert (ns); + if (STAT_HACK_P (*slot)) STAT_DECL (*slot) = head; else *slot = head; @@ -3859,7 +3882,7 @@ do_pushdecl_with_scope (tree x, cp_binding_level *level, bool hiding = false) current_function_decl = NULL_TREE; b = current_binding_level; current_binding_level = level; - x = pushdecl (x, hiding); + x = do_pushdecl (x, hiding); current_binding_level = b; current_function_decl = function_decl; } @@ -4398,8 +4421,6 @@ get_class_binding (tree name, cp_binding_level *scope) value_binding, type_binding, scope); - /* This is a class-scope binding, not a block-scope binding. */ - LOCAL_BINDING_P (binding) = 0; set_inherited_value_binding_p (binding, value_binding, class_type); } else diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 199fe65..a096337 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -6018,16 +6018,16 @@ push_template_decl (tree decl, bool is_friend) if (!ctx && !(is_friend && template_class_depth (current_class_type) > 0)) { - tmpl = pushdecl_namespace_level (tmpl, /*hiding=*/is_friend); - if (tmpl == error_mark_node) - return error_mark_node; - /* Hide template friend classes that haven't been declared yet. */ if (is_friend && TREE_CODE (decl) == TYPE_DECL) { DECL_ANTICIPATED (tmpl) = 1; DECL_FRIEND_P (tmpl) = 1; } + + tmpl = pushdecl_namespace_level (tmpl, /*hiding=*/is_friend); + if (tmpl == error_mark_node) + return error_mark_node; } } else -- cgit v1.1 From dec881f85abbddc6e37630b6e61ce621cea6acd7 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Tue, 29 Sep 2020 11:40:46 -0700 Subject: x86: Replace with Fix 2 typos in config/i386/enqcmdintrin.h by replacing with : [hjl@gnu-cfl-2 x86-gcc]$ echo "#include " | gcc -S -o /dev/null -x c - In file included from :1: /usr/lib/gcc/x86_64-redhat-linux/10/include/enqcmdintrin.h:25:3: error: #error "Never use directly; include instead." 25 | # error "Never use directly; include instead." | ^~~~~ [hjl@gnu-cfl-2 x86-gcc]$ and _ENQCMDINTRIN_H_INCLUDED with _ENQCMDINTRIN_H_INCLUDED. gcc/ PR target/97247 * config/i386/enqcmdintrin.h: Replace with . Replace _ENQCMDNTRIN_H_INCLUDED with _ENQCMDINTRIN_H_INCLUDED. --- gcc/config/i386/enqcmdintrin.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/config/i386/enqcmdintrin.h b/gcc/config/i386/enqcmdintrin.h index 4b2efcb..721dfb2 100644 --- a/gcc/config/i386/enqcmdintrin.h +++ b/gcc/config/i386/enqcmdintrin.h @@ -22,11 +22,11 @@ . */ #if !defined _IMMINTRIN_H_INCLUDED -# error "Never use directly; include instead." +# error "Never use directly; include instead." #endif -#ifndef _ENQCMDNTRIN_H_INCLUDED -#define _ENQCMDNTRIN_H_INCLUDED +#ifndef _ENQCMDINTRIN_H_INCLUDED +#define _ENQCMDINTRIN_H_INCLUDED #ifndef __ENQCMD__ #pragma GCC push_options @@ -52,4 +52,4 @@ _enqcmds (void * __P, const void * __Q) #undef __DISABLE_ENQCMD__ #pragma GCC pop_options #endif /* __DISABLE_ENQCMD__ */ -#endif /* _ENQCMDNTRIN_H_INCLUDED. */ +#endif /* _ENQCMDINTRIN_H_INCLUDED. */ -- cgit v1.1 From 7cbfe0894dea4128805595dce3f23d0530b33a3c Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Tue, 29 Sep 2020 12:03:27 -0700 Subject: c++: Hiddenness is a property of the symbol table This patch moves the handling of decl-hiddenness entirely into the name lookup machinery, where it belongs. We need a few new flags, because pressing the existing OVL_HIDDEN_P into play for non-function decls doesn't work well. For a local binding we only need one marker, as there cannot be both a hidden implicit typedef and a hidden function. That's not true for namespace-scope, where they could both be hidden. The name-lookup machinery maintains the existing decl_hidden and co flags, and asserts have been sprinkled around to make sure they are consistent. The next series of patches will remove those old markers. (we'll need to keep one, as there are some special restrictions on redeclaring friend functions with in-class definitions or default args.) gcc/cp/ * cp-tree.h (ovl_insert): Change final parm to hidden-or-using indicator. * name-lookup.h (HIDDEN_TYPE_BINDING_P): New. (struct cxx_binding): Add type_is_hidden flag. * tree.c (ovl_insert): Change using_p parm to using_or_hidden, adjust. (ovl_skip_hidden): Assert we never see a naked hidden decl. * decl.c (xref_tag_1): Delete unhiding friend from here (moved to lookup_elaborated_type_1). * name-lookup.c (STAT_TYPE_HIDDEN_P, STAT_DECL_HIDDEN_P): New. (name_lookup::search_namespace_only): Check new hidden markers. (cxx_binding_make): Clear HIDDEN_TYPE_BINDING_P. (update_binding): Update new hidden markers. (lookup_name_1): Check HIDDEN_TYPE_BINDING_P and simplify friend ignoring. (lookup_elaborated_type_1): Use new hidden markers. Reveal the decl here. --- gcc/cp/cp-tree.h | 2 +- gcc/cp/decl.c | 19 +--- gcc/cp/name-lookup.c | 291 +++++++++++++++++++++++++++++++++------------------ gcc/cp/name-lookup.h | 7 ++ gcc/cp/tree.c | 30 +++--- 5 files changed, 213 insertions(+), 136 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index b7f5b6b..a25934e 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -7371,7 +7371,7 @@ inline tree ovl_first (tree) ATTRIBUTE_PURE; extern tree ovl_make (tree fn, tree next = NULL_TREE); extern tree ovl_insert (tree fn, tree maybe_ovl, - bool using_p = false); + int using_or_hidden = 0); extern tree ovl_skip_hidden (tree) ATTRIBUTE_PURE; extern void lookup_mark (tree lookup, bool val); extern tree lookup_add (tree fns, tree lookup); diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index c00b996..617b96e 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -15089,22 +15089,9 @@ xref_tag_1 (enum tag_types tag_code, tree name, return error_mark_node; } - if (how != TAG_how::HIDDEN_FRIEND && TYPE_HIDDEN_P (t)) - { - /* This is no longer an invisible friend. Make it - visible. */ - tree decl = TYPE_NAME (t); - - DECL_ANTICIPATED (decl) = false; - DECL_FRIEND_P (decl) = false; - - if (TYPE_TEMPLATE_INFO (t)) - { - tree tmpl = TYPE_TI_TEMPLATE (t); - DECL_ANTICIPATED (tmpl) = false; - DECL_FRIEND_P (tmpl) = false; - } - } + gcc_checking_assert (how == TAG_how::HIDDEN_FRIEND + || !(DECL_LANG_SPECIFIC (TYPE_NAME (t)) + && DECL_ANTICIPATED (TYPE_NAME (t)))); } return t; diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index 89f1a4c..bc60d343 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -55,6 +55,15 @@ static name_hint suggest_alternatives_for_1 (location_t location, tree name, #define MAYBE_STAT_DECL(N) (STAT_HACK_P (N) ? STAT_DECL (N) : N) #define MAYBE_STAT_TYPE(N) (STAT_HACK_P (N) ? STAT_TYPE (N) : NULL_TREE) +/* For regular (maybe) overloaded functions, we have OVL_HIDDEN_P. + But we also need to indicate hiddenness on implicit type decls + (injected friend classes), and (coming soon) decls injected from + block-scope externs. It is too awkward to press the existing + overload marking for that. If we have a hidden non-function, we + always create a STAT_HACK, and use these two markers as needed. */ +#define STAT_TYPE_HIDDEN_P(N) OVL_HIDDEN_P (N) +#define STAT_DECL_HIDDEN_P(N) OVL_DEDUP_P (N) + /* Create a STAT_HACK node with DECL as the value binding and TYPE as the type binding. */ @@ -545,14 +554,18 @@ name_lookup::search_namespace_only (tree scope) { type = STAT_TYPE (value); value = STAT_DECL (value); - - if (!bool (want & LOOK_want::HIDDEN_FRIEND) - && DECL_LANG_SPECIFIC (type) - && DECL_ANTICIPATED (type)) - type = NULL_TREE; + + if (!bool (want & LOOK_want::HIDDEN_FRIEND)) + { + if (STAT_TYPE_HIDDEN_P (*binding)) + type = NULL_TREE; + if (STAT_DECL_HIDDEN_P (*binding)) + value = NULL_TREE; + else + value = ovl_skip_hidden (value); + } } - - if (!bool (want & LOOK_want::HIDDEN_FRIEND)) + else if (!bool (want & LOOK_want::HIDDEN_FRIEND)) value = ovl_skip_hidden (value); found |= process_binding (value, type); @@ -1975,6 +1988,7 @@ cxx_binding_make (tree value, tree type) /* Clear flags by default. */ LOCAL_BINDING_P (binding) = false; INHERITED_VALUE_BINDING_P (binding) = false; + HIDDEN_TYPE_BINDING_P (binding) = false; cxx_binding_init (binding, value, type); @@ -2046,13 +2060,15 @@ pop_local_binding (tree id, tree decl) /* The name should be bound. */ gcc_assert (binding != NULL); - /* The DECL will be either the ordinary binding or the type - binding for this identifier. Remove that binding. */ + /* The DECL will be either the ordinary binding or the type binding + for this identifier. Remove that binding. We don't have to + clear HIDDEN_TYPE_BINDING_P, as the whole binding will be going + away. */ if (binding->value == decl) binding->value = NULL_TREE; else { - gcc_assert (binding->type == decl); + gcc_checking_assert (binding->type == decl); binding->type = NULL_TREE; } @@ -2367,11 +2383,22 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, tree old, tree decl, bool hiding = false) { tree old_type = NULL_TREE; + bool hide_type = false; + bool hide_value = false; if (!slot) - old_type = binding->type; + { + old_type = binding->type; + hide_type = HIDDEN_TYPE_BINDING_P (binding); + if (!old_type) + hide_value = hide_type, hide_type = false; + } else if (STAT_HACK_P (*slot)) + { old_type = STAT_TYPE (*slot); + hide_type = STAT_TYPE_HIDDEN_P (*slot); + hide_value = STAT_DECL_HIDDEN_P (*slot); + } tree to_val = decl; tree to_type = old_type; @@ -2394,9 +2421,12 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, { /* Put DECL into the type slot. */ gcc_checking_assert (!to_type); + hide_type = hiding; to_type = decl; to_val = old; } + else + hide_value = hiding; goto done; } @@ -2407,7 +2437,9 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, gcc_checking_assert (!to_type); to_type = old; + hide_type = hide_value; old = NULL_TREE; + hide_value = false; } if (DECL_DECLARES_FUNCTION_P (decl)) @@ -2450,7 +2482,7 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, decl, to_type); local_overload = old && level->kind != sk_namespace; - to_val = ovl_insert (decl, old); + to_val = ovl_insert (decl, old, -int (hiding)); } else if (old) { @@ -2483,11 +2515,13 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, { /* There can be two block-scope declarations of the same variable, so long as they are `extern' declarations. */ - // FIXME: This is DECL_LOCAL_DECL_P type stuff. if (!DECL_EXTERNAL (old) || !DECL_EXTERNAL (decl)) goto conflict; else if (tree match = duplicate_decls (decl, old)) - return match; + { + gcc_checking_assert (!hide_value && !hiding); + return match; + } else goto conflict; } @@ -2498,6 +2532,8 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, to_val = NULL_TREE; } } + else if (hiding) + hide_value = true; done: if (to_val) @@ -2516,16 +2552,26 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot, { STAT_TYPE (*slot) = to_type; STAT_DECL (*slot) = to_val; + STAT_TYPE_HIDDEN_P (*slot) = hide_type; + STAT_DECL_HIDDEN_P (*slot) = hide_value; + } + else if (to_type || hide_value) + { + *slot = stat_hack (to_val, to_type); + STAT_TYPE_HIDDEN_P (*slot) = hide_type; + STAT_DECL_HIDDEN_P (*slot) = hide_value; } - else if (to_type) - *slot = stat_hack (to_val, to_type); else - *slot = to_val; + { + gcc_checking_assert (!hide_type); + *slot = to_val; + } } else { binding->type = to_type; binding->value = to_val; + HIDDEN_TYPE_BINDING_P (binding) = hide_type || hide_value; } } @@ -6489,86 +6535,37 @@ lookup_name_1 (tree name, LOOK_where where, LOOK_want want) for (cxx_binding *iter = nullptr; (iter = outer_binding (name, iter, bool (where & LOOK_where::CLASS)));) { - tree binding; - /* Skip entities we don't want. */ if (!bool (where & (LOCAL_BINDING_P (iter) ? LOOK_where::BLOCK : LOOK_where::CLASS))) continue; /* If this is the kind of thing we're looking for, we're done. */ - if (iter->value - && (bool (want & LOOK_want::HIDDEN_LAMBDA) - || !is_lambda_ignored_entity (iter->value)) - && qualify_lookup (iter->value, want)) - binding = iter->value; - else if (bool (want & LOOK_want::TYPE) - && qualify_lookup (iter->type, want)) - binding = iter->type; - else - binding = NULL_TREE; - - if (binding) + if (iter->value) { - if (TREE_CODE (binding) == TYPE_DECL && DECL_HIDDEN_P (binding)) + tree binding = NULL_TREE; + + if (!(!iter->type && HIDDEN_TYPE_BINDING_P (iter)) + && (bool (want & LOOK_want::HIDDEN_LAMBDA) + || !is_lambda_ignored_entity (iter->value)) + && qualify_lookup (iter->value, want)) + binding = iter->value; + else if (bool (want & LOOK_want::TYPE) + && !HIDDEN_TYPE_BINDING_P (iter) + && iter->type) + binding = iter->type; + + if (binding) { - /* A non namespace-scope binding can only be hidden in the - presence of a local class, due to friend declarations. - - In particular, consider: - - struct C; - void f() { - struct A { - friend struct B; - friend struct C; - void g() { - B* b; // error: B is hidden - C* c; // OK, finds ::C - } - }; - B *b; // error: B is hidden - C *c; // OK, finds ::C - struct B {}; - B *bb; // OK - } - - The standard says that "B" is a local class in "f" - (but not nested within "A") -- but that name lookup - for "B" does not find this declaration until it is - declared directly with "f". - - In particular: - - [class.friend] - - If a friend declaration appears in a local class and - the name specified is an unqualified name, a prior - declaration is looked up without considering scopes - that are outside the innermost enclosing non-class - scope. For a friend function declaration, if there is - no prior declaration, the program is ill-formed. For a - friend class declaration, if there is no prior - declaration, the class that is specified belongs to the - innermost enclosing non-class scope, but if it is - subsequently referenced, its name is not found by name - lookup until a matching declaration is provided in the - innermost enclosing nonclass scope. - - So just keep looking for a non-hidden binding. - */ - gcc_assert (TREE_CODE (binding) == TYPE_DECL); - continue; + /* The saved lookups for an operator record 'nothing + found' as error_mark_node. We need to stop the search + here, but not return the error mark node. */ + if (binding == error_mark_node) + binding = NULL_TREE; + + val = binding; + goto found; } - - /* The saved lookups for an operator record 'nothing - found' as error_mark_node. We need to stop the search - here, but not return the error mark node. */ - if (binding == error_mark_node) - binding = NULL_TREE; - - val = binding; - goto found; } } @@ -6649,17 +6646,55 @@ lookup_elaborated_type_1 (tree name, TAG_how how) typedef struct C {} C; correctly. */ + tree found = NULL_TREE; + bool reveal = false; if (tree type = iter->type) - if (qualify_lookup (type, LOOK_want::TYPE) - && (how != TAG_how::CURRENT_ONLY - || LOCAL_BINDING_P (iter) - || DECL_CONTEXT (type) == iter->scope->this_entity)) - return type; - - if (qualify_lookup (iter->value, LOOK_want::TYPE) - && (how != TAG_how::CURRENT_ONLY - || !INHERITED_VALUE_BINDING_P (iter))) - return iter->value; + { + if (qualify_lookup (type, LOOK_want::TYPE) + && (how != TAG_how::CURRENT_ONLY + || LOCAL_BINDING_P (iter) + || DECL_CONTEXT (type) == iter->scope->this_entity)) + { + found = type; + if (how != TAG_how::HIDDEN_FRIEND) + reveal = HIDDEN_TYPE_BINDING_P (iter); + } + } + else + { + if (qualify_lookup (iter->value, LOOK_want::TYPE) + && (how != TAG_how::CURRENT_ONLY + || !INHERITED_VALUE_BINDING_P (iter))) + { + found = iter->value; + if (how != TAG_how::HIDDEN_FRIEND) + reveal = !iter->type && HIDDEN_TYPE_BINDING_P (iter); + } + } + + if (found) + { + if (reveal) + { + /* It is no longer a hidden binding. */ + HIDDEN_TYPE_BINDING_P (iter) = false; + + /* Unanticipate the decl itself. */ + DECL_ANTICIPATED (found) = false; + DECL_FRIEND_P (found) = false; + + gcc_checking_assert (TREE_CODE (found) != TEMPLATE_DECL); + + if (tree ti = TYPE_TEMPLATE_INFO (TREE_TYPE (found))) + { + tree tmpl = TI_TEMPLATE (ti); + DECL_ANTICIPATED (tmpl) = false; + DECL_FRIEND_P (tmpl) = false; + } + } + + return found; + } } /* Now check if we can look in namespace scope. */ @@ -6675,13 +6710,63 @@ lookup_elaborated_type_1 (tree name, TAG_how how) if (tree *slot = find_namespace_slot (ns, name)) { /* If this is the kind of thing we're looking for, we're done. */ + tree found = NULL_TREE; + bool reveal = false; + if (tree type = MAYBE_STAT_TYPE (*slot)) - if (qualify_lookup (type, LOOK_want::TYPE)) - return type; + { + found = type; + if (how != TAG_how::HIDDEN_FRIEND) + { + reveal = STAT_TYPE_HIDDEN_P (*slot); + STAT_TYPE_HIDDEN_P (*slot) = false; + } + } + else if (tree decl = MAYBE_STAT_DECL (*slot)) + { + if (qualify_lookup (decl, LOOK_want::TYPE)) + { + found = decl; + + if (how != TAG_how::HIDDEN_FRIEND && STAT_HACK_P (*slot)) + { + reveal = STAT_DECL_HIDDEN_P (*slot); + if (reveal) + { + if (STAT_TYPE (*slot)) + STAT_DECL_HIDDEN_P (*slot) = false; + else + /* There is no type, just remove the stat + hack. */ + *slot = decl; + } + } + } + } + + if (found) + { + if (reveal) + { + /* Reveal the previously hidden thing. */ + DECL_ANTICIPATED (found) = false; + DECL_FRIEND_P (found) = false; + + if (TREE_CODE (found) == TEMPLATE_DECL) + { + DECL_ANTICIPATED (DECL_TEMPLATE_RESULT (found)) = false; + DECL_FRIEND_P (DECL_TEMPLATE_RESULT (found)) = false; + } + else if (tree ti = TYPE_TEMPLATE_INFO (TREE_TYPE (found))) + { + tree tmpl = TI_TEMPLATE (ti); + DECL_ANTICIPATED (tmpl) = false; + DECL_FRIEND_P (tmpl) = false; + } + } - if (tree decl = MAYBE_STAT_DECL (*slot)) - if (qualify_lookup (decl, LOOK_want::TYPE)) - return decl; + return found; + } } return NULL_TREE; diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h index 7b46338..01643fb 100644 --- a/gcc/cp/name-lookup.h +++ b/gcc/cp/name-lookup.h @@ -58,6 +58,12 @@ struct cp_binding_level; currently being defined. */ #define INHERITED_VALUE_BINDING_P(NODE) ((NODE)->value_is_inherited) +/* The IMPLICIT_TYPEDEF is hidden from ordinary name lookup (it was + injected via a local class's friend decl). The typdef may be in the + VALUE or the TYPE slot. We do not get the situation where the + value and type slots are both filled and both hidden. */ +#define HIDDEN_TYPE_BINDING_P(NODE) ((NODE)->type_is_hidden) + /* Datatype that represents binding established by a declaration between a name and a C++ entity. */ struct GTY(()) cxx_binding { @@ -72,6 +78,7 @@ struct GTY(()) cxx_binding { bool value_is_inherited : 1; bool is_local : 1; + bool type_is_hidden : 1; }; /* Datatype used to temporarily save C++ bindings (for implicit diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c index e860660..0b80d8e 100644 --- a/gcc/cp/tree.c +++ b/gcc/cp/tree.c @@ -2237,13 +2237,13 @@ ovl_make (tree fn, tree next) return result; } -/* Add FN to the (potentially NULL) overload set OVL. USING_P is - true, if FN is via a using declaration. We also pay attention to - DECL_HIDDEN. We keep the hidden decls first, but remaining ones - are unordered. */ +/* Add FN to the (potentially NULL) overload set OVL. USING_OR_HIDDEN + is > 0, if FN is via a using declaration. USING_OR_HIDDEN is < 0, + if FN is hidden. (A decl cannot be both using and hidden.) We + keep the hidden decls first, but remaining ones are unordered. */ tree -ovl_insert (tree fn, tree maybe_ovl, bool using_p) +ovl_insert (tree fn, tree maybe_ovl, int using_or_hidden) { tree result = maybe_ovl; tree insert_after = NULL_TREE; @@ -2257,13 +2257,15 @@ ovl_insert (tree fn, tree maybe_ovl, bool using_p) insert_after = maybe_ovl; } - bool hidden_p = DECL_HIDDEN_P (fn); - if (maybe_ovl || using_p || hidden_p || TREE_CODE (fn) == TEMPLATE_DECL) + if (maybe_ovl || using_or_hidden || TREE_CODE (fn) == TEMPLATE_DECL) { maybe_ovl = ovl_make (fn, maybe_ovl); - if (hidden_p) + + gcc_checking_assert ((using_or_hidden < 0) == DECL_HIDDEN_P (fn)); + + if (using_or_hidden < 0) OVL_HIDDEN_P (maybe_ovl) = true; - if (using_p) + if (using_or_hidden > 0) OVL_DEDUP_P (maybe_ovl) = OVL_USING_P (maybe_ovl) = true; } else @@ -2290,13 +2292,9 @@ ovl_skip_hidden (tree ovl) ovl = OVL_CHAIN (ovl)) gcc_checking_assert (DECL_HIDDEN_P (OVL_FUNCTION (ovl))); - if (ovl && TREE_CODE (ovl) != OVERLOAD && DECL_HIDDEN_P (ovl)) - { - /* Any hidden functions should have been wrapped in an - overload, but injected friend classes will not. */ - gcc_checking_assert (!DECL_DECLARES_FUNCTION_P (ovl)); - ovl = NULL_TREE; - } + /* We should not see a naked hidden decl. */ + gcc_checking_assert (!(ovl && TREE_CODE (ovl) != OVERLOAD + && DECL_HIDDEN_P (ovl))); return ovl; } -- cgit v1.1 From 9b4b1ed50f1e0f252a86851456b58bb2e142c495 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 29 Sep 2020 11:25:13 -0400 Subject: analyzer: silence -Wsign-compare warnings gcc/analyzer/ChangeLog: * constraint-manager.cc (constraint_manager::add_constraint_internal): Whitespace fixes. Silence -Wsign-compare warning. * engine.cc (maybe_process_run_of_before_supernode_enodes): Silence -Wsign-compare warning. --- gcc/analyzer/constraint-manager.cc | 6 +++--- gcc/analyzer/engine.cc | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/analyzer/constraint-manager.cc b/gcc/analyzer/constraint-manager.cc index 5cd2c9e..603b228 100644 --- a/gcc/analyzer/constraint-manager.cc +++ b/gcc/analyzer/constraint-manager.cc @@ -1014,10 +1014,10 @@ constraint_manager::add_unknown_constraint (equiv_class_id lhs_ec_id, void constraint_manager::add_constraint_internal (equiv_class_id lhs_id, - enum constraint_op c_op, - equiv_class_id rhs_id) + enum constraint_op c_op, + equiv_class_id rhs_id) { - if (m_constraints.length () >= param_analyzer_max_constraints) + if (m_constraints.length () >= (unsigned)param_analyzer_max_constraints) return; constraint new_c (lhs_id, c_op, rhs_id); diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 84eaa84..c15d119 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -2629,7 +2629,7 @@ maybe_process_run_of_before_supernode_enodes (exploded_node *enode) } got_merger: gcc_assert (it->m_merger_idx >= 0); - gcc_assert (it->m_merger_idx < merged_states.length ()); + gcc_assert ((unsigned)it->m_merger_idx < merged_states.length ()); } /* Create merger nodes. */ -- cgit v1.1 From f836f3bc8f76ef3e3ad21762590302ad11abc9f8 Mon Sep 17 00:00:00 2001 From: Przemyslaw Wirkus Date: Tue, 29 Sep 2020 22:13:05 +0100 Subject: aarch64: add support for Cortex-X1 This adds support for the Arm Cortex-X1 CPU in AArch64 GCC. For more information about this processor, see [0]. [0] : https://www.arm.com/products/cortex-x gcc/ChangeLog: * config/aarch64/aarch64-cores.def: Add Cortex-X1 Arm core. * config/aarch64/aarch64-tune.md: Regenerate. * doc/invoke.texi: Add -mtune=cortex-x1 docs. --- gcc/config/aarch64/aarch64-cores.def | 1 + gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/doc/invoke.texi | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 5223481..e6833bf 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -105,6 +105,7 @@ AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) +AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1) diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index bb5d8da..fc3e0a5 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 9f956b5..200ecf7 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -17548,7 +17548,7 @@ performance of the code. Permissible values for this option are: @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53}, @samp{cortex-a75.cortex-a55}, @samp{cortex-a76.cortex-a55}, -@samp{cortex-r82}, @samp{native}. +@samp{cortex-r82}, @samp{cortex-x1}, @samp{native}. The values @samp{cortex-a57.cortex-a53}, @samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35}, @samp{cortex-a73.cortex-a53}, -- cgit v1.1 From 0eef5eea2b42d892df52b655e55458f27ac3fb81 Mon Sep 17 00:00:00 2001 From: Przemyslaw Wirkus Date: Tue, 29 Sep 2020 22:22:44 +0100 Subject: arm: add support for Cortex-X1 This adds support for the Arm Cortex-X1 CPU. For more information about this processor, see [0]. [0] : https://www.arm.com/products/cortex-x gcc/ChangeLog: * config/arm/arm-cpus.in: Add Cortex-X1 core. * config/arm/arm-tables.opt: Regenerate. * config/arm/arm-tune.md: Regenerate. * doc/invoke.texi: Update docs. --- gcc/config/arm/arm-cpus.in | 11 +++++++++++ gcc/config/arm/arm-tables.opt | 3 +++ gcc/config/arm/arm-tune.md | 8 ++++---- gcc/doc/invoke.texi | 2 +- 4 files changed, 19 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index bf460dd..47a343d 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -1447,6 +1447,17 @@ begin cpu cortex-a77 part d0d end cpu cortex-a77 +begin cpu cortex-x1 + cname cortexx1 + tune for cortex-a57 + tune flags LDSCHED + architecture armv8.2-a+fp16+dotprod + option crypto add FP_ARMv8 CRYPTO + costs cortex_a57 + vendor 41 + part d44 +end cpu cortex-x1 + begin cpu neoverse-n1 cname neoversen1 alias !ares diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index b572063..dac8818 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -241,6 +241,9 @@ EnumValue Enum(processor_type) String(cortex-a77) Value( TARGET_CPU_cortexa77) EnumValue +Enum(processor_type) String(cortex-x1) Value( TARGET_CPU_cortexx1) + +EnumValue Enum(processor_type) String(neoverse-n1) Value( TARGET_CPU_neoversen1) EnumValue diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 2377037..86a117d 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -45,8 +45,8 @@ cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35, cortexa73cortexa53,cortexa55,cortexa75, cortexa76,cortexa76ae,cortexa77, - neoversen1,neoversen2,cortexa75cortexa55, - cortexa76cortexa55,neoversev1,cortexm23, - cortexm33,cortexm35p,cortexm55, - cortexr52" + cortexx1,neoversen1,neoversen2, + cortexa75cortexa55,cortexa76cortexa55,neoversev1, + cortexm23,cortexm33,cortexm35p, + cortexm55,cortexr52" (const (symbol_ref "((enum attr_tune) arm_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 200ecf7..274c17e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -19379,7 +19379,7 @@ Permissible names are: @samp{arm7tdmi}, @samp{arm7tdmi-s}, @samp{arm710t}, @samp{cortex-r5}, @samp{cortex-r7}, @samp{cortex-r8}, @samp{cortex-r52}, @samp{cortex-m0}, @samp{cortex-m0plus}, @samp{cortex-m1}, @samp{cortex-m3}, @samp{cortex-m4}, @samp{cortex-m7}, @samp{cortex-m23}, @samp{cortex-m33}, -@samp{cortex-m35p}, @samp{cortex-m55}, +@samp{cortex-m35p}, @samp{cortex-m55}, @samp{cortex-x1}, @samp{cortex-m1.small-multiply}, @samp{cortex-m0.small-multiply}, @samp{cortex-m0plus.small-multiply}, @samp{exynos-m1}, @samp{marvell-pj4}, @samp{neoverse-n1}, @samp{neoverse-n2}, @samp{neoverse-v1}, @samp{xscale}, -- cgit v1.1 From 6649df18f98d5baf89b56a09b816b5eeb5f67bcb Mon Sep 17 00:00:00 2001 From: Jim Wilson Date: Mon, 28 Sep 2020 17:13:40 -0700 Subject: Fix GCC 10+ build failure with zstd version 1.2.0 or older. Extends the configure check for zstd.h to also verify the zstd version, since gcc requires features that only exist in 1.3.0 and newer. Without this patch we get a build error for lto-compress.c when using an old zstd version. gcc/ PR bootstrap/97183 * configure.ac (gcc_cv_header_zstd_h): Check ZSTD_VERISON_NUMBER. * configure: Regenerated. --- gcc/configure | 11 ++++++++--- gcc/configure.ac | 7 ++++++- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/configure b/gcc/configure index 33a3e34..b05a371 100755 --- a/gcc/configure +++ b/gcc/configure @@ -10022,9 +10022,14 @@ $as_echo_n "checking for zstd.h... " >&6; } if ${gcc_cv_header_zstd_h+:} false; then : $as_echo_n "(cached) " >&6 else - cat confdefs.h - <<_ACEOF >conftest.$ac_ext + # We require version 1.3.0 or later. This is the first version that has +# ZSTD_getFrameContentSize. +cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include +#if ZSTD_VERSION_NUMBER < 10300 +#error "need zstd 1.3.0 or better" +#endif int main () { @@ -19013,7 +19018,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19016 "configure" +#line 19021 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -19119,7 +19124,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19122 "configure" +#line 19127 "configure" #include "confdefs.h" #if HAVE_DLFCN_H diff --git a/gcc/configure.ac b/gcc/configure.ac index 975f6d9..f561216 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -1376,8 +1376,13 @@ LDFLAGS="$LDFLAGS $ZSTD_LDFLAGS" AC_MSG_CHECKING(for zstd.h) AC_CACHE_VAL(gcc_cv_header_zstd_h, +# We require version 1.3.0 or later. This is the first version that has +# ZSTD_getFrameContentSize. [AC_COMPILE_IFELSE([AC_LANG_PROGRAM( -[[#include ]])], +[[#include +#if ZSTD_VERSION_NUMBER < 10300 +#error "need zstd 1.3.0 or better" +#endif]])], [gcc_cv_header_zstd_h=yes], [gcc_cv_header_zstd_h=no])]) AC_MSG_RESULT($gcc_cv_header_zstd_h) -- cgit v1.1 From d60d63a00bb50ba6896939705c589578177b404d Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 29 Sep 2020 15:55:33 -0400 Subject: analyzer: fix signal-handler registration location [PR95188] PR analyzer/95188 reports that diagnostics from -Wanalyzer-unsafe-call-within-signal-handler use the wrong source location when reporting the signal-handler registration event in the diagnostic_path. The diagnostics erroneously use the location of the first stmt in the basic block containing the call to "signal", rather than that of the call itself. Fixed thusly. gcc/analyzer/ChangeLog: PR analyzer/95188 * engine.cc (stmt_requires_new_enode_p): Split enodes before "signal" calls. gcc/testsuite/ChangeLog: PR analyzer/95188 * gcc.dg/analyzer/signal-registration-loc.c: New test. --- gcc/analyzer/engine.cc | 22 +++++++++++++++------ .../gcc.dg/analyzer/signal-registration-loc.c | 23 ++++++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/signal-registration-loc.c (limited to 'gcc') diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index c15d119..0e79254 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -2677,13 +2677,23 @@ static bool stmt_requires_new_enode_p (const gimple *stmt, const gimple *prev_stmt) { - /* Stop consolidating at calls to - "__analyzer_dump_exploded_nodes", so they always appear at the - start of an exploded_node. */ if (const gcall *call = dyn_cast (stmt)) - if (is_special_named_call_p (call, "__analyzer_dump_exploded_nodes", - 1)) - return true; + { + /* Stop consolidating at calls to + "__analyzer_dump_exploded_nodes", so they always appear at the + start of an exploded_node. */ + if (is_special_named_call_p (call, "__analyzer_dump_exploded_nodes", + 1)) + return true; + + /* sm-signal.cc injects an additional custom eedge at "signal" calls + from the registration enode to the handler enode, separate from the + regular next state, which defeats the "detect state change" logic + in process_node. Work around this via special-casing, to ensure + we split the enode immediately before any "signal" call. */ + if (is_special_named_call_p (call, "signal", 2)) + return true; + } /* If we had a PREV_STMT with an unknown location, and this stmt has a known location, then if a state change happens here, it diff --git a/gcc/testsuite/gcc.dg/analyzer/signal-registration-loc.c b/gcc/testsuite/gcc.dg/analyzer/signal-registration-loc.c new file mode 100644 index 0000000..4bac126 --- /dev/null +++ b/gcc/testsuite/gcc.dg/analyzer/signal-registration-loc.c @@ -0,0 +1,23 @@ +/* Ensure we use the correct location when reporting where the + signal handler was registered (PR analyzer/95188). */ + +/* { dg-require-effective-target signal } */ + +#include +#include + +int g; +extern int foo (void); + +static void +handler (int n) +{ + fprintf (stderr, "got here: %i\n", g); /* { dg-warning "call to 'fprintf' from within signal handler" } */ +} + +int main (int argc, char *argv[]) +{ + g = foo (); /* { dg-bogus "registering" } */ + signal (SIGSEGV, handler); /* { dg-message "registering 'handler' as signal handler" } */ + return 0; +} -- cgit v1.1 From 01852cc865c9c53fa3ba6627c1b7abd2446f48c1 Mon Sep 17 00:00:00 2001 From: David Edelsohn Date: Sun, 9 Aug 2020 14:26:44 -0400 Subject: testsuite: Remove unnecessary DWARF2 xfails on AIX A number of DWARF2 testsuite xfails no longer trigger on AIX. This patch removes the unnecessary XFAIL decorations that cause extraneous notices that clutter the testsuite output. gcc/testsuite/ChangeLog: 2020-09-29 David Edelsohn * g++.dg/debug/dwarf2/align-1.C: Remove AIX XFAIL. * g++.dg/debug/dwarf2/align-2.C: Same. * g++.dg/debug/dwarf2/align-3.C: Same. * g++.dg/debug/dwarf2/align-4.C: Same. * g++.dg/debug/dwarf2/align-5.C: Same. * g++.dg/debug/dwarf2/align-6.C: Same. * g++.dg/debug/dwarf2/defaulted-member-function-1.C: Same. * g++.dg/debug/dwarf2/defaulted-member-function-2.C: Same. * g++.dg/debug/dwarf2/defaulted-member-function-3.C: Same. * g++.dg/debug/dwarf2/inline-var-1.C: Same. * g++.dg/debug/dwarf2/inline-var-2.C: Same. * g++.dg/debug/dwarf2/inline-var-3.C: Same. * g++.dg/debug/dwarf2/noreturn-function.C: Same. * g++.dg/debug/dwarf2/ptrdmem-1.C: Same. * g++.dg/debug/dwarf2/ref-2.C: Same. * g++.dg/debug/dwarf2/ref-3.C: Same. * g++.dg/debug/dwarf2/ref-4.C: Same. * g++.dg/debug/dwarf2/refqual-1.C: Same. * g++.dg/debug/dwarf2/refqual-2.C: Same. * gcc.dg/debug/dwarf2/align-1.c: Same. * gcc.dg/debug/dwarf2/align-2.c: Same. * gcc.dg/debug/dwarf2/align-3.c: Same. * gcc.dg/debug/dwarf2/align-4.c: Same. * gcc.dg/debug/dwarf2/align-5.c: Same. * gcc.dg/debug/dwarf2/align-6.c: Same. * gcc.dg/debug/dwarf2/align-as-1.c: Same. * gcc.dg/debug/dwarf2/dwarf2-macro.c: Same. * gcc.dg/debug/dwarf2/dwarf2-macro2.c: Same. * gcc.dg/debug/dwarf2/lang-c89.c: Same. * gcc.dg/debug/dwarf2/noreturn-function-attribute.c: Same. * gcc.dg/debug/dwarf2/noreturn-function-keyword.c: Same. * gcc.dg/debug/dwarf2/pr71855.c: Same. * gcc.dg/debug/dwarf2/inline5.c: Add XFAIL on AIX. --- gcc/testsuite/g++.dg/debug/dwarf2/align-1.C | 2 +- gcc/testsuite/g++.dg/debug/dwarf2/align-2.C | 2 +- gcc/testsuite/g++.dg/debug/dwarf2/align-3.C | 2 +- gcc/testsuite/g++.dg/debug/dwarf2/align-4.C | 2 +- gcc/testsuite/g++.dg/debug/dwarf2/align-5.C | 2 +- gcc/testsuite/g++.dg/debug/dwarf2/align-6.C | 2 +- .../g++.dg/debug/dwarf2/defaulted-member-function-1.C | 2 +- .../g++.dg/debug/dwarf2/defaulted-member-function-2.C | 2 +- .../g++.dg/debug/dwarf2/defaulted-member-function-3.C | 2 +- gcc/testsuite/g++.dg/debug/dwarf2/inline-var-1.C | 10 +++++----- gcc/testsuite/g++.dg/debug/dwarf2/inline-var-2.C | 2 +- gcc/testsuite/g++.dg/debug/dwarf2/inline-var-3.C | 12 ++++++------ gcc/testsuite/g++.dg/debug/dwarf2/noreturn-function.C | 2 +- gcc/testsuite/g++.dg/debug/dwarf2/ptrdmem-1.C | 4 ++-- gcc/testsuite/g++.dg/debug/dwarf2/ref-2.C | 4 ++-- gcc/testsuite/g++.dg/debug/dwarf2/ref-3.C | 10 +++++----- gcc/testsuite/g++.dg/debug/dwarf2/ref-4.C | 6 +++--- gcc/testsuite/g++.dg/debug/dwarf2/refqual-1.C | 4 ++-- gcc/testsuite/g++.dg/debug/dwarf2/refqual-2.C | 4 ++-- gcc/testsuite/gcc.dg/debug/dwarf2/align-1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/align-2.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/align-3.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/align-4.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/align-5.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/align-6.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/align-as-1.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2-macro.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2-macro2.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/inline5.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/lang-c89.c | 2 +- .../gcc.dg/debug/dwarf2/noreturn-function-attribute.c | 2 +- .../gcc.dg/debug/dwarf2/noreturn-function-keyword.c | 2 +- gcc/testsuite/gcc.dg/debug/dwarf2/pr71855.c | 2 +- 33 files changed, 52 insertions(+), 52 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/align-1.C b/gcc/testsuite/g++.dg/debug/dwarf2/align-1.C index fec0fec..c87a998 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/align-1.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/align-1.C @@ -1,5 +1,5 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-times " DW_AT_alignment" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 1 } } int __attribute__((__aligned__(64))) i; diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/align-2.C b/gcc/testsuite/g++.dg/debug/dwarf2/align-2.C index c5b74db..3b0f462 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/align-2.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/align-2.C @@ -1,6 +1,6 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-times " DW_AT_alignment" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 1 } } typedef int __attribute__((__aligned__(64))) i_t; i_t i; diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/align-3.C b/gcc/testsuite/g++.dg/debug/dwarf2/align-3.C index eceee07..32cb390 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/align-3.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/align-3.C @@ -1,6 +1,6 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-times " DW_AT_alignment" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 1 } } typedef int int_t; typedef int_t __attribute__((__aligned__(64))) i_t; diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/align-4.C b/gcc/testsuite/g++.dg/debug/dwarf2/align-4.C index 851a58c..a2905d0 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/align-4.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/align-4.C @@ -1,6 +1,6 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-times " DW_AT_alignment" 2 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 2 } } struct tt { int __attribute__((__aligned__(64))) i; diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/align-5.C b/gcc/testsuite/g++.dg/debug/dwarf2/align-5.C index e74146ce..5e883b5 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/align-5.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/align-5.C @@ -1,6 +1,6 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-times " DW_AT_alignment" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 1 } } struct tt { int i; diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/align-6.C b/gcc/testsuite/g++.dg/debug/dwarf2/align-6.C index ff118e4..3384ce7 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/align-6.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/align-6.C @@ -1,6 +1,6 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-times " DW_AT_alignment" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 1 } } struct tt { int i; diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-1.C b/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-1.C index e798b49..6127dcd 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-1.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-1.C @@ -1,6 +1,6 @@ // { dg-do compile } // { dg-options "-O -std=c++11 -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-times "0x1\[ \t\]\[^\n\]* DW_AT_defaulted" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times "0x1\[ \t\]\[^\n\]* DW_AT_defaulted" 1 } } struct Foo { diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-2.C b/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-2.C index 5b56949..73c37b0 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-2.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-2.C @@ -1,6 +1,6 @@ // { dg-do compile } // { dg-options "-O -std=c++11 -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-times "0x2\[ \t\]\[^\n\]* DW_AT_defaulted" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times "0x2\[ \t\]\[^\n\]* DW_AT_defaulted" 1 } } struct Foo { diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-3.C b/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-3.C index 190fe50..5537edd 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-3.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-3.C @@ -1,6 +1,6 @@ // { dg-do compile } // { dg-options "-O -std=c++11 -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-not " DW_AT_defaulted" { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-not " DW_AT_defaulted" } } struct Foo { diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-1.C b/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-1.C index 9a88e28..85f74a9 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-1.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-1.C @@ -1,11 +1,11 @@ // { dg-do compile { target c++17 } } // { dg-options "-O -gdwarf-2 -dA -gno-strict-dwarf -fno-eliminate-unused-debug-symbols" } // { dg-require-weak "" } -// { dg-final { scan-assembler-times "0x3\[^\n\r]* DW_AT_inline" 6 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times "0x1\[^\n\r]* DW_AT_inline" 2 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times " DW_AT_declaration" 6 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times " DW_AT_specification" 6 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times " DW_AT_\[^\n\r]*linkage_name" 7 { xfail *-*-aix* } } } +// { dg-final { scan-assembler-times "0x3\[^\n\r]* DW_AT_inline" 6 } } +// { dg-final { scan-assembler-times "0x1\[^\n\r]* DW_AT_inline" 2 } } +// { dg-final { scan-assembler-times " DW_AT_declaration" 6 } } +// { dg-final { scan-assembler-times " DW_AT_specification" 6 } } +// { dg-final { scan-assembler-times " DW_AT_\[^\n\r]*linkage_name" 7 } } inline int a; struct S diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-2.C b/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-2.C index cdb2696..63052d6 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-2.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-2.C @@ -1,7 +1,7 @@ // { dg-do compile { target c++17 } } // { dg-options "-O -gdwarf-5 -dA -gno-strict-dwarf" } // { dg-require-weak "" } -// { dg-final { scan-assembler-not "DW_TAG_member" { xfail *-*-aix* } } } +// { dg-final { scan-assembler-not "DW_TAG_member" } } inline int a; struct S diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-3.C b/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-3.C index 52ed5b6..7225258 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-3.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/inline-var-3.C @@ -2,12 +2,12 @@ // { dg-do compile { target c++17 } } // { dg-options "-O -gdwarf-5 -dA -gno-strict-dwarf -fno-eliminate-unused-debug-symbols" } // { dg-require-weak "" } -// { dg-final { scan-assembler-times " DW_AT_inline \\(0x3\\)" 2 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times "0x3\[^\n\r]* DW_AT_inline" 4 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times "0x1\[^\n\r]* DW_AT_inline" 2 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times " DW_AT_declaration" 6 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times " DW_AT_specification" 6 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times " DW_AT_\[^\n\r]*linkage_name" 7 { xfail *-*-aix* } } } +// { dg-final { scan-assembler-times " DW_AT_inline \\(0x3\\)" 2 } } +// { dg-final { scan-assembler-times "0x3\[^\n\r]* DW_AT_inline" 4 } } +// { dg-final { scan-assembler-times "0x1\[^\n\r]* DW_AT_inline" 2 } } +// { dg-final { scan-assembler-times " DW_AT_declaration" 6 } } +// { dg-final { scan-assembler-times " DW_AT_specification" 6 } } +// { dg-final { scan-assembler-times " DW_AT_\[^\n\r]*linkage_name" 7 } } inline int a; struct S diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/noreturn-function.C b/gcc/testsuite/g++.dg/debug/dwarf2/noreturn-function.C index 722ee13..73a0af4 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/noreturn-function.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/noreturn-function.C @@ -1,7 +1,7 @@ // { dg-do compile } // { dg-options "-O -std=c++11 -g -dA -gno-strict-dwarf" } // Expect DW_AT_noreturn once in .debug_info and once in .debug_abbrev -// { dg-final { scan-assembler-times "DW_AT_noreturn" 2 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times "DW_AT_noreturn" 2 } } class Foo { diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/ptrdmem-1.C b/gcc/testsuite/g++.dg/debug/dwarf2/ptrdmem-1.C index bebf7fb..79c7baa 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/ptrdmem-1.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/ptrdmem-1.C @@ -1,7 +1,7 @@ // { dg-do compile } // { dg-options "-O -std=c++11 -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_ptr_to_member_type" 1 { xfail { powerpc-ibm-aix* } } } } -// { dg-final { scan-assembler-times " DW_AT_use_location" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_ptr_to_member_type" 1 } } +// { dg-final { scan-assembler-times " DW_AT_use_location" 1 } } // { dg-final { scan-assembler-not " DW_AT_reference" } } struct S; diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/ref-2.C b/gcc/testsuite/g++.dg/debug/dwarf2/ref-2.C index 43939c9..dd1f457 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/ref-2.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/ref-2.C @@ -1,7 +1,7 @@ // { dg-do compile { target c++11 } } // { dg-options "-g -gno-strict-dwarf -dA" } -// { dg-final { scan-assembler-times " DW_AT_reference" 1 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times " DW_AT_rvalue_reference" 1 { xfail *-*-aix* } } } +// { dg-final { scan-assembler-times " DW_AT_reference" 1 } } +// { dg-final { scan-assembler-times " DW_AT_rvalue_reference" 1 } } struct S { diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/ref-3.C b/gcc/testsuite/g++.dg/debug/dwarf2/ref-3.C index d7932d8..6294d02 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/ref-3.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/ref-3.C @@ -1,10 +1,10 @@ // { dg-do compile { target c++11 } } // { dg-options "-g -gno-strict-dwarf -dA" } -// { dg-final { scan-assembler-times " DW_AT_reference" 5 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times " DW_AT_rvalue_reference" 5 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_subroutine_type" 6 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_ptr_to_member_type" 7 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times " DW_AT_use_location" 1 { xfail *-*-aix* } } } +// { dg-final { scan-assembler-times " DW_AT_reference" 5 } } +// { dg-final { scan-assembler-times " DW_AT_rvalue_reference" 5 } } +// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_subroutine_type" 6 } } +// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_ptr_to_member_type" 7 } } +// { dg-final { scan-assembler-times " DW_AT_use_location" 1 } } struct S { diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/ref-4.C b/gcc/testsuite/g++.dg/debug/dwarf2/ref-4.C index 00cf81f..b1fe0e4 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/ref-4.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/ref-4.C @@ -1,8 +1,8 @@ // { dg-do compile { target c++11 } } // { dg-options "-g -gno-strict-dwarf -dA" } -// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_typedef" 2 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_ptr_to_member_type" 2 { xfail *-*-aix* } } } -// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_subroutine_type" 1 { xfail *-*-aix* } } } +// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_typedef" 2 } } +// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_ptr_to_member_type" 2 } } +// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_subroutine_type" 1 } } struct A { void foo (); int a; }; typedef void (A::*PMF) (); diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/refqual-1.C b/gcc/testsuite/g++.dg/debug/dwarf2/refqual-1.C index aba02b0..046e401 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/refqual-1.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/refqual-1.C @@ -1,7 +1,7 @@ // { dg-do compile } // { dg-options "-O -std=c++11 -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_ptr_to_member_type" 1 { xfail { powerpc-ibm-aix* } } } } -// { dg-final { scan-assembler-times " DW_AT_reference" 2 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_ptr_to_member_type" 1 } } +// { dg-final { scan-assembler-times " DW_AT_reference" 2 } } // { dg-final { scan-assembler-not " DW_AT_use_location" } } /* It is not clear what if anything we should output for DW_AT_use_location in a pointer to member function, so we don't diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/refqual-2.C b/gcc/testsuite/g++.dg/debug/dwarf2/refqual-2.C index 3cd424e..f77af63 100644 --- a/gcc/testsuite/g++.dg/debug/dwarf2/refqual-2.C +++ b/gcc/testsuite/g++.dg/debug/dwarf2/refqual-2.C @@ -1,7 +1,7 @@ // { dg-do compile } // { dg-options "-O -std=c++11 -g -dA -gno-strict-dwarf" } -// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_ptr_to_member_type" 1 { xfail { powerpc-ibm-aix* } } } } -// { dg-final { scan-assembler-times " DW_AT_rvalue_reference" 2 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times "DIE \\(\[^\n\]*\\) DW_TAG_ptr_to_member_type" 1 } } +// { dg-final { scan-assembler-times " DW_AT_rvalue_reference" 2 } } // { dg-final { scan-assembler-not " DW_AT_use_location" } } /* It is not clear what if anything we should output for DW_AT_use_location in a pointer to member function, so we don't diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/align-1.c b/gcc/testsuite/gcc.dg/debug/dwarf2/align-1.c index a004042..8802bd1 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/align-1.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/align-1.c @@ -1,6 +1,6 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } // { dg-additional-options "-fno-common" { target hppa*-*-hpux* } } -// { dg-final { scan-assembler-times " DW_AT_alignment" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 1 } } int __attribute__((__aligned__(64))) i; diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/align-2.c b/gcc/testsuite/gcc.dg/debug/dwarf2/align-2.c index 439a7da..0de960e 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/align-2.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/align-2.c @@ -1,7 +1,7 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } // { dg-additional-options "-fno-common" { target hppa*-*-hpux* } } -// { dg-final { scan-assembler-times " DW_AT_alignment" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 1 } } typedef int __attribute__((__aligned__(64))) i_t; i_t i; diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/align-3.c b/gcc/testsuite/gcc.dg/debug/dwarf2/align-3.c index 01c19cd..94db588 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/align-3.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/align-3.c @@ -1,7 +1,7 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } // { dg-additional-options "-fno-common" { target hppa*-*-hpux* } } -// { dg-final { scan-assembler-times " DW_AT_alignment" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 1 } } typedef int int_t; typedef int_t __attribute__((__aligned__(64))) i_t; diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/align-4.c b/gcc/testsuite/gcc.dg/debug/dwarf2/align-4.c index 8418274..bc3c516 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/align-4.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/align-4.c @@ -1,7 +1,7 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } // { dg-additional-options "-fno-common" { target hppa*-*-hpux* } } -// { dg-final { scan-assembler-times " DW_AT_alignment" 2 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 2 } } struct tt { int __attribute__((__aligned__(64))) i; diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/align-5.c b/gcc/testsuite/gcc.dg/debug/dwarf2/align-5.c index 322ac50..86cd3a9 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/align-5.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/align-5.c @@ -1,7 +1,7 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } // { dg-additional-options "-fno-common" { target hppa*-*-hpux* } } -// { dg-final { scan-assembler-times " DW_AT_alignment" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 1 } } struct tt { int i; diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/align-6.c b/gcc/testsuite/gcc.dg/debug/dwarf2/align-6.c index 784f213..bb1895f 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/align-6.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/align-6.c @@ -1,7 +1,7 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } // { dg-additional-options "-fno-common" { target hppa*-*-hpux* } } -// { dg-final { scan-assembler-times " DW_AT_alignment" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 1 } } struct tt { int i; diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/align-as-1.c b/gcc/testsuite/gcc.dg/debug/dwarf2/align-as-1.c index 5ef02c3..0b23580 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/align-as-1.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/align-as-1.c @@ -1,6 +1,6 @@ // { dg-do compile } // { dg-options "-O -g -dA -gno-strict-dwarf" } // { dg-additional-options "-fno-common" { target hppa*-*-hpux* } } -// { dg-final { scan-assembler-times " DW_AT_alignment" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times " DW_AT_alignment" 1 } } int _Alignas(64) i; diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2-macro.c b/gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2-macro.c index 24b598e..fd61296 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2-macro.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2-macro.c @@ -1,7 +1,7 @@ /* Test to make sure the mcaro info includes a start file command for the main source */ /* { dg-do compile } */ /* { dg-options "-g3 -gdwarf -dA -fverbose-asm" } */ -/* { dg-final { scan-assembler "Start new file" { xfail { powerpc-ibm-aix* } } } } */ +/* { dg-final { scan-assembler "Start new file" } } */ #define ADD(x) (M + x) diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2-macro2.c b/gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2-macro2.c index 5204342..3dfa290 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2-macro2.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/dwarf2-macro2.c @@ -1,7 +1,7 @@ /* Test to make sure the macro info includes the predefined macros with line number 0. */ /* { dg-do compile } */ /* { dg-options "-g3 -gdwarf -dA -fverbose-asm" } */ -/* { dg-final { scan-assembler "At line number 0" { xfail { powerpc-ibm-aix* } } } } */ +/* { dg-final { scan-assembler "At line number 0" } } */ #define FOO 1 int i; diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/inline5.c b/gcc/testsuite/gcc.dg/debug/dwarf2/inline5.c index 7587a28..bd34f0d 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/inline5.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/inline5.c @@ -9,7 +9,7 @@ /* We do not know which is output first so look for both invalid abstract origins on the lexical blocks (knowing that the abstract instance has no attribute following the DW_TAG_lexical_block. */ -/* { dg-final { scan-assembler-not "\\(DIE \\(0x(\[0-9a-f\]*)\\) DW_TAG_lexical_block\\)\[^#/!@;\\|\]*\[#/!@;\\|\]+ +\[^(\].*DW_TAG_lexical_block\\)\[^#/!@;\\|x\]*x\\1\[^#/!@;\\|\]*\[#/!@;\\|\] +DW_AT_abstract_origin" { xfail { *-*-solaris2.* && { ! gas } } } } } */ +/* { dg-final { scan-assembler-not "\\(DIE \\(0x(\[0-9a-f\]*)\\) DW_TAG_lexical_block\\)\[^#/!@;\\|\]*\[#/!@;\\|\]+ +\[^(\].*DW_TAG_lexical_block\\)\[^#/!@;\\|x\]*x\\1\[^#/!@;\\|\]*\[#/!@;\\|\] +DW_AT_abstract_origin" { xfail { { *-*-aix* || *-*-solaris2.* } && { ! gas } } } } } */ /* { dg-final { scan-assembler-not "DW_TAG_lexical_block\\)\[^#/!@;\\|x\]*x(\[0-9a-f\]*)\[^#/!@;\\|\]*\[#/!@;\\|\]+ +DW_AT_abstract_origin.*\\(DIE \\(0x\\1\\) DW_TAG_lexical_block\\)\[^#/!@;\\|\]*\[#/!@;\\|\]+ +DW_AT" } } */ int foo (int i) diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/lang-c89.c b/gcc/testsuite/gcc.dg/debug/dwarf2/lang-c89.c index b6b4a89..6292cf8 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/lang-c89.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/lang-c89.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -std=c89 -g -dA" } */ /* DW_LANG_C89 = 0x0001 */ -/* { dg-final { scan-assembler "0x1.*DW_AT_language" { xfail { powerpc-ibm-aix* } } } } */ +/* { dg-final { scan-assembler "0x1.*DW_AT_language" } } */ int version; diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/noreturn-function-attribute.c b/gcc/testsuite/gcc.dg/debug/dwarf2/noreturn-function-attribute.c index bc2cfa5..7c8924a6 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/noreturn-function-attribute.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/noreturn-function-attribute.c @@ -1,7 +1,7 @@ // { dg-do compile } // { dg-options "-O -std=c99 -g -dA -gno-strict-dwarf" } // Expect DW_AT_noreturn once in .debug_info and once in .debug_abbrev -// { dg-final { scan-assembler-times "DW_AT_noreturn" 2 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times "DW_AT_noreturn" 2 } } void __attribute__ ((noreturn)) baz (void) diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/noreturn-function-keyword.c b/gcc/testsuite/gcc.dg/debug/dwarf2/noreturn-function-keyword.c index 0105e6c..ced96d1 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/noreturn-function-keyword.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/noreturn-function-keyword.c @@ -1,7 +1,7 @@ // { dg-do compile } // { dg-options "-O -std=c11 -g -dA -gno-strict-dwarf" } // Expect DW_AT_noreturn once in .debug_info and once in .debug_abbrev -// { dg-final { scan-assembler-times "DW_AT_noreturn" 2 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times "DW_AT_noreturn" 2 } } _Noreturn void exit (int); diff --git a/gcc/testsuite/gcc.dg/debug/dwarf2/pr71855.c b/gcc/testsuite/gcc.dg/debug/dwarf2/pr71855.c index 3842dbc..4fd8b74 100644 --- a/gcc/testsuite/gcc.dg/debug/dwarf2/pr71855.c +++ b/gcc/testsuite/gcc.dg/debug/dwarf2/pr71855.c @@ -8,4 +8,4 @@ foo (const char *format, ...) { } -// { dg-final { scan-assembler-times "DIE.*DW_TAG_unspecified_parameters" 1 { xfail { powerpc-ibm-aix* } } } } +// { dg-final { scan-assembler-times "DIE.*DW_TAG_unspecified_parameters" 1 } } -- cgit v1.1 From 969baf03acd8124345617cea125b148568c7370a Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Thu, 24 Sep 2020 14:30:50 -0400 Subject: c++: Implement -Wrange-loop-construct [PR94695] This new warning can be used to prevent expensive copies inside range-based for-loops, for instance: struct S { char arr[128]; }; void fn () { S arr[5]; for (const auto x : arr) { } } where auto deduces to S and then we copy the big S in every iteration. Using "const auto &x" would not incur such a copy. With this patch the compiler will warn: q.C:4:19: warning: loop variable 'x' creates a copy from type 'const S' [-Wrange-loop-construct] 4 | for (const auto x : arr) { } | ^ q.C:4:19: note: use reference type 'const S&' to prevent copying 4 | for (const auto x : arr) { } | ^ | & As per Clang, this warning is suppressed for trivially copyable types whose size does not exceed 64B. The tricky part of the patch was how to figure out if using a reference would have prevented a copy. To that point, I'm using the new function called ref_conv_binds_directly_p. This warning is enabled by -Wall. Further warnings of similar nature should follow soon. gcc/c-family/ChangeLog: PR c++/94695 * c.opt (Wrange-loop-construct): New option. gcc/cp/ChangeLog: PR c++/94695 * call.c (ref_conv_binds_directly_p): New function. * cp-tree.h (ref_conv_binds_directly_p): Declare. * parser.c (warn_for_range_copy): New function. (cp_convert_range_for): Call it. gcc/ChangeLog: PR c++/94695 * doc/invoke.texi: Document -Wrange-loop-construct. gcc/testsuite/ChangeLog: PR c++/94695 * g++.dg/warn/Wrange-loop-construct.C: New test. --- gcc/c-family/c.opt | 4 + gcc/cp/call.c | 22 +++ gcc/cp/cp-tree.h | 1 + gcc/cp/parser.c | 68 ++++++- gcc/doc/invoke.texi | 21 ++- gcc/testsuite/g++.dg/warn/Wrange-loop-construct.C | 207 ++++++++++++++++++++++ 6 files changed, 318 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.dg/warn/Wrange-loop-construct.C (limited to 'gcc') diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 7761eef..bbf7da8 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -800,6 +800,10 @@ Wpacked-not-aligned C ObjC C++ ObjC++ Var(warn_packed_not_aligned) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) Warn when fields in a struct with the packed attribute are misaligned. +Wrange-loop-construct +C++ ObjC++ Var(warn_range_loop_construct) Warning LangEnabledBy(C++ ObjC++,Wall) +Warn when a range-based for-loop is creating unnecessary copies. + Wredundant-tags C++ ObjC++ Var(warn_redundant_tags) Warning Warn when a class or enumerated type is referenced using a redundant class-key. diff --git a/gcc/cp/call.c b/gcc/cp/call.c index 5606389..1e5fffe 100644 --- a/gcc/cp/call.c +++ b/gcc/cp/call.c @@ -8429,6 +8429,28 @@ conv_binds_ref_to_prvalue (conversion *c) return false; } +/* True iff converting EXPR to a reference type TYPE does not involve + creating a temporary. */ + +bool +ref_conv_binds_directly_p (tree type, tree expr) +{ + gcc_assert (TYPE_REF_P (type)); + + /* Get the high-water mark for the CONVERSION_OBSTACK. */ + void *p = conversion_obstack_alloc (0); + + conversion *conv = implicit_conversion (type, TREE_TYPE (expr), expr, + /*c_cast_p=*/false, + LOOKUP_IMPLICIT, tf_none); + bool ret = conv && !conv->bad_p && !conv_binds_ref_to_prvalue (conv); + + /* Free all the conversions we allocated. */ + obstack_free (&conversion_obstack, p); + + return ret; +} + /* Call the trivial destructor for INSTANCE, which can be either an lvalue of class type or a pointer to class type. If NO_PTR_DEREF is true and INSTANCE has pointer type, clobber the pointer rather than what it points diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index a25934e..42d0d76 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6225,6 +6225,7 @@ extern bool sufficient_parms_p (const_tree); extern tree type_decays_to (tree); extern tree extract_call_expr (tree); extern tree build_trivial_dtor_call (tree, bool = false); +extern bool ref_conv_binds_directly_p (tree, tree); extern tree build_user_type_conversion (tree, tree, int, tsubst_flags_t); extern tree build_new_function_call (tree, vec **, diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 8905833..cb44227 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -12646,6 +12646,64 @@ do_range_for_auto_deduction (tree decl, tree range_expr) } } +/* Warns when the loop variable should be changed to a reference type to + avoid unnecessary copying. I.e., from + + for (const auto x : range) + + where range returns a reference, to + + for (const auto &x : range) + + if this version doesn't make a copy. DECL is the RANGE_DECL; EXPR is the + *__for_begin expression. + This function is never called when processing_template_decl is on. */ + +static void +warn_for_range_copy (tree decl, tree expr) +{ + if (!warn_range_loop_construct + || decl == error_mark_node) + return; + + location_t loc = DECL_SOURCE_LOCATION (decl); + tree type = TREE_TYPE (decl); + + if (from_macro_expansion_at (loc)) + return; + + if (TYPE_REF_P (type)) + { + /* TODO: Implement reference warnings. */ + return; + } + else if (!CP_TYPE_CONST_P (type)) + return; + + /* Since small trivially copyable types are cheap to copy, we suppress the + warning for them. 64B is a common size of a cache line. */ + if (TREE_CODE (TYPE_SIZE_UNIT (type)) != INTEGER_CST + || (tree_to_uhwi (TYPE_SIZE_UNIT (type)) <= 64 + && trivially_copyable_p (type))) + return; + + tree rtype = cp_build_reference_type (type, /*rval*/false); + /* If we could initialize the reference directly, it wouldn't involve any + copies. */ + if (!ref_conv_binds_directly_p (rtype, expr)) + return; + + auto_diagnostic_group d; + if (warning_at (loc, OPT_Wrange_loop_construct, + "loop variable %qD creates a copy from type %qT", + decl, type)) + { + gcc_rich_location richloc (loc); + richloc.add_fixit_insert_before ("&"); + inform (&richloc, "use reference type to prevent copying"); + } +} + /* Converts a range-based for-statement into a normal for-statement, as per the definition. @@ -12656,7 +12714,7 @@ do_range_for_auto_deduction (tree decl, tree range_expr) { auto &&__range = RANGE_EXPR; - for (auto __begin = BEGIN_EXPR, end = END_EXPR; + for (auto __begin = BEGIN_EXPR, __end = END_EXPR; __begin != __end; ++__begin) { @@ -12756,14 +12814,16 @@ cp_convert_range_for (tree statement, tree range_decl, tree range_expr, cp_maybe_mangle_decomp (range_decl, decomp_first_name, decomp_cnt); /* The declaration is initialized with *__begin inside the loop body. */ - cp_finish_decl (range_decl, - build_x_indirect_ref (input_location, begin, RO_UNARY_STAR, - tf_warning_or_error), + tree deref_begin = build_x_indirect_ref (input_location, begin, RO_UNARY_STAR, + tf_warning_or_error); + cp_finish_decl (range_decl, deref_begin, /*is_constant_init*/false, NULL_TREE, LOOKUP_ONLYCONVERTING); if (VAR_P (range_decl) && DECL_DECOMPOSITION_P (range_decl)) cp_finish_decomp (range_decl, decomp_first_name, decomp_cnt); + warn_for_range_copy (range_decl, deref_begin); + return statement; } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 274c17e..9a49033 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -245,7 +245,7 @@ in the following sections. -Wmultiple-inheritance -Wnamespaces -Wnarrowing @gol -Wnoexcept -Wnoexcept-type -Wnon-virtual-dtor @gol -Wpessimizing-move -Wno-placement-new -Wplacement-new=@var{n} @gol --Wredundant-move -Wredundant-tags @gol +-Wrange-loop-construct -Wredundant-move -Wredundant-tags @gol -Wreorder -Wregister @gol -Wstrict-null-sentinel -Wno-subobject-linkage -Wtemplates @gol -Wno-non-template-friend -Wold-style-cast @gol @@ -3605,6 +3605,24 @@ treats the return value as if it were designated by an rvalue. This warning is enabled by @option{-Wextra}. +@item -Wrange-loop-construct @r{(C++ and Objective-C++ only)} +@opindex Wrange-loop-construct +@opindex Wno-range-loop-construct +This warning warns when a C++ range-based for-loop is creating an unnecessary +copy. This can happen when the range declaration is not a reference, but +probably should be. For example: + +@smallexample +struct S @{ char arr[128]; @}; +void fn () @{ + S arr[5]; + for (const auto x : arr) @{ @dots{} @} +@} +@end smallexample + +It does not warn when the type being copied is a trivially-copyable type whose +size is less than 64 bytes. This warning is enabled by @option{-Wall}. + @item -Wredundant-tags @r{(C++ and Objective-C++ only)} @opindex Wredundant-tags @opindex Wno-redundant-tags @@ -5274,6 +5292,7 @@ Options} and @ref{Objective-C and Objective-C++ Dialect Options}. -Wparentheses @gol -Wpessimizing-move @r{(only for C++)} @gol -Wpointer-sign @gol +-Wrange-loop-construct @r{(only for C++)} @gol -Wreorder @gol -Wrestrict @gol -Wreturn-type @gol diff --git a/gcc/testsuite/g++.dg/warn/Wrange-loop-construct.C b/gcc/testsuite/g++.dg/warn/Wrange-loop-construct.C new file mode 100644 index 0000000..3caf00d --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/Wrange-loop-construct.C @@ -0,0 +1,207 @@ +// PR c++/94695 +// { dg-do compile { target c++11 } } +// { dg-options "-Wrange-loop-construct" } + +#include + +struct Small { + char arr[64]; +}; + +struct Big_aggr { + char arr[65]; +}; + +struct Big_triv_copy { + char arr[65]; + Big_triv_copy() { } +}; + +struct Big { + char arr[65]; + Big () = default; + Big(const Big&); +}; + +struct Foo { }; +struct Bar { + char arr[100]; + Bar(Foo); + Bar(int); + operator int(); +}; + +template +struct It { + T operator*(); + It operator++(); + bool operator!=(const It); +}; + +template +struct Cont { + using I = It; + I begin(); + I end(); +}; + +#define TEST \ + void fn_macro() \ + { \ + Cont cont_bar_ref; \ + for (const Bar x : cont_bar_ref) { (void) x; } \ + } + +TEST + +Cont& foo (); +Cont foo2 (); + +void +fn1 () +{ + for (const auto x : foo () ) { (void) x; } // { dg-warning "creates a copy" } + for (const auto x : foo2 () ) { (void) x; } // { dg-warning "creates a copy" } + + Small s{}; + Small sa[5] = { }; + for (const auto x : sa) { (void) x; } + for (const auto x : { s, s, s }) { (void) x; } + + Big_aggr b{}; + Big_aggr ba[5] = { }; + for (const auto x : ba) { (void) x; } // { dg-warning "creates a copy" } + for (const auto x : { b, b, b }) { (void) x; } // { dg-warning "creates a copy" } + + Big_triv_copy bt{}; + Big_triv_copy bta[5]; + for (const auto x : bta) { (void) x; } // { dg-warning "creates a copy" } + for (const auto x : { bt, bt, bt }) { (void) x; } // { dg-warning "creates a copy" } + + Big b2; + Big ba2[5]; + for (const auto x : ba2) { (void) x; } // { dg-warning "creates a copy" } + for (const auto x : { b2, b2, b2 }) { (void) x; } // { dg-warning "creates a copy" } +} + +void +fn2 () +{ + Cont cont_int; + for (const auto x : cont_int) { (void) x; } + for (const int x : cont_int) { (void) x; } + for (int x : cont_int) { (void) x; } + for (const auto &x : cont_int) { (void) x; } + for (double x : cont_int) { (void) x; } + for (const double x : cont_int) { (void) x; } + for (const Bar x : cont_int) { (void) x; } + for (Bar x : cont_int) { (void) x; } +} + +void +fn3 () +{ + Cont cont_int_ref; + for (const int x : cont_int_ref) { (void) x; } + for (int x : cont_int_ref) { (void) x; } + for (const double x : cont_int_ref) { (void) x; } + for (double x : cont_int_ref) { (void) x; } + for (const Bar x : cont_int_ref) { (void) x; } + for (Bar x : cont_int_ref) { (void) x; } +} + +void +fn4 () +{ + Cont cont_bar; + for (const Bar x : cont_bar) { (void) x; } + for (Bar x : cont_bar) { (void) x; } + for (const int x : cont_bar) { (void) x; } + for (int x : cont_bar) { (void) x; } +} + +void +fn5 () +{ + Cont cont_bar_ref; + for (const Bar x : cont_bar_ref) { (void) x; } // { dg-warning "creates a copy" } + for (Bar x : cont_bar_ref) { (void) x; } + for (const int x : cont_bar_ref) { (void) x; } + for (int x : cont_bar_ref) { (void) x; } +} + +void +fn6 () +{ + Cont cont_foo; + for (const Bar x : cont_foo) { (void) x; } + for (Bar x : cont_foo) { (void) x; } +} + +void +fn7 () +{ + Cont cont_foo_ref; + for (const Bar x : cont_foo_ref) { (void) x; } + for (Bar x : cont_foo_ref) { (void) x; } +} + +void +fn8 () +{ + double arr[2]; + for (const double x : arr) { (void) x; } + for (double x : arr) { (void) x; } + for (const int x : arr) { (void) x; } + for (int x : arr) { (void) x; } + for (const Bar x : arr) { (void) x; } + for (Bar x : arr) { (void) x; } +} + +void +fn9 () +{ + Foo foo[2]; + for (const Foo x : foo) { (void) x; } + for (Foo x : foo) { (void) x; } + for (const Bar x : foo) { (void) x; } + for (Bar x : foo) { (void) x; } +} + +void +fn10 () +{ + Bar bar[2] = { 1, 2 }; + for (const Bar x : bar) { (void) x; } // { dg-warning "creates a copy" } + for (Bar x : bar) { (void) x; } + for (const int x : bar) { (void) x; } + for (int x : bar) { (void) x; } +} + +template +void +fn11 () +{ + Cont cont_bar; + for (const Bar x : cont_bar) { (void) x; } + + Cont cont_bar_ref; + for (const Bar x : cont_bar_ref) { (void) x; } // { dg-warning "creates a copy" } + + Cont cont_dep; + for (const T x : cont_dep) { (void) x; } +} + +template +void +fn12 () +{ + for (const auto x : { T{} }) { (void) x; } // { dg-warning "creates a copy" } +} + +void +invoke () +{ + fn11 (); + fn12 (); +} -- cgit v1.1 From 873f8c1e6df94a9dcbfbe69f06538e3e45ba151d Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Tue, 29 Sep 2020 17:10:54 -0600 Subject: Correct and improve -Wnonnull for calls to functions with VLA arguments (PR middle-end/97188). Resolves: PR middle-end/97188 - ICE passing a null VLA to a function expecting at least one element gcc/ChangeLog: PR middle-end/97188 * calls.c (maybe_warn_rdwr_sizes): Simplify warning messages. Correct handling of VLA argumments. gcc/testsuite/ChangeLog: PR middle-end/97188 * gcc.dg/Wstringop-overflow-23.c: Adjust text of expected warnings. * gcc.dg/Wnonnull-4.c: New test. --- gcc/calls.c | 115 ++++++++++++------ gcc/testsuite/gcc.dg/Wnonnull-4.c | 173 +++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/Wstringop-overflow-23.c | 12 +- 3 files changed, 261 insertions(+), 39 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/Wnonnull-4.c (limited to 'gcc') diff --git a/gcc/calls.c b/gcc/calls.c index 0e5c696..ed43638 100644 --- a/gcc/calls.c +++ b/gcc/calls.c @@ -17,6 +17,7 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ +#define INCLUDE_STRING #include "config.h" #include "system.h" #include "coretypes.h" @@ -1924,7 +1925,10 @@ static void maybe_warn_rdwr_sizes (rdwr_map *rwm, tree fndecl, tree fntype, tree exp) { auto_diagnostic_group adg; - bool warned = false; + + /* Set if a warning has been issued for any argument (used to decide + whether to emit an informational note at the end). */ + bool any_warned = false; /* A string describing the attributes that the warnings issued by this function apply to. Used to print one informational note per function @@ -1974,27 +1978,60 @@ maybe_warn_rdwr_sizes (rdwr_map *rwm, tree fndecl, tree fntype, tree exp) else access_size = rwm->get (sizidx)->size; - bool warned = false; + /* Format the value or range to avoid an explosion of messages. */ + char sizstr[80]; + tree sizrng[2] = { size_zero_node, build_all_ones_cst (sizetype) }; + if (get_size_range (access_size, sizrng, true)) + { + const char *s0 = print_generic_expr_to_str (sizrng[0]); + if (tree_int_cst_equal (sizrng[0], sizrng[1])) + { + gcc_checking_assert (strlen (s0) < sizeof sizstr); + strcpy (sizstr, s0); + } + else + { + const char *s1 = print_generic_expr_to_str (sizrng[1]); + gcc_checking_assert (strlen (s0) + strlen (s1) + < sizeof sizstr - 4); + sprintf (sizstr, "[%s, %s]", s0, s1); + } + } + else + *sizstr = '\0'; + + /* Set if a warning has been issued for the current argument. */ + bool arg_warned = false; location_t loc = EXPR_LOCATION (exp); tree ptr = access.second.ptr; - tree sizrng[2] = { size_zero_node, build_all_ones_cst (sizetype) }; - if (get_size_range (access_size, sizrng, true) + if (*sizstr && tree_int_cst_sgn (sizrng[0]) < 0 && tree_int_cst_sgn (sizrng[1]) < 0) { /* Warn about negative sizes. */ - if (tree_int_cst_equal (sizrng[0], sizrng[1])) - warned = warning_at (loc, OPT_Wstringop_overflow_, - "%Kargument %i value %E is negative", - exp, sizidx + 1, access_size); + if (access.second.internal_p) + { + const std::string argtypestr + = access.second.array_as_string (ptrtype); + + arg_warned = warning_at (loc, OPT_Wstringop_overflow_, + "%Kbound argument %i value %s is " + "negative for a variable length array " + "argument %i of type %s", + exp, sizidx + 1, sizstr, + ptridx + 1, argtypestr.c_str ()); + } else - warned = warning_at (loc, OPT_Wstringop_overflow_, - "%Kargument %i range [%E, %E] is negative", - exp, sizidx + 1, sizrng[0], sizrng[1]); - if (warned) + arg_warned = warning_at (loc, OPT_Wstringop_overflow_, + "%Kargument %i value %s is negative", + exp, sizidx + 1, sizstr); + + if (arg_warned) { append_attrname (access, attrstr, sizeof attrstr); - /* Avoid warning again for the same attribute. */ + /* Remember a warning has been issued and avoid warning + again below for the same attribute. */ + any_warned = true; continue; } } @@ -2006,7 +2043,6 @@ maybe_warn_rdwr_sizes (rdwr_map *rwm, tree fndecl, tree fntype, tree exp) /* Multiply ACCESS_SIZE by the size of the type the pointer argument points to. If it's incomplete the size is used as is. */ - access_size = NULL_TREE; if (tree argsize = TYPE_SIZE_UNIT (argtype)) if (TREE_CODE (argsize) == INTEGER_CST) { @@ -2028,35 +2064,44 @@ maybe_warn_rdwr_sizes (rdwr_map *rwm, tree fndecl, tree fntype, tree exp) different from also declaring the pointer argument with attribute nonnull when the function accepts null pointers only when the corresponding size is zero. */ - if (tree_int_cst_equal (sizrng[0], sizrng[1])) - warned = warning_at (loc, OPT_Wnonnull, - "%Kargument %i is null but " - "the corresponding size argument %i " - "value is %E", - exp, ptridx + 1, sizidx + 1, access_size); + if (access.second.internal_p) + { + const std::string argtypestr + = access.second.array_as_string (ptrtype); + + arg_warned = warning_at (loc, OPT_Wnonnull, + "%Kargument %i of variable length " + "array %s is null but " + "the corresponding bound argument " + "%i value is %s", + exp, sizidx + 1, argtypestr.c_str (), + ptridx + 1, sizstr); + } else - warned = warning_at (loc, OPT_Wnonnull, - "%Kargument %i is null but " - "the corresponding size argument %i " - "range is [%E, %E]", - exp, ptridx + 1, sizidx + 1, - sizrng[0], sizrng[1]); + arg_warned = warning_at (loc, OPT_Wnonnull, + "%Kargument %i is null but " + "the corresponding size argument " + "%i value is %s", + exp, ptridx + 1, sizidx + 1, + sizstr); } else if (access_size && access.second.static_p) { /* Warn about null pointers for [static N] array arguments but do not warn for ordinary (i.e., nonstatic) arrays. */ - warned = warning_at (loc, OPT_Wnonnull, - "%Kargument %i to %<%T[static %E]%> null " - "where non-null expected", - exp, ptridx + 1, argtype, - sizrng[0]); + arg_warned = warning_at (loc, OPT_Wnonnull, + "%Kargument %i to %<%T[static %E]%> " + "is null where non-null expected", + exp, ptridx + 1, argtype, + access_size); } - if (warned) + if (arg_warned) { append_attrname (access, attrstr, sizeof attrstr); - /* Avoid warning again for the same attribute. */ + /* Remember a warning has been issued and avoid warning + again below for the same attribute. */ + any_warned = true; continue; } } @@ -2101,7 +2146,7 @@ maybe_warn_rdwr_sizes (rdwr_map *rwm, tree fndecl, tree fntype, tree exp) if (TREE_NO_WARNING (exp)) { - warned = true; + any_warned = true; if (access.second.internal_p) inform (loc, "referencing argument %u of type %qT", @@ -2124,7 +2169,7 @@ maybe_warn_rdwr_sizes (rdwr_map *rwm, tree fndecl, tree fntype, tree exp) "in a call with type %qT and attribute %qs", fntype, attrstr); } - else if (warned) + else if (any_warned) { if (fndecl) inform (DECL_SOURCE_LOCATION (fndecl), diff --git a/gcc/testsuite/gcc.dg/Wnonnull-4.c b/gcc/testsuite/gcc.dg/Wnonnull-4.c new file mode 100644 index 0000000..180a40d --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wnonnull-4.c @@ -0,0 +1,173 @@ +/* PR middle-end/97188 - ICE passing a null VLA to a function expecting + at least one element + { dg-do compile } + { dg-options "-O -Wall -ftrack-macro-expansion=0" } */ + +#define INT_MAX __INT_MAX__ +#define INT_MIN (-INT_MAX - 1) + +/* Exercise passing nul to a one-dimensional VLA argument. */ + +void test_fca_n (int r_m1) +{ + extern void fca_n (int n, char[n]); // { dg-message "in a call to function 'fca_n'" "note" } + +#define T(n) fca_n (n, 0) + + int min = INT_MIN; + int max = INT_MAX; + if (r_m1 >= 0) + r_m1 = -1; + + // Verify negative bounds. + T (min); // { dg-warning "bound argument 1 value -\\d+ is negative for a variable length array argument 2 of type 'char\\\[n]'" } + T (r_m1); // { dg-warning "bound argument 1 value \\\[-\\d+, -1] is negative for a variable length array argument 2 of type 'char\\\[n]" } + T ( -1); // { dg-warning "bound argument 1 value -1 is negative for a variable length array argument 2 of type 'char\\\[n]" } + + T ( 0); + + // Verify positive bounds. + T ( 1); // { dg-warning "argument 1 of variable length array 'char\\\[n]' is null but the corresponding bound argument 2 value is 1" } + T ( 9); // { dg-warning "argument 1 of variable length array 'char\\\[n]' is null but the corresponding bound argument 2 value is 9" } + T (max); // { dg-warning "argument 1 of variable length array 'char\\\[n]' is null but the corresponding bound argument 2 value is \\d+" } +} + + +/* Exercise passing nul to an array with unspecified bound of VLAs. */ + +void test_fsa_x_n (int r_m1) +{ + extern void fsa_x_n (int n, short[][n]); // { dg-message "in a call to function 'fsa_x_n'" "note" } + +#undef T +#define T(n) fsa_x_n (n, 0) + + int min = INT_MIN; + int max = INT_MAX; + if (r_m1 >= 0) + r_m1 = -1; + + // Verify negative bounds. + T (min); // { dg-warning "bound argument 1 value -\\d+ is negative for a variable length array argument 2 of type 'short int\\\[]\\\[n]'" } + T (r_m1); // { dg-warning "bound argument 1 value \\\[-\\d+, -1] is negative for a variable length array argument 2 of type 'short int\\\[]\\\[n]" } + T ( -1); // { dg-warning "bound argument 1 value -1 is negative for a variable length array argument 2 of type 'short int\\\[]\\\[n]" } + + T ( 0); + + // Verify positive bounds. + T ( 1); // { dg-warning "argument 1 of variable length array 'short int\\\[]\\\[n]' is null but the corresponding bound argument 2 value is 1" } + T ( 9); // { dg-warning "argument 1 of variable length array 'short int\\\[]\\\[n]' is null but the corresponding bound argument 2 value is 9" } + T (max); // { dg-warning "argument 1 of variable length array 'short int\\\[]\\\[n]' is null but the corresponding bound argument 2 value is \\d+" } +} + + +/* Exercise passing nul to an array of a single VLA. */ + +void test_fia_1_n (int r_m1) +{ + extern void fia_1_n (int n, int[1][n]); // { dg-message "in a call to function 'fia_1_n'" "note" } + +#undef T +#define T(n) fia_1_n (n, 0) + + int min = INT_MIN; + int max = INT_MAX; + if (r_m1 >= 0) + r_m1 = -1; + + // Verify negative bounds. + T (min); // { dg-warning "bound argument 1 value -\\d+ is negative for a variable length array argument 2 of type 'int\\\[1]\\\[n]'" } + T (r_m1); // { dg-warning "bound argument 1 value \\\[-\\d+, -1] is negative for a variable length array argument 2 of type 'int\\\[1]\\\[n]" } + T ( -1); // { dg-warning "bound argument 1 value -1 is negative for a variable length array argument 2 of type 'int\\\[1]\\\[n]" } + + T ( 0); + + // Verify positive bounds. + T ( 1); // { dg-warning "argument 1 of variable length array 'int\\\[1]\\\[n]' is null but the corresponding bound argument 2 value is 1" } + T ( 9); // { dg-warning "argument 1 of variable length array 'int\\\[1]\\\[n]' is null but the corresponding bound argument 2 value is 9" } + T (max); // { dg-warning "argument 1 of variable length array 'int\\\[1]\\\[n]' is null but the corresponding bound argument 2 value is \\d+" } +} + + +/* Exercise passing nul to an array of three VLAs. */ + +void test_fla_3_n (int r_m1) +{ + extern void fla_3_n (int n, long[3][n]); // { dg-message "in a call to function 'fla_3_n'" "note" } + +#undef T +#define T(n) fla_3_n (n, 0) + + int min = INT_MIN; + int max = INT_MAX; + if (r_m1 >= 0) + r_m1 = -1; + + // Verify negative bounds. + T (min); // { dg-warning "bound argument 1 value -\\d+ is negative for a variable length array argument 2 of type 'long int\\\[3]\\\[n]'" } + T (r_m1); // { dg-warning "bound argument 1 value \\\[-\\d+, -1] is negative for a variable length array argument 2 of type 'long int\\\[3]\\\[n]" } + T ( -1); // { dg-warning "bound argument 1 value -1 is negative for a variable length array argument 2 of type 'long int\\\[3]\\\[n]" } + + T ( 0); + + // Verify positive bounds. + T ( 1); // { dg-warning "argument 1 of variable length array 'long int\\\[3]\\\[n]' is null but the corresponding bound argument 2 value is 1" } + T ( 9); // { dg-warning "argument 1 of variable length array 'long int\\\[3]\\\[n]' is null but the corresponding bound argument 2 value is 9" } + T (max); // { dg-warning "argument 1 of variable length array 'long int\\\[3]\\\[n]' is null but the corresponding bound argument 2 value is \\d+" } +} + + +/* Exercise passing nul to a VLA of five-element arrays. */ + +void test_fda_n_5 (int r_m1) +{ + extern void fda_n_5 (int n, double[n][5]);// { dg-message "in a call to function 'fda_n_5'" "note" } + +#undef T +#define T(n) fda_n_5 (n, 0) + + int min = INT_MIN; + int max = INT_MAX; + if (r_m1 >= 0) + r_m1 = -1; + + // Verify negative bounds. + T (min); // { dg-warning "bound argument 1 value -\\d+ is negative for a variable length array argument 2 of type 'double\\\[n]\\\[5]'" } + T (r_m1); // { dg-warning "bound argument 1 value \\\[-\\d+, -1] is negative for a variable length array argument 2 of type 'double\\\[n]\\\[5]" } + T ( -1); // { dg-warning "bound argument 1 value -1 is negative for a variable length array argument 2 of type 'double\\\[n]\\\[5]" } + + T ( 0); + + // Verify positive bounds. + T ( 1); // { dg-warning "argument 1 of variable length array 'double\\\[n]\\\[5]' is null but the corresponding bound argument 2 value is 1" } + T ( 9); // { dg-warning "argument 1 of variable length array 'double\\\[n]\\\[5]' is null but the corresponding bound argument 2 value is 9" } + T (max); // { dg-warning "argument 1 of variable length array 'double\\\[n]\\\[5]' is null but the corresponding bound argument 2 value is \\d+" } +} + + +/* Exercise passing nul to a two-dimensional VLA. */ + +void test_fca_n_n (int r_m1) +{ + extern void fca_n_n (int n, char[n][n]); // { dg-message "in a call to function 'fca_n_n'" "note" } + +#undef T +#define T(n) fca_n_n (n, 0) + + int min = INT_MIN; + int max = INT_MAX; + if (r_m1 >= 0) + r_m1 = -1; + + // Verify negative bounds. + T (min); // { dg-warning "bound argument 1 value -\\d+ is negative for a variable length array argument 2 of type 'char\\\[n]\\\[n]'" } + T (r_m1); // { dg-warning "bound argument 1 value \\\[-\\d+, -1] is negative for a variable length array argument 2 of type 'char\\\[n]\\\[n]" } + T ( -1); // { dg-warning "bound argument 1 value -1 is negative for a variable length array argument 2 of type 'char\\\[n]\\\[n]" } + + T ( 0); + + // Verify positive bounds. + T ( 1); // { dg-warning "argument 1 of variable length array 'char\\\[n]\\\[n]' is null but the corresponding bound argument 2 value is 1" } + T ( 9); // { dg-warning "argument 1 of variable length array 'char\\\[n]\\\[n]' is null but the corresponding bound argument 2 value is 9" } + T (max); // { dg-warning "argument 1 of variable length array 'char\\\[n]\\\[n]' is null but the corresponding bound argument 2 value is \\d+" } +} diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-23.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-23.c index bbc1210..0da916a 100644 --- a/gcc/testsuite/gcc.dg/Wstringop-overflow-23.c +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-23.c @@ -40,7 +40,11 @@ void test_rd2_1 (void) { void *null = 0; - rd2_1 (SR (1, 2), null); // { dg-warning "argument 2 is null but the corresponding size argument 1 range is \\\[1, 2]" } + /* Ideally the message would say "range" for a range and "value" + for a singular value but using the same reduces the complexity + of the code and keeps down the number of messages that need to + be translated, withot sacrificing (too much) clarity. */ + rd2_1 (SR (1, 2), null); // { dg-warning "argument 2 is null but the corresponding size argument 1 range|value is \\\[1, 2]" } } } @@ -59,7 +63,7 @@ void test_wr3_1 (void) void *null = 0; - wr3_1 (SR (1, 2), 1, null); // { dg-warning "argument 3 is null but the corresponding size argument 1 range is \\\[1, 2]" } + wr3_1 (SR (1, 2), 1, null); // { dg-warning "argument 3 is null but the corresponding size argument 1 range|value is \\\[1, 2]" } } @@ -71,7 +75,7 @@ void test_wrd2_1 (int n) wr2_1 (0, 0); wr2_1 (SR (-1, 1), 0); wr2_1 (SR (0, 1), 0); - wr2_1 (SR (1, 2), 0); // { dg-warning "argument 2 is null but the corresponding size argument 1 range is \\\[1, 2]" } + wr2_1 (SR (1, 2), 0); // { dg-warning "argument 2 is null but the corresponding size argument 1 range|value is \\\[1, 2]" } /* This should probably be diagnosed but to avoid false positives caused by jump threading and such it would have to be done @@ -127,7 +131,7 @@ void test_rd1_3_wr2_4 (const void *s, void *d, int n1, int n2) rd1_3_wr2_4 (s, d, -1, 2); // { dg-warning "argument 3 value -1 is negative" } const int ir_min_m1 = SR (INT_MIN, -1); - rd1_3_wr2_4 (s, d, ir_min_m1, 2); // { dg-warning "argument 3 range \\\[-\[0-9\]+, -1] is negative" } + rd1_3_wr2_4 (s, d, ir_min_m1, 2); // { dg-warning "argument 3 range|value \\\[-\[0-9\]+, -1] is negative" } rd1_3_wr2_4 (s, d, SR (-1, 0), 2); rd1_3_wr2_4 (s, d, SR (INT_MIN, INT_MAX), 2); -- cgit v1.1 From 58614b10edc9a66fa3ef23dd0fee034e3a2cbc2d Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Mon, 28 Sep 2020 23:57:52 +0000 Subject: rs6000: Use parameterized names for tablejump We have too many tablejump patterns. Using parameterized names simplifies the code a bit. 2020-09-29 Segher Boessenkool * config/rs6000/rs6000.md (tablejump): Simplify. (tablejumpsi): Merge this ... (tablejumpdi): ... and this ... (@tablejump_normal): ... into this. (tablejumpsi_nospec): Merge this ... (tablejumpdi_nospec): ... and this ... (@tablejump_nospec): ... into this. (*tablejump_internal1): Delete, rename to ... (@tablejump_insn_normal): ... this. (*tablejump_internal1_nospec): Delete, rename to ... (@tablejump_insn_nospec): ... this. --- gcc/config/rs6000/rs6000.md | 101 ++++++++++++++++++-------------------------- 1 file changed, 41 insertions(+), 60 deletions(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 694ff70..24ad809 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -12697,12 +12697,7 @@ "" { if (rs6000_speculate_indirect_jumps) - { - if (TARGET_32BIT) - emit_jump_insn (gen_tablejumpsi (operands[0], operands[1])); - else - emit_jump_insn (gen_tablejumpdi (operands[0], operands[1])); - } + emit_jump_insn (gen_tablejump_normal (Pmode, operands[0], operands[1])); else { rtx ccreg = gen_reg_rtx (CCmode); @@ -12716,69 +12711,55 @@ DONE; }) -(define_expand "tablejumpsi" - [(set (match_dup 3) - (plus:SI (match_operand:SI 0) - (match_dup 2))) - (parallel [(set (pc) - (match_dup 3)) - (use (label_ref (match_operand 1)))])] - "TARGET_32BIT && rs6000_speculate_indirect_jumps" +(define_expand "@tablejump_normal" + [(use (match_operand:SI 0)) + (use (match_operand:P 1))] + "rs6000_speculate_indirect_jumps" { operands[0] = force_reg (SImode, operands[0]); - operands[2] = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[1])); - operands[3] = gen_reg_rtx (SImode); + if (mode == SImode) + operands[4] = operands[0]; + else + { + operands[4] = gen_reg_rtx (Pmode); + rtx src = gen_rtx_fmt_e (SIGN_EXTEND, Pmode, operands[0]); + emit_move_insn (operands[4], src); + } + + operands[2] = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, operands[1])); + operands[3] = gen_reg_rtx (Pmode); + + emit_insn (gen_add3 (operands[3], operands[4], operands[2])); + emit_jump_insn (gen_tablejump_insn_normal (Pmode, operands[3], operands[1])); + DONE; }) -(define_expand "tablejumpsi_nospec" - [(set (match_dup 4) - (plus:SI (match_operand:SI 0) - (match_dup 3))) - (parallel [(set (pc) - (match_dup 4)) - (use (label_ref (match_operand 1))) - (clobber (match_operand 2))])] - "TARGET_32BIT && !rs6000_speculate_indirect_jumps" +(define_expand "@tablejump_nospec" + [(use (match_operand:SI 0)) + (use (match_operand:P 1)) + (use (match_operand:CC 2))] + "!rs6000_speculate_indirect_jumps" { operands[0] = force_reg (SImode, operands[0]); - operands[3] = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[1])); - operands[4] = gen_reg_rtx (SImode); -}) + if (mode == SImode) + operands[4] = operands[0]; + else + { + operands[4] = gen_reg_rtx (Pmode); + rtx src = gen_rtx_fmt_e (SIGN_EXTEND, Pmode, operands[0]); + emit_move_insn (operands[4], src); + } -(define_expand "tablejumpdi" - [(set (match_dup 4) - (sign_extend:DI (match_operand:SI 0 "lwa_operand"))) - (set (match_dup 3) - (plus:DI (match_dup 4) - (match_dup 2))) - (parallel [(set (pc) - (match_dup 3)) - (use (label_ref (match_operand 1)))])] - "TARGET_64BIT && rs6000_speculate_indirect_jumps" -{ - operands[2] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[1])); - operands[3] = gen_reg_rtx (DImode); - operands[4] = gen_reg_rtx (DImode); -}) + operands[5] = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, operands[1])); + operands[3] = gen_reg_rtx (Pmode); -(define_expand "tablejumpdi_nospec" - [(set (match_dup 5) - (sign_extend:DI (match_operand:SI 0 "lwa_operand"))) - (set (match_dup 4) - (plus:DI (match_dup 5) - (match_dup 3))) - (parallel [(set (pc) - (match_dup 4)) - (use (label_ref (match_operand 1))) - (clobber (match_operand 2))])] - "TARGET_64BIT && !rs6000_speculate_indirect_jumps" -{ - operands[3] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[1])); - operands[4] = gen_reg_rtx (DImode); - operands[5] = gen_reg_rtx (DImode); + emit_insn (gen_add3 (operands[3], operands[4], operands[5])); + emit_jump_insn (gen_tablejump_insn_nospec (Pmode, operands[3], operands[1], + operands[2])); + DONE; }) -(define_insn "*tablejump_internal1" +(define_insn "@tablejump_insn_normal" [(set (pc) (match_operand:P 0 "register_operand" "c,*l")) (use (label_ref (match_operand 1)))] @@ -12786,7 +12767,7 @@ "b%T0" [(set_attr "type" "jmpreg")]) -(define_insn "*tablejump_internal1_nospec" +(define_insn "@tablejump_insn_nospec" [(set (pc) (match_operand:P 0 "register_operand" "c,*l")) (use (label_ref (match_operand 1))) -- cgit v1.1 From 93bca37c0a6dec354592676233e40e66741fdbe4 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 30 Sep 2020 00:16:29 +0000 Subject: Daily bump. --- gcc/ChangeLog | 135 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 14 +++++ gcc/c-family/ChangeLog | 5 ++ gcc/cp/ChangeLog | 52 +++++++++++++++++++ gcc/testsuite/ChangeLog | 94 +++++++++++++++++++++++++++++++++ 6 files changed, 301 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a2c395b..4df8e96 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,138 @@ +2020-09-30 Segher Boessenkool + + * config/rs6000/rs6000.md (tablejump): Simplify. + (tablejumpsi): Merge this ... + (tablejumpdi): ... and this ... + (@tablejump_normal): ... into this. + (tablejumpsi_nospec): Merge this ... + (tablejumpdi_nospec): ... and this ... + (@tablejump_nospec): ... into this. + (*tablejump_internal1): Delete, rename to ... + (@tablejump_insn_normal): ... this. + (*tablejump_internal1_nospec): Delete, rename to ... + (@tablejump_insn_nospec): ... this. + +2020-09-29 Martin Sebor + + PR middle-end/97188 + * calls.c (maybe_warn_rdwr_sizes): Simplify warning messages. + Correct handling of VLA argumments. + +2020-09-29 Marek Polacek + + PR c++/94695 + * doc/invoke.texi: Document -Wrange-loop-construct. + +2020-09-29 Jim Wilson + + PR bootstrap/97183 + * configure.ac (gcc_cv_header_zstd_h): Check ZSTD_VERISON_NUMBER. + * configure: Regenerated. + +2020-09-29 Przemyslaw Wirkus + + * config/arm/arm-cpus.in: Add Cortex-X1 core. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-tune.md: Regenerate. + * doc/invoke.texi: Update docs. + +2020-09-29 Przemyslaw Wirkus + + * config/aarch64/aarch64-cores.def: Add Cortex-X1 Arm core. + * config/aarch64/aarch64-tune.md: Regenerate. + * doc/invoke.texi: Add -mtune=cortex-x1 docs. + +2020-09-29 H.J. Lu + + PR target/97247 + * config/i386/enqcmdintrin.h: Replace with + . Replace _ENQCMDNTRIN_H_INCLUDED with + _ENQCMDINTRIN_H_INCLUDED. + +2020-09-29 Richard Biener + + PR tree-optimization/97241 + * tree-vect-loop.c (vectorizable_reduction): Move finding + the SLP node for the reduction stmt to a better place. + +2020-09-29 Richard Biener + + * tree-vect-slp.c (vect_analyze_slp): Move SLP reduction + re-arrangement and SLP graph load gathering... + (vect_optimize_slp): ... here. + * tree-vectorizer.h (vec_info::slp_loads): Remove. + +2020-09-29 Hongyu Wang + + PR target/97231 + * config/i386/amxbf16intrin.h: Add FSF copyright notes. + * config/i386/amxint8intrin.h: Ditto. + * config/i386/amxtileintrin.h: Ditto. + * config/i386/avx512vp2intersectintrin.h: Ditto. + * config/i386/avx512vp2intersectvlintrin.h: Ditto. + * config/i386/pconfigintrin.h: Ditto. + * config/i386/tsxldtrkintrin.h: Ditto. + * config/i386/wbnoinvdintrin.h: Ditto. + +2020-09-29 Richard Biener + + PR tree-optimization/97238 + * tree-ssa-reassoc.c (ovce_extract_ops): Fix typo. + +2020-09-29 Richard Sandiford + + * config/arm/arm.h (ARM_HAVE_NEON_V8QI_ARITH, ARM_HAVE_NEON_V4HI_ARITH) + (ARM_HAVE_NEON_V2SI_ARITH, ARM_HAVE_NEON_V16QI_ARITH): New macros. + (ARM_HAVE_NEON_V8HI_ARITH, ARM_HAVE_NEON_V4SI_ARITH): Likewise. + (ARM_HAVE_NEON_V2DI_ARITH, ARM_HAVE_NEON_V4HF_ARITH): Likewise. + (ARM_HAVE_NEON_V8HF_ARITH, ARM_HAVE_NEON_V2SF_ARITH): Likewise. + (ARM_HAVE_NEON_V4SF_ARITH, ARM_HAVE_V8QI_ARITH, ARM_HAVE_V4HI_ARITH) + (ARM_HAVE_V2SI_ARITH, ARM_HAVE_V16QI_ARITH, ARM_HAVE_V8HI_ARITH) + (ARM_HAVE_V4SI_ARITH, ARM_HAVE_V2DI_ARITH, ARM_HAVE_V4HF_ARITH) + (ARM_HAVE_V2SF_ARITH, ARM_HAVE_V8HF_ARITH, ARM_HAVE_V4SF_ARITH): + Likewise. + * config/arm/iterators.md (VNIM, VNINOTM): Delete. + * config/arm/vec-common.md (add3, addv8hf3) + (add3): Replace with... + (add3): ...this new expander. + * config/arm/neon.md (*add3_neon): Use the new + ARM_HAVE_NEON__ARITH macros as the C condition. + (addv8hf3_neon, addv4hf3, add3_fp16): Delete in + favor of the above. + (neon_vadd): Use gen_add3 instead of + gen_add3_fp16. + +2020-09-29 Kito Cheng + + * config/riscv/riscv-c.c (riscv_cpu_cpp_builtins): Define + __riscv_cmodel_medany when PIC mode. + +2020-09-29 Alex Coplan + + * config/aarch64/aarch64-cores.def: Move neoverse-n2 after saphira. + * config/aarch64/aarch64-tune.md: Regenerate. + +2020-09-29 Martin Liska + + PR tree-optimization/96979 + * tree-switch-conversion.c (jump_table_cluster::can_be_handled): + Make a fast bail out. + (bit_test_cluster::can_be_handled): Likewise here. + * tree-switch-conversion.h (get_range): Use wi::to_wide instead + of a folding. + +2020-09-29 Martin Liska + + Revert: + 2020-09-22 Martin Liska + + PR tree-optimization/96979 + * doc/invoke.texi: Document new param max-switch-clustering-attempts. + * params.opt: Add new parameter. + * tree-switch-conversion.c (jump_table_cluster::find_jump_tables): + Limit number of attempts. + (bit_test_cluster::find_bit_tests): Likewise. + 2020-09-28 Aldy Hernandez * value-range.h (class irange): Add irange_allocator friend. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 91db4fb..f1815d1 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200929 +20200930 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 458fdae..2e40e81 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,17 @@ +2020-09-29 David Malcolm + + PR analyzer/95188 + * engine.cc (stmt_requires_new_enode_p): Split enodes before + "signal" calls. + +2020-09-29 David Malcolm + + * constraint-manager.cc + (constraint_manager::add_constraint_internal): Whitespace fixes. + Silence -Wsign-compare warning. + * engine.cc (maybe_process_run_of_before_supernode_enodes): + Silence -Wsign-compare warning. + 2020-09-28 David Malcolm * region-model.h (binop_svalue::dyn_cast_binop_svalue): Remove diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 4dd5822..03ce9ea 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,8 @@ +2020-09-29 Marek Polacek + + PR c++/94695 + * c.opt (Wrange-loop-construct): New option. + 2020-09-23 Martin Sebor PR c/97131 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 04f63a4..4dea15f 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,55 @@ +2020-09-29 Marek Polacek + + PR c++/94695 + * call.c (ref_conv_binds_directly_p): New function. + * cp-tree.h (ref_conv_binds_directly_p): Declare. + * parser.c (warn_for_range_copy): New function. + (cp_convert_range_for): Call it. + +2020-09-29 Nathan Sidwell + + * cp-tree.h (ovl_insert): Change final parm to hidden-or-using + indicator. + * name-lookup.h (HIDDEN_TYPE_BINDING_P): New. + (struct cxx_binding): Add type_is_hidden flag. + * tree.c (ovl_insert): Change using_p parm to using_or_hidden, + adjust. + (ovl_skip_hidden): Assert we never see a naked hidden decl. + * decl.c (xref_tag_1): Delete unhiding friend from here (moved to + lookup_elaborated_type_1). + * name-lookup.c (STAT_TYPE_HIDDEN_P, STAT_DECL_HIDDEN_P): New. + (name_lookup::search_namespace_only): Check new hidden markers. + (cxx_binding_make): Clear HIDDEN_TYPE_BINDING_P. + (update_binding): Update new hidden markers. + (lookup_name_1): Check HIDDEN_TYPE_BINDING_P and simplify friend + ignoring. + (lookup_elaborated_type_1): Use new hidden markers. Reveal the + decl here. + +2020-09-29 Nathan Sidwell + + * name-lookup.c (create_local_binding): Do not clear + INHERITED_VALUE_BINDING_P here. + (name_lookup::process_binding): Move done hidden-decl triage to ... + (name_lookup::search_namespace_only): ... here, its only caller. + (cxx_binding_make): Clear flags here. + (push_binding): Not here. + (pop_local_binding): RAII. + (update_binding): Refactor. + (do_pushdecl): Assert we're never revealing a local binding. + (do_pushdecl_with_scope): Directly call do_pushdecl. + (get_class_binding): Do not clear LOCAL_BINDING_P here. + * pt.c (push_template_decl): Set friend & anticipated before + pushing. + +2020-09-29 Nathan Sidwell + + * name-lookup.c (update_binding): We never meet two implicit + typedefs. + (do_pushdecl): Adjust set_identifier_type_value_with_scope calls. + (set_identifier_type_value_with_scope): Do not update binding in + the namespace-case. Assert it is already there. + 2020-09-25 Nathan Sidwell * cp-tree.h (duplicate_decls): Replace 'is_friend' with 'hiding' diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index dc8ccbb..1c26589 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,97 @@ +2020-09-29 Martin Sebor + + PR middle-end/97188 + * gcc.dg/Wstringop-overflow-23.c: Adjust text of expected warnings. + * gcc.dg/Wnonnull-4.c: New test. + +2020-09-29 Marek Polacek + + PR c++/94695 + * g++.dg/warn/Wrange-loop-construct.C: New test. + +2020-09-29 David Edelsohn + + * g++.dg/debug/dwarf2/align-1.C: Remove AIX XFAIL. + * g++.dg/debug/dwarf2/align-2.C: Same. + * g++.dg/debug/dwarf2/align-3.C: Same. + * g++.dg/debug/dwarf2/align-4.C: Same. + * g++.dg/debug/dwarf2/align-5.C: Same. + * g++.dg/debug/dwarf2/align-6.C: Same. + * g++.dg/debug/dwarf2/defaulted-member-function-1.C: Same. + * g++.dg/debug/dwarf2/defaulted-member-function-2.C: Same. + * g++.dg/debug/dwarf2/defaulted-member-function-3.C: Same. + * g++.dg/debug/dwarf2/inline-var-1.C: Same. + * g++.dg/debug/dwarf2/inline-var-2.C: Same. + * g++.dg/debug/dwarf2/inline-var-3.C: Same. + * g++.dg/debug/dwarf2/noreturn-function.C: Same. + * g++.dg/debug/dwarf2/ptrdmem-1.C: Same. + * g++.dg/debug/dwarf2/ref-2.C: Same. + * g++.dg/debug/dwarf2/ref-3.C: Same. + * g++.dg/debug/dwarf2/ref-4.C: Same. + * g++.dg/debug/dwarf2/refqual-1.C: Same. + * g++.dg/debug/dwarf2/refqual-2.C: Same. + * gcc.dg/debug/dwarf2/align-1.c: Same. + * gcc.dg/debug/dwarf2/align-2.c: Same. + * gcc.dg/debug/dwarf2/align-3.c: Same. + * gcc.dg/debug/dwarf2/align-4.c: Same. + * gcc.dg/debug/dwarf2/align-5.c: Same. + * gcc.dg/debug/dwarf2/align-6.c: Same. + * gcc.dg/debug/dwarf2/align-as-1.c: Same. + * gcc.dg/debug/dwarf2/dwarf2-macro.c: Same. + * gcc.dg/debug/dwarf2/dwarf2-macro2.c: Same. + * gcc.dg/debug/dwarf2/lang-c89.c: Same. + * gcc.dg/debug/dwarf2/noreturn-function-attribute.c: Same. + * gcc.dg/debug/dwarf2/noreturn-function-keyword.c: Same. + * gcc.dg/debug/dwarf2/pr71855.c: Same. + * gcc.dg/debug/dwarf2/inline5.c: Add XFAIL on AIX. + +2020-09-29 David Malcolm + + PR analyzer/95188 + * gcc.dg/analyzer/signal-registration-loc.c: New test. + +2020-09-29 David Edelsohn + + * g++.dg/spellcheck-inttypes.C: Define _STD_TYPES_T on AIX. + * gcc.dg/spellcheck-inttypes.c: Same. + +2020-09-29 Richard Biener + + PR tree-optimization/97241 + * gcc.dg/vect/pr97241.c: New testcase. + +2020-09-29 Richard Biener + + PR tree-optimization/97238 + * gcc.dg/pr97238.c: New testcase. + +2020-09-29 Richard Sandiford + + * gcc.target/arm/armv8_2-fp16-arith-2.c: Expect FP16 vectorization + even without -ffast-math. + +2020-09-29 Kito Cheng + + * gcc.target/riscv/predef-3.c: Update testcase. + * gcc.target/riscv/predef-6.c: Ditto. + +2020-09-29 Martin Liska + + PR tree-optimization/96979 + * g++.dg/tree-ssa/pr96979.C: New test. + +2020-09-29 Martin Liska + + Revert: + 2020-09-29 Martin Liska + + PR tree-optimization/96979 + * g++.dg/tree-ssa/pr96979.C: New test. + +2020-09-29 David Edelsohn + + * gcc.dg/ipa/symver1.c: Skip on AIX. + 2020-09-28 David Malcolm PR analyzer/97233 -- cgit v1.1 From ac001f5ce604f40f0a0099909325837b8792294b Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Wed, 30 Sep 2020 12:34:20 +0930 Subject: Re: rs6000: Use parameterized names for tablejump * config/rs6000/rs6000.md (@tablejump_normal): Don't use non-existent operands[]. (@tablejump_nospec): Likewise. --- gcc/config/rs6000/rs6000.md | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 24ad809..779bfd1 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -12716,21 +12716,22 @@ (use (match_operand:P 1))] "rs6000_speculate_indirect_jumps" { + rtx off; operands[0] = force_reg (SImode, operands[0]); if (mode == SImode) - operands[4] = operands[0]; + off = operands[0]; else { - operands[4] = gen_reg_rtx (Pmode); + off = gen_reg_rtx (Pmode); rtx src = gen_rtx_fmt_e (SIGN_EXTEND, Pmode, operands[0]); - emit_move_insn (operands[4], src); + emit_move_insn (off, src); } - operands[2] = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, operands[1])); - operands[3] = gen_reg_rtx (Pmode); + rtx lab = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, operands[1])); + rtx addr = gen_reg_rtx (Pmode); - emit_insn (gen_add3 (operands[3], operands[4], operands[2])); - emit_jump_insn (gen_tablejump_insn_normal (Pmode, operands[3], operands[1])); + emit_insn (gen_add3 (addr, off, lab)); + emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1])); DONE; }) @@ -12740,21 +12741,22 @@ (use (match_operand:CC 2))] "!rs6000_speculate_indirect_jumps" { + rtx off; operands[0] = force_reg (SImode, operands[0]); if (mode == SImode) - operands[4] = operands[0]; + off = operands[0]; else { - operands[4] = gen_reg_rtx (Pmode); + off = gen_reg_rtx (Pmode); rtx src = gen_rtx_fmt_e (SIGN_EXTEND, Pmode, operands[0]); - emit_move_insn (operands[4], src); + emit_move_insn (off, src); } - operands[5] = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, operands[1])); - operands[3] = gen_reg_rtx (Pmode); + rtx lab = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, operands[1])); + rtx addr = gen_reg_rtx (Pmode); - emit_insn (gen_add3 (operands[3], operands[4], operands[5])); - emit_jump_insn (gen_tablejump_insn_nospec (Pmode, operands[3], operands[1], + emit_insn (gen_add3 (addr, off, lab)); + emit_jump_insn (gen_tablejump_insn_nospec (Pmode, addr, operands[1], operands[2])); DONE; }) -- cgit v1.1 From de2c1d00f27bfaa1f917a2e3a933a45e3b8ba95a Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Mon, 28 Sep 2020 15:00:35 +0930 Subject: gcc/configure typo fix * configure.ac (--with-long-double-format): Typo fix. * configure: Regenerate. --- gcc/configure | 2 +- gcc/configure.ac | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/configure b/gcc/configure index b05a371..22fb852 100755 --- a/gcc/configure +++ b/gcc/configure @@ -29897,7 +29897,7 @@ supported if the default cpu is power7 or newer" "$LINENO" 5 ;; esac ;; - xpowerpc64*-*-linux*:*) + powerpc64*-*-linux*:*) as_fn_error $? "--with-long-double-format argument should be ibm or ieee" "$LINENO" 5 with_long_double_format="" ;; diff --git a/gcc/configure.ac b/gcc/configure.ac index f561216..1ad5bbc6 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -6260,7 +6260,7 @@ supported if the default cpu is power7 or newer]) ;; esac ;; - xpowerpc64*-*-linux*:*) + powerpc64*-*-linux*:*) AC_MSG_ERROR([--with-long-double-format argument should be ibm or ieee]) with_long_double_format="" ;; -- cgit v1.1 From f63023fafbbc13545fef67e6b32a55b48a945fcf Mon Sep 17 00:00:00 2001 From: Alex Coplan Date: Wed, 30 Sep 2020 09:02:47 +0100 Subject: arm: Fix ICEs in no-literal-pool.c on MVE [PR97251] This patch fixes ICEs when compiling gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c with -mfp16-format=ieee -mfloat-abi=hard -march=armv8.1-m.main+mve -mpure-code. The existing conditions in the movsf/movdf expanders (as well as the no_literal_pool patterns) were too restrictive, requiring TARGET_HARD_FLOAT instead of TARGET_VFP_BASE, which caused unrecognised insns when compiling this testcase with integer MVE and -mpure-code. gcc/ChangeLog: PR target/97251 * config/arm/arm.md (movsf): Relax TARGET_HARD_FLOAT to TARGET_VFP_BASE. (movdf): Likewise. * config/arm/vfp.md (no_literal_pool_df_immediate): Likewise. (no_literal_pool_sf_immediate): Likewise. --- gcc/config/arm/arm.md | 4 ++-- gcc/config/arm/vfp.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 147c4a5..1a8e498 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -7357,7 +7357,7 @@ if (arm_disable_literal_pool && (REG_P (operands[0]) || SUBREG_P (operands[0])) && CONST_DOUBLE_P (operands[1]) - && TARGET_HARD_FLOAT + && TARGET_VFP_BASE && !vfp3_const_double_rtx (operands[1])) { rtx clobreg = gen_reg_rtx (SFmode); @@ -7454,7 +7454,7 @@ if (arm_disable_literal_pool && (REG_P (operands[0]) || SUBREG_P (operands[0])) && CONSTANT_P (operands[1]) - && TARGET_HARD_FLOAT + && TARGET_VFP_BASE && !arm_const_double_rtx (operands[1]) && !(TARGET_VFP_DOUBLE && vfp3_const_double_rtx (operands[1]))) { diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 72707c1..e6c287c 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -2125,7 +2125,7 @@ (match_operand:DF 1 "const_double_operand" "F")) (clobber (match_operand:DF 2 "s_register_operand" "=r"))] "arm_disable_literal_pool - && TARGET_HARD_FLOAT + && TARGET_VFP_BASE && !arm_const_double_rtx (operands[1]) && !(TARGET_VFP_DOUBLE && vfp3_const_double_rtx (operands[1]))" "#" @@ -2151,7 +2151,7 @@ (match_operand:SF 1 "const_double_operand" "E")) (clobber (match_operand:SF 2 "s_register_operand" "=r"))] "arm_disable_literal_pool - && TARGET_HARD_FLOAT + && TARGET_VFP_BASE && !vfp3_const_double_rtx (operands[1])" "#" "" -- cgit v1.1 From d4f9e81976066e1d67c8dd5ddaf24ebe3b0695ed Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 30 Sep 2020 11:52:06 +0100 Subject: aarch64: Tweak movti and movtf patterns movti lacked an way of zeroing an FPR, meaning that we'd do: mov x0, 0 mov x1, 0 fmov d0, x0 fmov v0.d[1], x1 instead of just: movi v0.2d, #0 movtf had the opposite problem for GPRs: we'd generate: movi v0.2d, #0 fmov x0, d0 fmov x1, v0.d[1] instead of just: mov x0, 0 mov x1, 0 Also, there was an unnecessary earlyclobber on the GPR<-GPR movtf alternative (but not the movti one). The splitter handles overlap correctly. The TF splitter used aarch64_reg_or_imm, but the _imm part only accepts integer constants, not floating-point ones. The patch changes it to nonmemory_operand instead. gcc/ * config/aarch64/aarch64.c (aarch64_split_128bit_move_p): Add a function comment. Tighten check for FP moves. * config/aarch64/aarch64.md (*movti_aarch64): Add a w<-Z alternative. (*movtf_aarch64): Handle r<-Y like r<-r. Remove unnecessary earlyclobber. Change splitter predicate from aarch64_reg_or_imm to nonmemory_operand. gcc/testsuite/ * gcc.target/aarch64/movtf_1.c: New test. * gcc.target/aarch64/movti_1.c: Likewise. --- gcc/config/aarch64/aarch64.c | 9 +++- gcc/config/aarch64/aarch64.md | 17 +++--- gcc/testsuite/gcc.target/aarch64/movtf_1.c | 87 ++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/movti_1.c | 87 ++++++++++++++++++++++++++++++ 4 files changed, 190 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_1.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 491fc58..9e88438 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3422,11 +3422,16 @@ aarch64_split_128bit_move (rtx dst, rtx src) } } +/* Return true if we should split a move from 128-bit value SRC + to 128-bit register DEST. */ + bool aarch64_split_128bit_move_p (rtx dst, rtx src) { - return (! REG_P (src) - || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); + if (FP_REGNUM_P (REGNO (dst))) + return REG_P (src) && !FP_REGNUM_P (REGNO (src)); + /* All moves to GPRs need to be split. */ + return true; } /* Split a complex SIMD combine. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 19ec9e3..78fe7c43 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1361,13 +1361,14 @@ (define_insn "*movti_aarch64" [(set (match_operand:TI 0 - "nonimmediate_operand" "= r,w, r,w,r,m,m,w,m") + "nonimmediate_operand" "= r,w,w, r,w,r,m,m,w,m") (match_operand:TI 1 - "aarch64_movti_operand" " rUti,r, w,w,m,r,Z,m,w"))] + "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))] "(register_operand (operands[0], TImode) || aarch64_reg_or_zero (operands[1], TImode))" "@ # + movi\\t%0.2d, #0 # # mov\\t%0.16b, %1.16b @@ -1376,11 +1377,11 @@ stp\\txzr, xzr, %0 ldr\\t%q0, %1 str\\t%q1, %0" - [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \ + [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \ load_16,store_16,store_16,\ load_16,store_16") - (set_attr "length" "8,8,8,4,4,4,4,4,4") - (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")] + (set_attr "length" "8,4,8,8,4,4,4,4,4,4") + (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")] ) ;; Split a TImode register-register or register-immediate move into @@ -1511,9 +1512,9 @@ (define_insn "*movtf_aarch64" [(set (match_operand:TF 0 - "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m") + "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m") (match_operand:TF 1 - "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))] + "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))] "TARGET_FLOAT && (register_operand (operands[0], TFmode) || aarch64_reg_or_fp_zero (operands[1], TFmode))" "@ @@ -1536,7 +1537,7 @@ (define_split [(set (match_operand:TF 0 "register_operand" "") - (match_operand:TF 1 "aarch64_reg_or_imm" ""))] + (match_operand:TF 1 "nonmemory_operand" ""))] "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])" [(const_int 0)] { diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_1.c b/gcc/testsuite/gcc.target/aarch64/movtf_1.c new file mode 100644 index 0000000..570de93 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/movtf_1.c @@ -0,0 +1,87 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** zero_q: +** movi v0.2d, #0 +** ret +*/ +void +zero_q () +{ + register _Float128 q0 asm ("q0"); + q0 = 0; + asm volatile ("" :: "w" (q0)); +} + +/* +** load_q: +** ldr q0, \[x0\] +** ret +*/ +void +load_q (_Float128 *ptr) +{ + register _Float128 q0 asm ("q0"); + q0 = *ptr; + asm volatile ("" :: "w" (q0)); +} + +/* +** store_q: +** str q0, \[x0\] +** ret +*/ +void +store_q (_Float128 *ptr) +{ + register _Float128 q0 asm ("q0"); + asm volatile ("" : "=w" (q0)); + *ptr = q0; +} + +/* +** zero_x: +** ( +** mov x0, #?0 +** mov x1, #?0 +** | +** mov x1, #?0 +** mov x0, #?0 +** ) +** ret +*/ +void +zero_x () +{ + register _Float128 x0 asm ("x0"); + x0 = 0; + asm volatile ("" :: "r" (x0)); +} + +/* +** load_x: +** ldp x2, x3, \[x0\] +** ret +*/ +void +load_x (_Float128 *ptr) +{ + register _Float128 x2 asm ("x2"); + x2 = *ptr; + asm volatile ("" :: "r" (x2)); +} + +/* +** store_x: +** stp x2, x3, \[x0\] +** ret +*/ +void +store_x (_Float128 *ptr) +{ + register _Float128 x2 asm ("x2"); + asm volatile ("" : "=r" (x2)); + *ptr = x2; +} diff --git a/gcc/testsuite/gcc.target/aarch64/movti_1.c b/gcc/testsuite/gcc.target/aarch64/movti_1.c new file mode 100644 index 0000000..160e1ac --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/movti_1.c @@ -0,0 +1,87 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** zero_q: +** movi v0.2d, #0 +** ret +*/ +void +zero_q () +{ + register __int128_t q0 asm ("q0"); + q0 = 0; + asm volatile ("" :: "w" (q0)); +} + +/* +** load_q: +** ldr q0, \[x0\] +** ret +*/ +void +load_q (__int128_t *ptr) +{ + register __int128_t q0 asm ("q0"); + q0 = *ptr; + asm volatile ("" :: "w" (q0)); +} + +/* +** store_q: +** str q0, \[x0\] +** ret +*/ +void +store_q (__int128_t *ptr) +{ + register __int128_t q0 asm ("q0"); + asm volatile ("" : "=w" (q0)); + *ptr = q0; +} + +/* +** zero_x: +** ( +** mov x0, #?0 +** mov x1, #?0 +** | +** mov x1, #?0 +** mov x0, #?0 +** ) +** ret +*/ +void +zero_x () +{ + register __int128_t x0 asm ("x0"); + x0 = 0; + asm volatile ("" :: "r" (x0)); +} + +/* +** load_x: +** ldp x2, x3, \[x0\] +** ret +*/ +void +load_x (__int128_t *ptr) +{ + register __int128_t x2 asm ("x2"); + x2 = *ptr; + asm volatile ("" :: "r" (x2)); +} + +/* +** store_x: +** stp x2, x3, \[x0\] +** ret +*/ +void +store_x (__int128_t *ptr) +{ + register __int128_t x2 asm ("x2"); + asm volatile ("" : "=r" (x2)); + *ptr = x2; +} -- cgit v1.1 From 135b043196b5575c690ef1e07bcbb49bf037c3a2 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 30 Sep 2020 12:00:20 +0100 Subject: PR target/96313 AArch64: vqmovun* return types should be unsigned In this PR we have the wrong return type for some intrinsics. It should be unsigned, but we implement it as signed. Fix this by adjusting the type qualifiers used when creating the builtins and fixing the type in the arm_neon.h intrinsic. With the adjustment in qualifiers we now don't need to cast the result when returning. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ PR target/96313 * config/aarch64/aarch64-simd-builtins.def (sqmovun): Use UNOPUS qualifiers. * config/aarch64/arm_neon.h (vqmovun_s16): Adjust builtin call. Remove unnecessary result cast. (vqmovun_s32): Likewise. (vqmovun_s64): Likewise. (vqmovunh_s16): Likewise. Fix return type. (vqmovuns_s32): Likewise. (vqmovund_s64): Likewise. gcc/testsuite/ PR target/96313 * gcc.target/aarch64/pr96313.c: New test. * gcc.target/aarch64/scalar_intrinsics.c (test_vqmovunh_s16): Adjust return type. (test_vqmovuns_s32): Likewise. (test_vqmovund_s64): Likewise. --- gcc/config/aarch64/aarch64-simd-builtins.def | 2 +- gcc/config/aarch64/arm_neon.h | 18 +++++++++--------- gcc/testsuite/gcc.target/aarch64/pr96313.c | 8 ++++++++ gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c | 6 +++--- 4 files changed, 21 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr96313.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 48ecd4a..3554fb0 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -159,7 +159,7 @@ BUILTIN_VQN (TERNOP, raddhn2, 0, NONE) BUILTIN_VQN (TERNOP, rsubhn2, 0, NONE) - BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0, ALL) + BUILTIN_VSQN_HSDI (UNOPUS, sqmovun, 0, ALL) /* Implemented by aarch64_qmovn. */ BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0, ALL) BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0, ALL) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 9a970e7..6729fb5 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -24046,42 +24046,42 @@ __extension__ extern __inline uint8x8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovun_s16 (int16x8_t __a) { - return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a); + return __builtin_aarch64_sqmovunv8hi_us (__a); } __extension__ extern __inline uint16x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovun_s32 (int32x4_t __a) { - return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a); + return __builtin_aarch64_sqmovunv4si_us (__a); } __extension__ extern __inline uint32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovun_s64 (int64x2_t __a) { - return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a); + return __builtin_aarch64_sqmovunv2di_us (__a); } -__extension__ extern __inline int8_t +__extension__ extern __inline uint8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovunh_s16 (int16_t __a) { - return (int8_t) __builtin_aarch64_sqmovunhi (__a); + return __builtin_aarch64_sqmovunhi_us (__a); } -__extension__ extern __inline int16_t +__extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovuns_s32 (int32_t __a) { - return (int16_t) __builtin_aarch64_sqmovunsi (__a); + return __builtin_aarch64_sqmovunsi_us (__a); } -__extension__ extern __inline int32_t +__extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vqmovund_s64 (int64_t __a) { - return (int32_t) __builtin_aarch64_sqmovundi (__a); + return __builtin_aarch64_sqmovundi_us (__a); } /* vqneg */ diff --git a/gcc/testsuite/gcc.target/aarch64/pr96313.c b/gcc/testsuite/gcc.target/aarch64/pr96313.c new file mode 100644 index 0000000..0bf7604 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr96313.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ + +#include + +uint32_t (*fp3)(int64_t) = vqmovund_s64; +uint8_t (*fp4)(int16_t) = vqmovunh_s16; +uint16_t (*fp5)(int32_t) = vqmovuns_s32; + diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c index d943989..c2e13b6 100644 --- a/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/scalar_intrinsics.c @@ -661,7 +661,7 @@ test_vqnegs_s32 (int32_t a) /* { dg-final { scan-assembler-times "\\tsqxtun\\tb\[0-9\]+" 1 } } */ -int8_t +uint8_t test_vqmovunh_s16 (int16_t a) { return vqmovunh_s16 (a); @@ -669,7 +669,7 @@ test_vqmovunh_s16 (int16_t a) /* { dg-final { scan-assembler-times "\\tsqxtun\\th\[0-9\]+" 1 } } */ -int16_t +uint16_t test_vqmovuns_s32 (int32_t a) { return vqmovuns_s32 (a); @@ -677,7 +677,7 @@ test_vqmovuns_s32 (int32_t a) /* { dg-final { scan-assembler-times "\\tsqxtun\\ts\[0-9\]+" 1 } } */ -int32_t +uint32_t test_vqmovund_s64 (int64_t a) { return vqmovund_s64 (a); -- cgit v1.1 From 2d8fbebdb1eaca8de557ab3052535a8e4b8f8972 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 30 Sep 2020 12:01:23 +0100 Subject: PR target/97150 AArch64: 2nd parameter of unsigned Neon scalar shift intrinsics should be signed In this PR the second argument to the intrinsics should be signed but we use an unsigned one erroneously. The corresponding builtins are already using the correct types so it's just a matter of correcting the signatures in arm_neon.h gcc/ PR target/97150 * config/aarch64/arm_neon.h (vqrshlb_u8): Make second argument signed. (vqrshlh_u16): Likewise. (vqrshls_u32): Likewise. (vqrshld_u64): Likewise. (vqshlb_u8): Likewise. (vqshlh_u16): Likewise. (vqshls_u32): Likewise. (vqshld_u64): Likewise. (vshld_u64): Likewise. gcc/testsuite/ PR target/97150 * gcc.target/aarch64/pr97150.c: New test. --- gcc/config/aarch64/arm_neon.h | 18 +++++++++--------- gcc/testsuite/gcc.target/aarch64/pr97150.c | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr97150.c (limited to 'gcc') diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 6729fb5..d943f63 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -24337,28 +24337,28 @@ vqrshld_s64 (int64_t __a, int64_t __b) __extension__ extern __inline uint8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqrshlb_u8 (uint8_t __a, uint8_t __b) +vqrshlb_u8 (uint8_t __a, int8_t __b) { return __builtin_aarch64_uqrshlqi_uus (__a, __b); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqrshlh_u16 (uint16_t __a, uint16_t __b) +vqrshlh_u16 (uint16_t __a, int16_t __b) { return __builtin_aarch64_uqrshlhi_uus (__a, __b); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqrshls_u32 (uint32_t __a, uint32_t __b) +vqrshls_u32 (uint32_t __a, int32_t __b) { return __builtin_aarch64_uqrshlsi_uus (__a, __b); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqrshld_u64 (uint64_t __a, uint64_t __b) +vqrshld_u64 (uint64_t __a, int64_t __b) { return __builtin_aarch64_uqrshldi_uus (__a, __b); } @@ -24637,28 +24637,28 @@ vqshld_s64 (int64_t __a, int64_t __b) __extension__ extern __inline uint8_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqshlb_u8 (uint8_t __a, uint8_t __b) +vqshlb_u8 (uint8_t __a, int8_t __b) { return __builtin_aarch64_uqshlqi_uus (__a, __b); } __extension__ extern __inline uint16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqshlh_u16 (uint16_t __a, uint16_t __b) +vqshlh_u16 (uint16_t __a, int16_t __b) { return __builtin_aarch64_uqshlhi_uus (__a, __b); } __extension__ extern __inline uint32_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqshls_u32 (uint32_t __a, uint32_t __b) +vqshls_u32 (uint32_t __a, int32_t __b) { return __builtin_aarch64_uqshlsi_uus (__a, __b); } __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vqshld_u64 (uint64_t __a, uint64_t __b) +vqshld_u64 (uint64_t __a, int64_t __b) { return __builtin_aarch64_uqshldi_uus (__a, __b); } @@ -26999,7 +26999,7 @@ vshld_s64 (int64_t __a, int64_t __b) __extension__ extern __inline uint64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vshld_u64 (uint64_t __a, uint64_t __b) +vshld_u64 (uint64_t __a, int64_t __b) { return __builtin_aarch64_ushldi_uus (__a, __b); } diff --git a/gcc/testsuite/gcc.target/aarch64/pr97150.c b/gcc/testsuite/gcc.target/aarch64/pr97150.c new file mode 100644 index 0000000..7abdd8c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr97150.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ + +#include + +uint8_t (*fp0)(uint8_t, int8_t) = vqshlb_u8; +uint16_t (*fp1)(uint16_t, int16_t) = vqshlh_u16; +uint32_t (*fp2)(uint32_t, int32_t) = vqshls_u32; +uint64_t (*fp3)(uint64_t, int64_t) = vqshld_u64; +uint8_t (*fp4)(uint8_t, int8_t) = vqrshlb_u8; +uint16_t (*fp5)(uint16_t, int16_t) = vqrshlh_u16; +uint32_t (*fp6)(uint32_t, int32_t) = vqrshls_u32; +uint64_t (*fp7)(uint64_t, int64_t) = vqrshld_u64; +uint64_t (*fp8)(uint64_t, int64_t) = vshld_u64; + -- cgit v1.1 From 7d131029918a8b8fb8676f1cc17af21b78907abf Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 30 Sep 2020 13:21:04 +0200 Subject: testsuite: Fix up amx* dg-do run tests with older binutils These tests were missing dg-requires-effective-targets to ensure they are UNSUPPORTED if the assembler doesn't have AMX support. 2020-09-30 Jakub Jelinek * gcc.target/i386/amxint8-dpbssd-2.c: Require effective targets amx_tile and amx_int8. * gcc.target/i386/amxint8-dpbsud-2.c: Likewise. * gcc.target/i386/amxint8-dpbusd-2.c: Likewise. * gcc.target/i386/amxint8-dpbuud-2.c: Likewise. * gcc.target/i386/amxbf16-dpbf16ps-2.c: Require effective targets amx_tile and amx_bf16. * gcc.target/i386/amxtile-2.c: Require effective target amx_tile. --- gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c | 2 ++ gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c | 2 ++ gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c | 2 ++ gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c | 2 ++ gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c | 2 ++ gcc/testsuite/gcc.target/i386/amxtile-2.c | 1 + 6 files changed, 11 insertions(+) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c index c819113..349ec58 100644 --- a/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c +++ b/gcc/testsuite/gcc.target/i386/amxbf16-dpbf16ps-2.c @@ -1,4 +1,6 @@ /* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_tile } */ +/* { dg-require-effective-target amx_bf16 } */ /* { dg-options "-O2 -mamx-tile -mamx-bf16" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c index 62d31ce..74ad71b 100644 --- a/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c +++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbssd-2.c @@ -1,4 +1,6 @@ /* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_tile } */ +/* { dg-require-effective-target amx_int8 } */ /* { dg-options "-O2 -mamx-tile -mamx-int8" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c index 5007ee9..e7241bd 100644 --- a/gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c +++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbsud-2.c @@ -1,4 +1,6 @@ /* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_tile } */ +/* { dg-require-effective-target amx_int8 } */ /* { dg-options "-O2 -mamx-tile -mamx-int8" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c index 17888e2..f0b9f97 100644 --- a/gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c +++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbusd-2.c @@ -1,4 +1,6 @@ /* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_tile } */ +/* { dg-require-effective-target amx_int8 } */ /* { dg-options "-O2 -mamx-tile -mamx-int8" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c b/gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c index c39666c..eb70b2f 100644 --- a/gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c +++ b/gcc/testsuite/gcc.target/i386/amxint8-dpbuud-2.c @@ -1,4 +1,6 @@ /* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_tile } */ +/* { dg-require-effective-target amx_int8 } */ /* { dg-options "-O2 -mamx-tile -mamx-int8" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/amxtile-2.c b/gcc/testsuite/gcc.target/i386/amxtile-2.c index cef84f9..1f4138e 100644 --- a/gcc/testsuite/gcc.target/i386/amxtile-2.c +++ b/gcc/testsuite/gcc.target/i386/amxtile-2.c @@ -1,4 +1,5 @@ /* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target amx_tile } */ /* { dg-options "-O2 -mamx-tile " } */ #include -- cgit v1.1 From 92e652d8c21bd7e66cbb0f9001542a2f55345af0 Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Tue, 29 Sep 2020 17:28:22 +0200 Subject: i386: Define __LAHF_SAHF__ and __MOVBE__ macros, based on ISA flags gcc/ * config/i386/i386-c.c (ix86_target_macros_internal): Define __LAHF_SAHF__ and __MOVBE__ based on ISA flags. --- gcc/config/i386/i386-c.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'gcc') diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 9da682a..e647fce 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -594,6 +594,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__AMX_INT8__"); if (isa_flag2 & OPTION_MASK_ISA2_AMX_BF16) def_or_undef (parse_in, "__AMX_BF16__"); + if (isa_flag & OPTION_MASK_ISA_SAHF) + def_or_undef (parse_in, "__LAHF_SAHF__"); + if (isa_flag2 & OPTION_MASK_ISA2_MOVBE) + def_or_undef (parse_in, "__MOVBE__"); if (TARGET_IAMCU) { -- cgit v1.1 From 4c0eb14bc8553d6e97f1cf70276c6a4fa67b7c38 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Wed, 30 Sep 2020 11:51:41 +0200 Subject: [testsuite] Re-enable pr94600-{1,3}.c tests for arm Before commit 7e437162001 "[testsuite] Require non_strict_align in pr94600-{1,3}.c", some tests were failing for nvptx, because volatile stores were expected, but memcpy's were found instead. This was traced back to this bit in compute_record_mode: ... /* If structure's known alignment is less than what the scalar mode would need, and it matters, then stick with BLKmode. */ if (mode != BLKmode && STRICT_ALIGNMENT && ! (TYPE_ALIGN (type) >= BIGGEST_ALIGNMENT || TYPE_ALIGN (type) >= GET_MODE_ALIGNMENT (mode))) { /* If this is the only reason this type is BLKmode, then don't force containing types to be BLKmode. */ TYPE_NO_FORCE_BLK (type) = 1; mode = BLKmode; } ... which got triggered for nvptx, but not for x86_64. The commit disabled the tests for non_strict_align effective target, but that had the effect for the arm target that those tests were disabled, even though they were passing before. Further investigation in compute_record_mode shows that the if-condition evaluates to false for arm because, because TYPE_ALIGN (type) == 32, while it's 8 for nvptx. This again can be explained by the PCC_BITFIELD_TYPE_MATTERS setting, which is 1 for arm, but 0 for nvptx. Re-enable the test for arm by using effective target (non_strict_align || pcc_bitfield_type_matters). Tested on arm-eabi and nvptx. gcc/testsuite/ChangeLog: 2020-09-30 Tom de Vries * gcc.dg/pr94600-1.c: Use effective target (non_strict_align || pcc_bitfield_type_matters). * gcc.dg/pr94600-3.c: Same. --- gcc/testsuite/gcc.dg/pr94600-1.c | 4 ++-- gcc/testsuite/gcc.dg/pr94600-3.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/pr94600-1.c b/gcc/testsuite/gcc.dg/pr94600-1.c index 38f939a..c9a7bb9 100644 --- a/gcc/testsuite/gcc.dg/pr94600-1.c +++ b/gcc/testsuite/gcc.dg/pr94600-1.c @@ -32,5 +32,5 @@ foo(void) } /* The only volatile accesses should be the obvious writes. */ -/* { dg-final { scan-rtl-dump-times {\(mem/v} 6 "final" { target { non_strict_align } } } } */ -/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 6 "final" { target { non_strict_align } } } } */ +/* { dg-final { scan-rtl-dump-times {\(mem/v} 6 "final" { target { non_strict_align || pcc_bitfield_type_matters } } } } */ +/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 6 "final" { target { non_strict_align || pcc_bitfield_type_matters } } } } */ diff --git a/gcc/testsuite/gcc.dg/pr94600-3.c b/gcc/testsuite/gcc.dg/pr94600-3.c index e8776fb..ff42c7d 100644 --- a/gcc/testsuite/gcc.dg/pr94600-3.c +++ b/gcc/testsuite/gcc.dg/pr94600-3.c @@ -31,5 +31,5 @@ foo(void) } /* The loop isn't unrolled. */ -/* { dg-final { scan-rtl-dump-times {\(mem/v} 1 "final" { target { non_strict_align } } } } */ -/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 1 "final" { target { non_strict_align } } } } */ +/* { dg-final { scan-rtl-dump-times {\(mem/v} 1 "final" { target { non_strict_align || pcc_bitfield_type_matters } } } } */ +/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 1 "final" { target { non_strict_align || pcc_bitfield_type_matters } } } } */ -- cgit v1.1 From 46183c96d2aea8181efb6bc3cfdb221987fe002d Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 23 Sep 2020 12:11:45 -0700 Subject: x86: Use SET operation in MOVDIRI and MOVDIR64B Since MOVDIRI and MOVDIR64B write to memory, similar to UNSPEC_MOVNT, use SET operation in MOVDIRI and MOVDIR64B patterns with UNSPEC instead of UNSPECV. gcc/ PR target/97184 * config/i386/i386.md (UNSPECV_MOVDIRI): Renamed to ... (UNSPEC_MOVDIRI): This. (UNSPECV_MOVDIR64B): Renamed to ... (UNSPEC_MOVDIR64B): This. (movdiri): Use SET operation. (@movdir64b_): Likewise. gcc/testsuite/ PR target/97184 * gcc.target/i386/movdir64b.c: New test. * gcc.target/i386/movdiri32.c: Likewise. * gcc.target/i386/movdiri64.c: Likewise. * lib/target-supports.exp (check_effective_target_movdir): New. --- gcc/config/i386/i386.md | 20 ++++++++++---------- gcc/testsuite/gcc.target/i386/movdir64b.c | 23 +++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/movdiri32.c | 20 ++++++++++++++++++++ gcc/testsuite/gcc.target/i386/movdiri64.c | 20 ++++++++++++++++++++ gcc/testsuite/lib/target-supports.exp | 17 +++++++++++++++++ 5 files changed, 90 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/movdir64b.c create mode 100644 gcc/testsuite/gcc.target/i386/movdiri32.c create mode 100644 gcc/testsuite/gcc.target/i386/movdiri64.c (limited to 'gcc') diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 93aae81..9dd12cf 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -186,6 +186,10 @@ ;; IRET support UNSPEC_INTERRUPT_RETURN + + ;; For MOVDIRI and MOVDIR64B support + UNSPEC_MOVDIRI + UNSPEC_MOVDIR64B ]) (define_c_enum "unspecv" [ @@ -280,10 +284,6 @@ UNSPECV_SETSSBSY UNSPECV_CLRSSBSY - ;; For MOVDIRI and MOVDIR64B support - UNSPECV_MOVDIRI - UNSPECV_MOVDIR64B - ;; For TSXLDTRK support UNSPECV_XSUSLDTRK UNSPECV_XRESLDTRK @@ -21531,17 +21531,17 @@ ;; MOVDIRI and MOVDIR64B (define_insn "movdiri" - [(unspec_volatile:SWI48 [(match_operand:SWI48 0 "memory_operand" "m") - (match_operand:SWI48 1 "register_operand" "r")] - UNSPECV_MOVDIRI)] + [(set (match_operand:SWI48 0 "memory_operand" "=m") + (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")] + UNSPEC_MOVDIRI))] "TARGET_MOVDIRI" "movdiri\t{%1, %0|%0, %1}" [(set_attr "type" "other")]) (define_insn "@movdir64b_" - [(unspec_volatile:XI [(match_operand:P 0 "register_operand" "r") - (match_operand:XI 1 "memory_operand")] - UNSPECV_MOVDIR64B)] + [(set (mem:XI (match_operand:P 0 "register_operand" "r")) + (unspec:XI [(match_operand:XI 1 "memory_operand" "m")] + UNSPEC_MOVDIR64B))] "TARGET_MOVDIR64B" "movdir64b\t{%1, %0|%0, %1}" [(set_attr "type" "other")]) diff --git a/gcc/testsuite/gcc.target/i386/movdir64b.c b/gcc/testsuite/gcc.target/i386/movdir64b.c new file mode 100644 index 0000000..70d872e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/movdir64b.c @@ -0,0 +1,23 @@ +/* { dg-do run { target movdir } } */ +/* { dg-options "-mmovdir64b -O2" } */ + +#include +#include + +unsigned long long int src[8] = {1, 2, 3, 4, 5, 6, 7, 8}; +unsigned long long int dest[8] __attribute__ ((aligned (64))) + = {-1, -1, -1, -1, -1, -1, -1, -1}; + +int +main () +{ + if (!__builtin_cpu_supports ("movdir64b")) + return 0; + + _movdir64b (dest, src); + + if (memcmp (dest, src, sizeof (dest)) != 0) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/movdiri32.c b/gcc/testsuite/gcc.target/i386/movdiri32.c new file mode 100644 index 0000000..604fa0d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/movdiri32.c @@ -0,0 +1,20 @@ +/* { dg-do run { target movdir } } */ +/* { dg-options "-mmovdiri -O2" } */ + +#include + +unsigned int dest = -1; + +int +main () +{ + if (!__builtin_cpu_supports ("movdiri")) + return 0; + + _directstoreu_u32 (&dest, 0xbadbeef); + + if (dest != 0xbadbeef) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/movdiri64.c b/gcc/testsuite/gcc.target/i386/movdiri64.c new file mode 100644 index 0000000..551b98e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/movdiri64.c @@ -0,0 +1,20 @@ +/* { dg-do run { target { movdir && { ! ia32 } } } } */ +/* { dg-options "-mmovdiri -O2" } */ + +#include + +unsigned long long int dest = -1LL; + +int +main () +{ + if (!__builtin_cpu_supports ("movdiri")) + return 0; + + _directstoreu_u64 (&dest, 0x12345678badbeef); + + if (dest != 0x12345678badbeef) + abort (); + + return 0; +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 8314e44..2fb59e8 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -10552,3 +10552,20 @@ proc check_effective_target_ident_directive {} { int i; }] } + +# Return 1 if we're able to assemble movdiri and movdir64b + +proc check_effective_target_movdir { } { + return [check_no_compiler_messages movdir object { + void + foo (unsigned int *d, unsigned int s) + { + __builtin_ia32_directstoreu_u32 (d, s); + } + void + bar (void *d, const void *s) + { + __builtin_ia32_movdir64b (d, s); + } + } "-mmovdiri -mmovdir64b" ] +} -- cgit v1.1 From bae974e637421263e8854a69b83284fa6309f9a1 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Tue, 29 Sep 2020 10:10:06 +0200 Subject: [nvptx] Add type arg to TARGET_LIBC_HAS_FUNCTION GCC has a target hook TARGET_LIBC_HAS_FUNCTION, which tells the compiler which functions it can expect to be present in libc. The default target hook does not include the sincos functions. The nvptx port of newlib does include sincos and sincosf, but not sincosl. The target hook TARGET_LIBC_HAS_FUNCTION does not distinguish between sincos, sincosf and sincosl, so if we enable it for the sincos functions, then for test.c: ... long double x, a, b; int main (void) { x = 0.5; a = sinl (x); b = cosl (x); printf ("a: %f\n", (double)a); printf ("b: %f\n", (double)b); return 0; } ... we introduce a regression: ... $ gcc test.c -lm -O2 unresolved symbol sincosl collect2: error: ld returned 1 exit status ... Add a type argument to target hook TARGET_LIBC_HAS_FUNCTION_TYPE, and use it in nvptx_libc_has_function_type to enable sincos and sincosf, but not sincosl. Build and reg-tested on x86_64-linux. Build and tested on nvptx. gcc/ChangeLog: 2020-09-28 Tobias Burnus Tom de Vries * builtins.c (expand_builtin_cexpi, fold_builtin_sincos): Update targetm.libc_has_function call. * builtins.def (DEF_C94_BUILTIN, DEF_C99_BUILTIN, DEF_C11_BUILTIN): (DEF_C2X_BUILTIN, DEF_C99_COMPL_BUILTIN, DEF_C99_C90RES_BUILTIN): Same. * config/darwin-protos.h (darwin_libc_has_function): Update prototype. * config/darwin.c (darwin_libc_has_function): Add arg. * config/linux-protos.h (linux_libc_has_function): Update prototype. * config/linux.c (linux_libc_has_function): Add arg. * config/i386/i386.c (ix86_libc_has_function): Update targetm.libc_has_function call. * config/nvptx/nvptx.c (nvptx_libc_has_function): New function. (TARGET_LIBC_HAS_FUNCTION): Redefine to nvptx_libc_has_function. * convert.c (convert_to_integer_1): Update targetm.libc_has_function call. * match.pd: Same. * target.def (libc_has_function): Add arg. * doc/tm.texi: Regenerate. * targhooks.c (default_libc_has_function, gnu_libc_has_function) (no_c99_libc_has_function): Add arg. * targhooks.h (default_libc_has_function, no_c99_libc_has_function) (gnu_libc_has_function): Update prototype. * tree-ssa-math-opts.c (pass_cse_sincos::execute): Update targetm.libc_has_function call. gcc/fortran/ChangeLog: 2020-09-30 Tom de Vries * f95-lang.c (gfc_init_builtin_functions): Update targetm.libc_has_function call. --- gcc/builtins.c | 4 ++-- gcc/builtins.def | 20 ++++++++++++++------ gcc/config/darwin-protos.h | 2 +- gcc/config/darwin.c | 3 ++- gcc/config/i386/i386.c | 2 +- gcc/config/linux-protos.h | 2 +- gcc/config/linux.c | 3 ++- gcc/config/nvptx/nvptx.c | 20 ++++++++++++++++++++ gcc/convert.c | 8 ++++---- gcc/doc/tm.texi | 7 +++++-- gcc/fortran/f95-lang.c | 4 ++-- gcc/match.pd | 6 +++--- gcc/target.def | 7 +++++-- gcc/targhooks.c | 9 ++++++--- gcc/targhooks.h | 6 +++--- gcc/tree-ssa-math-opts.c | 8 +++++--- 16 files changed, 76 insertions(+), 35 deletions(-) (limited to 'gcc') diff --git a/gcc/builtins.c b/gcc/builtins.c index cac842f..8f2662b 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -2733,7 +2733,7 @@ expand_builtin_cexpi (tree exp, rtx target) /* Compute into op1 and op2. */ expand_twoval_unop (sincos_optab, op0, op2, op1, 0); } - else if (targetm.libc_has_function (function_sincos)) + else if (targetm.libc_has_function (function_sincos, type)) { tree call, fn = NULL_TREE; tree top1, top2; @@ -9770,7 +9770,7 @@ fold_builtin_sincos (location_t loc, } if (!call) { - if (!targetm.libc_has_function (function_c99_math_complex) + if (!targetm.libc_has_function (function_c99_math_complex, type) || !builtin_decl_implicit_p (fn)) return NULL_TREE; fndecl = builtin_decl_explicit (fn); diff --git a/gcc/builtins.def b/gcc/builtins.def index 102322b..95428c0 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -138,34 +138,41 @@ along with GCC; see the file COPYING3. If not see #undef DEF_C94_BUILTIN #define DEF_C94_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ - true, true, !flag_isoc94, ATTRS, targetm.libc_has_function (function_c94), true) + true, true, !flag_isoc94, ATTRS, \ + targetm.libc_has_function (function_c94, NULL_TREE), true) /* Like DEF_LIB_BUILTIN, except that the function is only a part of the standard in C99 or above. */ #undef DEF_C99_BUILTIN #define DEF_C99_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ - true, true, !flag_isoc99, ATTRS, targetm.libc_has_function (function_c99_misc), true) + true, true, !flag_isoc99, ATTRS, \ + targetm.libc_has_function (function_c99_misc, NULL_TREE), true) /* Like DEF_LIB_BUILTIN, except that the function is only a part of the standard in C11 or above. */ #undef DEF_C11_BUILTIN #define DEF_C11_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ - true, true, !flag_isoc11, ATTRS, targetm.libc_has_function (function_c11_misc), true) + true, true, !flag_isoc11, ATTRS, \ + targetm.libc_has_function (function_c11_misc, NULL_TREE), true) /* Like DEF_LIB_BUILTIN, except that the function is only a part of the standard in C2x or above. */ #undef DEF_C2X_BUILTIN #define DEF_C2X_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ - true, true, !flag_isoc2x, ATTRS, targetm.libc_has_function (function_c2x_misc), true) + true, true, !flag_isoc2x, ATTRS, \ + targetm.libc_has_function (function_c2x_misc, NULL_TREE), true) /* Like DEF_C99_BUILTIN, but for complex math functions. */ #undef DEF_C99_COMPL_BUILTIN #define DEF_C99_COMPL_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ - true, true, !flag_isoc99, ATTRS, targetm.libc_has_function (function_c99_math_complex), true) + true, true, !flag_isoc99, ATTRS, \ + targetm.libc_has_function (function_c99_math_complex, \ + NULL_TREE), \ + true) /* Builtin that is specified by C99 and C90 reserve the name for future use. We can still recognize the builtin in C90 mode but we can't produce it @@ -173,7 +180,8 @@ along with GCC; see the file COPYING3. If not see #undef DEF_C99_C90RES_BUILTIN #define DEF_C99_C90RES_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ - true, true, !flag_isoc99, ATTRS, targetm.libc_has_function (function_c99_misc), true) + true, true, !flag_isoc99, ATTRS, \ + targetm.libc_has_function (function_c99_misc, NULL_TREE), true) /* Builtin that C99 reserve the name for future use. We can still recognize the builtin in C99 mode but we can't produce it implicitly. */ diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h index 54cd1e4..49c540f 100644 --- a/gcc/config/darwin-protos.h +++ b/gcc/config/darwin-protos.h @@ -125,6 +125,6 @@ extern bool darwin_kextabi_p (void); extern void darwin_override_options (void); extern void darwin_patch_builtins (void); extern void darwin_rename_builtins (void); -extern bool darwin_libc_has_function (enum function_class fn_class); +extern bool darwin_libc_has_function (enum function_class fn_class, tree); #endif /* CONFIG_DARWIN_PROTOS_H */ diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c index c8edfb8..b64aaa7 100644 --- a/gcc/config/darwin.c +++ b/gcc/config/darwin.c @@ -3542,7 +3542,8 @@ darwin_rename_builtins (void) } bool -darwin_libc_has_function (enum function_class fn_class) +darwin_libc_has_function (enum function_class fn_class, + tree type ATTRIBUTE_UNUSED) { if (fn_class == function_sincos) return (strverscmp (darwin_macosx_version_min, "10.9") >= 0); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c890a73..f684954 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1484,7 +1484,7 @@ ix86_reg_parm_stack_space (const_tree fndecl) bool ix86_libc_has_function (enum function_class fn_class) { - return targetm.libc_has_function (fn_class); + return targetm.libc_has_function (fn_class, NULL_TREE); } /* Returns value SYSV_ABI, MS_ABI dependent on fntype, diff --git a/gcc/config/linux-protos.h b/gcc/config/linux-protos.h index 3759187..c52778b 100644 --- a/gcc/config/linux-protos.h +++ b/gcc/config/linux-protos.h @@ -19,4 +19,4 @@ along with GCC; see the file COPYING3. If not see extern bool linux_has_ifunc_p (void); -extern bool linux_libc_has_function (enum function_class fn_class); +extern bool linux_libc_has_function (enum function_class fn_class, tree); diff --git a/gcc/config/linux.c b/gcc/config/linux.c index 9876153..83ffff4 100644 --- a/gcc/config/linux.c +++ b/gcc/config/linux.c @@ -25,7 +25,8 @@ along with GCC; see the file COPYING3. If not see #include "linux-protos.h" bool -linux_libc_has_function (enum function_class fn_class) +linux_libc_has_function (enum function_class fn_class, + tree type ATTRIBUTE_UNUSED) { if (OPTION_GLIBC || OPTION_MUSL) return true; diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index de82f9a..afac1bda 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -6536,6 +6536,23 @@ nvptx_set_current_function (tree fndecl) oacc_bcast_partition = 0; } +/* Implement TARGET_LIBC_HAS_FUNCTION. */ + +bool +nvptx_libc_has_function (enum function_class fn_class, tree type) +{ + if (fn_class == function_sincos) + { + if (type != NULL_TREE) + /* Currently, newlib does not support sincosl. */ + return type == float_type_node || type == double_type_node; + else + return true; + } + + return default_libc_has_function (fn_class, type); +} + #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE nvptx_option_override @@ -6681,6 +6698,9 @@ nvptx_set_current_function (tree fndecl) #undef TARGET_SET_CURRENT_FUNCTION #define TARGET_SET_CURRENT_FUNCTION nvptx_set_current_function +#undef TARGET_LIBC_HAS_FUNCTION +#define TARGET_LIBC_HAS_FUNCTION nvptx_libc_has_function + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-nvptx.h" diff --git a/gcc/convert.c b/gcc/convert.c index 292c513..7b28332 100644 --- a/gcc/convert.c +++ b/gcc/convert.c @@ -501,7 +501,7 @@ convert_to_integer_1 (tree type, tree expr, bool dofold) CASE_FLT_FN (BUILT_IN_CEIL): CASE_FLT_FN_FLOATN_NX (BUILT_IN_CEIL): /* Only convert in ISO C99 mode. */ - if (!targetm.libc_has_function (function_c99_misc)) + if (!targetm.libc_has_function (function_c99_misc, intype)) break; if (outprec < TYPE_PRECISION (integer_type_node) || (outprec == TYPE_PRECISION (integer_type_node) @@ -518,7 +518,7 @@ convert_to_integer_1 (tree type, tree expr, bool dofold) CASE_FLT_FN (BUILT_IN_FLOOR): CASE_FLT_FN_FLOATN_NX (BUILT_IN_FLOOR): /* Only convert in ISO C99 mode. */ - if (!targetm.libc_has_function (function_c99_misc)) + if (!targetm.libc_has_function (function_c99_misc, intype)) break; if (outprec < TYPE_PRECISION (integer_type_node) || (outprec == TYPE_PRECISION (integer_type_node) @@ -535,7 +535,7 @@ convert_to_integer_1 (tree type, tree expr, bool dofold) CASE_FLT_FN (BUILT_IN_ROUND): CASE_FLT_FN_FLOATN_NX (BUILT_IN_ROUND): /* Only convert in ISO C99 mode and with -fno-math-errno. */ - if (!targetm.libc_has_function (function_c99_misc) + if (!targetm.libc_has_function (function_c99_misc, intype) || flag_errno_math) break; if (outprec < TYPE_PRECISION (integer_type_node) @@ -559,7 +559,7 @@ convert_to_integer_1 (tree type, tree expr, bool dofold) CASE_FLT_FN (BUILT_IN_RINT): CASE_FLT_FN_FLOATN_NX (BUILT_IN_RINT): /* Only convert in ISO C99 mode and with -fno-math-errno. */ - if (!targetm.libc_has_function (function_c99_misc) + if (!targetm.libc_has_function (function_c99_misc, intype) || flag_errno_math) break; if (outprec < TYPE_PRECISION (integer_type_node) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 8e9e770..97437e8 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5602,9 +5602,12 @@ refers to the global ``variable'' @code{errno}. (On certain systems, macro, a reasonable default is used. @end defmac -@deftypefn {Target Hook} bool TARGET_LIBC_HAS_FUNCTION (enum function_class @var{fn_class}) +@deftypefn {Target Hook} bool TARGET_LIBC_HAS_FUNCTION (enum function_class @var{fn_class}, tree @var{type}) This hook determines whether a function from a class of functions -@var{fn_class} is present in the target C library. +@var{fn_class} is present in the target C library. If @var{type} is NULL, +the caller asks for support for all standard (float, double, long double) +types. If @var{type} is non-NULL, the caller asks for support for a +specific type. @end deftypefn @deftypefn {Target Hook} bool TARGET_LIBC_HAS_FAST_FUNCTION (int @var{fcode}) diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c index e3288d7..526b721 100644 --- a/gcc/fortran/f95-lang.c +++ b/gcc/fortran/f95-lang.c @@ -881,7 +881,7 @@ gfc_init_builtin_functions (void) BUILT_IN_POWIF, "powif", ATTR_CONST_NOTHROW_LEAF_LIST); - if (targetm.libc_has_function (function_c99_math_complex)) + if (targetm.libc_has_function (function_c99_math_complex, NULL_TREE)) { gfc_define_builtin ("__builtin_cbrtl", mfunc_longdouble[0], BUILT_IN_CBRTL, "cbrtl", @@ -903,7 +903,7 @@ gfc_init_builtin_functions (void) ATTR_CONST_NOTHROW_LEAF_LIST); } - if (targetm.libc_has_function (function_sincos)) + if (targetm.libc_has_function (function_sincos, NULL_TREE)) { gfc_define_builtin ("__builtin_sincosl", func_longdouble_longdoublep_longdoublep, diff --git a/gcc/match.pd b/gcc/match.pd index e6dcdd0..952643f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5185,7 +5185,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (with { const REAL_VALUE_TYPE *const value = TREE_REAL_CST_PTR (@0); bool use_exp2 = false; - if (targetm.libc_has_function (function_c99_misc) + if (targetm.libc_has_function (function_c99_misc, TREE_TYPE (@0)) && value->cl == rvc_normal) { REAL_VALUE_TYPE frac_rvt = *value; @@ -5484,7 +5484,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) cexpis (CEXPI) (simplify (cexps compositional_complex@0) - (if (targetm.libc_has_function (function_c99_math_complex)) + (if (targetm.libc_has_function (function_c99_math_complex, TREE_TYPE (@0))) (complex (mult (exps@1 (realpart @0)) (realpart (cexpis:type@2 (imagpart @0)))) (mult @1 (imagpart @2))))))) @@ -5536,7 +5536,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* truncl(extend(x)) and trunc(extend(x)) -> extend(truncf(x)), etc., if x is a float. */ (if (optimize && canonicalize_math_p () - && targetm.libc_has_function (function_c99_misc)) + && targetm.libc_has_function (function_c99_misc, NULL_TREE)) (simplify (froms (convert float_value_p@0)) (convert (tos @0))))) diff --git a/gcc/target.def b/gcc/target.def index fc4563d..ed2da15 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -2618,8 +2618,11 @@ set via @code{__attribute__}.", DEFHOOK (libc_has_function, "This hook determines whether a function from a class of functions\n\ -@var{fn_class} is present in the target C library.", - bool, (enum function_class fn_class), +@var{fn_class} is present in the target C library. If @var{type} is NULL,\n\ +the caller asks for support for all standard (float, double, long double)\n\ +types. If @var{type} is non-NULL, the caller asks for support for a\n\ +specific type.", + bool, (enum function_class fn_class, tree type), default_libc_has_function) DEFHOOK diff --git a/gcc/targhooks.c b/gcc/targhooks.c index da4805d..5d94fce 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1641,7 +1641,8 @@ default_have_conditional_execution (void) /* By default we assume that c99 functions are present at the runtime, but sincos is not. */ bool -default_libc_has_function (enum function_class fn_class) +default_libc_has_function (enum function_class fn_class, + tree type ATTRIBUTE_UNUSED) { if (fn_class == function_c94 || fn_class == function_c99_misc @@ -1660,13 +1661,15 @@ default_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED) } bool -gnu_libc_has_function (enum function_class fn_class ATTRIBUTE_UNUSED) +gnu_libc_has_function (enum function_class fn_class ATTRIBUTE_UNUSED, + tree type ATTRIBUTE_UNUSED) { return true; } bool -no_c99_libc_has_function (enum function_class fn_class ATTRIBUTE_UNUSED) +no_c99_libc_has_function (enum function_class fn_class ATTRIBUTE_UNUSED, + tree type ATTRIBUTE_UNUSED) { return false; } diff --git a/gcc/targhooks.h b/gcc/targhooks.h index b572a36..44ab926 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -208,10 +208,10 @@ extern rtx default_addr_space_convert (rtx, tree, tree); extern unsigned int default_case_values_threshold (void); extern bool default_have_conditional_execution (void); -extern bool default_libc_has_function (enum function_class); +extern bool default_libc_has_function (enum function_class, tree); extern bool default_libc_has_fast_function (int fcode); -extern bool no_c99_libc_has_function (enum function_class); -extern bool gnu_libc_has_function (enum function_class); +extern bool no_c99_libc_has_function (enum function_class, tree); +extern bool gnu_libc_has_function (enum function_class, tree); extern tree default_builtin_tm_load_store (tree); diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 8423caa..bdbb9d9 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -2176,12 +2176,14 @@ pass_cse_sincos::execute (function *fun) CASE_CFN_COS: CASE_CFN_SIN: CASE_CFN_CEXPI: + arg = gimple_call_arg (stmt, 0); /* Make sure we have either sincos or cexp. */ - if (!targetm.libc_has_function (function_c99_math_complex) - && !targetm.libc_has_function (function_sincos)) + if (!targetm.libc_has_function (function_c99_math_complex, + TREE_TYPE (arg)) + && !targetm.libc_has_function (function_sincos, + TREE_TYPE (arg))) break; - arg = gimple_call_arg (stmt, 0); if (TREE_CODE (arg) == SSA_NAME) cfg_changed |= execute_cse_sincos_1 (arg); break; -- cgit v1.1 From fcc4891d7f3bff1a3f7428f12830bc942989306c Mon Sep 17 00:00:00 2001 From: Paul Thomas Date: Wed, 30 Sep 2020 13:44:39 +0100 Subject: This patch fixes PR97045 - unlimited polymorphic array element selectors. 2020-30-09 Paul Thomas gcc/fortran PR fortran/97045 * trans-array.c (gfc_conv_array_ref): Make sure that the class decl is passed to build_array_ref in the case of unlimited polymorphic entities. * trans-expr.c (gfc_conv_derived_to_class): Ensure that array refs do not preceed the _len component. Free the _len expr. * trans-stmt.c (trans_associate_var): Reset 'need_len_assign' for polymorphic scalars. * trans.c (gfc_build_array_ref): When the vptr size is used for span, multiply by the _len field of unlimited polymorphic entities, when non-zero. gcc/testsuite/ PR fortran/97045 * gfortran.dg/select_type_50.f90 : New test. --- gcc/fortran/trans-array.c | 15 +++++++- gcc/fortran/trans-expr.c | 3 +- gcc/fortran/trans-stmt.c | 1 + gcc/fortran/trans.c | 23 +++++++++++- gcc/testsuite/gfortran.dg/select_type_50.f90 | 52 ++++++++++++++++++++++++++++ 5 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/select_type_50.f90 (limited to 'gcc') diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c index 6566c47..998d4d4 100644 --- a/gcc/fortran/trans-array.c +++ b/gcc/fortran/trans-array.c @@ -3787,7 +3787,20 @@ gfc_conv_array_ref (gfc_se * se, gfc_array_ref * ar, gfc_expr *expr, decl = sym->backend_decl; } else if (sym->ts.type == BT_CLASS) - decl = NULL_TREE; + { + if (UNLIMITED_POLY (sym)) + { + gfc_expr *class_expr = gfc_find_and_cut_at_last_class_ref (expr); + gfc_init_se (&tmpse, NULL); + gfc_conv_expr (&tmpse, class_expr); + if (!se->class_vptr) + se->class_vptr = gfc_class_vptr_get (tmpse.expr); + gfc_free_expr (class_expr); + decl = tmpse.expr; + } + else + decl = NULL_TREE; + } se->expr = build_array_ref (se->expr, offset, decl, se->class_vptr); } diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index a690839..2c31ec9 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -728,7 +728,7 @@ gfc_conv_derived_to_class (gfc_se *parmse, gfc_expr *e, gfc_expr *len; gfc_se se; - len = gfc_copy_expr (e); + len = gfc_find_and_cut_at_last_class_ref (e); gfc_add_len_component (len); gfc_init_se (&se, NULL); gfc_conv_expr (&se, len); @@ -739,6 +739,7 @@ gfc_conv_derived_to_class (gfc_se *parmse, gfc_expr *e, integer_zero_node)); else tmp = se.expr; + gfc_free_expr (len); } else tmp = integer_zero_node; diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index 389fec7..adc6b8f 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -2091,6 +2091,7 @@ trans_associate_var (gfc_symbol *sym, gfc_wrapped_block *block) /* Obtain a temporary class container for the result. */ gfc_conv_derived_to_class (&se, e, sym->ts, tmp, false, false); se.expr = build_fold_indirect_ref_loc (input_location, se.expr); + need_len_assign = false; } else { diff --git a/gcc/fortran/trans.c b/gcc/fortran/trans.c index ed05426..8caa625 100644 --- a/gcc/fortran/trans.c +++ b/gcc/fortran/trans.c @@ -429,7 +429,28 @@ gfc_build_array_ref (tree base, tree offset, tree decl, tree vptr) /* If decl or vptr are non-null, pointer arithmetic for the array reference is likely. Generate the 'span' for the array reference. */ if (vptr) - span = gfc_vptr_size_get (vptr); + { + span = gfc_vptr_size_get (vptr); + + /* Check if this is an unlimited polymorphic object carrying a character + payload. In this case, the 'len' field is non-zero. */ + if (decl && GFC_CLASS_TYPE_P (TREE_TYPE (decl))) + { + tmp = gfc_class_len_or_zero_get (decl); + if (!integer_zerop (tmp)) + { + tree cond; + tree stype = TREE_TYPE (span); + tmp = fold_convert (stype, tmp); + cond = fold_build2_loc (input_location, EQ_EXPR, + logical_type_node, tmp, + build_int_cst (stype, 0)); + tmp = fold_build2 (MULT_EXPR, stype, span, tmp); + span = fold_build3_loc (input_location, COND_EXPR, stype, + cond, span, tmp); + } + } + } else if (decl) span = get_array_span (type, decl); diff --git a/gcc/testsuite/gfortran.dg/select_type_50.f90 b/gcc/testsuite/gfortran.dg/select_type_50.f90 new file mode 100644 index 0000000..aea1c81 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/select_type_50.f90 @@ -0,0 +1,52 @@ +! { dg-do run } +! +! Test the fix for PR97045. The report was for the INTEGER version. Testing +! revealed a further bug with the character versions. +! +! Contributed by Igor Gayday +! +program test_prg + implicit none + integer :: i + integer, allocatable :: arr(:, :) + character(kind = 1, len = 2), allocatable :: chr(:, :) + character(kind = 4, len = 2), allocatable :: chr4(:, :) + + arr = reshape ([(i, i = 1, 9)], [3, 3]) + do i = 1, 3 + call write_array(arr(1:2, i), i) + end do + + chr = reshape([(char (i)//char (i+1), i = 65, 83, 2)], [3, 3]) + do i = 1, 3 + call write_array (chr(1:2, i), i) + end do + + chr4 = reshape([(char (i, kind = 4)//char (i+1, kind = 4), i = 65, 83, 2)], & + [3, 3]) + do i = 1, 3 + call write_array (chr4(1:2, i), i) + end do + +contains + + subroutine write_array(array, j) + class(*), intent(in) :: array(:) + integer :: i = 2 + integer :: j, k + + select type (elem => array(i)) + type is (integer) + k = 3*(j-1)+i + if (elem .ne. k) stop 1 + type is (character(kind = 1, len = *)) + k = 63 + 2*(3*(j-1)+i) + if (elem .ne. char (k)//char (k+1)) print *, elem, " ", char (k)//char (k+1) + type is (character(kind = 4, len = *)) + k = 63 + 2*(3*(j-1)+i) + if (elem .ne. char (k, kind = 4)//char (k+1, kind = 4)) stop 3 + end select + + end subroutine + +end program -- cgit v1.1 From 8b0a63e47cd83f4e8534d0d201739bdd10f321a2 Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Wed, 30 Sep 2020 14:59:27 +0200 Subject: OpenMP: Add implicit declare target for nested procedures gcc/ChangeLog: * omp-offload.c (omp_discover_implicit_declare_target): Also handled nested functions. libgomp/ChangeLog: * testsuite/libgomp.fortran/declare-target-3.f90: New test. --- gcc/omp-offload.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'gcc') diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c index a89275b..7fb3a72 100644 --- a/gcc/omp-offload.c +++ b/gcc/omp-offload.c @@ -327,11 +327,18 @@ omp_discover_implicit_declare_target (void) FOR_EACH_DEFINED_FUNCTION (node) if (DECL_SAVED_TREE (node->decl)) { + struct cgraph_node *cgn; if (omp_declare_target_fn_p (node->decl)) worklist.safe_push (node->decl); else if (DECL_STRUCT_FUNCTION (node->decl) && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target) worklist.safe_push (node->decl); + for (cgn = node->nested; cgn; cgn = cgn->next_nested) + if (omp_declare_target_fn_p (cgn->decl)) + worklist.safe_push (cgn->decl); + else if (DECL_STRUCT_FUNCTION (cgn->decl) + && DECL_STRUCT_FUNCTION (cgn->decl)->has_omp_target) + worklist.safe_push (cgn->decl); } FOR_EACH_STATIC_INITIALIZER (vnode) if (omp_declare_target_var_p (vnode->decl)) -- cgit v1.1 From 65167982efa4dbb96698d026e6d7e17acb513f0a Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Wed, 30 Sep 2020 15:01:13 +0200 Subject: Fortran: add contiguous check for ptr assignment, fix non-contig check (PR97242) gcc/fortran/ChangeLog: PR fortran/97242 * expr.c (gfc_is_not_contiguous): Fix check. (gfc_check_pointer_assign): Use it. gcc/testsuite/ChangeLog: PR fortran/97242 * gfortran.dg/contiguous_11.f90: New test. * gfortran.dg/contiguous_4.f90: Update. * gfortran.dg/contiguous_7.f90: Update. --- gcc/fortran/expr.c | 26 ++++++++++++----- gcc/testsuite/gfortran.dg/contiguous_11.f90 | 45 +++++++++++++++++++++++++++++ gcc/testsuite/gfortran.dg/contiguous_4.f90 | 6 ++-- gcc/testsuite/gfortran.dg/contiguous_7.f90 | 16 ++++++++-- 4 files changed, 82 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/contiguous_11.f90 (limited to 'gcc') diff --git a/gcc/fortran/expr.c b/gcc/fortran/expr.c index 68784a2..b87ae3d 100644 --- a/gcc/fortran/expr.c +++ b/gcc/fortran/expr.c @@ -4366,10 +4366,18 @@ gfc_check_pointer_assign (gfc_expr *lvalue, gfc_expr *rvalue, contiguous. */ if (lhs_attr.contiguous - && lhs_attr.dimension > 0 - && !gfc_is_simply_contiguous (rvalue, false, true)) - gfc_warning (OPT_Wextra, "Assignment to contiguous pointer from " - "non-contiguous target at %L", &rvalue->where); + && lhs_attr.dimension > 0) + { + if (gfc_is_not_contiguous (rvalue)) + { + gfc_error ("Assignment to contiguous pointer from " + "non-contiguous target at %L", &rvalue->where); + return false; + } + if (!gfc_is_simply_contiguous (rvalue, false, true)) + gfc_warning (OPT_Wextra, "Assignment to contiguous pointer from " + "non-contiguous target at %L", &rvalue->where); + } /* Warn if it is the LHS pointer may lives longer than the RHS target. */ if (warn_target_lifetime @@ -5935,7 +5943,7 @@ gfc_is_not_contiguous (gfc_expr *array) { /* Array-ref shall be last ref. */ - if (ar) + if (ar && ar->type != AR_ELEMENT) return true; if (ref->type == REF_ARRAY) @@ -5955,10 +5963,11 @@ gfc_is_not_contiguous (gfc_expr *array) if (gfc_ref_dimen_size (ar, i, &ref_size, NULL)) { - if (gfc_dep_difference (ar->as->lower[i], ar->as->upper[i], &arr_size)) + if (gfc_dep_difference (ar->as->upper[i], ar->as->lower[i], &arr_size)) { /* a(2:4,2:) is known to be non-contiguous, but a(2:4,i:i) can be contiguous. */ + mpz_add_ui (arr_size, arr_size, 1L); if (previous_incomplete && mpz_cmp_si (ref_size, 1) != 0) { mpz_clear (arr_size); @@ -5979,7 +5988,10 @@ gfc_is_not_contiguous (gfc_expr *array) && ar->dimen_type[i] == DIMEN_RANGE && ar->stride[i] && ar->stride[i]->expr_type == EXPR_CONSTANT && mpz_cmp_si (ar->stride[i]->value.integer, 1) != 0) - return true; + { + mpz_clear (ref_size); + return true; + } mpz_clear (ref_size); } diff --git a/gcc/testsuite/gfortran.dg/contiguous_11.f90 b/gcc/testsuite/gfortran.dg/contiguous_11.f90 new file mode 100644 index 0000000..b7eb7bf --- /dev/null +++ b/gcc/testsuite/gfortran.dg/contiguous_11.f90 @@ -0,0 +1,45 @@ +! { dg-do compile } +! +! PR fortran/97242 +! +implicit none +type t + integer, allocatable :: A(:,:,:) + integer :: D(5,5,5) +end type t + +type(t), target :: B(5) +integer, pointer, contiguous :: P(:,:,:) +integer, target :: C(5,5,5) +integer :: i + +i = 1 + +! OK: contiguous +P => B(i)%A +P => B(i)%A(:,:,:) +P => C +P => C(:,:,:) +call foo (B(i)%A) +call foo (B(i)%A(:,:,:)) +call foo (C) +call foo (C(:,:,:)) + +! Invalid - not contiguous +! "If the pointer object has the CONTIGUOUS attribute, the pointer target shall be contiguous." +! → known to be noncontigous (not always checkable, however) +P => B(i)%A(:,::3,::4) ! <<< Unknown as (1:2:3,1:3:4) is contiguous and has one element. +P => B(i)%D(:,::2,::2) ! { dg-error "Assignment to contiguous pointer from non-contiguous target" } +P => C(::2,::2,::2) ! { dg-error "Assignment to contiguous pointer from non-contiguous target" } + +! This following is stricter: +! C1541 The actual argument corresponding to a dummy pointer with the +! CONTIGUOUS attribute shall be simply contiguous (9.5.4). +call foo (B(i)%A(:,::3,::4)) ! { dg-error "must be simply contiguous" } +call foo (C(::2,::2,::2)) ! { dg-error "must be simply contiguous" } + +contains + subroutine foo(Q) + integer, pointer, intent(in), contiguous :: Q(:,:,:) + end subroutine foo +end diff --git a/gcc/testsuite/gfortran.dg/contiguous_4.f90 b/gcc/testsuite/gfortran.dg/contiguous_4.f90 index 874ef8b..e784287 100644 --- a/gcc/testsuite/gfortran.dg/contiguous_4.f90 +++ b/gcc/testsuite/gfortran.dg/contiguous_4.f90 @@ -10,8 +10,10 @@ program cont_01_neg x = (/ (real(i),i=1,45) /) x2 = reshape(x,shape(x2)) - r => x(::3) - r2 => x2(2:,:) + r => x(::46) + r => x(::3) ! { dg-error "Assignment to contiguous pointer from non-contiguous target" } + r2 => x2(2:,9:) + r2 => x2(2:,:) ! { dg-error "Assignment to contiguous pointer from non-contiguous target" } r2 => x2(:,2:3) r => x2(2:3,1) r => x(::1) diff --git a/gcc/testsuite/gfortran.dg/contiguous_7.f90 b/gcc/testsuite/gfortran.dg/contiguous_7.f90 index cccc89f..7444b4c 100644 --- a/gcc/testsuite/gfortran.dg/contiguous_7.f90 +++ b/gcc/testsuite/gfortran.dg/contiguous_7.f90 @@ -8,17 +8,29 @@ program cont_01_neg implicit none real, pointer, contiguous :: r(:) real, pointer, contiguous :: r2(:,:) - real, target :: x(45) - real, target :: x2(5,9) + real, target, allocatable :: x(:) + real, target, allocatable :: x2(:,:) + real, target :: y(45) + real, target :: y2(5,9) integer :: i integer :: n=1 x = (/ (real(i),i=1,45) /) x2 = reshape(x,shape(x2)) + y = x + y2 = x2 + r => x(::3) ! { dg-warning "ssignment to contiguous pointer from non-contiguous target" } r2 => x2(2:,:) ! { dg-warning "ssignment to contiguous pointer from non-contiguous target" } r2 => x2(:,2:3) r => x2(2:3,1) r => x(::1) r => x(::n) ! { dg-warning "ssignment to contiguous pointer from non-contiguous target" } + + r => y(::3) ! { dg-error "ssignment to contiguous pointer from non-contiguous target" } + r2 => y2(2:,:) ! { dg-error "ssignment to contiguous pointer from non-contiguous target" } + r2 => y2(:,2:3) + r => y2(2:3,1) + r => y(::1) + r => y(::n) ! { dg-warning "ssignment to contiguous pointer from non-contiguous target" } end program -- cgit v1.1 From 734eed68537a1a9eed43a4e409da527474fbf63d Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Wed, 30 Sep 2020 06:23:15 -0700 Subject: c++: Kill DECL_HIDDEN_FRIEND_P Now hiddenness is managed by name-lookup, we no longer need DECL_HIDDEN_FRIEND_P. This removes it. Mainly by deleting its bookkeeping, but there are a couple of uses 1) two name lookups look at it to see if they found a hidden thing. In one we have the OVERLOAD, so can record OVL_HIDDEN_P. In the other we're repeating a lookup that failed, but asking for hidden things -- so if that succeeds we know the thing was hidden. (FWIW CWG recently discussed whether template specializations and instantiations should see such hidden templates anyway, there is compiler divergence.) 2) We had a confusing setting of KOENIG_P when building a non-dependent call. We don't repeat that lookup at instantiation time anyway. gcc/cp/ * cp-tree.h (struct lang_decl_fn): Remove hidden_friend_p. (DECL_HIDDEN_FRIEND_P): Delete. * call.c (add_function_candidate): Drop assert about anticipated decl. (build_new_op_1): Drop koenig lookup flagging for hidden friend. * decl.c (duplicate_decls): Drop HIDDEN_FRIEND_P updating. * name-lookup.c (do_pushdecl): Likewise. (set_decl_namespace): Discover hiddenness from OVL_HIDDEN_P. * pt.c (check_explicit_specialization): Record found_hidden explicitly. --- gcc/cp/call.c | 10 ---------- gcc/cp/cp-tree.h | 9 +-------- gcc/cp/decl.c | 12 +++--------- gcc/cp/name-lookup.c | 28 ++++++++++++++++++---------- gcc/cp/pt.c | 24 ++++++++++++++---------- 5 files changed, 36 insertions(+), 47 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/call.c b/gcc/cp/call.c index 1e5fffe..dce229c 100644 --- a/gcc/cp/call.c +++ b/gcc/cp/call.c @@ -2220,11 +2220,6 @@ add_function_candidate (struct z_candidate **candidates, int viable = 1; struct rejection_reason *reason = NULL; - /* At this point we should not see any functions which haven't been - explicitly declared, except for friend functions which will have - been found using argument dependent lookup. */ - gcc_assert (!DECL_ANTICIPATED (fn) || DECL_HIDDEN_FRIEND_P (fn)); - /* The `this', `in_chrg' and VTT arguments to constructors are not considered in overload resolution. */ if (DECL_CONSTRUCTOR_P (fn)) @@ -6344,11 +6339,6 @@ build_new_op_1 (const op_location_t &loc, enum tree_code code, int flags, tree call = extract_call_expr (result); CALL_EXPR_OPERATOR_SYNTAX (call) = true; - if (processing_template_decl && DECL_HIDDEN_FRIEND_P (cand->fn)) - /* This prevents build_new_function_call from discarding this - function during instantiation of the enclosing template. */ - KOENIG_LOOKUP_P (call) = 1; - /* Specify evaluation order as per P0145R2. */ CALL_EXPR_ORDERED_ARGS (call) = false; switch (op_is_ordered (code)) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 42d0d76..48a4074 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -2720,14 +2720,13 @@ struct GTY(()) lang_decl_fn { unsigned thunk_p : 1; unsigned this_thunk_p : 1; - unsigned hidden_friend_p : 1; unsigned omp_declare_reduction_p : 1; unsigned has_dependent_explicit_spec_p : 1; unsigned immediate_fn_p : 1; unsigned maybe_deleted : 1; unsigned coroutine_p : 1; - unsigned spare : 9; + unsigned spare : 10; /* 32-bits padding on 64-bit host. */ @@ -4067,12 +4066,6 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter) #define DECL_OMP_PRIVATIZED_MEMBER(NODE) \ (DECL_LANG_SPECIFIC (VAR_DECL_CHECK (NODE))->u.base.anticipated_p) -/* Nonzero if NODE is a FUNCTION_DECL which was declared as a friend - within a class but has not been declared in the surrounding scope. - The function is invisible except via argument dependent lookup. */ -#define DECL_HIDDEN_FRIEND_P(NODE) \ - (LANG_DECL_FN_CHECK (DECL_COMMON_CHECK (NODE))->hidden_friend_p) - /* Nonzero if NODE is an artificial FUNCTION_DECL for #pragma omp declare reduction. */ #define DECL_OMP_DECLARE_REDUCTION_P(NODE) \ diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 617b96e..14742c1 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -2141,10 +2141,7 @@ duplicate_decls (tree newdecl, tree olddecl, bool hiding, bool was_hidden) olddecl_hidden_friend = olddecl_friend && was_hidden; hidden_friend = olddecl_hidden_friend && hiding; if (!hidden_friend) - { - DECL_ANTICIPATED (olddecl) = 0; - DECL_HIDDEN_FRIEND_P (olddecl) = 0; - } + DECL_ANTICIPATED (olddecl) = false; } if (TREE_CODE (newdecl) == TEMPLATE_DECL) @@ -2892,12 +2889,9 @@ duplicate_decls (tree newdecl, tree olddecl, bool hiding, bool was_hidden) DECL_UID (olddecl) = olddecl_uid; if (olddecl_friend) - DECL_FRIEND_P (olddecl) = 1; + DECL_FRIEND_P (olddecl) = true; if (hidden_friend) - { - DECL_ANTICIPATED (olddecl) = 1; - DECL_HIDDEN_FRIEND_P (olddecl) = 1; - } + DECL_ANTICIPATED (olddecl) = true; /* NEWDECL contains the merged attribute lists. Update OLDDECL to be the same. */ diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index bc60d343..8cd6fe3 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -3172,7 +3172,7 @@ do_pushdecl (tree decl, bool hiding) return error_mark_node; } /* Hide it from ordinary lookup. */ - DECL_ANTICIPATED (decl) = DECL_HIDDEN_FRIEND_P (decl) = true; + DECL_ANTICIPATED (decl) = true; } } @@ -4924,8 +4924,15 @@ set_decl_namespace (tree decl, tree scope, bool friendp) /* Since decl is a function, old should contain a function decl. */ if (!OVL_P (old)) - goto not_found; + { + not_found: + /* It didn't work, go back to the explicit scope. */ + DECL_CONTEXT (decl) = FROB_CONTEXT (scope); + error ("%qD should have been declared inside %qD", decl, scope); + return; + } + /* We handle these in check_explicit_instantiation_namespace. */ if (processing_explicit_instantiation) return; @@ -4935,13 +4942,14 @@ set_decl_namespace (tree decl, tree scope, bool friendp) match. But, we'll check later, when we construct the template. */ return; + /* Instantiations or specializations of templates may be declared as friends in any namespace. */ if (friendp && DECL_USE_TEMPLATE (decl)) return; - tree found; - found = NULL_TREE; + tree found = NULL_TREE; + bool hidden_p = false; for (lkp_iterator iter (old); iter; ++iter) { @@ -4957,17 +4965,20 @@ set_decl_namespace (tree decl, tree scope, bool friendp) { if (found) { - /* We found more than one matching declaration. */ + /* We found more than one matching declaration. This + can happen if we have two inline namespace children, + each containing a suitable declaration. */ DECL_CONTEXT (decl) = FROB_CONTEXT (scope); goto ambiguous; } found = ofn; + hidden_p = iter.hidden_p (); } } if (found) { - if (DECL_HIDDEN_FRIEND_P (found)) + if (hidden_p) { pedwarn (DECL_SOURCE_LOCATION (decl), 0, "%qD has not been declared within %qD", decl, scope); @@ -4978,10 +4989,7 @@ set_decl_namespace (tree decl, tree scope, bool friendp) goto found; } - not_found: - /* It didn't work, go back to the explicit scope. */ - DECL_CONTEXT (decl) = FROB_CONTEXT (scope); - error ("%qD should have been declared inside %qD", decl, scope); + goto not_found; } /* Return the namespace where the current declaration is declared. */ diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index a096337..652b458 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -2988,6 +2988,7 @@ check_explicit_specialization (tree declarator, tree tmpl = NULL_TREE; tree targs = NULL_TREE; bool was_template_id = (TREE_CODE (declarator) == TEMPLATE_ID_EXPR); + bool found_hidden = false; /* Make sure that the declarator is a TEMPLATE_ID_EXPR. */ if (!was_template_id) @@ -3008,12 +3009,15 @@ check_explicit_specialization (tree declarator, fns = lookup_qualified_name (CP_DECL_CONTEXT (decl), dname, LOOK_want::NORMAL, true); if (fns == error_mark_node) - /* If lookup fails, look for a friend declaration so we can - give a better diagnostic. */ - fns = (lookup_qualified_name - (CP_DECL_CONTEXT (decl), dname, - LOOK_want::NORMAL | LOOK_want::HIDDEN_FRIEND, - /*complain*/true)); + { + /* If lookup fails, look for a friend declaration so we can + give a better diagnostic. */ + fns = (lookup_qualified_name + (CP_DECL_CONTEXT (decl), dname, + LOOK_want::NORMAL | LOOK_want::HIDDEN_FRIEND, + /*complain*/true)); + found_hidden = true; + } if (fns == error_mark_node || !is_overloaded_fn (fns)) { @@ -3122,8 +3126,7 @@ check_explicit_specialization (tree declarator, return error_mark_node; else { - if (TREE_CODE (decl) == FUNCTION_DECL - && DECL_HIDDEN_FRIEND_P (tmpl)) + if (found_hidden && TREE_CODE (decl) == FUNCTION_DECL) { auto_diagnostic_group d; if (pedwarn (DECL_SOURCE_LOCATION (decl), 0, @@ -3132,8 +3135,9 @@ check_explicit_specialization (tree declarator, inform (DECL_SOURCE_LOCATION (tmpl), "friend declaration here"); } - else if (!ctype && !is_friend - && CP_DECL_CONTEXT (decl) == current_namespace) + + if (!ctype && !is_friend + && CP_DECL_CONTEXT (decl) == current_namespace) check_unqualified_spec_or_inst (tmpl, DECL_SOURCE_LOCATION (decl)); tree gen_tmpl = most_general_template (tmpl); -- cgit v1.1 From aa248b8db9a7594fcc84e52a84d56526d4284ca8 Mon Sep 17 00:00:00 2001 From: Tamar Christina Date: Wed, 30 Sep 2020 14:46:16 +0100 Subject: middle-end: Refactor refcnt to use SLP_TREE_REF_COUNT for consistency This is a small refactoring which introduces SLP_TREE_REF_COUNT and replaces the uses of refcnt with it. This for consistency between the other properties. A similar patch was pre-approved last year but since there are more use now I am sending it for review anyway. gcc/ChangeLog: * tree-vectorizer.h (SLP_TREE_REF_COUNT): New. * tree-vect-slp.c (_slp_tree::_slp_tree, _slp_tree::~_slp_tree, vect_free_slp_tree, vect_build_slp_tree, vect_print_slp_tree, slp_copy_subtree, vect_attempt_slp_rearrange_stmts): Use it. --- gcc/tree-vect-slp.c | 19 ++++++++++--------- gcc/tree-vectorizer.h | 1 + 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 8de2480..e11037e 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -66,7 +66,7 @@ _slp_tree::_slp_tree () SLP_TREE_CODE (this) = ERROR_MARK; SLP_TREE_VECTYPE (this) = NULL_TREE; SLP_TREE_REPRESENTATIVE (this) = NULL; - this->refcnt = 1; + SLP_TREE_REF_COUNT (this) = 1; this->max_nunits = 1; this->lanes = 0; } @@ -92,7 +92,7 @@ vect_free_slp_tree (slp_tree node) int i; slp_tree child; - if (--node->refcnt != 0) + if (--SLP_TREE_REF_COUNT (node) != 0) return; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) @@ -1180,7 +1180,7 @@ vect_build_slp_tree (vec_info *vinfo, *leader ? "" : "failed ", *leader); if (*leader) { - (*leader)->refcnt++; + SLP_TREE_REF_COUNT (*leader)++; vect_update_max_nunits (max_nunits, (*leader)->max_nunits); } return *leader; @@ -1194,7 +1194,7 @@ vect_build_slp_tree (vec_info *vinfo, res->max_nunits = this_max_nunits; vect_update_max_nunits (max_nunits, this_max_nunits); /* Keep a reference for the bst_map use. */ - res->refcnt++; + SLP_TREE_REF_COUNT (res)++; } bst_map->put (stmts.copy (), res); return res; @@ -1590,7 +1590,7 @@ fail: SLP_TREE_CHILDREN (two).safe_splice (children); slp_tree child; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (two), i, child) - child->refcnt++; + SLP_TREE_REF_COUNT (child)++; /* Here we record the original defs since this node represents the final lane configuration. */ @@ -1650,7 +1650,8 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc, : (SLP_TREE_DEF_TYPE (node) == vect_constant_def ? " (constant)" : ""), node, - estimated_poly_value (node->max_nunits), node->refcnt); + estimated_poly_value (node->max_nunits), + SLP_TREE_REF_COUNT (node)); if (SLP_TREE_SCALAR_STMTS (node).exists ()) FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt); @@ -1802,7 +1803,7 @@ slp_copy_subtree (slp_tree node, hash_map &map) SLP_TREE_REPRESENTATIVE (copy) = SLP_TREE_REPRESENTATIVE (node); SLP_TREE_LANES (copy) = SLP_TREE_LANES (node); copy->max_nunits = node->max_nunits; - copy->refcnt = 0; + SLP_TREE_REF_COUNT (copy) = 0; if (SLP_TREE_SCALAR_STMTS (node).exists ()) SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy (); if (SLP_TREE_SCALAR_OPS (node).exists ()) @@ -1819,7 +1820,7 @@ slp_copy_subtree (slp_tree node, hash_map &map) FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child) { SLP_TREE_CHILDREN (copy)[i] = slp_copy_subtree (child, map); - SLP_TREE_CHILDREN (copy)[i]->refcnt++; + SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (copy)[i])++; } return copy; } @@ -1935,7 +1936,7 @@ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn) hash_map map; slp_tree unshared = slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn), map); vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn)); - unshared->refcnt++; + SLP_TREE_REF_COUNT (unshared)++; SLP_INSTANCE_TREE (slp_instn) = unshared; FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) SLP_INSTANCE_LOADS (slp_instn)[i] = *map.get (node); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index e62f1cc..37b0915 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -204,6 +204,7 @@ public: #define SLP_TREE_CHILDREN(S) (S)->children #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts #define SLP_TREE_SCALAR_OPS(S) (S)->ops +#define SLP_TREE_REF_COUNT(S) (S)->refcnt #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts #define SLP_TREE_VEC_DEFS(S) (S)->vec_defs #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size -- cgit v1.1 From 97b798d80baf945ea28236eef3fa69f36626b579 Mon Sep 17 00:00:00 2001 From: Joel Hutton Date: Wed, 30 Sep 2020 15:08:13 +0100 Subject: [SLP][VECT] Add check to fix 96837 The following patch adds a simple check to prevent slp stmts from vector constructors being rearranged. vect_attempt_slp_rearrange_stmts tries to rearrange to avoid a load permutation. This fixes PR target/96837 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96827 gcc/ChangeLog: 2020-09-29 Joel Hutton PR target/96837 * tree-vect-slp.c (vect_analyze_slp): Do not call vect_attempt_slp_rearrange_stmts for vector constructors. gcc/testsuite/ChangeLog: 2020-09-29 Joel Hutton PR target/96837 * gcc.dg/vect/bb-slp-49.c: New test. --- gcc/testsuite/gcc.dg/vect/bb-slp-49.c | 28 ++++++++++++++++++++++++++++ gcc/tree-vect-slp.c | 3 ++- 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-49.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c new file mode 100644 index 0000000..e7101fc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c @@ -0,0 +1,28 @@ +/* This checks that vectorized constructors have the correct ordering. */ +/* { dg-require-effective-target vect_int } */ + +typedef int V __attribute__((__vector_size__(16))); + +__attribute__((__noipa__)) void +foo (unsigned int x, V *y) +{ + unsigned int a[4] = { x + 0, x + 2, x + 4, x + 6 }; + for (unsigned int i = 0; i < 3; ++i) + if (a[i] == 1234) + a[i]--; + *y = (V) { a[3], a[2], a[1], a[0] }; +} + +int +main () +{ + V b; + foo (0, &b); + if (b[0] != 6 || b[1] != 4 || b[2] != 2 || b[3] != 0) + __builtin_abort (); + return 0; +} + +/* See that we vectorize an SLP instance. */ +/* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } */ +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index e11037e..e0614fb 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2503,7 +2503,8 @@ vect_optimize_slp (vec_info *vinfo) /* Reduction (there are no data-refs in the root). In reduction chain the order of the loads is not important. */ if (!STMT_VINFO_DATA_REF (stmt_info) - && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)) + && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) + && !SLP_INSTANCE_ROOT_STMT (instance)) vect_attempt_slp_rearrange_stmts (instance); } -- cgit v1.1 From 6bd4ce64eb48a72eca300cb52773e6101d646004 Mon Sep 17 00:00:00 2001 From: Srinath Parvathaneni Date: Wed, 30 Sep 2020 15:19:17 +0100 Subject: [GCC][PATCH] arm: Fix MVE intrinsics polymorphic variants wrongly generating __ARM_undef type (pr96795). Hello, This patch fixes (PR96795) MVE intrinsic polymorphic variants vaddq, vaddq_m, vaddq_x, vcmpeqq_m, vcmpeqq, vcmpgeq_m, vcmpgeq, vcmpgtq_m, vcmpgtq, vcmpleq_m, vcmpleq, vcmpltq_m, vcmpltq, vcmpneq_m, vcmpneq, vfmaq_m, vfmaq, vfmasq_m, vfmasq, vmaxnmavq, vmaxnmavq_p, vmaxnmvq, vmaxnmvq_p, vminnmavq, vminnmavq_p, vminnmvq, vminnmvq_p, vmulq_m, vmulq, vmulq_x, vsetq_lane, vsubq_m, vsubq and vsubq_x which are incorrectly generating __ARM_undef and mismatching the passed floating point scalar arguments. Bootstrapped on arm-none-linux-gnueabihf and regression tested on arm-none-eabi and found no regressions. Ok for master? Ok for GCC-10 branch? Regards, Srinath. gcc/ChangeLog: 2020-09-30 Srinath Parvathaneni PR target/96795 * config/arm/arm_mve.h (__ARM_mve_coerce2): Define. (__arm_vaddq): Correct the scalar argument. (__arm_vaddq_m): Likewise. (__arm_vaddq_x): Likewise. (__arm_vcmpeqq_m): Likewise. (__arm_vcmpeqq): Likewise. (__arm_vcmpgeq_m): Likewise. (__arm_vcmpgeq): Likewise. (__arm_vcmpgtq_m): Likewise. (__arm_vcmpgtq): Likewise. (__arm_vcmpleq_m): Likewise. (__arm_vcmpleq): Likewise. (__arm_vcmpltq_m): Likewise. (__arm_vcmpltq): Likewise. (__arm_vcmpneq_m): Likewise. (__arm_vcmpneq): Likewise. (__arm_vfmaq_m): Likewise. (__arm_vfmaq): Likewise. (__arm_vfmasq_m): Likewise. (__arm_vfmasq): Likewise. (__arm_vmaxnmavq): Likewise. (__arm_vmaxnmavq_p): Likewise. (__arm_vmaxnmvq): Likewise. (__arm_vmaxnmvq_p): Likewise. (__arm_vminnmavq): Likewise. (__arm_vminnmavq_p): Likewise. (__arm_vminnmvq): Likewise. (__arm_vminnmvq_p): Likewise. (__arm_vmulq_m): Likewise. (__arm_vmulq): Likewise. (__arm_vmulq_x): Likewise. (__arm_vsetq_lane): Likewise. (__arm_vsubq_m): Likewise. (__arm_vsubq): Likewise. (__arm_vsubq_x): Likewise. gcc/testsuite/ChangeLog: PR target/96795 * gcc.target/arm/mve/intrinsics/mve_fp_vaddq_n.c: New Test. * gcc.target/arm/mve/intrinsics/mve_vaddq_n.c: Likewise. * gcc.target/arm/mve/intrinsics/vaddq_m_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vaddq_m_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vaddq_x_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vaddq_x_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpleq_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpleq_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpltq_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpltq_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpneq_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vcmpneq_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmaq_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmaq_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmasq_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vfmasq_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_p_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_p_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_p_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_p_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmulq_m_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmulq_m_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmulq_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmulq_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmulq_x_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vmulq_x_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vsetq_lane_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vsetq_lane_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vsubq_m_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vsubq_m_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vsubq_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vsubq_n_f32-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vsubq_x_n_f16-1.c: Likewise. * gcc.target/arm/mve/intrinsics/vsubq_x_n_f32-1.c: Likewise. --- gcc/config/arm/arm_mve.h | 167 +++++++++++---------- .../gcc.target/arm/mve/intrinsics/mve_fp_vaddq_n.c | 47 ++++++ .../gcc.target/arm/mve/intrinsics/mve_vaddq_n.c | 31 ++++ .../arm/mve/intrinsics/vaddq_m_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vaddq_m_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vaddq_x_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vaddq_x_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpeqq_m_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpeqq_m_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpeqq_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpeqq_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpgeq_m_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpgeq_m_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpgeq_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpgeq_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpgtq_m_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpgtq_m_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpgtq_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpgtq_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpleq_m_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpleq_m_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpleq_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpleq_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpltq_m_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpltq_m_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpltq_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpltq_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpneq_m_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpneq_m_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vcmpneq_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vcmpneq_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vfmaq_m_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vfmaq_m_n_f32-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vfmaq_n_f16-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vfmaq_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vfmasq_m_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vfmasq_m_n_f32-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vfmasq_n_f16-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vfmasq_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vmaxnmavq_f16-1.c | 12 ++ .../arm/mve/intrinsics/vmaxnmavq_f32-1.c | 12 ++ .../arm/mve/intrinsics/vmaxnmavq_p_f16-1.c | 12 ++ .../arm/mve/intrinsics/vmaxnmavq_p_f32-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vmaxnmvq_f16-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vmaxnmvq_f32-1.c | 12 ++ .../arm/mve/intrinsics/vmaxnmvq_p_f16-1.c | 12 ++ .../arm/mve/intrinsics/vmaxnmvq_p_f32-1.c | 12 ++ .../arm/mve/intrinsics/vminnmavq_f16-1.c | 12 ++ .../arm/mve/intrinsics/vminnmavq_f32-1.c | 12 ++ .../arm/mve/intrinsics/vminnmavq_p_f16-1.c | 12 ++ .../arm/mve/intrinsics/vminnmavq_p_f32-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vminnmvq_f16-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vminnmvq_f32-1.c | 12 ++ .../arm/mve/intrinsics/vminnmvq_p_f16-1.c | 12 ++ .../arm/mve/intrinsics/vminnmvq_p_f32-1.c | 12 ++ .../arm/mve/intrinsics/vmulq_m_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vmulq_m_n_f32-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vmulq_n_f16-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vmulq_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vmulq_x_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vmulq_x_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vsetq_lane_f16-1.c | 13 ++ .../arm/mve/intrinsics/vsetq_lane_f32-1.c | 13 ++ .../arm/mve/intrinsics/vsubq_m_n_f16-1.c | 12 ++ .../arm/mve/intrinsics/vsubq_m_n_f32-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vsubq_n_f16-1.c | 12 ++ .../gcc.target/arm/mve/intrinsics/vsubq_n_f32-1.c | 12 ++ .../arm/mve/intrinsics/vsubq_x_n_f16-1.c | 13 ++ .../arm/mve/intrinsics/vsubq_x_n_f32-1.c | 13 ++ 69 files changed, 959 insertions(+), 82 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_fp_vaddq_n.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vaddq_n.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_x_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_x_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_x_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_x_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vsetq_lane_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vsetq_lane_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_n_f32-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_x_n_f16-1.c create mode 100644 gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_x_n_f32-1.c (limited to 'gcc') diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index a801705..99cff41 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -35651,6 +35651,7 @@ enum { short: __ARM_mve_type_int_n, \ int: __ARM_mve_type_int_n, \ long: __ARM_mve_type_int_n, \ + double: __ARM_mve_type_fp_n, \ long long: __ARM_mve_type_int_n, \ unsigned char: __ARM_mve_type_int_n, \ unsigned short: __ARM_mve_type_int_n, \ @@ -35723,6 +35724,8 @@ extern void *__ARM_undef; _Generic(param, type: param, default: *(type *)__ARM_undef) #define __ARM_mve_coerce1(param, type) \ _Generic(param, type: param, const type: param, default: *(type *)__ARM_undef) +#define __ARM_mve_coerce2(param, type) \ + _Generic(param, type: param, float16_t: param, float32_t: param, default: *(type *)__ARM_undef) #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ @@ -35939,14 +35942,14 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vaddq_f16 (__ARM_mve_coerce(p0, float16x8_t), __ARM_mve_coerce(p1, float16x8_t)), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vaddq_f32 (__ARM_mve_coerce(p0, float32x4_t), __ARM_mve_coerce(p1, float32x4_t)), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));}) #define __arm_vandq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -35997,8 +36000,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmulq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmulq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ @@ -36029,8 +36032,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpeqq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpeqq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpeqq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ @@ -36069,8 +36072,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t), p2), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpeqq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpeqq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));}) #define __arm_vcmpgtq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -36083,8 +36086,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgtq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgtq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));}) #define __arm_vcmpleq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -36097,8 +36100,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));}) #define __arm_vcmpltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -36111,8 +36114,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpltq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpltq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));}) #define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -36123,8 +36126,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \ @@ -36179,8 +36182,8 @@ extern void *__ARM_undef; #define __arm_vmaxnmavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));}) #define __arm_vmaxnmq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -36191,14 +36194,14 @@ extern void *__ARM_undef; #define __arm_vmaxnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));}) #define __arm_vmaxnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));}) #define __arm_vminnmaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -36209,8 +36212,8 @@ extern void *__ARM_undef; #define __arm_vminnmavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));}) #define __arm_vbrsrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ @@ -36232,8 +36235,8 @@ extern void *__ARM_undef; #define __arm_vsubq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \ int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vsubq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vsubq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vsubq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ @@ -36252,8 +36255,8 @@ extern void *__ARM_undef; #define __arm_vminnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));}) + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \ + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));}) #define __arm_vshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ @@ -37011,8 +37014,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgtq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgtq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) @@ -37027,8 +37030,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));}) #define __arm_vcmpltq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -37041,8 +37044,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));}) #define __arm_vcmpneq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -37061,8 +37064,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t), p2), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t), p2), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t), p2), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));}) #define __arm_vcvtbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -37116,8 +37119,8 @@ extern void *__ARM_undef; __typeof(p1) __p1 = (p1); \ __typeof(p2) __p2 = (p2); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t)), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double)), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmaq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmaq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));}) @@ -37132,8 +37135,8 @@ extern void *__ARM_undef; __typeof(p1) __p1 = (p1); \ __typeof(p2) __p2 = (p2); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t)));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double)));}) #define __arm_vmaxnmaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -37156,14 +37159,14 @@ extern void *__ARM_undef; #define __arm_vmaxnmavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \ + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));}) #define __arm_vmaxnmvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \ + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));}) #define __arm_vminnmaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -37174,14 +37177,14 @@ extern void *__ARM_undef; #define __arm_vminnmavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \ + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));}) #define __arm_vminnmvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \ + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));}) #define __arm_vrndnq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -37248,8 +37251,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));}) #define __arm_vrshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -37353,8 +37356,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \ int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \ int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2), \ + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) @@ -37389,8 +37392,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int), p3), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int), p3), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int), p3), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));}) #define __arm_vandq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -37531,15 +37534,15 @@ extern void *__ARM_undef; _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));}) #define __arm_vfmasq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ __typeof(p2) __p2 = (p2); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));}) #define __arm_vfmsq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -37580,8 +37583,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmulq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmulq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));}) #define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -37614,8 +37617,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vsubq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vsubq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vsubq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));}) #define __arm_vorrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ @@ -38113,8 +38116,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vaddq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vaddq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));}) #define __arm_vandq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ __typeof(p2) __p2 = (p2); \ @@ -38248,8 +38251,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmulq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmulq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));}) #define __arm_vnegq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \ @@ -38337,8 +38340,8 @@ extern void *__ARM_undef; _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \ int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vsubq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \ int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vsubq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \ - int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \ - int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));}) + int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \ + int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));}) #define __arm_vcmulq_rot90_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ __typeof(p2) __p2 = (p2); \ @@ -38370,8 +38373,8 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vsetq_lane_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vsetq_lane_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint64x2_t]: __arm_vsetq_lane_u64 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint64x2_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vsetq_lane_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vsetq_lane_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));}) + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vsetq_lane_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \ + int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vsetq_lane_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));}) #else /* MVE Integer. */ @@ -38895,12 +38898,12 @@ extern void *__ARM_undef; int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vaddq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \ - int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \ - int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \ - int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \ - int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \ - int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \ - int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)));}) + int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int)), \ + int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int)), \ + int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int)), \ + int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int)), \ + int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int)), \ + int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int)));}) #define __arm_vandq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_fp_vaddq_n.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_fp_vaddq_n.c new file mode 100644 index 0000000..714fbf9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_fp_vaddq_n.c @@ -0,0 +1,47 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include +int8x16_t foo (int8x16_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} +int16x8_t foo1 (int16x8_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} +int32x4_t foo2 (int32x4_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} +uint8x16_t foo3 (uint8x16_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} +uint16x8_t foo4 (uint16x8_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} +uint32x4_t foo5 (uint32x4_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} +float16x8_t foo6 (float16x8_t a) +{ + return vaddq (a, (float16_t)23.6); +} +float32x4_t foo7 (float32x4_t a) +{ + return vaddq (a, (float32_t)23.46); +} +float16x8_t foo8 (float16x8_t a) +{ + return vaddq (a, 23.6); +} +float32x4_t foo9 (float32x4_t a) +{ + return vaddq (a, 23.46); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vaddq_n.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vaddq_n.c new file mode 100644 index 0000000..baa7fab --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/mve_vaddq_n.c @@ -0,0 +1,31 @@ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O2" } */ + +#include +int8x16_t foo (int8x16_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} +int16x8_t foo1 (int16x8_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} +int32x4_t foo2 (int32x4_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} +uint8x16_t foo3 (uint8x16_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} +uint16x8_t foo4 (uint16x8_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} +uint32x4_t foo5 (uint32x4_t a, int16_t b) +{ + return vaddq (a, (b<<3)); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f16-1.c new file mode 100644 index 0000000..8348098 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vaddq_m (inactive, a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f32-1.c new file mode 100644 index 0000000..c34cc98 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_m_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vaddq_m (inactive, a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_x_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_x_n_f16-1.c new file mode 100644 index 0000000..3bb0167 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_x_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo1 (float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vaddq_x (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_x_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_x_n_f32-1.c new file mode 100644 index 0000000..66dedc7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vaddq_x_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo1 (float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vaddq_x (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f16-1.c new file mode 100644 index 0000000..909ca93 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vcmpeqq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f32-1.c new file mode 100644 index 0000000..8f993af --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vcmpeqq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16-1.c new file mode 100644 index 0000000..223cffc --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b) +{ + return vcmpeqq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32-1.c new file mode 100644 index 0000000..81669bd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b) +{ + return vcmpeqq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f16-1.c new file mode 100644 index 0000000..4a4e4b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vcmpgeq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f32-1.c new file mode 100644 index 0000000..c406a63 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vcmpgeq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16-1.c new file mode 100644 index 0000000..a65ed44 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b) +{ + return vcmpgeq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32-1.c new file mode 100644 index 0000000..2e2fc01 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b) +{ + return vcmpgeq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f16-1.c new file mode 100644 index 0000000..08c91a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vcmpgtq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f32-1.c new file mode 100644 index 0000000..0b74482 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vcmpgtq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16-1.c new file mode 100644 index 0000000..3b2faea --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b) +{ + return vcmpgtq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32-1.c new file mode 100644 index 0000000..16862e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b) +{ + return vcmpgtq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f16-1.c new file mode 100644 index 0000000..50e53bd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vcmpleq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f32-1.c new file mode 100644 index 0000000..b16da27 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vcmpleq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f16-1.c new file mode 100644 index 0000000..4a4b973 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b) +{ + return vcmpleq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f32-1.c new file mode 100644 index 0000000..8d8f105 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpleq_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b) +{ + return vcmpleq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f16-1.c new file mode 100644 index 0000000..62ab53f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vcmpltq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f32-1.c new file mode 100644 index 0000000..55886fc --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vcmpltq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f16-1.c new file mode 100644 index 0000000..cd95dae --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b) +{ + return vcmpltq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f32-1.c new file mode 100644 index 0000000..db76687 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpltq_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b) +{ + return vcmpltq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f16-1.c new file mode 100644 index 0000000..30618e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vcmpneq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f32-1.c new file mode 100644 index 0000000..4ecfda6 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vcmpneq_m (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f16-1.c new file mode 100644 index 0000000..75a0090 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float16x8_t a, float16_t b) +{ + return vcmpneq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f32-1.c new file mode 100644 index 0000000..11ae14c --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcmpneq_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +mve_pred16_t +foo1 (float32x4_t a, float32_t b) +{ + return vcmpneq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16-1.c new file mode 100644 index 0000000..e47ae6d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p) +{ + return vfmaq_m (a, b, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32-1.c new file mode 100644 index 0000000..78c39f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32_t c, mve_pred16_t p) +{ + return vfmaq_m (a, b, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_n_f16-1.c new file mode 100644 index 0000000..f7867f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16_t c) +{ + return vfmaq (a, b, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_n_f32-1.c new file mode 100644 index 0000000..f0bc45b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmaq_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32_t c) +{ + return vfmaq (a, b, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16-1.c new file mode 100644 index 0000000..4750e10 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16_t c, mve_pred16_t p) +{ + return vfmasq_m (a, b, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32-1.c new file mode 100644 index 0000000..4a37971 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32_t c, mve_pred16_t p) +{ + return vfmasq_m (a, b, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_n_f16-1.c new file mode 100644 index 0000000..db82451 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo1 (float16x8_t a, float16x8_t b, float16_t c) +{ + return vfmasq (a, b, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_n_f32-1.c new file mode 100644 index 0000000..12b1410 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vfmasq_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo1 (float32x4_t a, float32x4_t b, float32_t c) +{ + return vfmasq (a, b, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f16-1.c new file mode 100644 index 0000000..7c2349d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16_t +foo1 (float16_t a, float16x8_t b) +{ + return vmaxnmavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f32-1.c new file mode 100644 index 0000000..0deef7948 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32_t +foo1 (float32_t a, float32x4_t b) +{ + return vmaxnmavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16-1.c new file mode 100644 index 0000000..56a7ac0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16_t +foo1 (float16_t a, float16x8_t b, mve_pred16_t p) +{ + return vmaxnmavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32-1.c new file mode 100644 index 0000000..36c10a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32_t +foo1 (float32_t a, float32x4_t b, mve_pred16_t p) +{ + return vmaxnmavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f16-1.c new file mode 100644 index 0000000..f60641f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16_t +foo1 (float16_t a, float16x8_t b) +{ + return vmaxnmvq (23.35, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f32-1.c new file mode 100644 index 0000000..f8c9f44 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32_t +foo1 (float32_t a, float32x4_t b) +{ + return vmaxnmvq (34.56, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16-1.c new file mode 100644 index 0000000..96820ec --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16_t +foo1 (float16_t a, float16x8_t b, mve_pred16_t p) +{ + return vmaxnmvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32-1.c new file mode 100644 index 0000000..826ee8f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32_t +foo1 (float32_t a, float32x4_t b, mve_pred16_t p) +{ + return vmaxnmvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f16-1.c new file mode 100644 index 0000000..37d5136 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16_t +foo1 (float16_t a, float16x8_t b) +{ + return vminnmavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f32-1.c new file mode 100644 index 0000000..78978d0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32_t +foo1 (float32_t a, float32x4_t b) +{ + return vminnmavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f16-1.c new file mode 100644 index 0000000..7170b74 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16_t +foo1 (float16_t a, float16x8_t b, mve_pred16_t p) +{ + return vminnmavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f32-1.c new file mode 100644 index 0000000..0955905 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32_t +foo1 (float32_t a, float32x4_t b, mve_pred16_t p) +{ + return vminnmavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f16-1.c new file mode 100644 index 0000000..132d1a1 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16_t +foo1 (float16_t a, float16x8_t b) +{ + return vminnmvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f32-1.c new file mode 100644 index 0000000..7490907 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32_t +foo1 (float32_t a, float32x4_t b) +{ + return vminnmvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f16-1.c new file mode 100644 index 0000000..c88c3b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16_t +foo1 (float16_t a, float16x8_t b, mve_pred16_t p) +{ + return vminnmvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f32-1.c new file mode 100644 index 0000000..e4db972 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32_t +foo1 (float32_t a, float32x4_t b, mve_pred16_t p) +{ + return vminnmvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f16-1.c new file mode 100644 index 0000000..c8222c5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vmulq_m (inactive, a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f32-1.c new file mode 100644 index 0000000..2fae3a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_m_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vmulq_m (inactive, a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_n_f16-1.c new file mode 100644 index 0000000..cef311d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo1 (float16x8_t a, float16_t b) +{ + return vmulq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_n_f32-1.c new file mode 100644 index 0000000..d6d4b9a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo1 (float32x4_t a, float32_t b) +{ + return vmulq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_x_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_x_n_f16-1.c new file mode 100644 index 0000000..ea4cab0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_x_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo1 (float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vmulq_x (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_x_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_x_n_f32-1.c new file mode 100644 index 0000000..a7a54c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmulq_x_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo1 (float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vmulq_x (a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsetq_lane_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsetq_lane_f16-1.c new file mode 100644 index 0000000..608dd30 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsetq_lane_f16-1.c @@ -0,0 +1,13 @@ +/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo (float16_t a, float16x8_t b) +{ + return vsetq_lane (23.26, b, 0); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsetq_lane_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsetq_lane_f32-1.c new file mode 100644 index 0000000..c5f5db7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsetq_lane_f32-1.c @@ -0,0 +1,13 @@ +/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=soft" } {""} } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo (float32_t a, float32x4_t b) +{ + return vsetq_lane (23.34, b, 0); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f16-1.c new file mode 100644 index 0000000..f3e1961 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo1 (float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vsubq_m (inactive, a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f32-1.c new file mode 100644 index 0000000..4b5cd90 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_m_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo1 (float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vsubq_m (inactive, a, 23.23, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_n_f16-1.c new file mode 100644 index 0000000..f883254 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_n_f16-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo1 (float16x8_t a, float16_t b) +{ + return vsubq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_n_f32-1.c new file mode 100644 index 0000000..88d9675 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_n_f32-1.c @@ -0,0 +1,12 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo1 (float32x4_t a, float32_t b) +{ + return vsubq (a, 23.23); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_x_n_f16-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_x_n_f16-1.c new file mode 100644 index 0000000..b3a67bb --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_x_n_f16-1.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float16x8_t +foo (float16x8_t a, float16_t b, mve_pred16_t p) +{ + return vsubq_x_n_f16 (a, 23.23, p); +} + + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_x_n_f32-1.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_x_n_f32-1.c new file mode 100644 index 0000000..dcb2425 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsubq_x_n_f32-1.c @@ -0,0 +1,13 @@ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O2" } */ + +#include "arm_mve.h" +float32x4_t +foo (float32x4_t a, float32_t b, mve_pred16_t p) +{ + return vsubq_x_n_f32 (a, 23.23, p); +} + + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ -- cgit v1.1 From b6860cb96d038fe7519797adfb9c3c2e635234de Mon Sep 17 00:00:00 2001 From: Przemyslaw Wirkus Date: Wed, 30 Sep 2020 15:38:49 +0100 Subject: aarch64: add support for Cortex-A78 and Cortex-A78AE This patch introduces support for Cortex-A78 [0] and Cortex-A78AE [1] cpus. [0]: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78 [1]: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78ae OK for master branch ? kind regards Przemyslaw Wirkus gcc/ChangeLog: * config/aarch64/aarch64-cores.def: Add Cortex-A78 and Cortex-A78AE cores. * config/aarch64/aarch64-tune.md: Regenerate. * doc/invoke.texi: Add -mtune=cortex-a78 and -mtune=cortex-a78ae. --- gcc/config/aarch64/aarch64-cores.def | 2 ++ gcc/config/aarch64/aarch64-tune.md | 2 +- gcc/doc/invoke.texi | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index e6833bf..ac2a9b4 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -103,6 +103,8 @@ AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1) AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1) +AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1) +AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1) AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1) diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index fc3e0a5..e060302 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 9a49033..4fd578b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -17552,6 +17552,7 @@ performance of the code. Permissible values for this option are: @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75}, @samp{cortex-a76}, @samp{cortex-a76ae}, @samp{cortex-a77}, @samp{cortex-a65}, @samp{cortex-a65ae}, @samp{cortex-a34}, +@samp{cortex-a78}, @samp{cortex-a78ae}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, @samp{neoverse-e1}, @samp{neoverse-n1}, @samp{neoverse-n2}, @samp{neoverse-v1}, @samp{qdf24xx}, @samp{saphira}, -- cgit v1.1 From 60e4b3cade5c63f919df4ddc0f0d23261f968e13 Mon Sep 17 00:00:00 2001 From: Przemyslaw Wirkus Date: Wed, 30 Sep 2020 15:39:24 +0100 Subject: arm: add support for Cortex-A78 and Cortex-A78AE This patch introduces support for Cortex-A78 [0] and Cortex-A78AE [1] cpus. [0]: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78 [1]: https://www.arm.com/products/silicon-ip-cpu/cortex-a/cortex-a78ae OK for master branch ? kind regards Przemyslaw Wirkus gcc/ChangeLog: * config/arm/arm-cpus.in: Add Cortex-A78 and Cortex-A78AE cores. * config/arm/arm-tables.opt: Regenerate. * config/arm/arm-tune.md: Regenerate. * doc/invoke.texi: Update docs. --- gcc/config/arm/arm-cpus.in | 22 ++++++++++++++++++++++ gcc/config/arm/arm-tables.opt | 6 ++++++ gcc/config/arm/arm-tune.md | 9 +++++---- gcc/doc/invoke.texi | 1 + 4 files changed, 34 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index 47a343d..d47f943 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -1447,6 +1447,28 @@ begin cpu cortex-a77 part d0d end cpu cortex-a77 +begin cpu cortex-a78 + cname cortexa78 + tune for cortex-a57 + tune flags LDSCHED + architecture armv8.2-a+fp16+dotprod + option crypto add FP_ARMv8 CRYPTO + costs cortex_a57 + vendor 41 + part d41 +end cpu cortex-a78 + +begin cpu cortex-a78ae + cname cortexa78ae + tune for cortex-a57 + tune flags LDSCHED + architecture armv8.2-a+fp16+dotprod + option crypto add FP_ARMv8 CRYPTO + costs cortex_a57 + vendor 41 + part d42 +end cpu cortex-a78ae + begin cpu cortex-x1 cname cortexx1 tune for cortex-a57 diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index dac8818..9f65824 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -241,6 +241,12 @@ EnumValue Enum(processor_type) String(cortex-a77) Value( TARGET_CPU_cortexa77) EnumValue +Enum(processor_type) String(cortex-a78) Value( TARGET_CPU_cortexa78) + +EnumValue +Enum(processor_type) String(cortex-a78ae) Value( TARGET_CPU_cortexa78ae) + +EnumValue Enum(processor_type) String(cortex-x1) Value( TARGET_CPU_cortexx1) EnumValue diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 86a117d..269e627 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -45,8 +45,9 @@ cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35, cortexa73cortexa53,cortexa55,cortexa75, cortexa76,cortexa76ae,cortexa77, - cortexx1,neoversen1,neoversen2, - cortexa75cortexa55,cortexa76cortexa55,neoversev1, - cortexm23,cortexm33,cortexm35p, - cortexm55,cortexr52" + cortexa78,cortexa78ae,cortexx1, + neoversen1,neoversen2,cortexa75cortexa55, + cortexa76cortexa55,neoversev1,cortexm23, + cortexm33,cortexm35p,cortexm55, + cortexr52" (const (symbol_ref "((enum attr_tune) arm_tune)"))) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 4fd578b..c049932 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -19395,6 +19395,7 @@ Permissible names are: @samp{arm7tdmi}, @samp{arm7tdmi-s}, @samp{arm710t}, @samp{cortex-a32}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55}, @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75}, @samp{cortex-a76}, @samp{cortex-a76ae}, @samp{cortex-a77}, +@samp{cortex-a78}, @samp{cortex-a78ae}, @samp{ares}, @samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-r5}, @samp{cortex-r7}, @samp{cortex-r8}, @samp{cortex-r52}, @samp{cortex-m0}, @samp{cortex-m0plus}, @samp{cortex-m1}, @samp{cortex-m3}, -- cgit v1.1 From 9ff2bcd9df8f189dcc94e3bef33f7f282dcaa780 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 30 Sep 2020 16:12:20 +0200 Subject: amend SLP reduction testcases This amends SLP reduction testcases that currently trigger vect_attempt_slp_rearrange_stmts eliding load permutations to verify this is actually happening. 2020-09-30 Richard Biener * gcc.dg/vect/pr37027.c: Amend. * gcc.dg/vect/pr67790.c: Likewise. * gcc.dg/vect/pr92324-4.c: Likewise. * gcc.dg/vect/pr92558.c: Likewise. * gcc.dg/vect/pr95495.c: Likewise. * gcc.dg/vect/slp-reduc-1.c: Likewise. * gcc.dg/vect/slp-reduc-2.c: Likewise. * gcc.dg/vect/slp-reduc-3.c: Likewise. * gcc.dg/vect/slp-reduc-4.c: Likewise. * gcc.dg/vect/slp-reduc-5.c: Likewise. * gcc.dg/vect/slp-reduc-7.c: Likewise. * gcc.dg/vect/vect-reduc-in-order-4.c: Likewise. --- gcc/testsuite/gcc.dg/vect/pr37027.c | 2 +- gcc/testsuite/gcc.dg/vect/pr67790.c | 1 + gcc/testsuite/gcc.dg/vect/pr92324-4.c | 2 ++ gcc/testsuite/gcc.dg/vect/pr92558.c | 2 ++ gcc/testsuite/gcc.dg/vect/pr95495.c | 2 ++ gcc/testsuite/gcc.dg/vect/slp-reduc-1.c | 2 +- gcc/testsuite/gcc.dg/vect/slp-reduc-2.c | 1 + gcc/testsuite/gcc.dg/vect/slp-reduc-3.c | 1 + gcc/testsuite/gcc.dg/vect/slp-reduc-4.c | 1 + gcc/testsuite/gcc.dg/vect/slp-reduc-5.c | 2 +- gcc/testsuite/gcc.dg/vect/slp-reduc-7.c | 2 +- gcc/testsuite/gcc.dg/vect/vect-reduc-in-order-4.c | 1 + 12 files changed, 15 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/pr37027.c b/gcc/testsuite/gcc.dg/vect/pr37027.c index ef6760e..69f5826 100644 --- a/gcc/testsuite/gcc.dg/vect/pr37027.c +++ b/gcc/testsuite/gcc.dg/vect/pr37027.c @@ -33,4 +33,4 @@ foo (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_add } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_add } } } */ - +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr67790.c b/gcc/testsuite/gcc.dg/vect/pr67790.c index 5e2d506..32eacd9 100644 --- a/gcc/testsuite/gcc.dg/vect/pr67790.c +++ b/gcc/testsuite/gcc.dg/vect/pr67790.c @@ -38,3 +38,4 @@ int main() } /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr92324-4.c b/gcc/testsuite/gcc.dg/vect/pr92324-4.c index 8347985..57e117c 100644 --- a/gcc/testsuite/gcc.dg/vect/pr92324-4.c +++ b/gcc/testsuite/gcc.dg/vect/pr92324-4.c @@ -28,3 +28,5 @@ int main () __builtin_abort (); return 0; } + +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr92558.c b/gcc/testsuite/gcc.dg/vect/pr92558.c index 1d24fa0..11f4132 100644 --- a/gcc/testsuite/gcc.dg/vect/pr92558.c +++ b/gcc/testsuite/gcc.dg/vect/pr92558.c @@ -21,3 +21,5 @@ int main() __builtin_abort (); return 0; } + +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr95495.c b/gcc/testsuite/gcc.dg/vect/pr95495.c index a961aef..683f0f2 100644 --- a/gcc/testsuite/gcc.dg/vect/pr95495.c +++ b/gcc/testsuite/gcc.dg/vect/pr95495.c @@ -14,3 +14,5 @@ h() d += e[f].b >> 1 | e[f].b & 1; } } + +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-1.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-1.c index b353dd7..b9bddb8 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-1.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-1.c @@ -44,4 +44,4 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_add } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_add } } } */ - +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c index 15dd599..aa09d01 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c @@ -41,4 +41,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_add } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_add } } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c index 7358275..4969fe8 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-3.c @@ -60,3 +60,4 @@ int main (void) /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_short_mult && { vect_widen_sum_hi_to_si && vect_unpack } } } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_widen_sum_hi_to_si_pattern || { ! vect_unpack } } } } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c index d58e5b0..266b439 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c @@ -58,4 +58,5 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_min_max } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_no_int_min_max || vect_variable_length } } } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c index f457c11..11f5a741 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c @@ -46,4 +46,4 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_int_min_max } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail vect_no_int_min_max } } } */ - +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-7.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-7.c index 43d1cee..05cc9ed 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-7.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-7.c @@ -56,4 +56,4 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_add } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_no_int_add || vect_variable_length } } } } */ - +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-in-order-4.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-in-order-4.c index 1cc046e..7706a2d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-reduc-in-order-4.c +++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-in-order-4.c @@ -43,3 +43,4 @@ main () /* { dg-final { scan-tree-dump {in-order unchained SLP reductions not supported} "vect" } } */ /* { dg-final { scan-tree-dump-not {vectorizing stmts using SLP} "vect" } } */ +/* { dg-final { scan-tree-dump-times "VECT_PERM_EXPR" 0 "vect" } } */ -- cgit v1.1 From ef11f5b37b0a62dbad9ed37613a3799dc98f6f8b Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Mon, 7 Sep 2020 14:53:38 +0000 Subject: arm: [testsuite] Skip thumb2-cond-cmp tests on Cortex-M [PR94595] Since r204778 (g571880a0a4c512195aa7d41929ba6795190887b2), we favor branches over IT blocks on Cortex-M. As a result, instead of generating two nested IT blocks in thumb2-cond-cmp-[1234].c, we generate either a single IT block, or use branches depending on conditions tested by the program. Since this was a deliberate change and the tests still pass as expected on Cortex-A, this patch skips them when targetting Cortex-M. The avoids the failures on Cortex M3, M4, and M33. This patch makes the testcases unsupported on Cortex-M7 although they pass in this case because this CPU has different branch costs. I tried to relax the scan-assembler directives using eg. cmpne|subne or cmpgt|ble but that seemed fragile. 2020-09-07 Christophe Lyon gcc/testsuite/ PR target/94595 * gcc.target/arm/thumb2-cond-cmp-1.c: Skip if arm_cortex_m. * gcc.target/arm/thumb2-cond-cmp-2.c: Skip if arm_cortex_m. * gcc.target/arm/thumb2-cond-cmp-3.c: Skip if arm_cortex_m. * gcc.target/arm/thumb2-cond-cmp-4.c: Skip if arm_cortex_m. --- gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c | 2 +- gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c | 2 +- gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c | 2 +- gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c b/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c index 45ab605..36204f4 100644 --- a/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c +++ b/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-1.c @@ -1,6 +1,6 @@ /* Use conditional compare */ /* { dg-options "-O2" } */ -/* { dg-skip-if "" { arm_thumb1_ok } } */ +/* { dg-skip-if "" { arm_thumb1_ok || arm_cortex_m } } */ /* { dg-final { scan-assembler "cmpne" } } */ int f(int i, int j) diff --git a/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c b/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c index 17d9a8f..108d1c3 100644 --- a/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c +++ b/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-2.c @@ -1,6 +1,6 @@ /* Use conditional compare */ /* { dg-options "-O2" } */ -/* { dg-skip-if "" { arm_thumb1_ok } } */ +/* { dg-skip-if "" { arm_thumb1_ok || arm_cortex_m } } */ /* { dg-final { scan-assembler "cmpeq" } } */ int f(int i, int j) diff --git a/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c b/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c index 6b2a79b..ca7fd9f 100644 --- a/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c +++ b/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-3.c @@ -1,6 +1,6 @@ /* Use conditional compare */ /* { dg-options "-O2" } */ -/* { dg-skip-if "" { arm_thumb1_ok } } */ +/* { dg-skip-if "" { arm_thumb1_ok || arm_cortex_m } } */ /* { dg-final { scan-assembler "cmpgt" } } */ int f(int i, int j) diff --git a/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c b/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c index 80e1076..91cc8f4 100644 --- a/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c +++ b/gcc/testsuite/gcc.target/arm/thumb2-cond-cmp-4.c @@ -1,6 +1,6 @@ /* Use conditional compare */ /* { dg-options "-O2" } */ -/* { dg-skip-if "" { arm_thumb1_ok } } */ +/* { dg-skip-if "" { arm_thumb1_ok || arm_cortex_m } } */ /* { dg-final { scan-assembler "cmpgt" } } */ int f(int i, int j) -- cgit v1.1 From 373b99dc40949efa697326f378e5022a02e0328b Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Wed, 30 Sep 2020 08:13:21 -0700 Subject: Add a testcase for PR target/96827 Add a testcase for PR target/96827 which was fixed by r11-3559: commit 97b798d80baf945ea28236eef3fa69f36626b579 Author: Joel Hutton Date: Wed Sep 30 15:08:13 2020 +0100 [SLP][VECT] Add check to fix 96837 PR target/96827 * gcc.target/i386/pr96827.c: New test. --- gcc/testsuite/gcc.target/i386/pr96827.c | 41 +++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr96827.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/i386/pr96827.c b/gcc/testsuite/gcc.target/i386/pr96827.c new file mode 100644 index 0000000..309e9e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr96827.c @@ -0,0 +1,41 @@ +/* { dg-do run { target sse2_runtime } } */ +/* { dg-options "-O3 -msse2 -mfpmath=sse" } */ + +typedef unsigned short int __uint16_t; +typedef unsigned int __uint32_t; +typedef __uint16_t uint16_t; +typedef __uint32_t uint32_t; +typedef int __v4si __attribute__ ((__vector_size__ (16))); +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_si128 (__m128i *__P, __m128i __B) +{ + *__P = __B; +} +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) +{ + return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; +} +typedef uint16_t u16; +typedef uint32_t u32; +extern int printf (const char *__restrict __format, ...); +void do_the_thing(u32 idx, __m128i *dude) +{ + u32 dude_[4] = { idx+0, idx+2, idx+4, idx+6 }; + for (u32 i = 0; i < 3; ++i) + if (dude_[i] == 1234) + dude_[i]--; + *dude = _mm_set_epi32(dude_[0], dude_[1], dude_[2], dude_[3]); +} +int main() +{ + __m128i dude; + u32 idx = 0; + do_the_thing(idx, &dude); + __attribute__((aligned(16))) u32 dude_[4]; + _mm_store_si128((__m128i*)dude_, dude); + if (!(6 == dude_[0] && 4 == dude_[1] && 2 == dude_[2] && 0 == dude_[3])) + __builtin_abort (); + return 0; +} -- cgit v1.1 From 1814c828a021adaab7eb98124e9db3a3341a86e3 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Wed, 30 Sep 2020 17:23:55 +0200 Subject: Add trailing dots so length of spec string matches number of arguments. 2020-09-30 Jan Hubicka * trans-io.c (gfc_build_io_library_fndecls): Add trailing dots so length of spec string matches number of arguments. --- gcc/fortran/trans-io.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'gcc') diff --git a/gcc/fortran/trans-io.c b/gcc/fortran/trans-io.c index 21bdd5e..363cca5 100644 --- a/gcc/fortran/trans-io.c +++ b/gcc/fortran/trans-io.c @@ -328,86 +328,86 @@ gfc_build_io_library_fndecls (void) dt_parm_type = build_pointer_type (st_parameter[IOPARM_ptype_dt].type); iocall[IOCALL_X_INTEGER] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_integer")), ".wW", + get_identifier (PREFIX("transfer_integer")), ".wW.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_INTEGER_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_integer_write")), ".wR", + get_identifier (PREFIX("transfer_integer_write")), ".wR.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_LOGICAL] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_logical")), ".wW", + get_identifier (PREFIX("transfer_logical")), ".wW.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_LOGICAL_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_logical_write")), ".wR", + get_identifier (PREFIX("transfer_logical_write")), ".wR.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_CHARACTER] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_character")), ".wW", + get_identifier (PREFIX("transfer_character")), ".wW.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_charlen_type_node); iocall[IOCALL_X_CHARACTER_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_character_write")), ".wR", + get_identifier (PREFIX("transfer_character_write")), ".wR.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_charlen_type_node); iocall[IOCALL_X_CHARACTER_WIDE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_character_wide")), ".wW", + get_identifier (PREFIX("transfer_character_wide")), ".wW..", void_type_node, 4, dt_parm_type, pvoid_type_node, gfc_charlen_type_node, gfc_int4_type_node); iocall[IOCALL_X_CHARACTER_WIDE_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_character_wide_write")), ".wR", + get_identifier (PREFIX("transfer_character_wide_write")), ".wR..", void_type_node, 4, dt_parm_type, pvoid_type_node, gfc_charlen_type_node, gfc_int4_type_node); iocall[IOCALL_X_REAL] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_real")), ".wW", + get_identifier (PREFIX("transfer_real")), ".wW.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_REAL_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_real_write")), ".wR", + get_identifier (PREFIX("transfer_real_write")), ".wR.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_COMPLEX] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_complex")), ".wW", + get_identifier (PREFIX("transfer_complex")), ".wW.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_COMPLEX_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_complex_write")), ".wR", + get_identifier (PREFIX("transfer_complex_write")), ".wR.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); /* Version for __float128. */ iocall[IOCALL_X_REAL128] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_real128")), ".wW", + get_identifier (PREFIX("transfer_real128")), ".wW.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_REAL128_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_real128_write")), ".wR", + get_identifier (PREFIX("transfer_real128_write")), ".wR.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_COMPLEX128] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_complex128")), ".wW", + get_identifier (PREFIX("transfer_complex128")), ".wW.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_COMPLEX128_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_complex128_write")), ".wR", + get_identifier (PREFIX("transfer_complex128_write")), ".wR.", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_ARRAY] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_array")), ".ww", + get_identifier (PREFIX("transfer_array")), ".ww..", void_type_node, 4, dt_parm_type, pvoid_type_node, integer_type_node, gfc_charlen_type_node); iocall[IOCALL_X_ARRAY_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_array_write")), ".wr", + get_identifier (PREFIX("transfer_array_write")), ".wr..", void_type_node, 4, dt_parm_type, pvoid_type_node, integer_type_node, gfc_charlen_type_node); iocall[IOCALL_X_DERIVED] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_derived")), ".wrR", - void_type_node, 2, dt_parm_type, pvoid_type_node, pchar_type_node); + get_identifier (PREFIX("transfer_derived")), ".wr", + void_type_node, 2, dt_parm_type, pvoid_type_node); /* Library entry points */ @@ -475,18 +475,18 @@ gfc_build_io_library_fndecls (void) void_type_node, 1, dt_parm_type); iocall[IOCALL_SET_NML_VAL] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_set_nml_var")), ".w.R", + get_identifier (PREFIX("st_set_nml_var")), ".w.R...", void_type_node, 6, dt_parm_type, pvoid_type_node, pvoid_type_node, gfc_int4_type_node, gfc_charlen_type_node, get_dtype_type_node()); iocall[IOCALL_SET_NML_DTIO_VAL] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_set_nml_dtio_var")), ".w.R", + get_identifier (PREFIX("st_set_nml_dtio_var")), ".w.R.....", void_type_node, 8, dt_parm_type, pvoid_type_node, pvoid_type_node, gfc_int4_type_node, gfc_charlen_type_node, get_dtype_type_node(), pvoid_type_node, pvoid_type_node); iocall[IOCALL_SET_NML_VAL_DIM] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_set_nml_var_dim")), ".w", + get_identifier (PREFIX("st_set_nml_var_dim")), ".w....", void_type_node, 5, dt_parm_type, gfc_int4_type_node, gfc_array_index_type, gfc_array_index_type, gfc_array_index_type); } -- cgit v1.1 From ecd700c1bc6b4da70a439ac233f1515131c29f86 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Wed, 30 Sep 2020 18:00:42 +0200 Subject: Fix some fnspec strings in trans-decl.c * trans-decl.c (gfc_build_intrinsic_function_decls): Add traling dots to spec strings so they match the number of parameters; do not use R and W for non-pointer parameters. Drop pointless specifier on caf_stop_numeric and caf_get_team. --- gcc/fortran/trans-decl.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'gcc') diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 9224277..2be9df4 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -3484,16 +3484,16 @@ gfc_build_intrinsic_function_decls (void) /* Misc. functions. */ gfor_fndecl_ttynam = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("ttynam")), ".W", + get_identifier (PREFIX("ttynam")), ".W..", void_type_node, 3, pchar_type_node, gfc_charlen_type_node, integer_type_node); gfor_fndecl_fdate = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("fdate")), ".W", + get_identifier (PREFIX("fdate")), ".W.", void_type_node, 2, pchar_type_node, gfc_charlen_type_node); gfor_fndecl_ctime = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("ctime")), ".W", + get_identifier (PREFIX("ctime")), ".W..", void_type_node, 3, pchar_type_node, gfc_charlen_type_node, gfc_int8_type_node); @@ -3662,7 +3662,7 @@ gfc_build_intrinsic_function_decls (void) TREE_NOTHROW (gfor_fndecl_size0) = 1; gfor_fndecl_size1 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("size1")), ".R", + get_identifier (PREFIX("size1")), ".R.", gfc_array_index_type, 2, pvoid_type_node, gfc_array_index_type); DECL_PURE_P (gfor_fndecl_size1) = 1; TREE_NOTHROW (gfor_fndecl_size1) = 1; @@ -3701,7 +3701,7 @@ gfc_build_builtin_function_decls (void) TREE_THIS_VOLATILE (gfor_fndecl_stop_numeric) = 1; gfor_fndecl_stop_string = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("stop_string")), ".R.", + get_identifier (PREFIX("stop_string")), ".R..", void_type_node, 3, pchar_type_node, size_type_node, boolean_type_node); /* STOP doesn't return. */ @@ -3714,7 +3714,7 @@ gfc_build_builtin_function_decls (void) TREE_THIS_VOLATILE (gfor_fndecl_error_stop_numeric) = 1; gfor_fndecl_error_stop_string = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("error_stop_string")), ".R.", + get_identifier (PREFIX("error_stop_string")), ".R..", void_type_node, 3, pchar_type_node, size_type_node, boolean_type_node); /* ERROR STOP doesn't return. */ @@ -3892,15 +3892,15 @@ gfc_build_builtin_function_decls (void) integer_type_node); gfor_fndecl_caf_sync_all = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_sync_all")), ".WW", void_type_node, + get_identifier (PREFIX("caf_sync_all")), ".WW.", void_type_node, 3, pint_type, pchar_type_node, size_type_node); gfor_fndecl_caf_sync_memory = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_sync_memory")), ".WW", void_type_node, + get_identifier (PREFIX("caf_sync_memory")), ".WW.", void_type_node, 3, pint_type, pchar_type_node, size_type_node); gfor_fndecl_caf_sync_images = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_sync_images")), ".RRWW", void_type_node, + get_identifier (PREFIX("caf_sync_images")), "..RWW.", void_type_node, 5, integer_type_node, pint_type, pint_type, pchar_type_node, size_type_node); @@ -3916,8 +3916,8 @@ gfc_build_builtin_function_decls (void) /* CAF's ERROR STOP doesn't return. */ TREE_THIS_VOLATILE (gfor_fndecl_caf_error_stop_str) = 1; - gfor_fndecl_caf_stop_numeric = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_stop_numeric")), ".R.", + gfor_fndecl_caf_stop_numeric = gfc_build_library_function_decl ( + get_identifier (PREFIX("caf_stop_numeric")), void_type_node, 1, integer_type_node); /* CAF's STOP doesn't return. */ TREE_THIS_VOLATILE (gfor_fndecl_caf_stop_numeric) = 1; @@ -4003,8 +4003,8 @@ gfc_build_builtin_function_decls (void) get_identifier (PREFIX("caf_end_team")), void_type_node, 0); gfor_fndecl_caf_get_team - = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_get_team")), "R", + = gfc_build_library_function_decl ( + get_identifier (PREFIX("caf_get_team")), void_type_node, 1, integer_type_node); gfor_fndecl_caf_sync_team -- cgit v1.1 From e808f3fdfa8f31066da19011b55acb8c0446c72d Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Wed, 30 Sep 2020 09:31:29 -0600 Subject: PR c/97206 - ICE in composite_type on declarations of a similar array types gcc/ChangeLog: PR c/97206 * attribs.c (attr_access::array_as_string): Avoid modifying a shared type in place and use build_type_attribute_qual_variant instead. gcc/testsuite/ChangeLog: PR c/97206 * gcc.dg/Warray-parameter-7.c: New test. * gcc.dg/Warray-parameter-8.c: New test. * gcc.dg/Wvla-parameter-5.c: New test. --- gcc/attribs.c | 37 ++++++++++++------------------- gcc/testsuite/gcc.dg/Warray-parameter-7.c | 25 +++++++++++++++++++++ gcc/testsuite/gcc.dg/Warray-parameter-8.c | 36 ++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/Wvla-parameter-5.c | 22 ++++++++++++++++++ 4 files changed, 97 insertions(+), 23 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/Warray-parameter-7.c create mode 100644 gcc/testsuite/gcc.dg/Warray-parameter-8.c create mode 100644 gcc/testsuite/gcc.dg/Wvla-parameter-5.c (limited to 'gcc') diff --git a/gcc/attribs.c b/gcc/attribs.c index abc7536..3f6ec3d 100644 --- a/gcc/attribs.c +++ b/gcc/attribs.c @@ -2256,15 +2256,14 @@ attr_access::array_as_string (tree type) const if (this->str) { - /* For array parameters (but not pointers) create an array type - that corresponds to the form of the parameter including its + /* For array parameters (but not pointers) create a temporary array + type that corresponds to the form of the parameter including its qualifiers even though they apply to the pointer, not the array type. */ const bool vla_p = minsize == HOST_WIDE_INT_M1U; tree eltype = TREE_TYPE (type); - tree artype; - tree index_type = NULL_TREE; + if (minsize == HOST_WIDE_INT_M1U) { /* Determine if this is a VLA (an array whose most significant @@ -2278,28 +2277,24 @@ attr_access::array_as_string (tree type) const else if (minsize) index_type = build_index_type (size_int (minsize - 1)); - artype = build_array_type (eltype, index_type); - + tree arat = NULL_TREE; if (static_p || vla_p) { tree flag = static_p ? integer_one_node : NULL_TREE; /* Hack: there's no language-independent way to encode the "static" specifier or the "*" notation in an array type. - Temporarily add an attribute to have the pretty printer add - "static" or "*", and remove it later. The static notation - is only valid in the most significant bound but [*] can be - used for any bound. Because [*] is represented the same as - [0] this hack only works for the most significant bound like - static and the others are rendered as [0]. */ - tree at = tree_cons (get_identifier ("array"), flag, NULL_TREE); - TYPE_ATTRIBUTES (artype) = at; + Add a "fake" attribute to have the pretty-printer add "static" + or "*". The "[static N]" notation is only valid in the most + significant bound but [*] can be used for any bound. Because + [*] is represented the same as [0] this hack only works for + the most significant bound like static and the others are + rendered as [0]. */ + arat = build_tree_list (get_identifier ("array"), flag); } - TYPE_ATOMIC (artype) = TYPE_ATOMIC (type); - TYPE_READONLY (artype) = TYPE_READONLY (type); - TYPE_RESTRICT (artype) = TYPE_RESTRICT (type); - TYPE_VOLATILE (artype) = TYPE_VOLATILE (type); - type = artype; + const int quals = TYPE_QUALS (type); + type = build_array_type (eltype, index_type); + type = build_type_attribute_qual_variant (type, arat, quals); } /* Format the type using the current pretty printer. The generic tree @@ -2309,10 +2304,6 @@ attr_access::array_as_string (tree type) const typstr = pp_formatted_text (pp); delete pp; - if (this->str) - /* Remove the attribute that wasn't installed by decl_attributes. */ - TYPE_ATTRIBUTES (type) = NULL_TREE; - return typstr; } diff --git a/gcc/testsuite/gcc.dg/Warray-parameter-7.c b/gcc/testsuite/gcc.dg/Warray-parameter-7.c new file mode 100644 index 0000000..4863045 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Warray-parameter-7.c @@ -0,0 +1,25 @@ +/* PR c/97206 - ICE in composite_type on declarations of a similar array types + { dg-do compile } + { dg-options "-Wall" } */ + +__attribute__((__access__(__write_only__, 1))) void +f1 (char* restrict); + +void f1 (char*); + +char a1[]; +char a1[] = { }; + + +void f2 (char[restrict]); +void f2 (char*); + +char a2[]; +char a2[] = { }; + + +void f3 (char*); +void f3 (char[const]); + +extern const char a3[]; +extern const char a3[1]; diff --git a/gcc/testsuite/gcc.dg/Warray-parameter-8.c b/gcc/testsuite/gcc.dg/Warray-parameter-8.c new file mode 100644 index 0000000..b152702 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Warray-parameter-8.c @@ -0,0 +1,36 @@ +/* Verify that combinations of array type qualifiers render correctly. + { dg-do compile } + { dg-options "-Warray-parameter" } */ + +void fatm (int[_Atomic 1]); // { dg-message "previously declared as 'int\\\[_Atomic 1]" } +void fatm (int[_Atomic 2]); // { dg-warning "argument 1 of type 'int\\\[_Atomic 2]' with mismatched bound" } + + +void fcst (int[const 2]); // { dg-message "previously declared as 'int\\\[const 2]" } +void fcst (int[const 3]); // { dg-warning "argument 1 of type 'int\\\[const 3]' with mismatched bound" } + + +void frst (int[restrict 3]); // { dg-message "previously declared as 'int\\\[restrict 3]" } +void frst (int[restrict 4]); // { dg-warning "argument 1 of type 'int\\\[restrict 4]' with mismatched bound" } + +void fvol (int[volatile 4]); // { dg-message "previously declared as 'int\\\[volatile 4]" } +void fvol (int[volatile 5]); // { dg-warning "argument 1 of type 'int\\\[volatile 5]' with mismatched bound" } + + +void fcr (int[const restrict 1]); // { dg-message "previously declared as 'int\\\[\(const restrict|restrict const\) 1]" } +void fcr (int[restrict volatile 2]); // { dg-warning "argument 1 of type 'int\\\[\(restrict volatile|volatile restrict\) 2]' with mismatched bound" } +void fcr (int[const restrict volatile 3]); // { dg-warning "argument 1 of type 'int\\\[const volatile restrict 3]' with mismatched bound" } + + +extern int n; + +void fcx_n (int [const 1][n]); // { dg-message "previously declared as 'int\\\[const 1]\\\[n]'" "note" } +void fcx_n (int [restrict 2][n]); // { dg-warning "argument 1 of type 'int\\\[restrict 2]\\\[n]' with mismatched bound" } + + +extern int n1, n2; + +/* The mismatch in the array bound should be diagnosed but the mismatch + in the VLA should not be without -Wvla-parameter. */ +void fc3_n1 (int [const 3][n1]); // { dg-message "previously declared as 'int\\\[const 3]\\\[n1]'" "note" } +void fc3_n1 (int [const 5][n2]); // { dg-warning "argument 1 of type 'int\\\[const 5]\\\[n2]' with mismatched bound" } diff --git a/gcc/testsuite/gcc.dg/Wvla-parameter-5.c b/gcc/testsuite/gcc.dg/Wvla-parameter-5.c new file mode 100644 index 0000000..16b40d9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wvla-parameter-5.c @@ -0,0 +1,22 @@ +/* Verify that combinations of array type qualifiers render correctly. + { dg-do compile } + { dg-options "-Wvla-parameter" } */ + +extern int n1, n2; + +void fcx_n1 (int [const][n1]); // { dg-message "previously declared as 'int\\\[const]\\\[n1]' with bound 'n1'" "note" } +void fcx_n1 (int [const][n2]); // { dg-warning "argument 1 of type 'int\\\[const]\\\[n2]' declared with mismatched bound 'n2'" } + +/* The mismatch in the array bound should not be diagnosed without + -Warray-parameter but the mismatch in the VLA should still be + diagnosed. */ +void fc3_n1 (int [const 3][n1]); // { dg-message "previously declared as 'int\\\[const 3]\\\[n1]' with bound 'n1'" "note" } +void fc3_n1 (int [const 5][n2]); // { dg-warning "argument 1 of type 'int\\\[const 5]\\\[n2]' declared with mismatched bound 'n2'" } + + +void frx_n1 (int [restrict][n1]); // { dg-message "previously declared as 'int\\\[restrict]\\\[n1]' with bound 'n1'" "note" } +void frx_n1 (int [restrict][n2]); // { dg-warning "argument 1 of type 'int\\\[restrict]\\\[n2]' declared with mismatched bound 'n2'" } + + +void fvx_n2 (int [volatile][n2]); // { dg-message "previously declared as 'int\\\[volatile]\\\[n2]' with bound 'n2'" "note" } +void fvx_n2 (int [volatile][n1]); // { dg-warning "argument 1 of type 'int\\\[volatile]\\\[n1]' declared with mismatched bound 'n1'" } -- cgit v1.1 From 7dbc7ad524a540e34ce25d120d0968f36c571bbb Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Wed, 30 Sep 2020 12:58:09 -0600 Subject: Avoid assuming a VLA access specification string contains a closing bracket (PR middle-end/97189). Resolves: PR middle-end/97189 - ICE on redeclaration of a function with VLA argument and attribute access gcc/ChangeLog: PR middle-end/97189 * attribs.c (attr_access::array_as_string): Avoid assuming a VLA access specification string contains a closing bracket. gcc/c-family/ChangeLog: PR middle-end/97189 * c-attribs.c (append_access_attr): Use the function declaration location for a warning about an attribute access argument. gcc/testsuite/ChangeLog: PR middle-end/97189 * gcc.dg/attr-access-2.c: Adjust caret location. * gcc.dg/Wvla-parameter-6.c: New test. * gcc.dg/Wvla-parameter-7.c: New test. --- gcc/attribs.c | 4 ++-- gcc/c-family/c-attribs.c | 18 ++++++----------- gcc/testsuite/gcc.dg/Wvla-parameter-6.c | 34 +++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/Wvla-parameter-7.c | 36 +++++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/attr-access-2.c | 10 +++++++-- 5 files changed, 86 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/Wvla-parameter-6.c create mode 100644 gcc/testsuite/gcc.dg/Wvla-parameter-7.c (limited to 'gcc') diff --git a/gcc/attribs.c b/gcc/attribs.c index 3f6ec3d..94b9e02 100644 --- a/gcc/attribs.c +++ b/gcc/attribs.c @@ -2270,11 +2270,11 @@ attr_access::array_as_string (tree type) const bound is nonconstant and whose access string has "$]" in it) extract the bound expression from SIZE. */ const char *p = end; - for ( ; *p-- != ']'; ); + for ( ; p != str && *p-- != ']'; ); if (*p == '$') index_type = build_index_type (TREE_VALUE (size)); } - else if (minsize) + else if (minsize) index_type = build_index_type (size_int (minsize - 1)); tree arat = NULL_TREE; diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c index 70b0003..c779d13 100644 --- a/gcc/c-family/c-attribs.c +++ b/gcc/c-family/c-attribs.c @@ -4151,18 +4151,12 @@ append_access_attr (tree node[3], tree attrs, const char *attrstr, "missing in previous designation", attrstr); else if (newa->internal_p || cura->internal_p) - { - /* Mismatch in the value of the size argument and a VLA - bound. */ - location_t argloc = curloc; - if (tree arg = get_argument (node[2], newa->sizarg)) - argloc = DECL_SOURCE_LOCATION (arg); - warned = warning_at (argloc, OPT_Wattributes, - "attribute %qs positional argument 2 " - "conflicts with previous designation " - "by argument %u", - attrstr, cura->sizarg + 1); - } + /* Mismatch in the value of the size argument and a VLA bound. */ + warned = warning_at (curloc, OPT_Wattributes, + "attribute %qs positional argument 2 " + "conflicts with previous designation " + "by argument %u", + attrstr, cura->sizarg + 1); else /* Mismatch in the value of the size argument between two explicit access attributes. */ diff --git a/gcc/testsuite/gcc.dg/Wvla-parameter-6.c b/gcc/testsuite/gcc.dg/Wvla-parameter-6.c new file mode 100644 index 0000000..268aeec --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wvla-parameter-6.c @@ -0,0 +1,34 @@ +/* PR middle-end/97189 - ICE on redeclaration of a function with VLA argument + and attribute access + Also verify the right arguments are underlined in the notes. + { dg-do compile } + { dg-options "-Wall -fdiagnostics-show-caret" } */ + +#define RW(...) __attribute__ ((access (read_write, __VA_ARGS__))) + +RW (2, 3) void f1 (int n, int[n], int); +/* { dg-warning "attribute 'access \\(read_write, 2, 3\\)' positional argument 2 conflicts with previous designation by argument 3" "warning" { target *-*-* } .-1 } + { dg-begin-multiline-output "" } + RW (2, 3) void f1 (int n, int[n], int); + ^~ + { dg-end-multiline-output "" } + { dg-message "designating the bound of variable length array argument 2" "note" { target *-*-* } .-6 } + { dg-begin-multiline-output "" } + RW (2, 3) void f1 (int n, int[n], int); + ~~~~^ ~~~~~~ + { dg-end-multiline-output "" } */ + + +RW (2) void f2 (int, int[*], int); +/* { dg-message "previously declared as a variable length array 'int\\\[\\\*]'" "note" { target *-*-* } .-1 } + { dg-begin-multiline-output "" } + RW (2, 3) void f2 (int, int[], int); + ^~~~~ + { dg-end-multiline-output "" } */ + +RW (2, 3) void f2 (int, int[], int); +/* { dg-warning "argument 2 of type 'int\\\[]' declared as an ordinary array" "warning" { target *-*-* } .-1 } + { dg-begin-multiline-output "" } + RW (2) void f2 (int, int[*], int); + ^~~~~~ + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/Wvla-parameter-7.c b/gcc/testsuite/gcc.dg/Wvla-parameter-7.c new file mode 100644 index 0000000..14ce75f --- /dev/null +++ b/gcc/testsuite/gcc.dg/Wvla-parameter-7.c @@ -0,0 +1,36 @@ +/* PR middle-end/97189 - ICE on redeclaration of a function with VLA argument + and attribute access + { dg-do compile } + { dg-options "-Wall" } */ + +#define RW(...) __attribute__ ((access (read_write, __VA_ARGS__))) + +RW (2, 3) void f1 (int n, int[n], int); +/* { dg-warning "attribute 'access \\(read_write, 2, 3\\)' positional argument 2 conflicts with previous designation by argument 3" "warning" { target *-*-* } .-1 } + { dg-message "designating the bound of variable length array argument 2" "note" { target *-*-* } .-2 } */ + +void call_f1 (int *p) +{ + /* Verify that a warning is issued. Ideally, it seems the VLA bound + should take precedence over the attribute and the warning would + reference argument 1 but since the conflict in the redeclarations + of the function is already diagnosed don't test that (and let it + be acceptable for this warning to reference argument 3). */ + f1 (-1, p, -1); + // { dg-warning "argument \\d value -1 is negative" "warning" { target *-*-* } .-1 } +} + +RW (2) void f2 (int, int[*], int); +// { dg-message "previously declared as a variable length array 'int\\\[\\\*]'" "note" { target *-*-* } .-1 } +RW (2, 3) void f2 (int, int[], int); +// { dg-warning "argument 2 of type 'int\\\[]' declared as an ordinary array" "warning" { target *-*-* } .-1 } + +void call_f2 (int *p) +{ + f2 (-1, p, 0); + + /* Verify that the attribute access on the redeclaration of f2() takes + precedence over the one on the first declaration. */ + f2 (0, p, -1); + // { dg-warning "argument 3 value -1 is negative" "warning" { target *-*-* } .-1 } +} diff --git a/gcc/testsuite/gcc.dg/attr-access-2.c b/gcc/testsuite/gcc.dg/attr-access-2.c index 7476261..76baddf 100644 --- a/gcc/testsuite/gcc.dg/attr-access-2.c +++ b/gcc/testsuite/gcc.dg/attr-access-2.c @@ -112,5 +112,11 @@ typedef void G1 (int n, int[n], int); G1 g1; -RW (2, 3) void g1 (int n, int[n], int); // { dg-warning "24: attribute 'access *\\\(read_write, 2, 3\\\)' positional argument 2 conflicts with previous designation by argument 3" } -// { dg-message "designating the bound of variable length array argument 2" "note" { target *-*-* } .-1 } +/* The warning is about the attribute positional argument 2 which refers + to the last function argument. Ideally, the caret would be under + the corresponding function argument, i.e., the last one here) but + that location isn't available yet. Verify that the caret doesn't + point to function argument 1 which is the VLA bound (that's what + the caret in the note points to). */ +RW (2, 3) void g1 (int n, int[n], int); // { dg-warning "16: attribute 'access *\\\(read_write, 2, 3\\\)' positional argument 2 conflicts with previous designation by argument 3" } +// { dg-message "24:designating the bound of variable length array argument 2" "note" { target *-*-* } .-1 } -- cgit v1.1 From 660bfe61d4045c7931a7c1c3a166d0a2cd199412 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 1 Oct 2020 00:16:30 +0000 Subject: Daily bump. --- gcc/ChangeLog | 181 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/c-family/ChangeLog | 6 ++ gcc/cp/ChangeLog | 13 ++++ gcc/fortran/ChangeLog | 37 ++++++++++ gcc/testsuite/ChangeLog | 175 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 413 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4df8e96..988351b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,184 @@ +2020-09-30 Martin Sebor + + PR middle-end/97189 + * attribs.c (attr_access::array_as_string): Avoid assuming a VLA + access specification string contains a closing bracket. + +2020-09-30 Martin Sebor + + PR c/97206 + * attribs.c (attr_access::array_as_string): Avoid modifying a shared + type in place and use build_type_attribute_qual_variant instead. + +2020-09-30 Przemyslaw Wirkus + + * config/arm/arm-cpus.in: Add Cortex-A78 and Cortex-A78AE cores. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-tune.md: Regenerate. + * doc/invoke.texi: Update docs. + +2020-09-30 Przemyslaw Wirkus + + * config/aarch64/aarch64-cores.def: Add Cortex-A78 and Cortex-A78AE cores. + * config/aarch64/aarch64-tune.md: Regenerate. + * doc/invoke.texi: Add -mtune=cortex-a78 and -mtune=cortex-a78ae. + +2020-09-30 Srinath Parvathaneni + + PR target/96795 + * config/arm/arm_mve.h (__ARM_mve_coerce2): Define. + (__arm_vaddq): Correct the scalar argument. + (__arm_vaddq_m): Likewise. + (__arm_vaddq_x): Likewise. + (__arm_vcmpeqq_m): Likewise. + (__arm_vcmpeqq): Likewise. + (__arm_vcmpgeq_m): Likewise. + (__arm_vcmpgeq): Likewise. + (__arm_vcmpgtq_m): Likewise. + (__arm_vcmpgtq): Likewise. + (__arm_vcmpleq_m): Likewise. + (__arm_vcmpleq): Likewise. + (__arm_vcmpltq_m): Likewise. + (__arm_vcmpltq): Likewise. + (__arm_vcmpneq_m): Likewise. + (__arm_vcmpneq): Likewise. + (__arm_vfmaq_m): Likewise. + (__arm_vfmaq): Likewise. + (__arm_vfmasq_m): Likewise. + (__arm_vfmasq): Likewise. + (__arm_vmaxnmavq): Likewise. + (__arm_vmaxnmavq_p): Likewise. + (__arm_vmaxnmvq): Likewise. + (__arm_vmaxnmvq_p): Likewise. + (__arm_vminnmavq): Likewise. + (__arm_vminnmavq_p): Likewise. + (__arm_vminnmvq): Likewise. + (__arm_vminnmvq_p): Likewise. + (__arm_vmulq_m): Likewise. + (__arm_vmulq): Likewise. + (__arm_vmulq_x): Likewise. + (__arm_vsetq_lane): Likewise. + (__arm_vsubq_m): Likewise. + (__arm_vsubq): Likewise. + (__arm_vsubq_x): Likewise. + +2020-09-30 Joel Hutton + + PR target/96837 + * tree-vect-slp.c (vect_analyze_slp): Do not call + vect_attempt_slp_rearrange_stmts for vector constructors. + +2020-09-30 Tamar Christina + + * tree-vectorizer.h (SLP_TREE_REF_COUNT): New. + * tree-vect-slp.c (_slp_tree::_slp_tree, _slp_tree::~_slp_tree, + vect_free_slp_tree, vect_build_slp_tree, vect_print_slp_tree, + slp_copy_subtree, vect_attempt_slp_rearrange_stmts): Use it. + +2020-09-30 Tobias Burnus + + * omp-offload.c (omp_discover_implicit_declare_target): Also + handled nested functions. + +2020-09-30 Tobias Burnus + Tom de Vries + + * builtins.c (expand_builtin_cexpi, fold_builtin_sincos): Update + targetm.libc_has_function call. + * builtins.def (DEF_C94_BUILTIN, DEF_C99_BUILTIN, DEF_C11_BUILTIN): + (DEF_C2X_BUILTIN, DEF_C99_COMPL_BUILTIN, DEF_C99_C90RES_BUILTIN): + Same. + * config/darwin-protos.h (darwin_libc_has_function): Update prototype. + * config/darwin.c (darwin_libc_has_function): Add arg. + * config/linux-protos.h (linux_libc_has_function): Update prototype. + * config/linux.c (linux_libc_has_function): Add arg. + * config/i386/i386.c (ix86_libc_has_function): Update + targetm.libc_has_function call. + * config/nvptx/nvptx.c (nvptx_libc_has_function): New function. + (TARGET_LIBC_HAS_FUNCTION): Redefine to nvptx_libc_has_function. + * convert.c (convert_to_integer_1): Update targetm.libc_has_function + call. + * match.pd: Same. + * target.def (libc_has_function): Add arg. + * doc/tm.texi: Regenerate. + * targhooks.c (default_libc_has_function, gnu_libc_has_function) + (no_c99_libc_has_function): Add arg. + * targhooks.h (default_libc_has_function, no_c99_libc_has_function) + (gnu_libc_has_function): Update prototype. + * tree-ssa-math-opts.c (pass_cse_sincos::execute): Update + targetm.libc_has_function call. + +2020-09-30 H.J. Lu + + PR target/97184 + * config/i386/i386.md (UNSPECV_MOVDIRI): Renamed to ... + (UNSPEC_MOVDIRI): This. + (UNSPECV_MOVDIR64B): Renamed to ... + (UNSPEC_MOVDIR64B): This. + (movdiri): Use SET operation. + (@movdir64b_): Likewise. + +2020-09-30 Florian Weimer + + * config/i386/i386-c.c (ix86_target_macros_internal): Define + __LAHF_SAHF__ and __MOVBE__ based on ISA flags. + +2020-09-30 Kyrylo Tkachov + + PR target/97150 + * config/aarch64/arm_neon.h (vqrshlb_u8): Make second argument + signed. + (vqrshlh_u16): Likewise. + (vqrshls_u32): Likewise. + (vqrshld_u64): Likewise. + (vqshlb_u8): Likewise. + (vqshlh_u16): Likewise. + (vqshls_u32): Likewise. + (vqshld_u64): Likewise. + (vshld_u64): Likewise. + +2020-09-30 Kyrylo Tkachov + + PR target/96313 + * config/aarch64/aarch64-simd-builtins.def (sqmovun): Use UNOPUS + qualifiers. + * config/aarch64/arm_neon.h (vqmovun_s16): Adjust builtin call. + Remove unnecessary result cast. + (vqmovun_s32): Likewise. + (vqmovun_s64): Likewise. + (vqmovunh_s16): Likewise. Fix return type. + (vqmovuns_s32): Likewise. + (vqmovund_s64): Likewise. + +2020-09-30 Richard Sandiford + + * config/aarch64/aarch64.c (aarch64_split_128bit_move_p): Add a + function comment. Tighten check for FP moves. + * config/aarch64/aarch64.md (*movti_aarch64): Add a w<-Z alternative. + (*movtf_aarch64): Handle r<-Y like r<-r. Remove unnecessary + earlyclobber. Change splitter predicate from aarch64_reg_or_imm + to nonmemory_operand. + +2020-09-30 Alex Coplan + + PR target/97251 + * config/arm/arm.md (movsf): Relax TARGET_HARD_FLOAT to + TARGET_VFP_BASE. + (movdf): Likewise. + * config/arm/vfp.md (no_literal_pool_df_immediate): Likewise. + (no_literal_pool_sf_immediate): Likewise. + +2020-09-30 Alan Modra + + * configure.ac (--with-long-double-format): Typo fix. + * configure: Regenerate. + +2020-09-30 Alan Modra + + * config/rs6000/rs6000.md (@tablejump_normal): Don't use + non-existent operands[]. + (@tablejump_nospec): Likewise. + 2020-09-30 Segher Boessenkool * config/rs6000/rs6000.md (tablejump): Simplify. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index f1815d1..c5ffab1 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20200930 +20201001 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 03ce9ea..1e36632 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,9 @@ +2020-09-30 Martin Sebor + + PR middle-end/97189 + * c-attribs.c (append_access_attr): Use the function declaration + location for a warning about an attribute access argument. + 2020-09-29 Marek Polacek PR c++/94695 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 4dea15f..eeb6f8a 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,16 @@ +2020-09-30 Nathan Sidwell + + * cp-tree.h (struct lang_decl_fn): Remove hidden_friend_p. + (DECL_HIDDEN_FRIEND_P): Delete. + * call.c (add_function_candidate): Drop assert about anticipated + decl. + (build_new_op_1): Drop koenig lookup flagging for hidden friend. + * decl.c (duplicate_decls): Drop HIDDEN_FRIEND_P updating. + * name-lookup.c (do_pushdecl): Likewise. + (set_decl_namespace): Discover hiddenness from OVL_HIDDEN_P. + * pt.c (check_explicit_specialization): Record found_hidden + explicitly. + 2020-09-29 Marek Polacek PR c++/94695 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index faa7897..fc65592 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,40 @@ +2020-09-30 Jan Hubicka + + * trans-decl.c (gfc_build_intrinsic_function_decls): Add traling dots + to spec strings so they match the number of parameters; do not use + R and W for non-pointer parameters. Drop pointless specifier on + caf_stop_numeric and caf_get_team. + +2020-09-30 Jan Hubicka + + * trans-io.c (gfc_build_io_library_fndecls): Add trailing dots so + length of spec string matches number of arguments. + +2020-09-30 Tobias Burnus + + PR fortran/97242 + * expr.c (gfc_is_not_contiguous): Fix check. + (gfc_check_pointer_assign): Use it. + +2020-09-30 Paul Thomas + + PR fortran/97045 + * trans-array.c (gfc_conv_array_ref): Make sure that the class + decl is passed to build_array_ref in the case of unlimited + polymorphic entities. + * trans-expr.c (gfc_conv_derived_to_class): Ensure that array + refs do not preceed the _len component. Free the _len expr. + * trans-stmt.c (trans_associate_var): Reset 'need_len_assign' + for polymorphic scalars. + * trans.c (gfc_build_array_ref): When the vptr size is used for + span, multiply by the _len field of unlimited polymorphic + entities, when non-zero. + +2020-09-30 Tom de Vries + + * f95-lang.c (gfc_init_builtin_functions): Update + targetm.libc_has_function call. + 2020-09-28 Mark Eggleston Revert: diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 1c26589..72508ab 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,178 @@ +2020-09-30 Martin Sebor + + PR middle-end/97189 + * gcc.dg/attr-access-2.c: Adjust caret location. + * gcc.dg/Wvla-parameter-6.c: New test. + * gcc.dg/Wvla-parameter-7.c: New test. + +2020-09-30 Martin Sebor + + PR c/97206 + * gcc.dg/Warray-parameter-7.c: New test. + * gcc.dg/Warray-parameter-8.c: New test. + * gcc.dg/Wvla-parameter-5.c: New test. + +2020-09-30 H.J. Lu + + PR target/96827 + * gcc.target/i386/pr96827.c: New test. + +2020-09-30 Christophe Lyon + + PR target/94595 + * gcc.target/arm/thumb2-cond-cmp-1.c: Skip if arm_cortex_m. + * gcc.target/arm/thumb2-cond-cmp-2.c: Skip if arm_cortex_m. + * gcc.target/arm/thumb2-cond-cmp-3.c: Skip if arm_cortex_m. + * gcc.target/arm/thumb2-cond-cmp-4.c: Skip if arm_cortex_m. + +2020-09-30 Richard Biener + + * gcc.dg/vect/pr37027.c: Amend. + * gcc.dg/vect/pr67790.c: Likewise. + * gcc.dg/vect/pr92324-4.c: Likewise. + * gcc.dg/vect/pr92558.c: Likewise. + * gcc.dg/vect/pr95495.c: Likewise. + * gcc.dg/vect/slp-reduc-1.c: Likewise. + * gcc.dg/vect/slp-reduc-2.c: Likewise. + * gcc.dg/vect/slp-reduc-3.c: Likewise. + * gcc.dg/vect/slp-reduc-4.c: Likewise. + * gcc.dg/vect/slp-reduc-5.c: Likewise. + * gcc.dg/vect/slp-reduc-7.c: Likewise. + * gcc.dg/vect/vect-reduc-in-order-4.c: Likewise. + +2020-09-30 Srinath Parvathaneni + + PR target/96795 + * gcc.target/arm/mve/intrinsics/mve_fp_vaddq_n.c: New Test. + * gcc.target/arm/mve/intrinsics/mve_vaddq_n.c: Likewise. + * gcc.target/arm/mve/intrinsics/vaddq_m_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vaddq_m_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vaddq_x_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vaddq_x_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpeqq_m_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpeqq_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpeqq_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpgeq_m_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpgeq_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpgeq_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpgtq_m_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpgtq_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpgtq_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpleq_m_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpleq_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpleq_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpltq_m_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpltq_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpltq_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpneq_m_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpneq_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vcmpneq_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmaq_m_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmaq_m_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmaq_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmaq_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmasq_m_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmasq_m_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmasq_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vfmasq_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmavq_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmavq_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmvq_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmvq_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmavq_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmavq_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmavq_p_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmavq_p_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmvq_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmvq_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmvq_p_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmvq_p_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmulq_m_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmulq_m_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmulq_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmulq_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmulq_x_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmulq_x_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsetq_lane_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsetq_lane_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsubq_m_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsubq_m_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsubq_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsubq_n_f32-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsubq_x_n_f16-1.c: Likewise. + * gcc.target/arm/mve/intrinsics/vsubq_x_n_f32-1.c: Likewise. + +2020-09-30 Joel Hutton + + PR target/96837 + * gcc.dg/vect/bb-slp-49.c: New test. + +2020-09-30 Tobias Burnus + + PR fortran/97242 + * gfortran.dg/contiguous_11.f90: New test. + * gfortran.dg/contiguous_4.f90: Update. + * gfortran.dg/contiguous_7.f90: Update. + +2020-09-30 Paul Thomas + + PR fortran/97045 + * gfortran.dg/select_type_50.f90 : New test. + +2020-09-30 H.J. Lu + + PR target/97184 + * gcc.target/i386/movdir64b.c: New test. + * gcc.target/i386/movdiri32.c: Likewise. + * gcc.target/i386/movdiri64.c: Likewise. + * lib/target-supports.exp (check_effective_target_movdir): New. + +2020-09-30 Tom de Vries + + * gcc.dg/pr94600-1.c: Use effective target + (non_strict_align || pcc_bitfield_type_matters). + * gcc.dg/pr94600-3.c: Same. + +2020-09-30 Jakub Jelinek + + * gcc.target/i386/amxint8-dpbssd-2.c: Require effective targets + amx_tile and amx_int8. + * gcc.target/i386/amxint8-dpbsud-2.c: Likewise. + * gcc.target/i386/amxint8-dpbusd-2.c: Likewise. + * gcc.target/i386/amxint8-dpbuud-2.c: Likewise. + * gcc.target/i386/amxbf16-dpbf16ps-2.c: Require effective targets + amx_tile and amx_bf16. + * gcc.target/i386/amxtile-2.c: Require effective target amx_tile. + +2020-09-30 Kyrylo Tkachov + + PR target/97150 + * gcc.target/aarch64/pr97150.c: New test. + +2020-09-30 Kyrylo Tkachov + + PR target/96313 + * gcc.target/aarch64/pr96313.c: New test. + * gcc.target/aarch64/scalar_intrinsics.c (test_vqmovunh_s16): + Adjust return type. + (test_vqmovuns_s32): Likewise. + (test_vqmovund_s64): Likewise. + +2020-09-30 Richard Sandiford + + * gcc.target/aarch64/movtf_1.c: New test. + * gcc.target/aarch64/movti_1.c: Likewise. + 2020-09-29 Martin Sebor PR middle-end/97188 -- cgit v1.1 From cf7dae01734eea0dfb4c387e4cd40e1f9a682f56 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Fri, 18 Sep 2020 16:57:34 -0400 Subject: c++: CTAD and explicit deduction guides for copy-list-init [PR90210] This PR points out that we accept template struct tuple { tuple(T); }; // #1 template explicit tuple(T t) -> tuple; // #2 tuple t = { 1 }; despite the 'explicit' deduction guide in a copy-list-initialization context. That's because in deduction_guides_for we first find the user-defined deduction guide (#2), and then ctor_deduction_guides_for creates artificial deduction guides: one from the tuple(T) constructor and a copy guide. So we end up with these three guides: (1) template tuple(T) -> tuple [DECL_NONCONVERTING_P] (2) template tuple(tuple) -> tuple (3) template tuple(T) -> tuple Then, in do_class_deduction, we prune this set, and get rid of (1). Then overload resolution selects (3) and we succeed. But [over.match.list]p1 says "In copy-list-initialization, if an explicit constructor is chosen, the initialization is ill-formed." It also goes on to say that this differs from other situations where only converting constructors are considered for copy-initialization. Therefore for list-initialization we consider explicit constructors and complain if one is chosen. E.g. convert_like_internal/ck_user can give an error. So my logic runs that we should not prune the deduction_guides_for guides in a copy-list-initialization context, and only complain if we actually choose an explicit deduction guide. This matches clang++/EDG/msvc++. gcc/cp/ChangeLog: PR c++/90210 * pt.c (do_class_deduction): Don't prune explicit deduction guides in copy-list-initialization. In copy-list-initialization, if an explicit deduction guide was selected, give an error. gcc/testsuite/ChangeLog: PR c++/90210 * g++.dg/cpp1z/class-deduction73.C: New test. --- gcc/cp/pt.c | 49 ++++++++++++++++++++------ gcc/testsuite/g++.dg/cpp1z/class-deduction73.C | 41 +++++++++++++++++++++ 2 files changed, 79 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp1z/class-deduction73.C (limited to 'gcc') diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 652b458..869477f 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -28977,6 +28977,7 @@ do_class_deduction (tree ptype, tree tmpl, tree init, tree type = TREE_TYPE (tmpl); bool try_list_ctor = false; + bool list_init_p = false; releasing_vec rv_args = NULL; vec *&args = *&rv_args; @@ -28984,6 +28985,7 @@ do_class_deduction (tree ptype, tree tmpl, tree init, args = make_tree_vector (); else if (BRACE_ENCLOSED_INITIALIZER_P (init)) { + list_init_p = true; try_list_ctor = TYPE_HAS_LIST_CTOR (type); if (try_list_ctor && CONSTRUCTOR_NELTS (init) == 1) { @@ -29016,9 +29018,10 @@ do_class_deduction (tree ptype, tree tmpl, tree init, if (cands == error_mark_node) return error_mark_node; - /* Prune explicit deduction guides in copy-initialization context. */ + /* Prune explicit deduction guides in copy-initialization context (but + not copy-list-initialization). */ bool elided = false; - if (flags & LOOKUP_ONLYCONVERTING) + if (!list_init_p && (flags & LOOKUP_ONLYCONVERTING)) { for (lkp_iterator iter (cands); !elided && iter; ++iter) if (DECL_NONCONVERTING_P (STRIP_TEMPLATE (*iter))) @@ -29087,18 +29090,42 @@ do_class_deduction (tree ptype, tree tmpl, tree init, --cp_unevaluated_operand; } - if (call == error_mark_node - && (complain & tf_warning_or_error)) + if (call == error_mark_node) { - error ("class template argument deduction failed:"); + if (complain & tf_warning_or_error) + { + error ("class template argument deduction failed:"); - ++cp_unevaluated_operand; - call = build_new_function_call (cands, &args, complain | tf_decltype); - --cp_unevaluated_operand; + ++cp_unevaluated_operand; + call = build_new_function_call (cands, &args, + complain | tf_decltype); + --cp_unevaluated_operand; - if (elided) - inform (input_location, "explicit deduction guides not considered " - "for copy-initialization"); + if (elided) + inform (input_location, "explicit deduction guides not considered " + "for copy-initialization"); + } + return error_mark_node; + } + /* [over.match.list]/1: In copy-list-initialization, if an explicit + constructor is chosen, the initialization is ill-formed. */ + else if (flags & LOOKUP_ONLYCONVERTING) + { + tree fndecl = cp_get_callee_fndecl_nofold (call); + if (fndecl && DECL_NONCONVERTING_P (fndecl)) + { + if (complain & tf_warning_or_error) + { + // TODO: Pass down location from cp_finish_decl. + error ("class template argument deduction for %qT failed: " + "explicit deduction guide selected in " + "copy-list-initialization", type); + inform (DECL_SOURCE_LOCATION (fndecl), + "explicit deduction guide declared here"); + + } + return error_mark_node; + } } /* If CTAD succeeded but the type doesn't have any explicit deduction diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction73.C b/gcc/testsuite/g++.dg/cpp1z/class-deduction73.C new file mode 100644 index 0000000..b37dded --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction73.C @@ -0,0 +1,41 @@ +// PR c++/90210 +// { dg-do compile { target c++17 } } + +template struct tuple { tuple(T); }; +template explicit tuple(T t) -> tuple; +tuple t = { 1 }; // { dg-error "explicit deduction guide selected" } +tuple t1 = tuple{ 1 }; +tuple t2{ 1 }; + +template struct A { A(T, T); }; +template explicit A(T, T) -> A; +A a = {1, 1}; // { dg-error "explicit deduction guide selected" } +A a1 = A{1, 1}; +A a2{1, 1}; + +template +struct B { + B(T, U); +}; +template +B(T, U) -> B; // SFINAEd-out +B b = { 1, 2 }; // OK +B b1 = B{ 1, 2 }; // OK +B b2{ 1, 2 }; // OK + +// Overriden implicit default constructor deduction guide: +template +struct C { }; +explicit C() -> C; +C c = {}; // { dg-error "explicit deduction guide selected" } +C c1 = C{}; +C c2{}; + +// Overriden copy guide: +template +struct D { }; +template explicit D(D) -> D; +D d; +D d1 = {d}; // { dg-error "explicit deduction guide selected" } +D d2 = D{d}; +D d3{d}; -- cgit v1.1 From c6be439b37702f6ac4c2fc447c6f3ed1042b80a3 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Mon, 28 Sep 2020 16:23:01 +0930 Subject: [RS6000] -mno-minimal-toc vs. power10 pcrelative We've had this hack in the libgcc config to build libgcc with -mcmodel=small for powerpc64 for a long time. It wouldn't be a bad thing if someone who knows the multilib machinery well could arrange for -mcmodel=small to be passed just for ppc64 when building for earlier than power10. But for now, make -mno-minimal-toc do nothing when pcrel. Which will do the right thing for any project that has copied libgcc's trick. We want this if configuring using --with-cpu=power10 to build a power10 pcrel libgcc. --mcmodel=small turns off pcrel. gcc/ * config/rs6000/linux64.h (SUBSUBTARGET_OVERRIDE_OPTIONS): Don't set -mcmodel=small for -mno-minimal-toc when pcrel. libgcc/ * config/rs6000/t-linux: Document purpose of -mno-minimal-toc. --- gcc/config/rs6000/linux64.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h index 2ded330..5c9f8e3 100644 --- a/gcc/config/rs6000/linux64.h +++ b/gcc/config/rs6000/linux64.h @@ -132,20 +132,29 @@ extern int dot_symbols; if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0) \ { \ rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ - error ("%<-m64%> requires a PowerPC64 cpu"); \ + error ("%<-m64%> requires a PowerPC64 cpu"); \ } \ + if (!global_options_set.x_rs6000_current_cmodel) \ + SET_CMODEL (CMODEL_MEDIUM); \ if ((rs6000_isa_flags_explicit \ & OPTION_MASK_MINIMAL_TOC) != 0) \ { \ if (global_options_set.x_rs6000_current_cmodel \ && rs6000_current_cmodel != CMODEL_SMALL) \ error ("%<-mcmodel incompatible with other toc options%>"); \ - SET_CMODEL (CMODEL_SMALL); \ + if (TARGET_MINIMAL_TOC) \ + SET_CMODEL (CMODEL_SMALL); \ + else if (TARGET_PCREL \ + || (PCREL_SUPPORTED_BY_OS \ + && (rs6000_isa_flags_explicit \ + & OPTION_MASK_PCREL) == 0)) \ + /* Ignore -mno-minimal-toc. */ \ + ; \ + else \ + SET_CMODEL (CMODEL_SMALL); \ } \ else \ { \ - if (!global_options_set.x_rs6000_current_cmodel) \ - SET_CMODEL (CMODEL_MEDIUM); \ if (rs6000_current_cmodel != CMODEL_SMALL) \ { \ if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \ -- cgit v1.1 From 2dd7b93778d551b6981c8086ecb38e26f677bd2b Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Mon, 28 Sep 2020 16:42:33 +0930 Subject: [RS6000] Adjust gcc asm for power10 Generate assembly with .localentry,1 functions using @notoc calls. This patch makes libgcc.a asm look the same as power10 pcrel as far as toc/notoc is concerned. Otherwise calling between functions that advertise as using the TOC and those that don't, will require linker call stubs in statically linked code. gcc/ * config/rs6000/ppc-asm.h: Support __PCREL__ code. libgcc/ * config/rs6000/morestack.S, * config/rs6000/tramp.S: Support __PCREL__ code. libitm/ * config/powerpc/sjlj.S: Support __PCREL__ code. --- gcc/config/rs6000/ppc-asm.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/ppc-asm.h b/gcc/config/rs6000/ppc-asm.h index 48edc99..e0bce9c 100644 --- a/gcc/config/rs6000/ppc-asm.h +++ b/gcc/config/rs6000/ppc-asm.h @@ -262,6 +262,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #undef toc #define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name) +#ifdef __PCREL__ +#define JUMP_TARGET(name) GLUE(FUNC_NAME(name),@notoc) +#define FUNC_START(name) \ + .type FUNC_NAME(name),@function; \ + .globl FUNC_NAME(name); \ +FUNC_NAME(name): \ + .localentry FUNC_NAME(name),1 +#else #define JUMP_TARGET(name) FUNC_NAME(name) #define FUNC_START(name) \ .type FUNC_NAME(name),@function; \ @@ -270,6 +278,7 @@ FUNC_NAME(name): \ 0: addis 2,12,(.TOC.-0b)@ha; \ addi 2,2,(.TOC.-0b)@l; \ .localentry FUNC_NAME(name),.-FUNC_NAME(name) +#endif /* !__PCREL__ */ #define HIDDEN_FUNC(name) \ FUNC_START(name) \ -- cgit v1.1 From 2c5499b57cf4a68ebc8decce90d3eb1e281c31a9 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 28 Aug 2020 16:05:56 +0100 Subject: libgo: add 32-bit RISC-V (RV32) support Add support for the 32-bit RISC-V (RV32) ISA matching the 64-bit RISC-V (RV64) port except for async preemption added as a stub only. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/251179 --- gcc/go/gofrontend/MERGE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 314ffd2..8d9fda5 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -9e55baf44ab63ba06af0b57038e7b3aab8216222 +c9c084bce713e258721e12041a351ec8ad33ad17 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. -- cgit v1.1 From 324bec558e95584e8c1997575ae9d75978af59f1 Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Thu, 1 Oct 2020 10:08:24 +0200 Subject: PR target/97250: i386: Add support for x86-64-v2, x86-64-v3, x86-64-v4 levels for x86-64 These micro-architecture levels are defined in the x86-64 psABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9 PTA_NO_TUNE is introduced so that the new processor alias table entries do not affect the CPU tuning setting in ix86_tune. The tests depend on the macros added in commit 92e652d8c21bd7e66cbb0f900 ("i386: Define __LAHF_SAHF__ and __MOVBE__ macros, based on ISA flags"). gcc/: PR target/97250 * config/i386/i386.h (PTA_NO_TUNE, PTA_X86_64_BASELINE) (PTA_X86_64_V2, PTA_X86_64_V3, PTA_X86_64_V4): New. * common/config/i386/i386-common.c (processor_alias_table): Add "x86-64-v2", "x86-64-v3", "x86-64-v4". * config/i386/i386-options.c (ix86_option_override_internal): Handle new PTA_NO_TUNE processor table entries. * doc/invoke.texi (x86 Options): Document new -march values. gcc/testsuite/: PR target/97250 * gcc.target/i386/x86-64-v2.c: New test. * gcc.target/i386/x86-64-v3.c: New test. * gcc.target/i386/x86-64-v3-haswell.c: New test. * gcc.target/i386/x86-64-v3-skylake.c: New test. * gcc.target/i386/x86-64-v4.c: New test. --- gcc/common/config/i386/i386-common.c | 10 +- gcc/config/i386/i386-options.c | 29 +++++- gcc/config/i386/i386.h | 11 +- gcc/doc/invoke.texi | 15 ++- gcc/testsuite/gcc.target/i386/x86-64-v2.c | 116 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/x86-64-v3-haswell.c | 18 ++++ gcc/testsuite/gcc.target/i386/x86-64-v3-skylake.c | 21 ++++ gcc/testsuite/gcc.target/i386/x86-64-v3.c | 116 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/x86-64-v4.c | 116 ++++++++++++++++++++++ 9 files changed, 442 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/x86-64-v2.c create mode 100644 gcc/testsuite/gcc.target/i386/x86-64-v3-haswell.c create mode 100644 gcc/testsuite/gcc.target/i386/x86-64-v3-skylake.c create mode 100644 gcc/testsuite/gcc.target/i386/x86-64-v3.c create mode 100644 gcc/testsuite/gcc.target/i386/x86-64-v4.c (limited to 'gcc') diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 1014214..62a620b 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -1795,9 +1795,13 @@ const pta processor_alias_table[] = PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR, 0, P_NONE}, {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON, PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR, 0, P_NONE}, - {"x86-64", PROCESSOR_K8, CPU_K8, - PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR, - 0, P_NONE}, + {"x86-64", PROCESSOR_K8, CPU_K8, PTA_X86_64_BASELINE, 0, P_NONE}, + {"x86-64-v2", PROCESSOR_K8, CPU_GENERIC, PTA_X86_64_V2 | PTA_NO_TUNE, + 0, P_NONE}, + {"x86-64-v3", PROCESSOR_K8, CPU_GENERIC, PTA_X86_64_V3 | PTA_NO_TUNE, + 0, P_NONE}, + {"x86-64-v4", PROCESSOR_K8, CPU_GENERIC, PTA_X86_64_V4 | PTA_NO_TUNE, + 0, P_NONE}, {"eden-x2", PROCESSOR_K8, CPU_K8, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR, 0, P_NONE}, diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index 597de53..a59bd70 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -2058,10 +2058,27 @@ ix86_option_override_internal (bool main_args_p, return false; } + /* The feature-only micro-architecture levels that use + PTA_NO_TUNE are only defined for the x86-64 psABI. */ + if ((processor_alias_table[i].flags & PTA_NO_TUNE) != 0 + && (!TARGET_64BIT_P (opts->x_ix86_isa_flags) + || opts->x_ix86_abi != SYSV_ABI)) + { + error (G_("%<%s%> architecture level is only defined" + " for the x86-64 psABI"), opts->x_ix86_arch_string); + return false; + } + ix86_schedule = processor_alias_table[i].schedule; ix86_arch = processor_alias_table[i].processor; - /* Default cpu tuning to the architecture. */ - ix86_tune = ix86_arch; + + /* Default cpu tuning to the architecture, unless the table + entry requests not to do this. Used by the x86-64 psABI + micro-architecture levels. */ + if ((processor_alias_table[i].flags & PTA_NO_TUNE) == 0) + ix86_tune = ix86_arch; + else + ix86_tune = PROCESSOR_GENERIC; if (((processor_alias_table[i].flags & PTA_MMX) != 0) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) @@ -2384,7 +2401,8 @@ ix86_option_override_internal (bool main_args_p, ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); for (i = 0; i < pta_size; i++) - if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)) + if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name) + && (processor_alias_table[i].flags & PTA_NO_TUNE) == 0) { ix86_schedule = processor_alias_table[i].schedule; ix86_tune = processor_alias_table[i].processor; @@ -2428,8 +2446,9 @@ ix86_option_override_internal (bool main_args_p, auto_vec candidates; for (i = 0; i < pta_size; i++) - if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) - || ((processor_alias_table[i].flags & PTA_64BIT) != 0)) + if ((!TARGET_64BIT_P (opts->x_ix86_isa_flags) + || ((processor_alias_table[i].flags & PTA_64BIT) != 0)) + && (processor_alias_table[i].flags & PTA_NO_TUNE) == 0) candidates.safe_push (processor_alias_table[i].name); #ifdef HAVE_LOCAL_CPU_DETECT diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index a449653..9a5de6a 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2433,7 +2433,7 @@ const wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << 40); const wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41); const wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42); const wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43); -/* Hole after PTA_MPX was removed. */ +const wide_int_bitmask PTA_NO_TUNE (HOST_WIDE_INT_1U << 44); const wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45); const wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46); const wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47); @@ -2476,6 +2476,15 @@ const wide_int_bitmask PTA_AMX_TILE(0, HOST_WIDE_INT_1U << 19); const wide_int_bitmask PTA_AMX_INT8(0, HOST_WIDE_INT_1U << 20); const wide_int_bitmask PTA_AMX_BF16(0, HOST_WIDE_INT_1U << 21); +const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE + | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR; +const wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE & (~PTA_NO_SAHF)) + | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3; +const wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2 + | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT + | PTA_MOVBE | PTA_XSAVE; +const wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3 + | PTA_AVX512F | PTA_AVX512BW | PTA_AVX512CD | PTA_AVX512DQ | PTA_AVX512VL; const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR; const wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c049932..a5ecb1b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -29244,7 +29244,7 @@ Generate instructions for the machine type @var{cpu-type}. In contrast to for the specified @var{cpu-type}, @option{-march=@var{cpu-type}} allows GCC to generate code that may not run at all on processors other than the one indicated. Specifying @option{-march=@var{cpu-type}} implies -@option{-mtune=@var{cpu-type}}. +@option{-mtune=@var{cpu-type}}, except where noted otherwise. The choices for @var{cpu-type} are: @@ -29260,6 +29260,19 @@ of the selected instruction set. @item x86-64 A generic CPU with 64-bit extensions. +@item x86-64-v2 +@itemx x86-64-v3 +@itemx x86-64-v4 +These choices for @var{cpu-type} select the corresponding +micro-architecture level from the x86-64 psABI. They are only available +when compiling for an x86-64 target that uses the System V psABI@. + +Since these @var{cpu-type} values do not have a corresponding +@option{-mtune} setting, using @option{-march} with these values enables +generic tuning. Specific tuning can be enabled using the +@option{-mtune=@var{other-cpu-type}} option with an appropriate +@var{other-cpu-type} value. + @item i386 Original Intel i386 CPU@. diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v2.c b/gcc/testsuite/gcc.target/i386/x86-64-v2.c new file mode 100644 index 0000000..0f3df36 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/x86-64-v2.c @@ -0,0 +1,116 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mabi=sysv -march=x86-64-v2" } */ + +/* Verify that the CPU features required by x86-64-v2 are enabled. */ + +#ifndef __MMX__ +# error __MMX__ not defined +#endif +#ifndef __SSE__ +# error __SSE__ not defined +#endif +#ifndef __SSE2__ +# error __SSE2__ not defined +#endif +#ifndef __LAHF_SAHF__ +# error __LAHF_SAHF__ not defined +#endif +#ifndef __POPCNT__ +# error __POPCNT__ not defined +#endif +#ifndef __SSE3__ +# error __SSE3__ not defined +#endif +#ifndef __SSE4_1__ +# error __SSE4_1__ not defined +#endif +#ifndef __SSE4_2__ +# error __SSE4_2__ not defined +#endif +#ifndef __SSSE3__ +# error __SSSE3__ not defined +#endif +#ifdef __SSE4A__ +# error __SSE4A__ defined +#endif +#ifdef __AVX__ +# error __AVX__ defined +#endif +#ifdef __AVX2__ +# error __AVX2__ defined +#endif +#ifdef __F16C__ +# error __F16C__ defined +#endif +#ifdef __FMA__ +# error __FMA__ defined +#endif +#ifdef __LZCNT__ +# error __LZCNT__ defined +#endif +#ifdef __MOVBE__ +# error __MOVBE__ defined +#endif +#ifdef __XSAVE__ +# error __XSAVE__ defined +#endif +#ifdef __XSAVEC__ +# error __XSAVEC__ defined +#endif +#ifdef __AVX512F__ +# error __AVX512F__ defined +#endif +#ifdef __AVX512BW__ +# error __AVX512BW__ defined +#endif +#ifdef __AVX512CD__ +# error __AVX512CD__ defined +#endif +#ifdef __AVX512DQ__ +# error __AVX512DQ__ defined +#endif +#ifdef __AVX512VL__ +# error __AVX512VL__ defined +#endif +#ifdef __AVX512PF__ +# error __AVX512PF__ defined +#endif +#ifdef __AVX512VBMI__ +# error __AVX512VBMI__ defined +#endif +#ifdef __AVX512IFMA__ +# error __AVX512IFMA__ defined +#endif +#ifdef __AVX512VNNIW__ +# error __AVX512VNNIW__ defined +#endif +#ifdef __AVX512VBMI2__ +# error __AVX512VBMI2__ defined +#endif +#ifdef __AVX5124FMAPS__ +# error __AVX5124FMAPS__ defined +#endif +#ifdef __AVX5124BITALG__ +# error __AVX5124BITALG__ defined +#endif +#ifdef __AVX5124VPOPCNTDQ__ +# error __AVX5124VPOPCNTDQ__ defined +#endif +#ifdef __AVX5124BF16__ +# error __AVX5124BF16__ defined +#endif +#ifdef __AVX512VP2INTERSECT__ +# error __AVX512VP2INTERSECT__ defined +#endif +#ifdef __AVX512VNNI__ +# error __AVX512VNNI__ defined +#endif +#ifdef __FMA4__ +# error __FMA4__ defined +#endif +#ifdef __3dNOW__ +# error __3dNOW__ defined +#endif +#ifdef __tune_k8__ +# error __tune_k8__ defined +#endif diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v3-haswell.c b/gcc/testsuite/gcc.target/i386/x86-64-v3-haswell.c new file mode 100644 index 0000000..216467e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/x86-64-v3-haswell.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mabi=sysv -mtune=haswell -march=x86-64-v3" } */ + +/* Check that -march=x86-64-v3 preserves tuning. */ + +/* PCLMUL is not in x86-64-v3, but in -march=haswell. Make sure that + it is absent. */ +#ifdef __PCLMUL__ +# error __PCLMUL__ is defined +#endif + +/* -mtune=haswell tuning is preserved. */ +#ifndef __k8__ +# error __k8__ is not defined +#endif +#ifndef __tune_haswell__ +# error __tune_haswell__ is not defined +#endif diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v3-skylake.c b/gcc/testsuite/gcc.target/i386/x86-64-v3-skylake.c new file mode 100644 index 0000000..aa34862 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/x86-64-v3-skylake.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mabi=sysv -march=skylake -march=x86-64-v3" } */ + +/* Check that -march=x86-64-v3 overrides a previous -march= setting. */ + +/* PCLMUL is not in x86-64-v3, but in -march=skylake. Make sure that + it is absent. */ +#ifdef __PCLMUL__ +# error __PCLMUL__ is defined +#endif + +/* -march=skylake tuning is deactivated. */ +#ifndef __k8__ +# error __k8__ is not defined +#endif +#ifdef __skylake__ +# error __skylake__ is defined +#endif +#ifdef __tune_skylake__ +# error __tune_skylake__ is defined +#endif diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v3.c b/gcc/testsuite/gcc.target/i386/x86-64-v3.c new file mode 100644 index 0000000..16a94b180 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/x86-64-v3.c @@ -0,0 +1,116 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mabi=sysv -march=x86-64-v3" } */ + +/* Verify that the CPU features required by x86-64-v4 are enabled. */ + +#ifndef __MMX__ +# error __MMX__ not defined +#endif +#ifndef __SSE__ +# error __SSE__ not defined +#endif +#ifndef __SSE2__ +# error __SSE2__ not defined +#endif +#ifndef __LAHF_SAHF__ +# error __LAHF_SAHF__ not defined +#endif +#ifndef __POPCNT__ +# error __POPCNT__ not defined +#endif +#ifndef __SSE3__ +# error __SSE3__ not defined +#endif +#ifndef __SSE4_1__ +# error __SSE4_1__ not defined +#endif +#ifndef __SSE4_2__ +# error __SSE4_2__ not defined +#endif +#ifndef __SSSE3__ +# error __SSSE3__ not defined +#endif +#ifdef __SSE4A__ +# error __SSE4A__ defined +#endif +#ifndef __AVX__ +# error __AVX__ not defined +#endif +#ifndef __AVX2__ +# error __AVX2__ not defined +#endif +#ifndef __F16C__ +# error __F16C__ not defined +#endif +#ifndef __FMA__ +# error __FMA__ not defined +#endif +#ifndef __LZCNT__ +# error __LZCNT__ not defined +#endif +#ifndef __MOVBE__ +# error __MOVBE__ not defined +#endif +#ifndef __XSAVE__ +# error __XSAVE__ not defined +#endif +#ifdef __XSAVEC__ +# error __XSAVEC__ defined +#endif +#ifdef __AVX512F__ +# error __AVX512F__ defined +#endif +#ifdef __AVX512BW__ +# error __AVX512BW__ defined +#endif +#ifdef __AVX512CD__ +# error __AVX512CD__ defined +#endif +#ifdef __AVX512DQ__ +# error __AVX512DQ__ defined +#endif +#ifdef __AVX512VL__ +# error __AVX512VL__ defined +#endif +#ifdef __AVX512PF__ +# error __AVX512PF__ defined +#endif +#ifdef __AVX512VBMI__ +# error __AVX512VBMI__ defined +#endif +#ifdef __AVX512IFMA__ +# error __AVX512IFMA__ defined +#endif +#ifdef __AVX512VNNIW__ +# error __AVX512VNNIW__ defined +#endif +#ifdef __AVX512VBMI2__ +# error __AVX512VBMI2__ defined +#endif +#ifdef __AVX5124FMAPS__ +# error __AVX5124FMAPS__ defined +#endif +#ifdef __AVX5124BITALG__ +# error __AVX5124BITALG__ defined +#endif +#ifdef __AVX5124VPOPCNTDQ__ +# error __AVX5124VPOPCNTDQ__ defined +#endif +#ifdef __AVX5124BF16__ +# error __AVX5124BF16__ defined +#endif +#ifdef __AVX512VP2INTERSECT__ +# error __AVX512VP2INTERSECT__ defined +#endif +#ifdef __AVX512VNNI__ +# error __AVX512VNNI__ defined +#endif +#ifdef __FMA4__ +# error __FMA4__ defined +#endif +#ifdef __3dNOW__ +# error __3dNOW__ defined +#endif +#ifdef __tune_k8__ +# error __tune_k8__ defined +#endif diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v4.c b/gcc/testsuite/gcc.target/i386/x86-64-v4.c new file mode 100644 index 0000000..48e928c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/x86-64-v4.c @@ -0,0 +1,116 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mabi=sysv -march=x86-64-v4" } */ + +/* Verify that the CPU features required by x86-64-v4 are enabled. */ + +#ifndef __MMX__ +# error __MMX__ not defined +#endif +#ifndef __SSE__ +# error __SSE__ not defined +#endif +#ifndef __SSE2__ +# error __SSE2__ not defined +#endif +#ifndef __LAHF_SAHF__ +# error __LAHF_SAHF__ not defined +#endif +#ifndef __POPCNT__ +# error __POPCNT__ not defined +#endif +#ifndef __SSE3__ +# error __SSE3__ not defined +#endif +#ifndef __SSE4_1__ +# error __SSE4_1__ not defined +#endif +#ifndef __SSE4_2__ +# error __SSE4_2__ not defined +#endif +#ifndef __SSSE3__ +# error __SSSE3__ not defined +#endif +#ifdef __SSE4A__ +# error __SSE4A__ defined +#endif +#ifndef __AVX__ +# error __AVX__ not defined +#endif +#ifndef __AVX2__ +# error __AVX2__ not defined +#endif +#ifndef __F16C__ +# error __F16C__ not defined +#endif +#ifndef __FMA__ +# error __FMA__ not defined +#endif +#ifndef __LZCNT__ +# error __LZCNT__ not defined +#endif +#ifndef __MOVBE__ +# error __MOVBE__ not defined +#endif +#ifndef __XSAVE__ +# error __XSAVE__ not defined +#endif +#ifdef __XSAVEC__ +# error __XSAVEC__ defined +#endif +#ifndef __AVX512F__ +# error __AVX512F__ not defined +#endif +#ifndef __AVX512BW__ +# error __AVX512BW__ not defined +#endif +#ifndef __AVX512CD__ +# error __AVX512CD__ not defined +#endif +#ifndef __AVX512DQ__ +# error __AVX512DQ__ not defined +#endif +#ifndef __AVX512VL__ +# error __AVX512VL__ not defined +#endif +#ifdef __AVX512PF__ +# error __AVX512PF__ defined +#endif +#ifdef __AVX512VBMI__ +# error __AVX512VBMI__ defined +#endif +#ifdef __AVX512IFMA__ +# error __AVX512IFMA__ defined +#endif +#ifdef __AVX512VNNIW__ +# error __AVX512VNNIW__ defined +#endif +#ifdef __AVX512VBMI2__ +# error __AVX512VBMI2__ defined +#endif +#ifdef __AVX5124FMAPS__ +# error __AVX5124FMAPS__ defined +#endif +#ifdef __AVX5124BITALG__ +# error __AVX5124BITALG__ defined +#endif +#ifdef __AVX5124VPOPCNTDQ__ +# error __AVX5124VPOPCNTDQ__ defined +#endif +#ifdef __AVX5124BF16__ +# error __AVX5124BF16__ defined +#endif +#ifdef __AVX512VP2INTERSECT__ +# error __AVX512VP2INTERSECT__ defined +#endif +#ifdef __AVX512VNNI__ +# error __AVX512VNNI__ defined +#endif +#ifdef __FMA4__ +# error __FMA4__ defined +#endif +#ifdef __3dNOW__ +# error __3dNOW__ defined +#endif +#ifdef __tune_k8__ +# error __tune_k8__ defined +#endif -- cgit v1.1 From 36e691d3a62145fda1f4a1b3143d215cc113c10a Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 1 Oct 2020 09:29:32 +0200 Subject: tree-optimization/97255 - missing vector bool pattern of SRAed bool SRA tends to use VIEW_CONVERT_EXPR when replacing bool fields with unsigned char fields. Those are not handled in vector bool pattern detection causing vector true values to leak. The following fixes this by turning those into b ? 1 : 0 as well. 2020-10-01 Richard Biener * tree-vect-patterns.c (vect_recog_bool_pattern): Also handle VIEW_CONVERT_EXPR. * g++.dg/vect/pr97255.cc: New testcase. --- gcc/testsuite/g++.dg/vect/pr97255.cc | 44 ++++++++++++++++++++++++++++++++++++ gcc/tree-vect-patterns.c | 8 +++++-- 2 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/vect/pr97255.cc (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/vect/pr97255.cc b/gcc/testsuite/g++.dg/vect/pr97255.cc new file mode 100644 index 0000000..efb7f53 --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/pr97255.cc @@ -0,0 +1,44 @@ +// { dg-require-effective-target c++11 } +// { dg-additional-options "-O3" } + +template +class Array{ +public: + T& operator[](unsigned x) {return m_arr[x];} +private: + T m_arr[N]; +}; + +int +__attribute__((noipa)) +logicalOr(Array< char, 4 > in1[60], + Array< bool, 4 > out[60]) +{ + for (unsigned k0 = 0u; k0 < 60u; ++k0) { + Array< char, 4 > in1m = in1[k0]; + Array< bool, 4 > x; + for (unsigned k1 = 0u; k1 < 4u; ++k1) { + char in1s = in1m[k1]; + x[k1] = in1s != char(0) || in1s != char(0); + } + out[k0] = x; + } + return out[0][0]; +} + + +int main() +{ + Array< char, 4 > In1[60]{}; + Array< bool, 4 > Out7[60]{}; + + for( int i = 0; i < 60; ++i){ + for( int j = 0; j < 4; ++j){ + In1[i][j] = 240 - i*4 - j; + } + } + + if (logicalOr(In1, Out7) != 1) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index db45740..d626c5f 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -4028,14 +4028,18 @@ vect_recog_bool_pattern (vec_info *vinfo, var = gimple_assign_rhs1 (last_stmt); lhs = gimple_assign_lhs (last_stmt); + rhs_code = gimple_assign_rhs_code (last_stmt); + + if (rhs_code == VIEW_CONVERT_EXPR) + var = TREE_OPERAND (var, 0); if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var))) return NULL; hash_set bool_stmts; - rhs_code = gimple_assign_rhs_code (last_stmt); - if (CONVERT_EXPR_CODE_P (rhs_code)) + if (CONVERT_EXPR_CODE_P (rhs_code) + || rhs_code == VIEW_CONVERT_EXPR) { if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs)) || TYPE_PRECISION (TREE_TYPE (lhs)) == 1) -- cgit v1.1 From 85516b71730d8f9401c34407ac3fadf5f1ebfc4e Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 1 Oct 2020 11:04:56 +0200 Subject: s390: Fix up s390_atomic_assign_expand_fenv The following patch fixes -FAIL: gcc.dg/pr94780.c (internal compiler error) -FAIL: gcc.dg/pr94780.c (test for excess errors) -FAIL: gcc.dg/pr94842.c (internal compiler error) -FAIL: gcc.dg/pr94842.c (test for excess errors) on s390x-linux. The fix is essentially the same as has been applied to many other targets (i386, aarch64, arm, rs6000, alpha, riscv). 2020-10-01 Jakub Jelinek * config/s390/s390.c (s390_atomic_assign_expand_fenv): Use TARGET_EXPR instead of MODIFY_EXPR for the first assignments to fenv_var and old_fpc. Formatting fixes. --- gcc/config/s390/s390.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'gcc') diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c index c762840..9389430 100644 --- a/gcc/config/s390/s390.c +++ b/gcc/config/s390/s390.c @@ -16082,12 +16082,13 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) fenv_var = __builtin_s390_efpc (); __builtin_s390_sfpc (fenv_var & mask) */ - tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc); - tree new_fpc = - build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, - build_int_cst (unsigned_type_node, - ~(FPC_DXC_MASK | FPC_FLAGS_MASK | - FPC_EXCEPTION_MASK))); + tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc, + NULL_TREE, NULL_TREE); + tree new_fpc + = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, + build_int_cst (unsigned_type_node, + ~(FPC_DXC_MASK | FPC_FLAGS_MASK + | FPC_EXCEPTION_MASK))); tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc); *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc); @@ -16106,8 +16107,8 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */ old_fpc = create_tmp_var_raw (unsigned_type_node); - tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node, - old_fpc, call_efpc); + tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc, + NULL_TREE, NULL_TREE); set_new_fpc = build_call_expr (sfpc, 1, fenv_var); -- cgit v1.1 From 2805fcb32660bc0cdcd5ba54310f1f02651e039f Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 1 Oct 2020 11:16:44 +0200 Subject: c++: Handle std::construct_at on automatic vars during constant evaluation [PR97195] As mentioned in the PR, we only support due to a bug in constant expressions std::construct_at on non-automatic variables, because we VERIFY_CONSTANT the second argument of placement new, which fails verification if it is an address of an automatic variable. The following patch fixes it by not performing that verification, the placement new evaluation later on will verify it after it is dereferenced. 2020-10-01 Jakub Jelinek PR c++/97195 * constexpr.c (cxx_eval_call_expression): Don't VERIFY_CONSTANT the second argument. * g++.dg/cpp2a/constexpr-new14.C: New test. --- gcc/cp/constexpr.c | 3 +- gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C | 73 ++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C (limited to 'gcc') diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c index dacce58..a118f8a 100644 --- a/gcc/cp/constexpr.c +++ b/gcc/cp/constexpr.c @@ -2342,9 +2342,10 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, tree arg = CALL_EXPR_ARG (t, i); arg = cxx_eval_constant_expression (ctx, arg, false, non_constant_p, overflow_p); - VERIFY_CONSTANT (arg); if (i == 1) arg1 = arg; + else + VERIFY_CONSTANT (arg); } gcc_assert (arg1); return arg1; diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C b/gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C new file mode 100644 index 0000000..fd6f607 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-new14.C @@ -0,0 +1,73 @@ +// PR c++/97195 +// { dg-do compile { target c++20 } } + +namespace std +{ + typedef __SIZE_TYPE__ size_t; + + template + struct allocator + { + constexpr allocator () noexcept {} + + constexpr T *allocate (size_t n) + { return static_cast (::operator new (n * sizeof(T))); } + + constexpr void + deallocate (T *p, size_t n) + { ::operator delete (p); } + }; + + template + U __declval (int); + template + T __declval (long); + template + auto declval () noexcept -> decltype (__declval (0)); + + template + struct remove_reference + { typedef T type; }; + template + struct remove_reference + { typedef T type; }; + template + struct remove_reference + { typedef T type; }; + + template + constexpr T && + forward (typename std::remove_reference::type &t) noexcept + { return static_cast (t); } + + template + constexpr T && + forward (typename std::remove_reference::type &&t) noexcept + { return static_cast (t); } + + template + constexpr auto + construct_at (T *l, A &&... a) + noexcept (noexcept (::new ((void *) 0) T (std::declval ()...))) + -> decltype (::new ((void *) 0) T (std::declval ()...)) + { return ::new ((void *) l) T (std::forward (a)...); } + + template + constexpr inline void + destroy_at (T *l) + { l->~T (); } +} + +inline void *operator new (std::size_t, void *p) noexcept +{ return p; } + +constexpr bool +foo () +{ + int a = 5; + int *p = std::construct_at (&a, -1); + if (p[0] != -1) + throw 1; + return true; +} +constexpr bool b = foo (); -- cgit v1.1 From 56da736cc6ced0f1c339744321a14ae569db8606 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 1 Oct 2020 11:18:35 +0200 Subject: c++: Fix up default initialization with consteval default ctor [PR96994] > > The following testcase is miscompiled (in particular the a and i > > initialization). The problem is that build_special_member_call due to > > the immediate constructors (but not evaluated in constant expression mode) > > doesn't create a CALL_EXPR, but returns a TARGET_EXPR with CONSTRUCTOR > > as the initializer for it, > > That seems like the bug; at the end of build_over_call, after you > > > call = cxx_constant_value (call, obj_arg); > > You need to build an INIT_EXPR if obj_arg isn't a dummy. That works. obj_arg is NULL if it is a dummy from the earlier code. 2020-10-01 Jakub Jelinek PR c++/96994 * call.c (build_over_call): If obj_arg is non-NULL, return INIT_EXPR setting obj_arg to call. * g++.dg/cpp2a/consteval18.C: New test. --- gcc/cp/call.c | 2 ++ gcc/testsuite/g++.dg/cpp2a/consteval18.C | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp2a/consteval18.C (limited to 'gcc') diff --git a/gcc/cp/call.c b/gcc/cp/call.c index dce229c..d67e8fe 100644 --- a/gcc/cp/call.c +++ b/gcc/cp/call.c @@ -9212,6 +9212,8 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) } } call = cxx_constant_value (call, obj_arg); + if (obj_arg && !error_operand_p (call)) + call = build2 (INIT_EXPR, void_type_node, obj_arg, call); } } return call; diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval18.C b/gcc/testsuite/g++.dg/cpp2a/consteval18.C new file mode 100644 index 0000000..586fede --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/consteval18.C @@ -0,0 +1,26 @@ +// PR c++/96994 +// { dg-do run { target c++20 } } + +struct A { consteval A () { i = 1; } consteval A (int x) : i (x) {} int i = 0; }; +struct B { constexpr B () { i = 1; } constexpr B (int x) : i (x) {} int i = 0; }; +A const a; +constexpr A b; +B const c; +A const constinit d; +A const e = 2; +constexpr A f = 3; +B const g = 4; +A const constinit h = 5; +A i; +B j; +A k = 6; +B l = 7; +static_assert (b.i == 1 && f.i == 3); + +int +main() +{ + if (a.i != 1 || c.i != 1 || d.i != 1 || e.i != 2 || g.i != 4 || h.i != 5 + || i.i != 1 || j.i != 1 || k.i != 6 || l.i != 7) + __builtin_abort (); +} -- cgit v1.1 From 8d268d75ad74772a7e97b86c72da0b5906d8c4d7 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Thu, 1 Oct 2020 11:08:58 +0200 Subject: [testsuite] Enable pr94600-{1,3}.c tests for nvptx When compiling test-case pr94600-1.c for nvptx, this gimple mem move: ... MEM[(volatile struct t0 *)655404B] ={v} a0[0]; ... is expanded into a memcpy, but when compiling pr94600-2.c instead, this similar gimple mem move: ... MEM[(volatile struct t0 *)655404B] ={v} a00; ... is expanded into a 32-bit load/store pair. In both cases, emit_block_move is called. In the latter case, can_move_by_pieces (4 /* byte-size */, 32 /* bit-align */) is called, which returns true (because by_pieces_ninsns returns 1, which is smaller than the MOVE_RATIO of 4). In the former case, can_move_by_pieces (4 /* byte-size */, 8 /* bit-align */) is called, which returns false (because by_pieces_ninsns returns 4, which is not smaller than the MOVE_RATIO of 4). So the difference in code generation is explained by the alignment. The difference in alignment comes from the move sources: a0[0] vs. a00. Both have the same type with 8-bit alignment, but a00 is on stack, which based on the base stack align and stack variable placement happens to result in a 32-bit alignment. Enable test-cases pr94600-{1,3}.c for nvptx by forcing the currently 8-byte aligned variables to have a 32-bit alignment for STRICT_ALIGNMENT targets. Tested on nvptx. gcc/testsuite/ChangeLog: 2020-10-01 Tom de Vries * gcc.dg/pr94600-1.c: Force 32-bit alignment for a0 for !non_strict_align targets. Remove target clauses from scan tests. * gcc.dg/pr94600-3.c: Same. --- gcc/testsuite/gcc.dg/pr94600-1.c | 11 ++++++++--- gcc/testsuite/gcc.dg/pr94600-3.c | 11 ++++++++--- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/pr94600-1.c b/gcc/testsuite/gcc.dg/pr94600-1.c index c9a7bb9..149e4f3 100644 --- a/gcc/testsuite/gcc.dg/pr94600-1.c +++ b/gcc/testsuite/gcc.dg/pr94600-1.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target size32plus } */ /* { dg-options "-fdump-rtl-final -O2" } */ +/* { dg-additional-options "-DALIGN_VAR" { target { ! non_strict_align } } } */ /* Assignments to a whole struct of suitable size (32 bytes) must not be picked apart into field accesses. */ @@ -12,7 +13,11 @@ typedef struct { unsigned int f3 : 7; } t0; -static t0 a0[] = { +static t0 a0[] +#ifdef ALIGN_VAR +__attribute__((aligned (4))) +#endif + = { { .f0 = 7, .f1 = 99, .f3 = 1, }, { .f0 = 7, .f1 = 251, .f3 = 1, }, { .f0 = 8, .f1 = 127, .f3 = 5, }, @@ -32,5 +37,5 @@ foo(void) } /* The only volatile accesses should be the obvious writes. */ -/* { dg-final { scan-rtl-dump-times {\(mem/v} 6 "final" { target { non_strict_align || pcc_bitfield_type_matters } } } } */ -/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 6 "final" { target { non_strict_align || pcc_bitfield_type_matters } } } } */ +/* { dg-final { scan-rtl-dump-times {\(mem/v} 6 "final" } } */ +/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 6 "final" } } */ diff --git a/gcc/testsuite/gcc.dg/pr94600-3.c b/gcc/testsuite/gcc.dg/pr94600-3.c index ff42c7d..2fce9f1 100644 --- a/gcc/testsuite/gcc.dg/pr94600-3.c +++ b/gcc/testsuite/gcc.dg/pr94600-3.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target size32plus } */ /* { dg-options "-fdump-rtl-final -O2 -fno-unroll-loops" } */ +/* { dg-additional-options "-DALIGN_VAR" { target { ! non_strict_align } } } */ /* Same-address version of pr94600-1.c. */ @@ -11,7 +12,11 @@ typedef struct { unsigned int f3 : 7; } t0; -static t0 a0[] = { +static t0 a0[] +#ifdef ALIGN_VAR +__attribute__((aligned (4))) +#endif + = { { .f0 = 7, .f1 = 99, .f3 = 1, }, { .f0 = 7, .f1 = 251, .f3 = 1, }, { .f0 = 8, .f1 = 127, .f3 = 5, }, @@ -31,5 +36,5 @@ foo(void) } /* The loop isn't unrolled. */ -/* { dg-final { scan-rtl-dump-times {\(mem/v} 1 "final" { target { non_strict_align || pcc_bitfield_type_matters } } } } */ -/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 1 "final" { target { non_strict_align || pcc_bitfield_type_matters } } } } */ +/* { dg-final { scan-rtl-dump-times {\(mem/v} 1 "final" } } */ +/* { dg-final { scan-rtl-dump-times {\(set \(mem/v} 1 "final" } } */ -- cgit v1.1 From f9c86e3105d786cccc88ac5bdbfd2393dc75f8b5 Mon Sep 17 00:00:00 2001 From: Alex Coplan Date: Thu, 1 Oct 2020 12:30:09 +0100 Subject: arm: Fix ordering in arm-cpus.in This moves the recent entry for Neoverse N2 down and adds a comment in order to preserve the existing order/structure in arm-cpus.in. gcc/ChangeLog: * config/arm/arm-cpus.in: Fix ordering, move Neoverse N2 down. * config/arm/arm-tables.opt: Regenerate. * config/arm/arm-tune.md: Regenerate. --- gcc/config/arm/arm-cpus.in | 23 ++++++++++++----------- gcc/config/arm/arm-tables.opt | 6 +++--- gcc/config/arm/arm-tune.md | 4 ++-- 3 files changed, 17 insertions(+), 16 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index d47f943..9abb59a 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -1492,17 +1492,6 @@ begin cpu neoverse-n1 part d0c end cpu neoverse-n1 -begin cpu neoverse-n2 - cname neoversen2 - tune for cortex-a57 - tune flags LDSCHED - architecture armv8.5-a+fp16+bf16+i8mm - option crypto add FP_ARMv8 CRYPTO - costs cortex_a57 - vendor 41 - part 0xd49 -end cpu neoverse-n2 - # ARMv8.2 A-profile ARM DynamIQ big.LITTLE implementations begin cpu cortex-a75.cortex-a55 cname cortexa75cortexa55 @@ -1532,6 +1521,18 @@ begin cpu neoverse-v1 costs cortex_a57 end cpu neoverse-v1 +# Armv8.5 A-profile Architecture Processors +begin cpu neoverse-n2 + cname neoversen2 + tune for cortex-a57 + tune flags LDSCHED + architecture armv8.5-a+fp16+bf16+i8mm + option crypto add FP_ARMv8 CRYPTO + costs cortex_a57 + vendor 41 + part 0xd49 +end cpu neoverse-n2 + # V8 M-profile implementations. begin cpu cortex-m23 cname cortexm23 diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index 9f65824..05f5c08 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -253,9 +253,6 @@ EnumValue Enum(processor_type) String(neoverse-n1) Value( TARGET_CPU_neoversen1) EnumValue -Enum(processor_type) String(neoverse-n2) Value( TARGET_CPU_neoversen2) - -EnumValue Enum(processor_type) String(cortex-a75.cortex-a55) Value( TARGET_CPU_cortexa75cortexa55) EnumValue @@ -265,6 +262,9 @@ EnumValue Enum(processor_type) String(neoverse-v1) Value( TARGET_CPU_neoversev1) EnumValue +Enum(processor_type) String(neoverse-n2) Value( TARGET_CPU_neoversen2) + +EnumValue Enum(processor_type) String(cortex-m23) Value( TARGET_CPU_cortexm23) EnumValue diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 269e627..32657da 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -46,8 +46,8 @@ cortexa73cortexa53,cortexa55,cortexa75, cortexa76,cortexa76ae,cortexa77, cortexa78,cortexa78ae,cortexx1, - neoversen1,neoversen2,cortexa75cortexa55, - cortexa76cortexa55,neoversev1,cortexm23, + neoversen1,cortexa75cortexa55,cortexa76cortexa55, + neoversev1,neoversen2,cortexm23, cortexm33,cortexm35p,cortexm55, cortexr52" (const (symbol_ref "((enum attr_tune) arm_tune)"))) -- cgit v1.1 From 9bab2a0dc84e7c3e14cb44fcd6ac41df079baa0f Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Thu, 1 Oct 2020 05:05:06 -0700 Subject: c++: Refactor lookup_and_check_tag It turns out I'd already found lookup_and_check_tag's control flow confusing, and had refactored it on the modules branch. For instance, it continually checks 'if (decl &&$ condition)' before finally getting to 'else if (!decl)'. why not just check !decl first and be done? Well, it is done thusly. gcc/cp/ * decl.c (lookup_and_check_tag): Refactor. --- gcc/cp/decl.c | 116 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 59 insertions(+), 57 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 14742c1..d2a8d40 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -14885,71 +14885,73 @@ lookup_and_check_tag (enum tag_types tag_code, tree name, else decl = lookup_elaborated_type (name, how); - if (decl - && (DECL_CLASS_TEMPLATE_P (decl) - /* If scope is TAG_how::CURRENT_ONLY we're defining a class, - so ignore a template template parameter. */ - || (how != TAG_how::CURRENT_ONLY - && DECL_TEMPLATE_TEMPLATE_PARM_P (decl)))) - decl = DECL_TEMPLATE_RESULT (decl); - - if (decl && TREE_CODE (decl) == TYPE_DECL) - { - /* Look for invalid nested type: - class C { - class C {}; - }; */ - if (how == TAG_how::CURRENT_ONLY && DECL_SELF_REFERENCE_P (decl)) - { - error ("%qD has the same name as the class in which it is " - "declared", decl); - return error_mark_node; - } - - /* Two cases we need to consider when deciding if a class - template is allowed as an elaborated type specifier: - 1. It is a self reference to its own class. - 2. It comes with a template header. - For example: - - template class C { - class C *c1; // DECL_SELF_REFERENCE_P is true - class D; - }; - template class C; // template_header_p is true - template class C::D { - class C *c2; // DECL_SELF_REFERENCE_P is true - }; */ - - tree t = check_elaborated_type_specifier (tag_code, - decl, - template_header_p - | DECL_SELF_REFERENCE_P (decl)); - if (template_header_p && t && CLASS_TYPE_P (t) - && (!CLASSTYPE_TEMPLATE_INFO (t) - || (!PRIMARY_TEMPLATE_P (CLASSTYPE_TI_TEMPLATE (t))))) - { - error ("%qT is not a template", t); - inform (location_of (t), "previous declaration here"); - if (TYPE_CLASS_SCOPE_P (t) - && CLASSTYPE_TEMPLATE_INFO (TYPE_CONTEXT (t))) - inform (input_location, - "perhaps you want to explicitly add %<%T::%>", - TYPE_CONTEXT (t)); - t = error_mark_node; - } + if (!decl) + /* We found nothing. */ + return NULL_TREE; - return t; - } - else if (decl && TREE_CODE (decl) == TREE_LIST) + if (TREE_CODE (decl) == TREE_LIST) { error ("reference to %qD is ambiguous", name); print_candidates (decl); return error_mark_node; } - else + + if (DECL_CLASS_TEMPLATE_P (decl) + /* If scope is TAG_how::CURRENT_ONLY we're defining a class, + so ignore a template template parameter. */ + || (how != TAG_how::CURRENT_ONLY && DECL_TEMPLATE_TEMPLATE_PARM_P (decl))) + decl = DECL_TEMPLATE_RESULT (decl); + + if (TREE_CODE (decl) != TYPE_DECL) + /* Found not-a-type. */ return NULL_TREE; + + /* Look for invalid nested type: + class C { + class C {}; + }; */ + if (how == TAG_how::CURRENT_ONLY && DECL_SELF_REFERENCE_P (decl)) + { + error ("%qD has the same name as the class in which it is " + "declared", decl); + return error_mark_node; + } + + /* Two cases we need to consider when deciding if a class + template is allowed as an elaborated type specifier: + 1. It is a self reference to its own class. + 2. It comes with a template header. + + For example: + + template class C { + class C *c1; // DECL_SELF_REFERENCE_P is true + class D; + }; + template class C; // template_header_p is true + template class C::D { + class C *c2; // DECL_SELF_REFERENCE_P is true + }; */ + + tree t = check_elaborated_type_specifier (tag_code, decl, + template_header_p + | DECL_SELF_REFERENCE_P (decl)); + if (template_header_p && t && CLASS_TYPE_P (t) + && (!CLASSTYPE_TEMPLATE_INFO (t) + || (!PRIMARY_TEMPLATE_P (CLASSTYPE_TI_TEMPLATE (t))))) + { + error ("%qT is not a template", t); + inform (location_of (t), "previous declaration here"); + if (TYPE_CLASS_SCOPE_P (t) + && CLASSTYPE_TEMPLATE_INFO (TYPE_CONTEXT (t))) + inform (input_location, + "perhaps you want to explicitly add %<%T::%>", + TYPE_CONTEXT (t)); + return error_mark_node; + } + + return t; } /* Get the struct, enum or union (TAG_CODE says which) with tag NAME. -- cgit v1.1 From 6eda9fa5f61e784638f0b1522840338e97fc5f7f Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Thu, 17 Sep 2020 09:23:12 +0200 Subject: Initial implementation of value query class. gcc/ChangeLog: * Makefile.in: Add value-query.o. * value-query.cc: New file. * value-query.h: New file. --- gcc/Makefile.in | 1 + gcc/value-query.cc | 162 +++++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/value-query.h | 107 +++++++++++++++++++++++++++++++++++ 3 files changed, 270 insertions(+) create mode 100644 gcc/value-query.cc create mode 100644 gcc/value-query.h (limited to 'gcc') diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 9c6c1c9..50d6c83 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1646,6 +1646,7 @@ OBJS = \ typed-splay-tree.o \ unique-ptr-tests.o \ valtrack.o \ + value-query.o \ value-range.o \ value-range-equiv.o \ value-prof.o \ diff --git a/gcc/value-query.cc b/gcc/value-query.cc new file mode 100644 index 0000000..5370a23 --- /dev/null +++ b/gcc/value-query.cc @@ -0,0 +1,162 @@ +/* Support routines for value queries. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Aldy Hernandez and + Andrew MacLeod . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "gimple.h" +#include "ssa.h" +#include "tree-pretty-print.h" +#include "fold-const.h" +#include "value-range-equiv.h" +#include "value-query.h" +#include "alloc-pool.h" + +// value_query default methods. + +tree +value_query::value_on_edge (edge, tree name) +{ + return value_of_expr (name); +} + +tree +value_query::value_of_stmt (gimple *stmt, tree name) +{ + if (!name) + name = gimple_get_lhs (stmt); + + gcc_checking_assert (!name || name == gimple_get_lhs (stmt)); + + if (name) + return value_of_expr (name); + return NULL_TREE; +} + +// range_query default methods. + +bool +range_query::range_on_edge (irange &r, edge, tree name) +{ + return range_of_expr (r, name); +} + +bool +range_query::range_of_stmt (irange &r, gimple *stmt, tree name) +{ + if (!name) + name = gimple_get_lhs (stmt); + + gcc_checking_assert (!name || name == gimple_get_lhs (stmt)); + + if (name) + return range_of_expr (r, name); + return false; +} + +tree +range_query::value_of_expr (tree name, gimple *stmt) +{ + tree t; + value_range r; + + if (!irange::supports_type_p (TREE_TYPE (name))) + return NULL_TREE; + if (range_of_expr (r, name, stmt) && r.singleton_p (&t)) + return t; + return NULL_TREE; +} + +tree +range_query::value_on_edge (edge e, tree name) +{ + tree t; + value_range r; + + if (!irange::supports_type_p (TREE_TYPE (name))) + return NULL_TREE; + if (range_on_edge (r, e, name) && r.singleton_p (&t)) + return t; + return NULL_TREE; + +} + +tree +range_query::value_of_stmt (gimple *stmt, tree name) +{ + tree t; + value_range r; + + if (!name) + name = gimple_get_lhs (stmt); + + gcc_checking_assert (!name || name == gimple_get_lhs (stmt)); + + if (!name || !irange::supports_type_p (TREE_TYPE (name))) + return NULL_TREE; + if (range_of_stmt (r, stmt, name) && r.singleton_p (&t)) + return t; + return NULL_TREE; + +} + +// valuation_query support routines for value_range_equiv's. + +class equiv_allocator : public object_allocator +{ +public: + equiv_allocator () + : object_allocator ("equiv_allocator pool") { } +}; + +value_range_equiv * +range_query::allocate_value_range_equiv () +{ + return new (equiv_alloc->allocate ()) value_range_equiv; +} + +void +range_query::free_value_range_equiv (value_range_equiv *v) +{ + equiv_alloc->remove (v); +} + +const class value_range_equiv * +range_query::get_value_range (const_tree expr, gimple *stmt) +{ + int_range_max r; + if (range_of_expr (r, const_cast (expr), stmt)) + return new (equiv_alloc->allocate ()) value_range_equiv (r); + return new (equiv_alloc->allocate ()) value_range_equiv (TREE_TYPE (expr)); +} + +range_query::range_query () +{ + equiv_alloc = new equiv_allocator; +} + +range_query::~range_query () +{ + equiv_alloc->release (); + delete equiv_alloc; +} diff --git a/gcc/value-query.h b/gcc/value-query.h new file mode 100644 index 0000000..cf0b6ed --- /dev/null +++ b/gcc/value-query.h @@ -0,0 +1,107 @@ +/* Support routines for value queries. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Aldy Hernandez and + Andrew Macleod . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_QUERY_H +#define GCC_QUERY_H + +// The value_query class is used by optimization passes that require +// valueizing SSA names in terms of a tree value, but have no neeed +// for ranges. +// +// value_of_expr must be provided. The default for value_on_edge and +// value_of_stmt is to call value_of_expr. +// +// This implies the valuation is global in nature. If a pass can make +// use of more specific information, it can override the other queries. +// +// Proper usage of the correct query in passes will enable other +// valuation mechanisms to produce more precise results. + +class value_query +{ +public: + value_query () { } + // Return the singleton expression for NAME at a gimple statement, + // or NULL if none found. + virtual tree value_of_expr (tree name, gimple * = NULL) = 0; + // Return the singleton expression for NAME at an edge, or NULL if + // none found. + virtual tree value_on_edge (edge, tree name); + // Return the singleton expression for the LHS of a gimple + // statement, assuming an (optional) initial value of NAME. Returns + // NULL if none found. + // + // Note that this method calculates the range the LHS would have + // *after* the statement has executed. + virtual tree value_of_stmt (gimple *, tree name = NULL); + +private: + DISABLE_COPY_AND_ASSIGN (value_query); +}; + +// The range_query class is used by optimization passes which are +// range aware. +// +// range_of_expr must be provided. The default for range_on_edge and +// range_of_stmt is to call range_of_expr. If a pass can make use of +// more specific information, then it can override the other queries. +// +// The default for the value_* routines is to call the equivalent +// range_* routines, check if the range is a singleton, and return it +// if so. +// +// The get_value_range method is currently provided for compatibility +// with vr-values. It will be deprecated when possible. + +class range_query : public value_query +{ +public: + range_query (); + virtual ~range_query (); + + virtual tree value_of_expr (tree name, gimple * = NULL) OVERRIDE; + virtual tree value_on_edge (edge, tree name) OVERRIDE; + virtual tree value_of_stmt (gimple *, tree name = NULL) OVERRIDE; + + // These are the range equivalents of the value_* methods. Instead + // of returning a singleton, they calculate a range and return it in + // R. TRUE is returned on success or FALSE if no range was found. + // + // Note that range_of_expr must always return TRUE unless ranges are + // unsupported for NAME's type (supports_type_p is false). + virtual bool range_of_expr (irange &r, tree name, gimple * = NULL) = 0; + virtual bool range_on_edge (irange &r, edge, tree name); + virtual bool range_of_stmt (irange &r, gimple *, tree name = NULL); + + // DEPRECATED: This method is used from vr-values. The plan is to + // rewrite all uses of it to the above API. + virtual const class value_range_equiv *get_value_range (const_tree, + gimple * = NULL); + +protected: + class value_range_equiv *allocate_value_range_equiv (); + void free_value_range_equiv (class value_range_equiv *); + +private: + class equiv_allocator *equiv_alloc; +}; + +#endif // GCC_QUERY_H -- cgit v1.1 From ba663ac1b1add0f1625862166a946fcb583d3686 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 1 Oct 2020 14:57:36 +0200 Subject: Fix handling of fnspec for internal functions. * internal-fn.c (DEF_INTERNAL_FN): Fix handling of fnspec --- gcc/internal-fn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 8ea3195..c897082 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -93,7 +93,7 @@ init_internal_fns () { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \ if (FNSPEC) internal_fn_fnspec_array[IFN_##CODE] = \ - build_string ((int) sizeof (FNSPEC), FNSPEC ? FNSPEC : ""); + build_string ((int) sizeof (FNSPEC) - 1, FNSPEC ? FNSPEC : ""); #include "internal-fn.def" internal_fn_fnspec_array[IFN_LAST] = 0; } -- cgit v1.1 From bc2fcccd9d5cc9d346543a98c98dc00d71e9a5b8 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 1 Oct 2020 15:03:19 +0200 Subject: Fix ICE in ipa_edge_args_sum_t::duplicate PR ipa/97244 * ipa-fnsummary.c (pass_free_fnsummary::execute): Free also indirect inlining datastructure. * ipa-modref.c (pass_ipa_modref::execute): Do not free them here. * ipa-prop.c (ipa_free_all_node_params): Do not crash when info does not exist. (ipa_unregister_cgraph_hooks): Likewise. --- gcc/ipa-fnsummary.c | 2 ++ gcc/ipa-modref.c | 2 -- gcc/ipa-prop.c | 6 ++++-- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index 4c1c1f9..8285cc0 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -4680,6 +4680,8 @@ public: virtual unsigned int execute (function *) { ipa_free_fn_summary (); + /* Free ipa-prop structures if they are no longer needed. */ + ipa_free_all_structures_after_iinln (); if (!flag_wpa) ipa_free_size_summary (); return 0; diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 6225552..2f4da8f 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -1681,8 +1681,6 @@ pass_ipa_modref::execute (function *) } ((modref_summaries *)summaries)->ipa = false; ipa_free_postorder_info (); - /* Free ipa-prop structures if they are no longer needed. */ - ipa_free_all_structures_after_iinln (); return 0; } diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index b28c78e..ea88fd3 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -4124,7 +4124,8 @@ ipa_free_all_edge_args (void) void ipa_free_all_node_params (void) { - ggc_delete (ipa_node_params_sum); + if (ipa_node_params_sum) + ggc_delete (ipa_node_params_sum); ipa_node_params_sum = NULL; } @@ -4368,7 +4369,8 @@ ipa_register_cgraph_hooks (void) static void ipa_unregister_cgraph_hooks (void) { - symtab->remove_cgraph_insertion_hook (function_insertion_hook_holder); + if (function_insertion_hook_holder) + symtab->remove_cgraph_insertion_hook (function_insertion_hook_holder); function_insertion_hook_holder = NULL; } -- cgit v1.1 From a12041339e8032f40acd5f1c0365f5b3a1469bf5 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 1 Oct 2020 15:03:45 +0200 Subject: Add -fno-ipa-modref to gcc.dg/ipa/remref-2a.c PR ipa/97244 * gcc.dg/ipa/remref-2a.c: Add -fno-ipa-modref --- gcc/testsuite/gcc.dg/ipa/remref-2a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/ipa/remref-2a.c b/gcc/testsuite/gcc.dg/ipa/remref-2a.c index 34a6188..c2f3eac 100644 --- a/gcc/testsuite/gcc.dg/ipa/remref-2a.c +++ b/gcc/testsuite/gcc.dg/ipa/remref-2a.c @@ -1,7 +1,7 @@ /* Verify that indirect inlining can also remove references of the functions it discovers calls for. */ /* { dg-do compile } */ -/* { dg-options "-O3 -fno-early-inlining -fno-ipa-cp -fdump-ipa-inline -fdump-tree-optimized -fno-ipa-icf" } */ +/* { dg-options "-O3 -fno-early-inlining -fno-ipa-cp -fdump-ipa-inline -fdump-tree-optimized -fno-ipa-icf -fno-ipa-modref" } */ int global; -- cgit v1.1 From 899c10c9d6caba9ec372174527d5bf2866edd32c Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 1 Oct 2020 15:27:00 +0200 Subject: Fix ICE in compute_parm_map gcc/ChangeLog: * ipa-modref.c (compute_parm_map): Be ready for callee_pi to be NULL. --- gcc/ipa-modref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 2f4da8f..71a7955 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -1363,7 +1363,7 @@ compute_parm_map (cgraph_edge *callee_edge, vec *parm_map) struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i); - if (jf) + if (jf && callee_pi) { tree cst = ipa_value_from_jfunc (caller_parms_info, jf, -- cgit v1.1 From 73c977cb0a112ac58fae18af44576ade8ab3aa26 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Thu, 1 Oct 2020 15:43:56 +0200 Subject: Add gcc.c-torture/compile/pr97243.c testcase. PR ipa/97243 * gcc.c-torture/compile/pr97243.c: New test. --- gcc/testsuite/gcc.c-torture/compile/pr97243.c | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr97243.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.c-torture/compile/pr97243.c b/gcc/testsuite/gcc.c-torture/compile/pr97243.c new file mode 100644 index 0000000..4d10a22 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr97243.c @@ -0,0 +1,10 @@ +/* { dg-options "-fipa-modref -fipa-icf" } */ +float fma_test1(float a, float b, float c) { + float x = a * b + c; + return x; +} +float fma_test2(float a, float b, float c) { + float x = a * b + c; + return x; +} + -- cgit v1.1 From 80a9c584e339c4f87a3f48ca6397d4a29b7e4ab6 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Thu, 1 Oct 2020 06:47:36 -0700 Subject: c++: pushdecl_top_level must set context I discovered pushdecl_top_level was not setting the decl's context, and we ended up with namespace-scope decls with NULL context. That broke modules. Then I discovered a couple of places where we set the context to a FUNCTION_DECL, which is also wrong. AFAICT the literals in question belong in global scope, as they're comdatable entities. But create_temporary would use current_scope for the context before we pushed it into namespace scope. This patch asserts the context is NULL and then sets it to the frobbed global_namespace. gcc/cp/ * name-lookup.c (pushdecl_top_level): Assert incoming context is null, add global_namespace context. (pushdecl_top_level_and_finish): Likewise. * pt.c (get_template_parm_object): Clear decl context before pushing. * semantics.c (finish_compound_literal): Likewise. --- gcc/cp/name-lookup.c | 4 ++++ gcc/cp/pt.c | 2 +- gcc/cp/semantics.c | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index 8cd6fe3..6204444 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -7404,6 +7404,8 @@ pushdecl_top_level (tree x) { bool subtime = timevar_cond_start (TV_NAME_LOOKUP); do_push_to_top_level (); + gcc_checking_assert (!DECL_CONTEXT (x)); + DECL_CONTEXT (x) = FROB_CONTEXT (global_namespace); x = pushdecl_namespace_level (x); do_pop_from_top_level (); timevar_cond_stop (TV_NAME_LOOKUP, subtime); @@ -7418,6 +7420,8 @@ pushdecl_top_level_and_finish (tree x, tree init) { bool subtime = timevar_cond_start (TV_NAME_LOOKUP); do_push_to_top_level (); + gcc_checking_assert (!DECL_CONTEXT (x)); + DECL_CONTEXT (x) = FROB_CONTEXT (global_namespace); x = pushdecl_namespace_level (x); cp_finish_decl (x, init, false, NULL_TREE, 0); do_pop_from_top_level (); diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 869477f..45b18f6 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -7094,12 +7094,12 @@ get_template_parm_object (tree expr, tsubst_flags_t complain) tree type = cp_build_qualified_type (TREE_TYPE (expr), TYPE_QUAL_CONST); decl = create_temporary_var (type); + DECL_CONTEXT (decl) = NULL_TREE; TREE_STATIC (decl) = true; DECL_DECLARED_CONSTEXPR_P (decl) = true; TREE_READONLY (decl) = true; DECL_NAME (decl) = name; SET_DECL_ASSEMBLER_NAME (decl, name); - DECL_CONTEXT (decl) = global_namespace; comdat_linkage (decl); if (!zero_init_p (type)) diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index b093044..1e42cd7 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -3030,6 +3030,7 @@ finish_compound_literal (tree type, tree compound_literal, && initializer_constant_valid_p (compound_literal, type)) { tree decl = create_temporary_var (type); + DECL_CONTEXT (decl) = NULL_TREE; DECL_INITIAL (decl) = compound_literal; TREE_STATIC (decl) = 1; if (literal_type_p (type) && CP_TYPE_CONST_NON_VOLATILE_P (type)) -- cgit v1.1 From 04b99da898a9817e72fedb4063589648b7961ac5 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 1 Oct 2020 15:12:35 +0200 Subject: tree-optimization/97236 - fix bad use of VMAT_CONTIGUOUS This avoids using VMAT_CONTIGUOUS with single-element interleaving when using V1mode vectors. Instead keep VMAT_ELEMENTWISE but continue to avoid load-lanes and gathers. 2020-10-01 Richard Biener PR tree-optimization/97236 * tree-vect-stmts.c (get_group_load_store_type): Keep VMAT_ELEMENTWISE for single-element vectors. * gcc.dg/vect/pr97236.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr97236.c | 43 +++++++++++++++++++++++++++++++++++++ gcc/tree-vect-stmts.c | 20 ++++++++--------- 2 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr97236.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/pr97236.c b/gcc/testsuite/gcc.dg/vect/pr97236.c new file mode 100644 index 0000000..9d3dc20 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr97236.c @@ -0,0 +1,43 @@ +typedef unsigned char __uint8_t; +typedef __uint8_t uint8_t; +typedef struct plane_t { + uint8_t *p_pixels; + int i_lines; + int i_pitch; +} plane_t; + +typedef struct { + plane_t p[5]; +} picture_t; + +#define N 4 + +void __attribute__((noipa)) +picture_Clone(picture_t *picture, picture_t *res) +{ + for (int i = 0; i < N; i++) { + res->p[i].p_pixels = picture->p[i].p_pixels; + res->p[i].i_lines = picture->p[i].i_lines; + res->p[i].i_pitch = picture->p[i].i_pitch; + } +} + +int +main() +{ + picture_t aaa, bbb; + uint8_t pixels[10] = {1, 1, 1, 1, 1, 1, 1, 1}; + + for (unsigned i = 0; i < N; i++) + aaa.p[i].p_pixels = pixels; + + picture_Clone (&aaa, &bbb); + + uint8_t c = 0; + for (unsigned i = 0; i < N; i++) + c += bbb.p[i].p_pixels[0]; + + if (c != N) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 191957c..3575f25 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -2235,25 +2235,23 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, /* First cope with the degenerate case of a single-element vector. */ if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)) - *memory_access_type = VMAT_CONTIGUOUS; + ; /* Otherwise try using LOAD/STORE_LANES. */ - if (*memory_access_type == VMAT_ELEMENTWISE - && (vls_type == VLS_LOAD - ? vect_load_lanes_supported (vectype, group_size, masked_p) - : vect_store_lanes_supported (vectype, group_size, - masked_p))) + else if (vls_type == VLS_LOAD + ? vect_load_lanes_supported (vectype, group_size, masked_p) + : vect_store_lanes_supported (vectype, group_size, + masked_p)) { *memory_access_type = VMAT_LOAD_STORE_LANES; overrun_p = would_overrun_p; } /* If that fails, try using permuting loads. */ - if (*memory_access_type == VMAT_ELEMENTWISE - && (vls_type == VLS_LOAD - ? vect_grouped_load_supported (vectype, single_element_p, - group_size) - : vect_grouped_store_supported (vectype, group_size))) + else if (vls_type == VLS_LOAD + ? vect_grouped_load_supported (vectype, single_element_p, + group_size) + : vect_grouped_store_supported (vectype, group_size)) { *memory_access_type = VMAT_CONTIGUOUS_PERMUTE; overrun_p = would_overrun_p; -- cgit v1.1 From a889e06ac680e0aafa62cd5dec99e75d3f1ca1b6 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Thu, 17 Sep 2020 09:34:29 +0200 Subject: Convert vr-values to value query class. gcc/ChangeLog: * gimple-loop-versioning.cc (lv_dom_walker::before_dom_children): Pass m_range_analyzer instead of get_vr_values. (loop_versioning::name_prop::get_value): Rename to... (loop_versioning::name_prop::value_of_expr): ...this. * gimple-ssa-evrp-analyze.c (evrp_range_analyzer::evrp_range_analyzer): Adjust for evrp_range_analyzer inheriting from vr_values. (evrp_range_analyzer::try_find_new_range): Same. (evrp_range_analyzer::record_ranges_from_incoming_edge): Same. (evrp_range_analyzer::record_ranges_from_phis): Same. (evrp_range_analyzer::record_ranges_from_stmt): Same. (evrp_range_analyzer::push_value_range): Same. (evrp_range_analyzer::pop_value_range): Same. * gimple-ssa-evrp-analyze.h (class evrp_range_analyzer): Inherit from vr_values. Adjust accordingly. * gimple-ssa-evrp.c: Adjust for evrp_range_analyzer inheriting from vr_values. (evrp_folder::value_of_evrp): Rename from get_value. * tree-ssa-ccp.c (class ccp_folder): Rename get_value to value_of_expr. (ccp_folder::get_value): Rename to... (ccp_folder::value_of_expr): ...this. * tree-ssa-copy.c (class copy_folder): Rename get_value to value_of_expr. (copy_folder::get_value): Rename to... (copy_folder::value_of_expr): ...this. * tree-ssa-dom.c (dom_opt_dom_walker::after_dom_children): Adjust for evrp_range_analyzer inheriting from vr_values. (dom_opt_dom_walker::optimize_stmt): Same. * tree-ssa-propagate.c (substitute_and_fold_engine::replace_uses_in): Call value_of_* instead of get_value. (substitute_and_fold_engine::replace_phi_args_in): Same. (substitute_and_fold_engine::propagate_into_phi_args): Same. (substitute_and_fold_dom_walker::before_dom_children): Same. * tree-ssa-propagate.h: Include value-query.h. (class substitute_and_fold_engine): Inherit from value_query. * tree-ssa-strlen.c (strlen_dom_walker::before_dom_children): Adjust for evrp_range_analyzer inheriting from vr_values. * tree-ssa-threadedge.c (record_temporary_equivalences_from_phis): Same. * tree-vrp.c (class vrp_folder): Same. (vrp_folder::get_value): Rename to value_of_expr. * vr-values.c (vr_values::get_lattice_entry): Adjust for vr_values inheriting from range_query. (vr_values::range_of_expr): New. (vr_values::value_of_expr): New. (vr_values::value_on_edge): New. (vr_values::value_of_stmt): New. (simplify_using_ranges::op_with_boolean_value_range_p): Call get_value_range through query. (check_for_binary_op_overflow): Rename store to query. (vr_values::vr_values): Remove vrp_value_range_pool. (vr_values::~vr_values): Same. (simplify_using_ranges::get_vr_for_comparison): Call get_value_range through query. (simplify_using_ranges::compare_names): Same. (simplify_using_ranges::vrp_evaluate_conditional): Same. (simplify_using_ranges::vrp_visit_cond_stmt): Same. (simplify_using_ranges::simplify_abs_using_ranges): Same. (simplify_using_ranges::simplify_cond_using_ranges_1): Same. (simplify_cond_using_ranges_2): Same. (simplify_using_ranges::simplify_switch_using_ranges): Same. (simplify_using_ranges::two_valued_val_range_p): Same. (simplify_using_ranges::simplify_using_ranges): Rename store to query. (simplify_using_ranges::simplify): Assert that we have a query. * vr-values.h (class range_query): Remove. (class simplify_using_ranges): Remove inheritance of range_query. (class vr_values): Add virtuals for range_of_expr, value_of_expr, value_on_edge, value_of_stmt, and get_value_range. Call range_query allocator instead of using vrp_value_range_pool. Remove vrp_value_range_pool. (simplify_using_ranges::get_value_range): Remove. --- gcc/gimple-loop-versioning.cc | 8 ++-- gcc/gimple-ssa-evrp-analyze.c | 31 ++++++------- gcc/gimple-ssa-evrp-analyze.h | 21 +-------- gcc/gimple-ssa-evrp.c | 20 ++++---- gcc/tree-ssa-ccp.c | 4 +- gcc/tree-ssa-copy.c | 4 +- gcc/tree-ssa-dom.c | 4 +- gcc/tree-ssa-propagate.c | 13 +++--- gcc/tree-ssa-propagate.h | 5 +- gcc/tree-ssa-strlen.c | 2 +- gcc/tree-ssa-threadedge.c | 6 +-- gcc/tree-vrp.c | 19 ++------ gcc/vr-values.c | 105 +++++++++++++++++++++++++++++------------- gcc/vr-values.h | 43 ++++++----------- 14 files changed, 139 insertions(+), 146 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc index 2687088..afe353e 100644 --- a/gcc/gimple-loop-versioning.cc +++ b/gcc/gimple-loop-versioning.cc @@ -277,7 +277,7 @@ private: { public: name_prop (loop_info &li) : m_li (li) {} - tree get_value (tree, gimple *) FINAL OVERRIDE; + tree value_of_expr (tree name, gimple *) FINAL OVERRIDE; private: /* Information about the versioning we've performed on the loop. */ @@ -512,8 +512,7 @@ loop_versioning::lv_dom_walker::before_dom_children (basic_block bb) m_range_analyzer.enter (bb); if (bb == bb->loop_father->header) - m_lv.prune_loop_conditions (bb->loop_father, - m_range_analyzer.get_vr_values ()); + m_lv.prune_loop_conditions (bb->loop_father, &m_range_analyzer); for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) @@ -534,8 +533,7 @@ loop_versioning::lv_dom_walker::after_dom_children (basic_block bb) Return the new value if so, otherwise return null. */ tree -loop_versioning::name_prop::get_value (tree val, - gimple *stmt ATTRIBUTE_UNUSED) +loop_versioning::name_prop::value_of_expr (tree val, gimple *) { if (TREE_CODE (val) == SSA_NAME && bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val))) diff --git a/gcc/gimple-ssa-evrp-analyze.c b/gcc/gimple-ssa-evrp-analyze.c index 9f8ce55..485774d 100644 --- a/gcc/gimple-ssa-evrp-analyze.c +++ b/gcc/gimple-ssa-evrp-analyze.c @@ -54,7 +54,6 @@ evrp_range_analyzer::evrp_range_analyzer (bool update_global_ranges) FOR_EACH_EDGE (e, ei, bb->preds) e->flags |= EDGE_EXECUTABLE; } - vr_values = new class vr_values; } /* Push an unwinding marker onto the unwinding stack. */ @@ -87,15 +86,14 @@ evrp_range_analyzer::try_find_new_range (tree name, const value_range_equiv *old_vr = get_value_range (name); /* Discover VR when condition is true. */ - vr_values->extract_range_for_var_from_comparison_expr (name, code, op, - limit, &vr); + extract_range_for_var_from_comparison_expr (name, code, op, limit, &vr); /* If we found any usable VR, set the VR to ssa_name and create a PUSH old value in the stack with the old VR. */ if (!vr.undefined_p () && !vr.varying_p ()) { if (old_vr->equal_p (vr, /*ignore_equivs=*/true)) return NULL; - value_range_equiv *new_vr = vr_values->allocate_value_range_equiv (); + value_range_equiv *new_vr = allocate_value_range_equiv (); new_vr->move (&vr); return new_vr; } @@ -214,7 +212,7 @@ evrp_range_analyzer::record_ranges_from_incoming_edge (basic_block bb) tem.intersect (vrs[i].second); if (tem.equal_p (*old_vr)) { - vr_values->free_value_range (vrs[i].second); + free_value_range (vrs[i].second); continue; } push_value_range (vrs[i].first, vrs[i].second); @@ -261,7 +259,7 @@ evrp_range_analyzer::record_ranges_from_phis (basic_block bb) value_range_equiv vr_result; bool interesting = stmt_interesting_for_vrp (phi); if (!has_unvisited_preds && interesting) - vr_values->extract_range_from_phi_node (phi, &vr_result); + extract_range_from_phi_node (phi, &vr_result); else { vr_result.set_varying (TREE_TYPE (lhs)); @@ -274,9 +272,9 @@ evrp_range_analyzer::record_ranges_from_phis (basic_block bb) && interesting && (l = loop_containing_stmt (phi)) && l->header == gimple_bb (phi)) - vr_values->adjust_range_with_scev (&vr_result, l, phi, lhs); + adjust_range_with_scev (&vr_result, l, phi, lhs); } - vr_values->update_value_range (lhs, &vr_result); + update_value_range (lhs, &vr_result); /* Set the SSA with the value range. */ if (m_update_global_ranges) @@ -303,7 +301,7 @@ evrp_range_analyzer::record_ranges_from_stmt (gimple *stmt, bool temporary) { edge taken_edge; value_range_equiv vr; - vr_values->extract_range_from_stmt (stmt, &taken_edge, &output, &vr); + extract_range_from_stmt (stmt, &taken_edge, &output, &vr); if (output) { /* Set the SSA with the value range. There are two cases to @@ -321,7 +319,7 @@ evrp_range_analyzer::record_ranges_from_stmt (gimple *stmt, bool temporary) { /* Case one. We can just update the underlying range information as well as the global information. */ - vr_values->update_value_range (output, &vr); + update_value_range (output, &vr); if (m_update_global_ranges) set_ssa_range_info (output, &vr); } @@ -332,18 +330,17 @@ evrp_range_analyzer::record_ranges_from_stmt (gimple *stmt, bool temporary) a new range and push the old range onto the stack. We also have to be very careful about sharing the underlying bitmaps. Ugh. */ - value_range_equiv *new_vr - = vr_values->allocate_value_range_equiv (); + value_range_equiv *new_vr = allocate_value_range_equiv (); new_vr->set (vr.min (), vr.max (), NULL, vr.kind ()); vr.equiv_clear (); push_value_range (output, new_vr); } } else - vr_values->set_defs_to_varying (stmt); + set_defs_to_varying (stmt); } else - vr_values->set_defs_to_varying (stmt); + set_defs_to_varying (stmt); /* See if we can derive a range for any of STMT's operands. */ tree op; @@ -429,7 +426,7 @@ evrp_range_analyzer::push_value_range (tree var, value_range_equiv *vr) dump_value_range (dump_file, vr); fprintf (dump_file, "\n"); } - value_range_equiv *old_vr = vr_values->swap_vr_value (var, vr); + value_range_equiv *old_vr = swap_vr_value (var, vr); stack.safe_push (std::make_pair (var, old_vr)); } @@ -451,7 +448,7 @@ evrp_range_analyzer::pop_value_range () } /* We saved off a lattice entry, now give it back and release the one we popped. */ - value_range_equiv *popped_vr = vr_values->swap_vr_value (var, vr); + value_range_equiv *popped_vr = swap_vr_value (var, vr); if (popped_vr) - vr_values->free_value_range (popped_vr); + free_value_range (popped_vr); } diff --git a/gcc/gimple-ssa-evrp-analyze.h b/gcc/gimple-ssa-evrp-analyze.h index 8abbbe3..c6d27f5 100644 --- a/gcc/gimple-ssa-evrp-analyze.h +++ b/gcc/gimple-ssa-evrp-analyze.h @@ -20,13 +20,12 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_GIMPLE_SSA_EVRP_ANALYZE_H #define GCC_GIMPLE_SSA_EVRP_ANALYZE_H -class evrp_range_analyzer +class evrp_range_analyzer : public vr_values { public: evrp_range_analyzer (bool update_global_ranges); ~evrp_range_analyzer (void) { - delete vr_values; stack.release (); } @@ -36,34 +35,18 @@ class evrp_range_analyzer void leave (basic_block); void record_ranges_from_stmt (gimple *, bool); - /* Main interface to retrieve range information. */ - const value_range_equiv *get_value_range (const_tree op) - { return vr_values->get_value_range (op); } - /* Record a new unwindable range. */ void push_value_range (tree var, value_range_equiv *vr); - /* Dump all the current value ranges. This is primarily - a debugging interface. */ - void dump_all_value_ranges (FILE *fp) - { vr_values->dump_all_value_ranges (fp); } - /* A bit of a wart. This should ideally go away. */ void vrp_visit_cond_stmt (gcond *cond, edge *e) { - simplify_using_ranges simpl (vr_values); + simplify_using_ranges simpl (this); simpl.vrp_visit_cond_stmt (cond, e); } - /* Get the underlying vr_values class instance. If TRANSFER is - true, then we are transferring ownership. Else we keep ownership. - - This should be converted to a unique_ptr. */ - class vr_values *get_vr_values (void) { return vr_values; } - private: DISABLE_COPY_AND_ASSIGN (evrp_range_analyzer); - class vr_values *vr_values; void pop_value_range (); value_range_equiv *try_find_new_range (tree, tree op, tree_code code, diff --git a/gcc/gimple-ssa-evrp.c b/gcc/gimple-ssa-evrp.c index e8fde63..60bf82a 100644 --- a/gcc/gimple-ssa-evrp.c +++ b/gcc/gimple-ssa-evrp.c @@ -45,11 +45,11 @@ along with GCC; see the file COPYING3. If not see class evrp_folder : public substitute_and_fold_engine { public: - evrp_folder () : m_range_analyzer (/*update_global_ranges=*/true), - m_vr_values (m_range_analyzer.get_vr_values ()), - simplifier (m_vr_values) - { - } + evrp_folder () : + substitute_and_fold_engine (), + m_range_analyzer (/*update_global_ranges=*/true), + simplifier (&m_range_analyzer) + { } ~evrp_folder () { @@ -61,9 +61,9 @@ public: } } - tree get_value (tree op, gimple *stmt ATTRIBUTE_UNUSED) OVERRIDE + tree value_of_expr (tree name, gimple *stmt) OVERRIDE { - return m_vr_values->op_with_constant_singleton_value_range (op); + return m_range_analyzer.value_of_expr (name, stmt); } void pre_fold_bb (basic_block bb) OVERRIDE @@ -95,14 +95,12 @@ public: void post_new_stmt (gimple *stmt) OVERRIDE { - m_range_analyzer.get_vr_values ()->set_defs_to_varying (stmt); + m_range_analyzer.set_defs_to_varying (stmt); } private: DISABLE_COPY_AND_ASSIGN (evrp_folder); - class evrp_range_analyzer m_range_analyzer; - class vr_values *m_vr_values; - + evrp_range_analyzer m_range_analyzer; simplify_using_ranges simplifier; }; diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c index 65dffe0..0432fe5 100644 --- a/gcc/tree-ssa-ccp.c +++ b/gcc/tree-ssa-ccp.c @@ -946,7 +946,7 @@ do_dbg_cnt (void) class ccp_folder : public substitute_and_fold_engine { public: - tree get_value (tree, gimple *) FINAL OVERRIDE; + tree value_of_expr (tree, gimple *) FINAL OVERRIDE; bool fold_stmt (gimple_stmt_iterator *) FINAL OVERRIDE; }; @@ -955,7 +955,7 @@ class ccp_folder : public substitute_and_fold_engine of calling member functions. */ tree -ccp_folder::get_value (tree op, gimple *stmt ATTRIBUTE_UNUSED) +ccp_folder::value_of_expr (tree op, gimple *) { return get_constant_value (op); } diff --git a/gcc/tree-ssa-copy.c b/gcc/tree-ssa-copy.c index 9bcb708..3d77982 100644 --- a/gcc/tree-ssa-copy.c +++ b/gcc/tree-ssa-copy.c @@ -492,13 +492,13 @@ init_copy_prop (void) class copy_folder : public substitute_and_fold_engine { public: - tree get_value (tree, gimple *) FINAL OVERRIDE; + tree value_of_expr (tree name, gimple *) FINAL OVERRIDE; }; /* Callback for substitute_and_fold to get at the final copy-of values. */ tree -copy_folder::get_value (tree name, gimple *stmt ATTRIBUTE_UNUSED) +copy_folder::value_of_expr (tree name, gimple *) { tree val; if (SSA_NAME_VERSION (name) >= n_copy_of) diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c index de5025f..c21bfe9 100644 --- a/gcc/tree-ssa-dom.c +++ b/gcc/tree-ssa-dom.c @@ -1500,7 +1500,7 @@ dom_opt_dom_walker::before_dom_children (basic_block bb) void dom_opt_dom_walker::after_dom_children (basic_block bb) { - x_vr_values = evrp_range_analyzer.get_vr_values (); + x_vr_values = &evrp_range_analyzer; thread_outgoing_edges (bb, m_dummy_cond, m_const_and_copies, m_avail_exprs_stack, &evrp_range_analyzer, @@ -1970,7 +1970,7 @@ dom_opt_dom_walker::optimize_stmt (basic_block bb, gimple_stmt_iterator *si, opt_stats.num_stmts++; /* Const/copy propagate into USES, VUSES and the RHS of VDEFs. */ - cprop_into_stmt (stmt, evrp_range_analyzer.get_vr_values ()); + cprop_into_stmt (stmt, &evrp_range_analyzer); /* If the statement has been modified with constant replacements, fold its RHS before checking for redundant computations. */ diff --git a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c index 5a30176d..87dbf55 100644 --- a/gcc/tree-ssa-propagate.c +++ b/gcc/tree-ssa-propagate.c @@ -868,7 +868,7 @@ substitute_and_fold_engine::replace_uses_in (gimple *stmt) FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE) { tree tuse = USE_FROM_PTR (use); - tree val = get_value (tuse, stmt); + tree val = value_of_expr (tuse, stmt); if (val == tuse || val == NULL_TREE) continue; @@ -909,12 +909,11 @@ substitute_and_fold_engine::replace_phi_args_in (gphi *phi) if (TREE_CODE (arg) == SSA_NAME) { - tree val = get_value (arg, phi); + edge e = gimple_phi_arg_edge (phi, i); + tree val = value_on_edge (e, arg); if (val && val != arg && may_propagate_copy (arg, val)) { - edge e = gimple_phi_arg_edge (phi, i); - if (TREE_CODE (val) != SSA_NAME) prop_stats.num_const_prop++; else @@ -1036,7 +1035,7 @@ substitute_and_fold_engine::propagate_into_phi_args (basic_block bb) if (TREE_CODE (arg) != SSA_NAME || virtual_operand_p (arg)) continue; - tree val = get_value (arg, phi); + tree val = value_on_edge (e, arg); if (val && is_gimple_min_invariant (val) && may_propagate_copy (arg, val)) @@ -1070,7 +1069,7 @@ substitute_and_fold_dom_walker::before_dom_children (basic_block bb) } if (res && TREE_CODE (res) == SSA_NAME) { - tree sprime = substitute_and_fold_engine->get_value (res, phi); + tree sprime = substitute_and_fold_engine->value_of_expr (res, phi); if (sprime && sprime != res && may_propagate_copy (res, sprime)) @@ -1110,7 +1109,7 @@ substitute_and_fold_dom_walker::before_dom_children (basic_block bb) tree lhs = gimple_get_lhs (stmt); if (lhs && TREE_CODE (lhs) == SSA_NAME) { - tree sprime = substitute_and_fold_engine->get_value (lhs, stmt); + tree sprime = substitute_and_fold_engine->value_of_expr (lhs, stmt); if (sprime && sprime != lhs && may_propagate_copy (lhs, sprime) diff --git a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h index 9406cdf..da362ab 100644 --- a/gcc/tree-ssa-propagate.h +++ b/gcc/tree-ssa-propagate.h @@ -22,6 +22,8 @@ along with GCC; see the file COPYING3. If not see #ifndef _TREE_SSA_PROPAGATE_H #define _TREE_SSA_PROPAGATE_H 1 +#include "value-query.h" + /* If SIM_P is true, statement S will be simulated again. */ static inline void @@ -97,14 +99,13 @@ class ssa_propagation_engine void simulate_block (basic_block); }; -class substitute_and_fold_engine +class substitute_and_fold_engine : public value_query { public: substitute_and_fold_engine (bool fold_all_stmts = false) : fold_all_stmts (fold_all_stmts) { } virtual ~substitute_and_fold_engine (void) { } virtual bool fold_stmt (gimple_stmt_iterator *) { return false; } - virtual tree get_value (tree, gimple *) { return NULL_TREE; } bool substitute_and_fold (basic_block = NULL); bool replace_uses_in (gimple *); diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c index 47f537a..9907cc0 100644 --- a/gcc/tree-ssa-strlen.c +++ b/gcc/tree-ssa-strlen.c @@ -5860,7 +5860,7 @@ strlen_dom_walker::before_dom_children (basic_block bb) can be used by printf argument processing. */ evrp.record_ranges_from_stmt (stmt, false); - if (check_and_optimize_stmt (&gsi, &cleanup_eh, evrp.get_vr_values ())) + if (check_and_optimize_stmt (&gsi, &cleanup_eh, &evrp)) gsi_next (&gsi); } diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c index 03a2108..f43d581 100644 --- a/gcc/tree-ssa-threadedge.c +++ b/gcc/tree-ssa-threadedge.c @@ -163,8 +163,8 @@ record_temporary_equivalences_from_phis (edge e, { /* Get an empty new VR we can pass to update_value_range and save away in the VR stack. */ - vr_values *vr_values = evrp_range_analyzer->get_vr_values (); - value_range_equiv *new_vr = vr_values->allocate_value_range_equiv (); + value_range_equiv *new_vr + = evrp_range_analyzer->allocate_value_range_equiv (); new (new_vr) value_range_equiv (); /* There are three cases to consider: @@ -178,7 +178,7 @@ record_temporary_equivalences_from_phis (edge e, Otherwise set NEW_VR to varying. This may be overly conservative. */ if (TREE_CODE (src) == SSA_NAME) - new_vr->deep_copy (vr_values->get_value_range (src)); + new_vr->deep_copy (evrp_range_analyzer->get_value_range (src)); else if (TREE_CODE (src) == INTEGER_CST) new_vr->set (src); else diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c index b493e40..0e19690 100644 --- a/gcc/tree-vrp.c +++ b/gcc/tree-vrp.c @@ -4010,9 +4010,12 @@ class vrp_folder : public substitute_and_fold_engine : substitute_and_fold_engine (/* Fold all stmts. */ true), m_vr_values (v), simplifier (v) { } - tree get_value (tree, gimple *stmt) FINAL OVERRIDE; bool fold_stmt (gimple_stmt_iterator *) FINAL OVERRIDE; + tree value_of_expr (tree name, gimple *stmt) OVERRIDE + { + return m_vr_values->value_of_expr (name, stmt); + } class vr_values *m_vr_values; private: @@ -4023,8 +4026,6 @@ private: { return simplifier.vrp_evaluate_conditional (code, op0, op1, stmt); } bool simplify_stmt_using_ranges (gimple_stmt_iterator *gsi) { return simplifier.simplify (gsi); } - tree op_with_constant_singleton_value_range (tree op) - { return m_vr_values->op_with_constant_singleton_value_range (op); } simplify_using_ranges simplifier; }; @@ -4102,18 +4103,6 @@ vrp_folder::fold_stmt (gimple_stmt_iterator *si) return simplify_stmt_using_ranges (si); } -/* If OP has a value range with a single constant value return that, - otherwise return NULL_TREE. This returns OP itself if OP is a - constant. - - Implemented as a pure wrapper right now, but this will change. */ - -tree -vrp_folder::get_value (tree op, gimple *stmt ATTRIBUTE_UNUSED) -{ - return op_with_constant_singleton_value_range (op); -} - /* Return the LHS of any ASSERT_EXPR where OP appears as the first argument to the ASSERT_EXPR and in which the ASSERT_EXPR dominates BB. If no such ASSERT_EXPR is found, return OP. */ diff --git a/gcc/vr-values.c b/gcc/vr-values.c index 9b21441..4d7dfd0 100644 --- a/gcc/vr-values.c +++ b/gcc/vr-values.c @@ -92,7 +92,7 @@ vr_values::get_lattice_entry (const_tree var) return vr; /* Create a default value range. */ - vr = new (vrp_value_range_pool.allocate ()) value_range_equiv; + vr = allocate_value_range_equiv (); vr_value[ver] = vr; /* After propagation finished return varying. */ @@ -173,6 +173,49 @@ vr_values::get_value_range (const_tree var, return vr; } +bool +vr_values::range_of_expr (irange &r, tree name, gimple *stmt) +{ + if (const value_range *vr = get_value_range (name, stmt)) + { + if (vr->undefined_p () || vr->varying_p () || vr->constant_p ()) + r = *vr; + else + { + value_range tmp = *vr; + tmp.normalize_symbolics (); + r = tmp; + } + return true; + } + return false; +} + +tree +vr_values::value_of_expr (tree op, gimple *) +{ + return op_with_constant_singleton_value_range (op); +} + +tree +vr_values::value_on_edge (edge, tree op) +{ + return op_with_constant_singleton_value_range (op); +} + +tree +vr_values::value_of_stmt (gimple *stmt, tree op) +{ + if (!op) + op = gimple_get_lhs (stmt); + + gcc_checking_assert (!op|| op == gimple_get_lhs (stmt)); + + if (op) + return op_with_constant_singleton_value_range (op); + return NULL_TREE; +} + /* Set the lattice entry for DEF to VARYING. */ void @@ -451,7 +494,7 @@ simplify_using_ranges::op_with_boolean_value_range_p (tree op) /* ?? Errr, this should probably check for [0,0] and [1,1] as well as [0,1]. */ - const value_range *vr = get_value_range (op); + const value_range *vr = query->get_value_range (op); return *vr == value_range (build_zero_cst (TREE_TYPE (op)), build_one_cst (TREE_TYPE (op))); } @@ -1006,20 +1049,20 @@ vr_values::extract_range_from_comparison (value_range_equiv *vr, overflow. */ static bool -check_for_binary_op_overflow (range_query *store, +check_for_binary_op_overflow (range_query *query, enum tree_code subcode, tree type, tree op0, tree op1, bool *ovf) { value_range vr0, vr1; if (TREE_CODE (op0) == SSA_NAME) - vr0 = *store->get_value_range (op0); + vr0 = *query->get_value_range (op0); else if (TREE_CODE (op0) == INTEGER_CST) vr0.set (op0); else vr0.set_varying (TREE_TYPE (op0)); if (TREE_CODE (op1) == SSA_NAME) - vr1 = *store->get_value_range (op1); + vr1 = *query->get_value_range (op1); else if (TREE_CODE (op1) == INTEGER_CST) vr1.set (op1); else @@ -1948,8 +1991,7 @@ vr_values::dump_all_value_ranges (FILE *file) /* Initialize VRP lattice. */ -vr_values::vr_values () : vrp_value_range_pool ("Tree VRP value ranges"), - simplifier (this) +vr_values::vr_values () : simplifier (this) { values_propagated = false; num_vr_values = num_ssa_names * 2; @@ -1966,7 +2008,6 @@ vr_values::~vr_values () free (vr_value); free (vr_phi_edge_counts); bitmap_obstack_release (&vrp_equiv_obstack); - vrp_value_range_pool.release (); /* So that we can distinguish between VRP data being available and not available. */ @@ -2092,7 +2133,7 @@ const value_range_equiv * simplify_using_ranges::get_vr_for_comparison (int i, value_range_equiv *tem) { /* Shallow-copy equiv bitmap. */ - const value_range_equiv *vr = get_value_range (ssa_name (i)); + const value_range_equiv *vr = query->get_value_range (ssa_name (i)); /* If name N_i does not have a valid range, use N_i as its own range. This allows us to compare against names that may @@ -2117,7 +2158,7 @@ simplify_using_ranges::compare_name_with_value bool *strict_overflow_p, bool use_equiv_p) { /* Get the set of equivalences for VAR. */ - bitmap e = get_value_range (var)->equiv (); + bitmap e = query->get_value_range (var)->equiv (); /* Start at -1. Set it to 0 if we do a comparison without relying on overflow, or 1 if all comparisons rely on overflow. */ @@ -2197,8 +2238,8 @@ simplify_using_ranges::compare_names (enum tree_code comp, tree n1, tree n2, { /* Compare the ranges of every name equivalent to N1 against the ranges of every name equivalent to N2. */ - bitmap e1 = get_value_range (n1)->equiv (); - bitmap e2 = get_value_range (n2)->equiv (); + bitmap e1 = query->get_value_range (n1)->equiv (); + bitmap e2 = query->get_value_range (n2)->equiv (); /* Use the fake bitmaps if e1 or e2 are not available. */ static bitmap s_e1 = NULL, s_e2 = NULL; @@ -2310,8 +2351,8 @@ simplify_using_ranges::vrp_evaluate_conditional_warnv_with_ops_using_ranges (enum tree_code code, tree op0, tree op1, bool * strict_overflow_p) { const value_range_equiv *vr0, *vr1; - vr0 = (TREE_CODE (op0) == SSA_NAME) ? get_value_range (op0) : NULL; - vr1 = (TREE_CODE (op1) == SSA_NAME) ? get_value_range (op1) : NULL; + vr0 = (TREE_CODE (op0) == SSA_NAME) ? query->get_value_range (op0) : NULL; + vr1 = (TREE_CODE (op1) == SSA_NAME) ? query->get_value_range (op1) : NULL; tree res = NULL_TREE; if (vr0 && vr1) @@ -2390,7 +2431,7 @@ simplify_using_ranges::vrp_evaluate_conditional_warnv_with_ops } else gcc_unreachable (); - const value_range_equiv *vr0 = get_value_range (op0, stmt); + const value_range_equiv *vr0 = query->get_value_range (op0, stmt); /* If vro, the range for OP0 to pass the overflow test, has no intersection with *vr0, OP0's known range, then the overflow test can't pass, so return the node for false. @@ -2496,7 +2537,7 @@ simplify_using_ranges::vrp_evaluate_conditional (tree_code code, tree op0, always fold regardless of the value of OP0. If -Wtype-limits was specified, emit a warning. */ tree type = TREE_TYPE (op0); - const value_range_equiv *vr0 = get_value_range (op0, stmt); + const value_range_equiv *vr0 = query->get_value_range (op0, stmt); if (vr0->varying_p () && INTEGRAL_TYPE_P (type) @@ -2547,7 +2588,7 @@ simplify_using_ranges::vrp_visit_cond_stmt (gcond *stmt, edge *taken_edge_p) fprintf (dump_file, "\t"); print_generic_expr (dump_file, use); fprintf (dump_file, ": "); - dump_value_range (dump_file, get_value_range (use, stmt)); + dump_value_range (dump_file, query->get_value_range (use, stmt)); } fprintf (dump_file, "\n"); @@ -3123,7 +3164,7 @@ simplify_using_ranges::simplify_div_or_mod_using_ranges } else { - vr = get_value_range (op0, stmt); + vr = query->get_value_range (op0, stmt); if (range_int_cst_p (vr)) { op0min = vr->min (); @@ -3134,7 +3175,7 @@ simplify_using_ranges::simplify_div_or_mod_using_ranges if (rhs_code == TRUNC_MOD_EXPR && TREE_CODE (op1) == SSA_NAME) { - const value_range_equiv *vr1 = get_value_range (op1, stmt); + const value_range_equiv *vr1 = query->get_value_range (op1, stmt); if (range_int_cst_p (vr1)) op1min = vr1->min (); } @@ -3283,7 +3324,7 @@ simplify_using_ranges::simplify_abs_using_ranges (gimple_stmt_iterator *gsi, gimple *stmt) { tree op = gimple_assign_rhs1 (stmt); - const value_range *vr = get_value_range (op, stmt); + const value_range *vr = query->get_value_range (op, stmt); if (vr) { @@ -3373,14 +3414,14 @@ simplify_using_ranges::simplify_bit_ops_using_ranges wide_int mask; if (TREE_CODE (op0) == SSA_NAME) - vr0 = *(get_value_range (op0, stmt)); + vr0 = *(query->get_value_range (op0, stmt)); else if (is_gimple_min_invariant (op0)) vr0.set (op0); else return false; if (TREE_CODE (op1) == SSA_NAME) - vr1 = *(get_value_range (op1, stmt)); + vr1 = *(query->get_value_range (op1, stmt)); else if (is_gimple_min_invariant (op1)) vr1.set (op1); else @@ -3599,7 +3640,7 @@ simplify_using_ranges::simplify_cond_using_ranges_1 (gcond *stmt) && INTEGRAL_TYPE_P (TREE_TYPE (op0)) && is_gimple_min_invariant (op1)) { - const value_range *vr = get_value_range (op0, stmt); + const value_range *vr = query->get_value_range (op0, stmt); /* If we have range information for OP0, then we might be able to simplify this conditional. */ @@ -3672,7 +3713,7 @@ simplify_using_ranges::simplify_cond_using_ranges_1 (gcond *stmt) subsequent passes. */ void -simplify_cond_using_ranges_2 (vr_values *store, gcond *stmt) +simplify_cond_using_ranges_2 (vr_values *query, gcond *stmt) { tree op0 = gimple_cond_lhs (stmt); tree op1 = gimple_cond_rhs (stmt); @@ -3702,7 +3743,7 @@ simplify_cond_using_ranges_2 (vr_values *store, gcond *stmt) && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (innerop) && desired_pro_or_demotion_p (TREE_TYPE (innerop), TREE_TYPE (op0))) { - const value_range *vr = store->get_value_range (innerop); + const value_range *vr = query->get_value_range (innerop); if (range_int_cst_p (vr) && range_fits_type_p (vr, @@ -3743,7 +3784,7 @@ simplify_using_ranges::simplify_switch_using_ranges (gswitch *stmt) if (TREE_CODE (op) == SSA_NAME) { - vr = get_value_range (op, stmt); + vr = query->get_value_range (op, stmt); /* We can only handle integer ranges. */ if (vr->varying_p () @@ -4036,7 +4077,7 @@ simplify_using_ranges::simplify_float_conversion_using_ranges gimple *stmt) { tree rhs1 = gimple_assign_rhs1 (stmt); - const value_range *vr = get_value_range (rhs1, stmt); + const value_range *vr = query->get_value_range (rhs1, stmt); scalar_float_mode fltmode = SCALAR_FLOAT_TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))); scalar_int_mode mode; @@ -4141,7 +4182,7 @@ simplify_using_ranges::simplify_internal_call_using_ranges return false; else type = TREE_TYPE (TREE_TYPE (gimple_call_lhs (stmt))); - if (!check_for_binary_op_overflow (store, subcode, type, op0, op1, &ovf) + if (!check_for_binary_op_overflow (query, subcode, type, op0, op1, &ovf) || (is_ubsan && ovf)) return false; @@ -4200,7 +4241,7 @@ simplify_using_ranges::simplify_internal_call_using_ranges bool simplify_using_ranges::two_valued_val_range_p (tree var, tree *a, tree *b) { - value_range vr = *get_value_range (var); + value_range vr = *query->get_value_range (var); vr.normalize_symbolics (); if (vr.varying_p () || vr.undefined_p ()) return false; @@ -4217,8 +4258,8 @@ simplify_using_ranges::two_valued_val_range_p (tree var, tree *a, tree *b) return false; } -simplify_using_ranges::simplify_using_ranges (range_query *store) - : store (store) +simplify_using_ranges::simplify_using_ranges (range_query *query) + : query (query) { to_remove_edges = vNULL; to_update_switch_stmts = vNULL; @@ -4234,6 +4275,8 @@ simplify_using_ranges::~simplify_using_ranges () bool simplify_using_ranges::simplify (gimple_stmt_iterator *gsi) { + gcc_checking_assert (query); + gimple *stmt = gsi_stmt (*gsi); if (is_gimple_assign (stmt)) { diff --git a/gcc/vr-values.h b/gcc/vr-values.h index 7051e13..a30f05c 100644 --- a/gcc/vr-values.h +++ b/gcc/vr-values.h @@ -21,28 +21,19 @@ along with GCC; see the file COPYING3. If not see #define GCC_VR_VALUES_H #include "value-range-equiv.h" +#include "value-query.h" // Abstract class to return a range for a given SSA. -class range_query -{ -public: - virtual const value_range_equiv *get_value_range (const_tree, - gimple * = NULL) = 0; - virtual ~range_query () { } -}; - // Class to simplify a statement using range information. -// -// The constructor takes a full vr_values, but all it needs is -// get_value_range() from it. This class could be made to work with -// any range repository. -class simplify_using_ranges : public range_query +class simplify_using_ranges { public: - simplify_using_ranges (class range_query *); + simplify_using_ranges (class range_query *query = NULL); ~simplify_using_ranges (); + void set_range_query (class range_query *q) { query = q; } + bool simplify (gimple_stmt_iterator *); // ?? These should be cleaned, merged, and made private. @@ -53,8 +44,6 @@ public: bool *, bool *); private: - const value_range_equiv *get_value_range (const_tree op, - gimple *stmt = NULL) OVERRIDE; bool simplify_truth_ops_using_ranges (gimple_stmt_iterator *, gimple *); bool simplify_div_or_mod_using_ranges (gimple_stmt_iterator *, gimple *); bool simplify_abs_using_ranges (gimple_stmt_iterator *, gimple *); @@ -89,7 +78,7 @@ private: vec to_remove_edges; vec to_update_switch_stmts; - class range_query *store; + class range_query *query; }; /* The VR_VALUES class holds the current view of range information @@ -112,7 +101,12 @@ class vr_values : public range_query vr_values (void); ~vr_values (void); - const value_range_equiv *get_value_range (const_tree, gimple * = NULL); + virtual bool range_of_expr (irange &r, tree name, gimple *stmt) OVERRIDE; + virtual tree value_of_expr (tree, gimple * = NULL) OVERRIDE; + virtual tree value_on_edge (edge, tree) OVERRIDE; + virtual tree value_of_stmt (gimple *, tree = NULL_TREE) OVERRIDE; + virtual const value_range_equiv *get_value_range (const_tree, + gimple * = NULL) OVERRIDE; void set_vr_value (tree, value_range_equiv *); value_range_equiv *swap_vr_value (tree, value_range_equiv *); @@ -136,9 +130,9 @@ class vr_values : public range_query /* Allocate a new value_range object. */ value_range_equiv *allocate_value_range_equiv (void) - { return vrp_value_range_pool.allocate (); } + { return range_query::allocate_value_range_equiv (); } void free_value_range (value_range_equiv *vr) - { vrp_value_range_pool.remove (vr); } + { free_value_range_equiv (vr); } private: value_range_equiv *get_lattice_entry (const_tree); @@ -155,9 +149,6 @@ class vr_values : public range_query void vrp_visit_assignment_or_call (gimple*, tree *, value_range_equiv *); void vrp_visit_switch_stmt (gswitch *, edge *); - /* Allocation pools for value_range objects. */ - object_allocator vrp_value_range_pool; - /* This probably belongs in the lattice rather than in here. */ bool values_propagated; @@ -176,12 +167,6 @@ class vr_values : public range_query simplify_using_ranges simplifier; }; -inline const value_range_equiv * -simplify_using_ranges::get_value_range (const_tree op, gimple *stmt) -{ - return store->get_value_range (op, stmt); -} - extern tree get_output_for_vrp (gimple *); // FIXME: Move this to tree-vrp.c. -- cgit v1.1 From f5299992827048274f2146746ab4abab3accd124 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Thu, 17 Sep 2020 09:34:03 +0200 Subject: Convert sprintf/strlen passes to value query class. gcc/ChangeLog: * builtins.c (compute_objsize): Replace vr_values with range_query. (get_range): Same. (gimple_call_alloc_size): Same. * builtins.h (class vr_values): Remove. (gimple_call_alloc_size): Replace vr_values with range_query. * gimple-ssa-sprintf.c (get_int_range): Same. (struct directive): Pass gimple context to fmtfunc callback. (directive::set_width): Replace inline with out-of-line version. (directive::set_precision): Same. (format_none): New gimple argument. (format_percent): New gimple argument. (format_integer): New gimple argument. (format_floating): New gimple argument. (get_string_length): Use range_query API. (format_character): New gimple argument. (format_string): New gimple argument. (format_plain): New gimple argument. (format_directive): New gimple argument. (parse_directive): Replace vr_values with range_query. (compute_format_length): Same. (handle_printf_call): Same. Adjust for range_query API. * tree-ssa-strlen.c (get_range): Same. (compare_nonzero_chars): Same. (get_addr_stridx) Replace vr_values with range_query. (get_stridx): Same. (dump_strlen_info): Same. (get_range_strlen_dynamic): Adjust for range_query API. (set_strlen_range): Same (maybe_warn_overflow): Replace vr_values with range_query. (handle_builtin_strcpy): Same. (maybe_diag_stxncpy_trunc): Add FIXME comment. (handle_builtin_memcpy): Replace vr_values with range_query. (handle_builtin_memset): Same. (get_len_or_size): Same. (strxcmp_eqz_result): Same. (handle_builtin_string_cmp): Same. (count_nonzero_bytes_addr): Same, plus adjust for range_query API. (count_nonzero_bytes): Replace vr_values with range_query. (handle_store): Same. (strlen_check_and_optimize_call): Same. (handle_integral_assign): Same. (check_and_optimize_stmt): Same. * tree-ssa-strlen.h (class vr_values): Remove. (get_range): Replace vr_values with range_query. (get_range_strlen_dynamic): Same. (handle_printf_call): Same. --- gcc/builtins.c | 30 ++++---- gcc/builtins.h | 10 +-- gcc/gimple-ssa-sprintf.c | 126 ++++++++++++++++--------------- gcc/tree-ssa-strlen.c | 189 +++++++++++++++++++++++------------------------ gcc/tree-ssa-strlen.h | 9 ++- 5 files changed, 186 insertions(+), 178 deletions(-) (limited to 'gcc') diff --git a/gcc/builtins.c b/gcc/builtins.c index 8f2662b..f91266e 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -183,7 +183,8 @@ static void maybe_emit_chk_warning (tree, enum built_in_function); static void maybe_emit_sprintf_chk_warning (tree, enum built_in_function); static void maybe_emit_free_warning (tree); static tree fold_builtin_object_size (tree, tree); -static bool get_range (tree, signop, offset_int[2], const vr_values * = NULL); +static bool get_range (tree, gimple *, signop, offset_int[2], + range_query * = NULL); static bool check_read_access (tree, tree, tree = NULL_TREE, int = 1); unsigned HOST_WIDE_INT target_newline; @@ -4152,7 +4153,7 @@ check_read_access (tree exp, tree src, tree bound /* = NULL_TREE */, tree gimple_call_alloc_size (gimple *stmt, wide_int rng1[2] /* = NULL */, - const vr_values *rvals /* = NULL */) + range_query *rvals /* = NULL */) { if (!stmt) return NULL_TREE; @@ -4206,7 +4207,7 @@ gimple_call_alloc_size (gimple *stmt, wide_int rng1[2] /* = NULL */, const int prec = ADDR_MAX_PRECISION; const tree size_max = TYPE_MAX_VALUE (sizetype); - if (!get_range (size, rng1, rvals)) + if (!get_range (size, stmt, rng1, rvals)) { /* Use the full non-negative range on failure. */ rng1[0] = wi::zero (prec); @@ -4220,7 +4221,7 @@ gimple_call_alloc_size (gimple *stmt, wide_int rng1[2] /* = NULL */, of the upper bounds as a constant. Ignore anti-ranges. */ tree n = argidx2 < nargs ? gimple_call_arg (stmt, argidx2) : integer_one_node; wide_int rng2[2]; - if (!get_range (n, rng2, rvals)) + if (!get_range (n, stmt, rng2, rvals)) { /* As above, use the full non-negative range on failure. */ rng2[0] = wi::zero (prec); @@ -4252,8 +4253,7 @@ gimple_call_alloc_size (gimple *stmt, wide_int rng1[2] /* = NULL */, Return the function parameter on success and null otherwise. */ tree -gimple_parm_array_size (tree ptr, wide_int rng[2], - const vr_values * /* = NULL */) +gimple_parm_array_size (tree ptr, wide_int rng[2], range_query * /* = NULL */) { /* For a function argument try to determine the byte size of the array from the current function declaratation (e.g., attribute access or @@ -4305,11 +4305,11 @@ gimple_parm_array_size (tree ptr, wide_int rng[2], result but accepts offset_int instead. */ static bool -get_range (tree x, signop sgn, offset_int r[2], - const vr_values *rvals /* = NULL */) +get_range (tree x, gimple *stmt, signop sgn, offset_int r[2], + range_query *rvals /* = NULL */) { wide_int wr[2]; - if (!get_range (x, wr, rvals)) + if (!get_range (x, stmt, wr, rvals)) return false; r[0] = offset_int::from (wr[0], sgn); @@ -4333,7 +4333,7 @@ get_range (tree x, signop sgn, offset_int r[2], static bool compute_objsize (tree ptr, int ostype, access_ref *pref, - bitmap *visited, const vr_values *rvals /* = NULL */) + bitmap *visited, range_query *rvals /* = NULL */) { const bool addr = TREE_CODE (ptr) == ADDR_EXPR; if (addr) @@ -4431,7 +4431,7 @@ compute_objsize (tree ptr, int ostype, access_ref *pref, offset_int orng[2]; tree off = TREE_OPERAND (ptr, 1); - if (!get_range (off, SIGNED, orng, rvals)) + if (!get_range (off, NULL, SIGNED, orng, rvals)) /* Fail unless the size of the object is zero. */ return pref->sizrng[0] == 0 && pref->sizrng[0] == pref->sizrng[1]; @@ -4527,7 +4527,7 @@ compute_objsize (tree ptr, int ostype, access_ref *pref, offset to the maximum. */ offset_int orng[2]; tree off = gimple_assign_rhs2 (stmt); - if (!get_range (off, SIGNED, orng, rvals)) + if (!get_range (off, stmt, SIGNED, orng, rvals)) { orng[0] = wi::to_offset (TYPE_MIN_VALUE (ptrdiff_type_node)); orng[1] = wi::to_offset (TYPE_MAX_VALUE (ptrdiff_type_node)); @@ -4551,7 +4551,7 @@ compute_objsize (tree ptr, int ostype, access_ref *pref, && !array_at_struct_end_p (ptr)) { if (tree size = TYPE_SIZE_UNIT (type)) - return get_range (size, UNSIGNED, pref->sizrng, rvals); + return get_range (size, NULL, UNSIGNED, pref->sizrng, rvals); } return false; @@ -4562,7 +4562,7 @@ compute_objsize (tree ptr, int ostype, access_ref *pref, tree compute_objsize (tree ptr, int ostype, access_ref *pref, - const vr_values *rvals /* = NULL */) + range_query *rvals /* = NULL */) { bitmap visited = NULL; @@ -4603,7 +4603,7 @@ compute_objsize (tree ptr, int ostype, access_ref *pref, tree compute_objsize (tree ptr, int ostype, tree *pdecl /* = NULL */, - tree *poff /* = NULL */, const vr_values *rvals /* = NULL */) + tree *poff /* = NULL */, class range_query *rvals /* = NULL */) { /* Set the initial offsets to zero and size to negative to indicate none has been computed yet. */ diff --git a/gcc/builtins.h b/gcc/builtins.h index 8136b76..504c618 100644 --- a/gcc/builtins.h +++ b/gcc/builtins.h @@ -195,13 +195,13 @@ struct access_data access_mode mode; }; -class vr_values; +class range_query; extern tree gimple_call_alloc_size (gimple *, wide_int[2] = NULL, - const vr_values * = NULL); -extern tree gimple_parm_array_size (tree, wide_int[2], const vr_values * = NULL); + range_query * = NULL); +extern tree gimple_parm_array_size (tree, wide_int[2], range_query * = NULL); extern tree compute_objsize (tree, int, tree * = NULL, tree * = NULL, - const vr_values * = NULL); -extern tree compute_objsize (tree, int, access_ref *, const vr_values * = NULL); + range_query * = NULL); +extern tree compute_objsize (tree, int, access_ref *, range_query * = NULL); extern bool check_access (tree, tree, tree, tree, tree, access_mode, const access_data * = NULL); diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c index 70b031f..fff034f 100644 --- a/gcc/gimple-ssa-sprintf.c +++ b/gcc/gimple-ssa-sprintf.c @@ -546,8 +546,8 @@ fmtresult::type_max_digits (tree type, int base) } static bool -get_int_range (tree, HOST_WIDE_INT *, HOST_WIDE_INT *, bool, HOST_WIDE_INT, - const vr_values *); +get_int_range (tree, gimple *, HOST_WIDE_INT *, HOST_WIDE_INT *, + bool, HOST_WIDE_INT, range_query *); struct call_info; @@ -597,7 +597,7 @@ struct directive /* Format conversion function that given a directive and an argument returns the formatting result. */ - fmtresult (*fmtfunc) (const directive &, tree, const vr_values *); + fmtresult (*fmtfunc) (const directive &, tree, range_query *); /* Return True when the format flag CHR has been used. */ bool get_flag (char chr) const @@ -634,10 +634,7 @@ struct directive or 0, whichever is greater. For a non-constant ARG in some range set width to its range adjusting each bound to -1 if it's less. For an indeterminate ARG set width to [0, INT_MAX]. */ - void set_width (tree arg, const vr_values *vr) - { - get_int_range (arg, width, width + 1, true, 0, vr); - } + void set_width (tree arg, range_query *); /* Set both bounds of the precision range to VAL. */ void set_precision (HOST_WIDE_INT val) @@ -650,10 +647,7 @@ struct directive or -1 whichever is greater. For a non-constant ARG in some range set precision to its range adjusting each bound to -1 if it's less. For an indeterminate ARG set precision to [-1, INT_MAX]. */ - void set_precision (tree arg, const vr_values *vr) - { - get_int_range (arg, prec, prec + 1, false, -1, vr); - } + void set_precision (tree arg, range_query *query); /* Return true if both width and precision are known to be either constant or in some range, false otherwise. */ @@ -956,10 +950,22 @@ struct call_info } }; +void +directive::set_width (tree arg, range_query *query) +{ + get_int_range (arg, info->callstmt, width, width + 1, true, 0, query); +} + +void +directive::set_precision (tree arg, range_query *query) +{ + get_int_range (arg, info->callstmt, prec, prec + 1, false, -1, query); +} + /* Return the result of formatting a no-op directive (such as '%n'). */ static fmtresult -format_none (const directive &, tree, const vr_values *) +format_none (const directive &, tree, range_query *) { fmtresult res (0); return res; @@ -968,7 +974,7 @@ format_none (const directive &, tree, const vr_values *) /* Return the result of formatting the '%%' directive. */ static fmtresult -format_percent (const directive &, tree, const vr_values *) +format_percent (const directive &, tree, range_query *) { fmtresult res (1); return res; @@ -1026,9 +1032,10 @@ build_intmax_type_nodes (tree *pintmax, tree *puintmax) the determined range are replaced with NEGBOUND. */ static bool -get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax, +get_int_range (tree arg, gimple *stmt, + HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax, bool absolute, HOST_WIDE_INT negbound, - const class vr_values *vr_values) + range_query *query) { /* The type of the result. */ const_tree type = integer_type_node; @@ -1067,10 +1074,10 @@ get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax, && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type)) { /* Try to determine the range of values of the integer argument. */ - const value_range_equiv *vr - = CONST_CAST (class vr_values *, vr_values)->get_value_range (arg); + value_range vr; + query->range_of_expr (vr, arg, stmt); - if (!vr->undefined_p () && !vr->varying_p () && !vr->symbolic_p ()) + if (!vr.undefined_p () && !vr.varying_p ()) { HOST_WIDE_INT type_min = (TYPE_UNSIGNED (argtype) @@ -1080,8 +1087,8 @@ get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax, HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype)); tree type = TREE_TYPE (arg); - tree tmin = wide_int_to_tree (type, vr->lower_bound ()); - tree tmax = wide_int_to_tree (type, vr->upper_bound ()); + tree tmin = wide_int_to_tree (type, vr.lower_bound ()); + tree tmax = wide_int_to_tree (type, vr.upper_bound ()); *pmin = TREE_INT_CST_LOW (tmin); *pmax = TREE_INT_CST_LOW (tmax); @@ -1103,8 +1110,8 @@ get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax, /* Handle an argument with an unknown range as if none had been provided. */ if (unknown) - return get_int_range (NULL_TREE, pmin, pmax, absolute, - negbound, vr_values); + return get_int_range (NULL_TREE, NULL, pmin, pmax, absolute, + negbound, query); } /* Adjust each bound as specified by ABSOLUTE and NEGBOUND. */ @@ -1189,7 +1196,7 @@ adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax) used when the directive argument or its value isn't known. */ static fmtresult -format_integer (const directive &dir, tree arg, const vr_values *vr_values) +format_integer (const directive &dir, tree arg, range_query *query) { tree intmax_type_node; tree uintmax_type_node; @@ -1372,13 +1379,13 @@ format_integer (const directive &dir, tree arg, const vr_values *vr_values) { /* Try to determine the range of values of the integer argument (range information is not available for pointers). */ - const value_range_equiv *vr - = CONST_CAST (class vr_values *, vr_values)->get_value_range (arg); + value_range vr; + query->range_of_expr (vr, arg, dir.info->callstmt); - if (!vr->varying_p () && !vr->undefined_p () && !vr->symbolic_p ()) + if (!vr.varying_p () && !vr.undefined_p ()) { - argmin = wide_int_to_tree (TREE_TYPE (arg), vr->lower_bound ()); - argmax = wide_int_to_tree (TREE_TYPE (arg), vr->upper_bound ()); + argmin = wide_int_to_tree (TREE_TYPE (arg), vr.lower_bound ()); + argmax = wide_int_to_tree (TREE_TYPE (arg), vr.upper_bound ()); /* Set KNOWNRANGE if the argument is in a known subrange of the directive's type and neither width nor precision @@ -1404,7 +1411,7 @@ format_integer (const directive &dir, tree arg, const vr_values *vr_values) if (code == INTEGER_CST) { arg = gimple_assign_rhs1 (def); - return format_integer (dir, arg, vr_values); + return format_integer (dir, arg, query); } if (code == NOP_EXPR) @@ -1449,16 +1456,16 @@ format_integer (const directive &dir, tree arg, const vr_values *vr_values) /* For unsigned conversions/directives or signed when the minimum is positive, use the minimum and maximum to compute the shortest and longest output, respectively. */ - res.range.min = format_integer (dir, argmin, vr_values).range.min; - res.range.max = format_integer (dir, argmax, vr_values).range.max; + res.range.min = format_integer (dir, argmin, query).range.min; + res.range.max = format_integer (dir, argmax, query).range.max; } else if (tree_int_cst_sgn (argmax) < 0) { /* For signed conversions/directives if maximum is negative, use the minimum as the longest output and maximum as the shortest output. */ - res.range.min = format_integer (dir, argmax, vr_values).range.min; - res.range.max = format_integer (dir, argmin, vr_values).range.max; + res.range.min = format_integer (dir, argmax, query).range.min; + res.range.max = format_integer (dir, argmin, query).range.max; } else { @@ -1467,11 +1474,11 @@ format_integer (const directive &dir, tree arg, const vr_values *vr_values) length of the output of both minimum and maximum and pick the longer. */ unsigned HOST_WIDE_INT max1 - = format_integer (dir, argmin, vr_values).range.max; + = format_integer (dir, argmin, query).range.max; unsigned HOST_WIDE_INT max2 - = format_integer (dir, argmax, vr_values).range.max; + = format_integer (dir, argmax, query).range.max; res.range.min - = format_integer (dir, integer_zero_node, vr_values).range.min; + = format_integer (dir, integer_zero_node, query).range.min; res.range.max = MAX (max1, max2); } @@ -1820,7 +1827,7 @@ format_floating (const directive &dir, const HOST_WIDE_INT prec[2]) ARG. */ static fmtresult -format_floating (const directive &dir, tree arg, const vr_values *) +format_floating (const directive &dir, tree arg, range_query *) { HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] }; tree type = (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll @@ -2014,7 +2021,8 @@ format_floating (const directive &dir, tree arg, const vr_values *) Used by the format_string function below. */ static fmtresult -get_string_length (tree str, unsigned eltsize, const vr_values *vr) +get_string_length (tree str, gimple *stmt, unsigned eltsize, + range_query *query) { if (!str) return fmtresult (); @@ -2025,7 +2033,7 @@ get_string_length (tree str, unsigned eltsize, const vr_values *vr) c_strlen_data lendata = { }; lendata.maxbound = str; if (eltsize == 1) - get_range_strlen_dynamic (str, &lendata, vr); + get_range_strlen_dynamic (str, stmt, &lendata, query); else { /* Determine the length of the shortest and longest string referenced @@ -2122,7 +2130,7 @@ get_string_length (tree str, unsigned eltsize, const vr_values *vr) vsprinf). */ static fmtresult -format_character (const directive &dir, tree arg, const vr_values *vr_values) +format_character (const directive &dir, tree arg, range_query *query) { fmtresult res; @@ -2135,7 +2143,7 @@ format_character (const directive &dir, tree arg, const vr_values *vr_values) res.range.min = 0; HOST_WIDE_INT min, max; - if (get_int_range (arg, &min, &max, false, 0, vr_values)) + if (get_int_range (arg, dir.info->callstmt, &min, &max, false, 0, query)) { if (min == 0 && max == 0) { @@ -2433,7 +2441,7 @@ alias_offset (tree arg, tree dst, HOST_WIDE_INT dst_fld) vsprinf). */ static fmtresult -format_string (const directive &dir, tree arg, const vr_values *vr_values) +format_string (const directive &dir, tree arg, range_query *query) { fmtresult res; @@ -2462,7 +2470,7 @@ format_string (const directive &dir, tree arg, const vr_values *vr_values) gcc_checking_assert (count_by == 2 || count_by == 4); } - fmtresult slen = get_string_length (arg, count_by, vr_values); + fmtresult slen = get_string_length (arg, dir.info->callstmt, count_by, query); if (slen.range.min == slen.range.max && slen.range.min < HOST_WIDE_INT_MAX) { @@ -2634,7 +2642,7 @@ format_string (const directive &dir, tree arg, const vr_values *vr_values) /* Format plain string (part of the format string itself). */ static fmtresult -format_plain (const directive &dir, tree, const vr_values *) +format_plain (const directive &dir, tree, range_query *) { fmtresult res (dir.len); return res; @@ -3030,7 +3038,7 @@ bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res) static bool format_directive (const call_info &info, format_result *res, const directive &dir, - const class vr_values *vr_values) + range_query *query) { /* Offset of the beginning of the directive from the beginning of the format string. */ @@ -3055,7 +3063,7 @@ format_directive (const call_info &info, return false; /* Compute the range of lengths of the formatted output. */ - fmtresult fmtres = dir.fmtfunc (dir, dir.arg, vr_values); + fmtresult fmtres = dir.fmtfunc (dir, dir.arg, query); /* Record whether the output of all directives is known to be bounded by some maximum, implying that their arguments are @@ -3386,7 +3394,7 @@ static size_t parse_directive (call_info &info, directive &dir, format_result *res, const char *str, unsigned *argno, - const vr_values *vr_values) + range_query *query) { const char *pcnt = strchr (str, target_percent); dir.beg = str; @@ -3711,7 +3719,7 @@ parse_directive (call_info &info, if (star_width) { if (INTEGRAL_TYPE_P (TREE_TYPE (star_width))) - dir.set_width (star_width, vr_values); + dir.set_width (star_width, query); else { /* Width specified by a va_list takes on the range [0, -INT_MIN] @@ -3744,7 +3752,7 @@ parse_directive (call_info &info, if (star_precision) { if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision))) - dir.set_precision (star_precision, vr_values); + dir.set_precision (star_precision, query); else { /* Precision specified by a va_list takes on the range [-1, INT_MAX] @@ -3958,7 +3966,7 @@ maybe_warn_overlap (call_info &info, format_result *res) that caused the processing to be terminated early). */ static bool -compute_format_length (call_info &info, format_result *res, const vr_values *vr) +compute_format_length (call_info &info, format_result *res, range_query *query) { if (dump_file) { @@ -3995,10 +4003,10 @@ compute_format_length (call_info &info, format_result *res, const vr_values *vr) { directive dir (&info, dirno); - size_t n = parse_directive (info, dir, res, pf, &argno, vr); + size_t n = parse_directive (info, dir, res, pf, &argno, query); /* Return failure if the format function fails. */ - if (!format_directive (info, res, dir, vr)) + if (!format_directive (info, res, dir, query)) return false; /* Return success when the directive is zero bytes long and it's @@ -4288,7 +4296,7 @@ get_user_idx_format (tree fndecl, unsigned *idx_args) gsi_next should not be performed in the caller. */ bool -handle_printf_call (gimple_stmt_iterator *gsi, const vr_values *vr_values) +handle_printf_call (gimple_stmt_iterator *gsi, range_query *query) { init_target_to_host_charmap (); @@ -4557,14 +4565,14 @@ handle_printf_call (gimple_stmt_iterator *gsi, const vr_values *vr_values) /* Try to determine the range of values of the argument and use the greater of the two at level 1 and the smaller of them at level 2. */ - const value_range_equiv *vr - = CONST_CAST (class vr_values *, vr_values)->get_value_range (size); + value_range vr; + query->range_of_expr (vr, size, info.callstmt); - if (!vr->undefined_p () && !vr->symbolic_p ()) + if (!vr.undefined_p ()) { tree type = TREE_TYPE (size); - tree tmin = wide_int_to_tree (type, vr->lower_bound ()); - tree tmax = wide_int_to_tree (type, vr->upper_bound ()); + tree tmin = wide_int_to_tree (type, vr.lower_bound ()); + tree tmax = wide_int_to_tree (type, vr.upper_bound ()); unsigned HOST_WIDE_INT minsize = TREE_INT_CST_LOW (tmin); unsigned HOST_WIDE_INT maxsize = TREE_INT_CST_LOW (tmax); dstsize = warn_level < 2 ? maxsize : minsize; @@ -4675,7 +4683,7 @@ handle_printf_call (gimple_stmt_iterator *gsi, const vr_values *vr_values) never set to true again). */ res.posunder4k = posunder4k && dstptr; - bool success = compute_format_length (info, &res, vr_values); + bool success = compute_format_length (info, &res, query); if (res.warned) gimple_set_no_warning (info.callstmt, true); diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c index 9907cc0..f4d1c5c 100644 --- a/gcc/tree-ssa-strlen.c +++ b/gcc/tree-ssa-strlen.c @@ -200,7 +200,8 @@ static void handle_builtin_stxncpy_strncat (bool, gimple_stmt_iterator *); to determine the range, otherwise get_range_info. */ tree -get_range (tree val, wide_int minmax[2], const vr_values *rvals /* = NULL */) +get_range (tree val, gimple *stmt, wide_int minmax[2], + range_query *rvals /* = NULL */) { if (TREE_CODE (val) == INTEGER_CST) { @@ -211,21 +212,17 @@ get_range (tree val, wide_int minmax[2], const vr_values *rvals /* = NULL */) if (TREE_CODE (val) != SSA_NAME) return NULL_TREE; - if (rvals) - { - /* The range below may be "inaccurate" if a constant has been - substituted earlier for VAL by this pass that hasn't been - propagated through the CFG. This shoud be fixed by the new - on-demand VRP if/when it becomes available (hopefully in - GCC 11). */ - const value_range *vr - = (CONST_CAST (class vr_values *, rvals)->get_value_range (val)); - value_range_kind rng = vr->kind (); - if (rng != VR_RANGE || !range_int_cst_p (vr)) + if (rvals && stmt) + { + value_range vr; + if (!rvals->range_of_expr (vr, val, stmt)) + return NULL_TREE; + value_range_kind rng = vr.kind (); + if (rng != VR_RANGE) return NULL_TREE; - minmax[0] = wi::to_wide (vr->min ()); - minmax[1] = wi::to_wide (vr->max ()); + minmax[0] = wi::to_wide (vr.min ()); + minmax[1] = wi::to_wide (vr.max ()); return val; } @@ -263,7 +260,7 @@ compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off) static int compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off, - const vr_values *rvals) + range_query *rvals) { if (!si->nonzero_chars) return -1; @@ -274,20 +271,19 @@ compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off, if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME) return -1; - const value_range_equiv *vr - = (CONST_CAST (class vr_values *, rvals) - ->get_value_range (si->nonzero_chars)); - - value_range_kind rng = vr->kind (); - if (rng != VR_RANGE || !range_int_cst_p (vr)) + value_range vr; + if (!rvals->range_of_expr (vr, si->nonzero_chars, si->stmt)) + return -1; + value_range_kind rng = vr.kind (); + if (rng != VR_RANGE) return -1; /* If the offset is less than the minimum length or if the bounds of the length range are equal return the result of the comparison same as in the constant case. Otherwise return a conservative result. */ - int cmpmin = compare_tree_int (vr->min (), off); - if (cmpmin > 0 || tree_int_cst_equal (vr->min (), vr->max ())) + int cmpmin = compare_tree_int (vr.min (), off); + if (cmpmin > 0 || tree_int_cst_equal (vr.min (), vr.max ())) return cmpmin; return -1; @@ -332,7 +328,7 @@ get_next_strinfo (strinfo *si) static int get_addr_stridx (tree exp, tree ptr, unsigned HOST_WIDE_INT *offset_out, - const vr_values *rvals = NULL) + range_query *rvals = NULL) { HOST_WIDE_INT off; struct stridxlist *list, *last = NULL; @@ -392,7 +388,7 @@ get_addr_stridx (tree exp, tree ptr, unsigned HOST_WIDE_INT *offset_out, When nonnull, uses RVALS to determine range information. */ static int -get_stridx (tree exp, wide_int offrng[2] = NULL, const vr_values *rvals = NULL) +get_stridx (tree exp, wide_int offrng[2] = NULL, range_query *rvals = NULL) { if (offrng) offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node)); @@ -474,7 +470,7 @@ get_stridx (tree exp, wide_int offrng[2] = NULL, const vr_values *rvals = NULL) return the index corresponding to the SSA_NAME. Do this irrespective of the whether the offset is known. */ - if (get_range (off, offrng, rvals)) + if (get_range (off, def_stmt, offrng, rvals)) { /* When the offset range is known, increment it it by the constant offset computed in prior @@ -864,11 +860,11 @@ get_string_length (strinfo *si) } /* Dump strlen data to FP for statement STMT. When non-null, RVALS - points to EVRP info and is used to dump strlen range for non-constant - results. */ + points to the valuation engine used to calculate ranges, and is + used to dump strlen range for non-constant results. */ DEBUG_FUNCTION void -dump_strlen_info (FILE *fp, gimple *stmt, const vr_values *rvals) +dump_strlen_info (FILE *fp, gimple *stmt, range_query *rvals) { if (stmt) { @@ -909,14 +905,14 @@ dump_strlen_info (FILE *fp, gimple *stmt, const vr_values *rvals) wide_int min, max; if (rvals) { - const value_range *vr - = CONST_CAST (class vr_values *, rvals) - ->get_value_range (si->nonzero_chars); - rng = vr->kind (); - if (range_int_cst_p (vr)) + value_range vr; + rvals->range_of_expr (vr, si->nonzero_chars, + si->stmt); + rng = vr.kind (); + if (range_int_cst_p (&vr)) { - min = wi::to_wide (vr->min ()); - max = wi::to_wide (vr->max ()); + min = wi::to_wide (vr.min ()); + max = wi::to_wide (vr.max ()); } else rng = VR_UNDEFINED; @@ -1004,13 +1000,14 @@ dump_strlen_info (FILE *fp, gimple *stmt, const vr_values *rvals) /* Attempt to determine the length of the string SRC. On success, store the length in *PDATA and return true. Otherwise, return false. - VISITED is a bitmap of visited PHI nodes. RVALS points to EVRP info - and PSSA_DEF_MAX to an SSA_NAME assignment limit used to prevent runaway - recursion. */ + VISITED is a bitmap of visited PHI nodes. RVALS points to the valuation + engine used to calculate ranges. PSSA_DEF_MAX to an SSA_NAME + assignment limit used to prevent runaway recursion. */ static bool -get_range_strlen_dynamic (tree src, c_strlen_data *pdata, bitmap *visited, - const vr_values *rvals, unsigned *pssa_def_max) +get_range_strlen_dynamic (tree src, gimple *stmt, + c_strlen_data *pdata, bitmap *visited, + range_query *rvals, unsigned *pssa_def_max) { int idx = get_stridx (src); if (!idx) @@ -1042,8 +1039,8 @@ get_range_strlen_dynamic (tree src, c_strlen_data *pdata, bitmap *visited, continue; c_strlen_data argdata = { }; - if (get_range_strlen_dynamic (arg, &argdata, visited, rvals, - pssa_def_max)) + if (get_range_strlen_dynamic (arg, phi, &argdata, visited, + rvals, pssa_def_max)) { /* Set the DECL of an unterminated array this argument refers to if one hasn't been found yet. */ @@ -1110,14 +1107,12 @@ get_range_strlen_dynamic (tree src, c_strlen_data *pdata, bitmap *visited, pdata->minlen = si->nonzero_chars; else if (TREE_CODE (si->nonzero_chars) == SSA_NAME) { - const value_range_equiv *vr - = CONST_CAST (class vr_values *, rvals) - ->get_value_range (si->nonzero_chars); - if (vr->kind () == VR_RANGE - && range_int_cst_p (vr)) + value_range vr; + rvals->range_of_expr (vr, si->nonzero_chars, si->stmt); + if (range_int_cst_p (&vr)) { - pdata->minlen = vr->min (); - pdata->maxlen = vr->max (); + pdata->minlen = vr.min (); + pdata->maxlen = vr.max (); } else pdata->minlen = build_zero_cst (size_type_node); @@ -1156,14 +1151,12 @@ get_range_strlen_dynamic (tree src, c_strlen_data *pdata, bitmap *visited, } else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME) { - const value_range_equiv *vr - = CONST_CAST (class vr_values *, rvals) - ->get_value_range (si->nonzero_chars); - if (vr->kind () == VR_RANGE - && range_int_cst_p (vr)) + value_range vr; + rvals->range_of_expr (vr, si->nonzero_chars, stmt); + if (range_int_cst_p (&vr)) { - pdata->minlen = vr->min (); - pdata->maxlen = vr->max (); + pdata->minlen = vr.min (); + pdata->maxlen = vr.max (); pdata->maxbound = pdata->maxlen; } else @@ -1198,17 +1191,17 @@ get_range_strlen_dynamic (tree src, c_strlen_data *pdata, bitmap *visited, Try to obtain the range of the lengths of the string(s) referenced by SRC, or the size of the largest array SRC refers to if the range of lengths cannot be determined, and store all in *PDATA. RVALS - points to EVRP info. */ + points to the valuation engine used to calculate ranges. */ void -get_range_strlen_dynamic (tree src, c_strlen_data *pdata, - const vr_values *rvals) +get_range_strlen_dynamic (tree src, gimple *stmt, c_strlen_data *pdata, + range_query *rvals) { bitmap visited = NULL; tree maxbound = pdata->maxbound; unsigned limit = param_ssa_name_def_chain_limit; - if (!get_range_strlen_dynamic (src, pdata, &visited, rvals, &limit)) + if (!get_range_strlen_dynamic (src, stmt, pdata, &visited, rvals, &limit)) { /* On failure extend the length range to an impossible maximum (a valid MAXLEN must be less than PTRDIFF_MAX - 1). Other @@ -1803,6 +1796,7 @@ set_strlen_range (tree lhs, wide_int min, wide_int max, else if (TREE_CODE (bound) == SSA_NAME) { wide_int minbound, maxbound; + // FIXME: Use range_query instead of global ranges. value_range_kind rng = get_range_info (bound, &minbound, &maxbound); if (rng == VR_RANGE) { @@ -1907,7 +1901,7 @@ maybe_set_strlen_range (tree lhs, tree src, tree bound) static void maybe_warn_overflow (gimple *stmt, tree len, - const vr_values *rvals = NULL, + range_query *rvals = NULL, strinfo *si = NULL, bool plus_one = false, bool rawmem = false) { @@ -1959,7 +1953,7 @@ maybe_warn_overflow (gimple *stmt, tree len, tree off = TREE_OPERAND (ref, 1); ref = TREE_OPERAND (ref, 0); wide_int rng[2]; - if (get_range (off, rng, rvals)) + if (get_range (off, stmt, rng, rvals)) { /* Convert offsets to the maximum precision. */ offrng[0] = widest_int::from (rng[0], SIGNED); @@ -1977,7 +1971,7 @@ maybe_warn_overflow (gimple *stmt, tree len, tree mem_off = TREE_OPERAND (ref, 1); ref = TREE_OPERAND (ref, 0); wide_int rng[2]; - if (get_range (mem_off, rng, rvals)) + if (get_range (mem_off, stmt, rng, rvals)) { offrng[0] += widest_int::from (rng[0], SIGNED); offrng[1] += widest_int::from (rng[1], SIGNED); @@ -2049,7 +2043,7 @@ maybe_warn_overflow (gimple *stmt, tree len, } wide_int rng[2]; - if (get_range (destsize, rng, rvals)) + if (get_range (destsize, stmt, rng, rvals)) { sizrng[0] = widest_int::from (rng[0], UNSIGNED); sizrng[1] = widest_int::from (rng[1], UNSIGNED); @@ -2080,7 +2074,7 @@ maybe_warn_overflow (gimple *stmt, tree len, return; wide_int rng[2]; - if (!get_range (len, rng, rvals)) + if (!get_range (len, stmt, rng, rvals)) return; widest_int lenrng[2] = @@ -2231,7 +2225,7 @@ maybe_warn_overflow (gimple *stmt, tree len, if (destoff) { wide_int rng[2]; - if (get_range (destoff, rng)) + if (get_range (destoff, stmt, rng)) { offrng[0] = widest_int::from (rng[0], SIGNED); offrng[1] = widest_int::from (rng[1], SIGNED); @@ -2339,7 +2333,7 @@ maybe_warn_overflow (gimple *stmt, tree len, static inline void maybe_warn_overflow (gimple *stmt, unsigned HOST_WIDE_INT len, - const vr_values *rvals = NULL, strinfo *si = NULL, + range_query *rvals = NULL, strinfo *si = NULL, bool plus_one = false, bool rawmem = false) { maybe_warn_overflow (stmt, build_int_cst (size_type_node, len), rvals, @@ -2642,7 +2636,7 @@ handle_builtin_strchr (gimple_stmt_iterator *gsi) static void handle_builtin_strcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi, - const vr_values *rvals) + range_query *rvals) { int idx, didx; tree src, dst, srclen, len, lhs, type, fn, oldlen; @@ -3036,6 +3030,7 @@ maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt) cntrange[0] = cntrange[1] = wi::to_wide (cnt); else if (TREE_CODE (cnt) == SSA_NAME) { + // FIXME: Use range_query instead of global ranges. enum value_range_kind rng = get_range_info (cnt, cntrange, cntrange + 1); if (rng == VR_RANGE) ; @@ -3444,7 +3439,7 @@ handle_builtin_stxncpy_strncat (bool append_p, gimple_stmt_iterator *gsi) static void handle_builtin_memcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi, - const vr_values *rvals) + range_query *rvals) { tree lhs, oldlen, newlen; gimple *stmt = gsi_stmt (*gsi); @@ -3909,7 +3904,7 @@ handle_alloc_call (enum built_in_function bcode, gimple_stmt_iterator *gsi) static bool handle_builtin_memset (gimple_stmt_iterator *gsi, bool *zero_write, - const vr_values *rvals) + range_query *rvals) { gimple *memset_stmt = gsi_stmt (*gsi); tree ptr = gimple_call_arg (memset_stmt, 0); @@ -4103,9 +4098,10 @@ handle_builtin_memcmp (gimple_stmt_iterator *gsi) determine range information. Returns true on success. */ static bool -get_len_or_size (tree arg, int idx, unsigned HOST_WIDE_INT lenrng[2], +get_len_or_size (gimple *stmt, tree arg, int idx, + unsigned HOST_WIDE_INT lenrng[2], unsigned HOST_WIDE_INT *size, bool *nulterm, - const vr_values *rvals) + range_query *rvals) { /* Invalidate. */ *size = HOST_WIDE_INT_M1U; @@ -4140,6 +4136,7 @@ get_len_or_size (tree arg, int idx, unsigned HOST_WIDE_INT lenrng[2], else if (TREE_CODE (si->nonzero_chars) == SSA_NAME) { wide_int min, max; + // FIXME: Use range_query instead of global ranges. value_range_kind rng = get_range_info (si->nonzero_chars, &min, &max); if (rng == VR_RANGE) { @@ -4158,7 +4155,7 @@ get_len_or_size (tree arg, int idx, unsigned HOST_WIDE_INT lenrng[2], /* Set MAXBOUND to an arbitrary non-null non-integer node as a request to have it set to the length of the longest string in a PHI. */ lendata.maxbound = arg; - get_range_strlen_dynamic (arg, &lendata, rvals); + get_range_strlen_dynamic (arg, stmt, &lendata, rvals); unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U; if (tree_fits_uhwi_p (lendata.maxbound) @@ -4216,17 +4213,17 @@ get_len_or_size (tree arg, int idx, unsigned HOST_WIDE_INT lenrng[2], Otherwise return null. */ static tree -strxcmp_eqz_result (tree arg1, int idx1, tree arg2, int idx2, +strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1, tree arg2, int idx2, unsigned HOST_WIDE_INT bound, unsigned HOST_WIDE_INT len[2], - unsigned HOST_WIDE_INT *psize, const vr_values *rvals) + unsigned HOST_WIDE_INT *psize, range_query *rvals) { /* Determine the range the length of each string is in and whether it's known to be nul-terminated, or the size of the array it's stored in. */ bool nul1, nul2; unsigned HOST_WIDE_INT siz1, siz2; unsigned HOST_WIDE_INT len1rng[2], len2rng[2]; - if (!get_len_or_size (arg1, idx1, len1rng, &siz1, &nul1, rvals) - || !get_len_or_size (arg2, idx2, len2rng, &siz2, &nul2, rvals)) + if (!get_len_or_size (stmt, arg1, idx1, len1rng, &siz1, &nul1, rvals) + || !get_len_or_size (stmt, arg2, idx2, len2rng, &siz2, &nul2, rvals)) return NULL_TREE; /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG @@ -4375,7 +4372,7 @@ maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound, another and false otherwise. */ static bool -handle_builtin_string_cmp (gimple_stmt_iterator *gsi, const vr_values *rvals) +handle_builtin_string_cmp (gimple_stmt_iterator *gsi, range_query *rvals) { gcall *stmt = as_a (gsi_stmt (*gsi)); tree lhs = gimple_call_lhs (stmt); @@ -4420,7 +4417,7 @@ handle_builtin_string_cmp (gimple_stmt_iterator *gsi, const vr_values *rvals) /* Try to determine if the two strings are either definitely equal or definitely unequal and if so, either fold the result to zero (when equal) or set the range of the result to ~[0, 0] otherwise. */ - if (tree eqz = strxcmp_eqz_result (arg1, idx1, arg2, idx2, bound, + if (tree eqz = strxcmp_eqz_result (stmt, arg1, idx1, arg2, idx2, bound, len, &siz, rvals)) { if (integer_zerop (eqz)) @@ -4457,8 +4454,9 @@ handle_builtin_string_cmp (gimple_stmt_iterator *gsi, const vr_values *rvals) unsigned HOST_WIDE_INT arsz1, arsz2; bool nulterm[2]; - if (!get_len_or_size (arg1, idx1, len1rng, &arsz1, nulterm, rvals) - || !get_len_or_size (arg2, idx2, len2rng, &arsz2, nulterm + 1, rvals)) + if (!get_len_or_size (stmt, arg1, idx1, len1rng, &arsz1, nulterm, rvals) + || !get_len_or_size (stmt, arg2, idx2, len2rng, &arsz2, nulterm + 1, + rvals)) return false; if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX) @@ -4623,7 +4621,7 @@ int ssa_name_limit_t::next_ssa_name (tree ssa_name) static bool count_nonzero_bytes_addr (tree, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, unsigned [3], bool *, bool *, bool *, - const vr_values *, ssa_name_limit_t &); + range_query *, ssa_name_limit_t &); /* Determines the minimum and maximum number of leading non-zero bytes in the representation of EXP and set LENRANGE[0] and LENRANGE[1] @@ -4644,7 +4642,7 @@ static bool count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset, unsigned HOST_WIDE_INT nbytes, unsigned lenrange[3], bool *nulterm, - bool *allnul, bool *allnonnul, const vr_values *rvals, + bool *allnul, bool *allnonnul, range_query *rvals, ssa_name_limit_t &snlim) { if (TREE_CODE (exp) == SSA_NAME) @@ -4836,7 +4834,7 @@ count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset, unsigned HOST_WIDE_INT nbytes, unsigned lenrange[3], bool *nulterm, bool *allnul, bool *allnonnul, - const vr_values *rvals, ssa_name_limit_t &snlim) + range_query *rvals, ssa_name_limit_t &snlim) { int idx = get_stridx (exp); if (idx > 0) @@ -4853,13 +4851,13 @@ count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset, else if (si->nonzero_chars && TREE_CODE (si->nonzero_chars) == SSA_NAME) { - vr_values *v = CONST_CAST (vr_values *, rvals); - const value_range_equiv *vr = v->get_value_range (si->nonzero_chars); - if (vr->kind () != VR_RANGE || !range_int_cst_p (vr)) + value_range vr; + rvals->range_of_expr (vr, si->nonzero_chars, si->stmt); + if (vr.kind () != VR_RANGE) return false; - minlen = tree_to_uhwi (vr->min ()); - maxlen = tree_to_uhwi (vr->max ()); + minlen = tree_to_uhwi (vr.min ()); + maxlen = tree_to_uhwi (vr.max ()); } else return false; @@ -4948,7 +4946,7 @@ count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset, static bool count_nonzero_bytes (tree exp, unsigned lenrange[3], bool *nulterm, - bool *allnul, bool *allnonnul, const vr_values *rvals) + bool *allnul, bool *allnonnul, range_query *rvals) { /* Set to optimistic values so the caller doesn't have to worry about initializing these and to what. On success, the function will clear @@ -4972,7 +4970,7 @@ count_nonzero_bytes (tree exp, unsigned lenrange[3], bool *nulterm, static bool handle_store (gimple_stmt_iterator *gsi, bool *zero_write, - const vr_values *rvals) + range_query *rvals) { int idx = -1; strinfo *si = NULL; @@ -5382,7 +5380,7 @@ is_char_type (tree type) static bool strlen_check_and_optimize_call (gimple_stmt_iterator *gsi, bool *zero_write, - const vr_values *rvals) + range_query *rvals) { gimple *stmt = gsi_stmt (*gsi); @@ -5473,7 +5471,7 @@ strlen_check_and_optimize_call (gimple_stmt_iterator *gsi, bool *zero_write, static void handle_integral_assign (gimple_stmt_iterator *gsi, bool *cleanup_eh, - const vr_values *rvals) + range_query *rvals) { gimple *stmt = gsi_stmt (*gsi); tree lhs = gimple_assign_lhs (stmt); @@ -5565,6 +5563,7 @@ handle_integral_assign (gimple_stmt_iterator *gsi, bool *cleanup_eh, wide_int min, max; signop sign = TYPE_SIGN (lhs_type); int prec = TYPE_PRECISION (lhs_type); + // FIXME: Use range_query instead of global ranges. value_range_kind vr = get_range_info (lhs, &min, &max); if (vr == VR_VARYING || (vr == VR_RANGE @@ -5617,7 +5616,7 @@ handle_integral_assign (gimple_stmt_iterator *gsi, bool *cleanup_eh, static bool check_and_optimize_stmt (gimple_stmt_iterator *gsi, bool *cleanup_eh, - const vr_values *rvals) + range_query *rvals) { gimple *stmt = gsi_stmt (*gsi); diff --git a/gcc/tree-ssa-strlen.h b/gcc/tree-ssa-strlen.h index a11c4d5..225f64b 100644 --- a/gcc/tree-ssa-strlen.h +++ b/gcc/tree-ssa-strlen.h @@ -25,13 +25,14 @@ extern bool is_strlen_related_p (tree, tree); extern bool maybe_diag_stxncpy_trunc (gimple_stmt_iterator, tree, tree); extern tree set_strlen_range (tree, wide_int, wide_int, tree = NULL_TREE); -class vr_values; -extern tree get_range (tree, wide_int[2], const vr_values * = NULL); +extern tree get_range (tree, gimple *, wide_int[2], + class range_query * = NULL); struct c_strlen_data; -extern void get_range_strlen_dynamic (tree , c_strlen_data *, const vr_values *); +extern void get_range_strlen_dynamic (tree, gimple *, c_strlen_data *, + class range_query *); /* APIs internal to strlen pass. Defined in gimple-ssa-sprintf.c. */ -extern bool handle_printf_call (gimple_stmt_iterator *, const vr_values *); +extern bool handle_printf_call (gimple_stmt_iterator *, class range_query *); #endif // GCC_TREE_SSA_STRLEN_H -- cgit v1.1 From 63a32847b0e8d14ae1eafe047ea43441f6e3a11c Mon Sep 17 00:00:00 2001 From: Michael Davidsaver Date: Thu, 1 Oct 2020 10:16:08 -0500 Subject: config/i386/t-rtems: Change from mtune to march for multilibs * config/i386/t-rtems: Change from mtune to march when building multilibs. The mtune argument tunes or optimizes for a specific CPU model but does not ensure the generated code is appropriate for the CPU model. Prior to this patch, i386 compatible code was always generated but tuned for later models. --- gcc/config/i386/t-rtems | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/config/i386/t-rtems b/gcc/config/i386/t-rtems index 7626970..5f078c6 100644 --- a/gcc/config/i386/t-rtems +++ b/gcc/config/i386/t-rtems @@ -17,10 +17,10 @@ # . # -MULTILIB_OPTIONS = mtune=i486/mtune=pentium/mtune=pentiumpro msoft-float +MULTILIB_OPTIONS = march=i486/march=pentium/march=pentiumpro msoft-float MULTILIB_DIRNAMES= m486 mpentium mpentiumpro soft-float MULTILIB_MATCHES = msoft-float=mno-80387 -MULTILIB_MATCHES += mtune?pentium=mtune?k6 mtune?pentiumpro=mtune?athlon +MULTILIB_MATCHES += march?pentium=march?k6 march?pentiumpro=march?athlon MULTILIB_EXCEPTIONS = \ -mtune=pentium/*msoft-float* \ -mtune=pentiumpro/*msoft-float* +march=pentium/*msoft-float* \ +march=pentiumpro/*msoft-float* -- cgit v1.1 From 968ec08efefeea6fbc0cdc379e98ce3e28904083 Mon Sep 17 00:00:00 2001 From: Andrea Corallo Date: Thu, 1 Oct 2020 17:16:00 +0200 Subject: arm: Fix testcase selection for Low Overhead Loop tests [PR96375] gcc/testsuite/ PR target/96375 * gcc.target/arm/lob1.c: Fix missing flag. * gcc.target/arm/lob2.c: Likewise. * gcc.target/arm/lob3.c: Likewise. * gcc.target/arm/lob4.c: Likewise. * gcc.target/arm/lob5.c: Likewise. * gcc.target/arm/lob6.c: Likewise. * lib/target-supports.exp (check_effective_target_arm_v8_1_lob_ok): Return 1 only for cortex-m targets, add '-mthumb' flag. --- gcc/testsuite/gcc.target/arm/lob1.c | 2 +- gcc/testsuite/gcc.target/arm/lob2.c | 2 +- gcc/testsuite/gcc.target/arm/lob3.c | 2 +- gcc/testsuite/gcc.target/arm/lob4.c | 2 +- gcc/testsuite/gcc.target/arm/lob5.c | 2 +- gcc/testsuite/gcc.target/arm/lob6.c | 2 +- gcc/testsuite/lib/target-supports.exp | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/arm/lob1.c b/gcc/testsuite/gcc.target/arm/lob1.c index b92dc55..ba5c82c 100644 --- a/gcc/testsuite/gcc.target/arm/lob1.c +++ b/gcc/testsuite/gcc.target/arm/lob1.c @@ -3,7 +3,7 @@ /* { dg-do run } */ /* { dg-require-effective-target arm_v8_1_lob_ok } */ /* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */ -/* { dg-options "-march=armv8.1-m.main -O3 --save-temps" } */ +/* { dg-options "-march=armv8.1-m.main -mthumb -O3 --save-temps" } */ #include #include "lob.h" diff --git a/gcc/testsuite/gcc.target/arm/lob2.c b/gcc/testsuite/gcc.target/arm/lob2.c index 1fe9a9d..fdeb268 100644 --- a/gcc/testsuite/gcc.target/arm/lob2.c +++ b/gcc/testsuite/gcc.target/arm/lob2.c @@ -2,7 +2,7 @@ if a non-inlineable function call takes place inside the loop. */ /* { dg-do compile } */ /* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */ -/* { dg-options "-march=armv8.1-m.main -O3 --save-temps" } */ +/* { dg-options "-march=armv8.1-m.main -mthumb -O3 --save-temps" } */ #include #include "lob.h" diff --git a/gcc/testsuite/gcc.target/arm/lob3.c b/gcc/testsuite/gcc.target/arm/lob3.c index 17cba00..70314ea 100644 --- a/gcc/testsuite/gcc.target/arm/lob3.c +++ b/gcc/testsuite/gcc.target/arm/lob3.c @@ -2,7 +2,7 @@ if causes VFP emulation library calls to happen inside the loop. */ /* { dg-do compile } */ /* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */ -/* { dg-options "-march=armv8.1-m.main -O3 --save-temps -mfloat-abi=soft" } */ +/* { dg-options "-march=armv8.1-m.main -mthumb -O3 --save-temps -mfloat-abi=soft" } */ /* { dg-require-effective-target arm_softfloat } */ #include #include "lob.h" diff --git a/gcc/testsuite/gcc.target/arm/lob4.c b/gcc/testsuite/gcc.target/arm/lob4.c index 444a2c7..792f352 100644 --- a/gcc/testsuite/gcc.target/arm/lob4.c +++ b/gcc/testsuite/gcc.target/arm/lob4.c @@ -2,7 +2,7 @@ if LR is modified within the loop. */ /* { dg-do compile } */ /* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */ -/* { dg-options "-march=armv8.1-m.main -O3 --save-temps -mfloat-abi=soft" } */ +/* { dg-options "-march=armv8.1-m.main -mthumb -O3 --save-temps -mfloat-abi=soft" } */ /* { dg-require-effective-target arm_softfloat } */ #include #include "lob.h" diff --git a/gcc/testsuite/gcc.target/arm/lob5.c b/gcc/testsuite/gcc.target/arm/lob5.c index c4f46e4..1a6adf1 100644 --- a/gcc/testsuite/gcc.target/arm/lob5.c +++ b/gcc/testsuite/gcc.target/arm/lob5.c @@ -3,7 +3,7 @@ therefore is not optimizable. Outer loops are not optimized. */ /* { dg-do compile } */ /* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */ -/* { dg-options "-march=armv8.1-m.main -O3 --save-temps" } */ +/* { dg-options "-march=armv8.1-m.main -mthumb -O3 --save-temps" } */ #include #include "lob.h" diff --git a/gcc/testsuite/gcc.target/arm/lob6.c b/gcc/testsuite/gcc.target/arm/lob6.c index 5612676..17b6124 100644 --- a/gcc/testsuite/gcc.target/arm/lob6.c +++ b/gcc/testsuite/gcc.target/arm/lob6.c @@ -3,7 +3,7 @@ /* { dg-do run } */ /* { dg-require-effective-target arm_v8_1_lob_ok } */ /* { dg-skip-if "avoid conflicting multilib options" { *-*-* } { "-marm" "-mcpu=*" } } */ -/* { dg-options "-march=armv8.1-m.main -O3 --save-temps" } */ +/* { dg-options "-march=armv8.1-m.main -mthumb -O3 --save-temps" } */ #include #include "lob.h" diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 2fb59e8..38af678 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -10380,7 +10380,7 @@ proc check_effective_target_arm_v8_3a_bkey_directive { } { # Overhead Loop, 0 otherwise. The test is valid for ARM. proc check_effective_target_arm_v8_1_lob_ok { } { - if { ![istarget arm*-*-*] } { + if { ![check_effective_target_arm_cortex_m] } { return 0; } else { return [check_runtime arm_v8_1_lob_hw_available { @@ -10394,7 +10394,7 @@ proc check_effective_target_arm_v8_1_lob_ok { } { asm goto ("le lr, %l0" : : : "lr" : loop); return i != 10; } - } "-march=armv8.1-m.main" ] + } "-march=armv8.1-m.main -mthumb" ] } } -- cgit v1.1 From 92f2c04d382c325f9930ab12e1b7c7d680222ae3 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 1 Oct 2020 17:37:52 +0100 Subject: aarch64: Restrict asm-matching tests to lp64 gcc/testsuite/ * gcc.target/aarch64/movtf_1.c: Restrict the asm matching to lp64. * gcc.target/aarch64/movti_1.c: Likewise. --- gcc/testsuite/gcc.target/aarch64/movtf_1.c | 2 +- gcc/testsuite/gcc.target/aarch64/movti_1.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_1.c b/gcc/testsuite/gcc.target/aarch64/movtf_1.c index 570de93..b975b20 100644 --- a/gcc/testsuite/gcc.target/aarch64/movtf_1.c +++ b/gcc/testsuite/gcc.target/aarch64/movtf_1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O" } */ -/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ /* ** zero_q: diff --git a/gcc/testsuite/gcc.target/aarch64/movti_1.c b/gcc/testsuite/gcc.target/aarch64/movti_1.c index 160e1ac..5595b3e 100644 --- a/gcc/testsuite/gcc.target/aarch64/movti_1.c +++ b/gcc/testsuite/gcc.target/aarch64/movti_1.c @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O" } */ -/* { dg-final { check-function-bodies "**" "" } } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ /* ** zero_q: -- cgit v1.1 From c2978b3405884e38429c1937f416753ca88d3cd6 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 1 Oct 2020 17:41:15 +0100 Subject: arm: Add missing vec_cmp and vcond patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch does several things at once: (1) Add vector compare patterns (vec_cmp and vec_cmpu). (2) Add vector selects between floating-point modes when the values being compared are integers (affects vcond and vcondu). (3) Add vector selects between integer modes when the values being compared are floating-point (affects vcond). (4) Add standalone vector select patterns (vcond_mask). (5) Tweak the handling of compound comparisons with zeros. Unfortunately it proved too difficult (for me) to separate this out into a series of smaller patches, since everything is so inter-related. Defining only some of the new patterns does not leave things in a happy state. The handling of comparisons is mostly taken from the vcond patterns. This means that it remains non-compliant with IEEE: “quiet” comparisons use signalling instructions. But that shouldn't matter for floats, since we require -funsafe-math-optimizations to vectorize for them anyway. It remains the case that comparisons and selects aren't implemented at all for HF vectors. Implementing those feels like separate work. gcc/ PR target/96528 PR target/97288 * config/arm/arm-protos.h (arm_expand_vector_compare): Declare. (arm_expand_vcond): Likewise. * config/arm/arm.c (arm_expand_vector_compare): New function. (arm_expand_vcond): Likewise. * config/arm/neon.md (vec_cmp): New pattern. (vec_cmpu): Likewise. (vcond): Require operand 5 to be a register or zero. Use arm_expand_vcond. (vcond): New pattern. (vcondu): Generalize to... (vcondu): New pattern. (neon_vc, neon_vc_insn): Add "@" marker. (neon_vbsl): Likewise. (neon_vcu): Reexpress as... (@neon_vc): ...this. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_vect_cond_mixed): Add arm neon targets. * gcc.target/arm/neon-compare-1.c: New test. * gcc.target/arm/neon-compare-2.c: Likewise. * gcc.target/arm/neon-compare-3.c: Likewise. * gcc.target/arm/neon-compare-4.c: Likewise. * gcc.target/arm/neon-compare-5.c: Likewise. * gcc.target/arm/neon-vcond-gt.c: Expect comparisons with zero. * gcc.target/arm/neon-vcond-ltgt.c: Likewise. * gcc.target/arm/neon-vcond-unordered.c: Likewise. --- gcc/config/arm/arm-protos.h | 2 + gcc/config/arm/arm.c | 121 +++++++++ gcc/config/arm/neon.md | 281 +++++---------------- gcc/testsuite/gcc.target/arm/neon-compare-1.c | 84 ++++++ gcc/testsuite/gcc.target/arm/neon-compare-2.c | 45 ++++ gcc/testsuite/gcc.target/arm/neon-compare-3.c | 44 ++++ gcc/testsuite/gcc.target/arm/neon-compare-4.c | 38 +++ gcc/testsuite/gcc.target/arm/neon-compare-5.c | 37 +++ gcc/testsuite/gcc.target/arm/neon-vcond-gt.c | 2 +- gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c | 3 +- .../gcc.target/arm/neon-vcond-unordered.c | 4 +- gcc/testsuite/lib/target-supports.exp | 2 + 12 files changed, 442 insertions(+), 221 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/neon-compare-1.c create mode 100644 gcc/testsuite/gcc.target/arm/neon-compare-2.c create mode 100644 gcc/testsuite/gcc.target/arm/neon-compare-3.c create mode 100644 gcc/testsuite/gcc.target/arm/neon-compare-4.c create mode 100644 gcc/testsuite/gcc.target/arm/neon-compare-5.c (limited to 'gcc') diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 9bb9c61..703d616 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -372,9 +372,11 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx, extern bool arm_fusion_enabled_p (tune_params::fuse_ops); extern bool arm_valid_symbolic_address_p (rtx); extern bool arm_validize_comparison (rtx *, rtx *, rtx *); +extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool); #endif /* RTX_CODE */ extern bool arm_gen_setmem (rtx *); +extern void arm_expand_vcond (rtx *, machine_mode); extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 8105b39..0e23246 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -30634,6 +30634,127 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, arm_post_atomic_barrier (model); } +/* Expand code to compare vectors OP0 and OP1 using condition CODE. + If CAN_INVERT, store either the result or its inverse in TARGET + and return true if TARGET contains the inverse. If !CAN_INVERT, + always store the result in TARGET, never its inverse. + + Note that the handling of floating-point comparisons is not + IEEE compliant. */ + +bool +arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, + bool can_invert) +{ + machine_mode cmp_result_mode = GET_MODE (target); + machine_mode cmp_mode = GET_MODE (op0); + + bool inverted; + switch (code) + { + /* For these we need to compute the inverse of the requested + comparison. */ + case UNORDERED: + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case UNEQ: + case NE: + code = reverse_condition_maybe_unordered (code); + if (!can_invert) + { + /* Recursively emit the inverted comparison into a temporary + and then store its inverse in TARGET. This avoids reusing + TARGET (which for integer NE could be one of the inputs). */ + rtx tmp = gen_reg_rtx (cmp_result_mode); + if (arm_expand_vector_compare (tmp, code, op0, op1, true)) + gcc_unreachable (); + emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp))); + return false; + } + inverted = true; + break; + + default: + inverted = false; + break; + } + + switch (code) + { + /* These are natively supported for zero comparisons, but otherwise + require the operands to be swapped. */ + case LE: + case LT: + if (op1 != CONST0_RTX (cmp_mode)) + { + code = swap_condition (code); + std::swap (op0, op1); + } + /* Fall through. */ + + /* These are natively supported for both register and zero operands. */ + case EQ: + case GE: + case GT: + emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1)); + return inverted; + + /* These are natively supported for register operands only. + Comparisons with zero aren't useful and should be folded + or canonicalized by target-independent code. */ + case GEU: + case GTU: + emit_insn (gen_neon_vc (code, cmp_mode, target, + op0, force_reg (cmp_mode, op1))); + return inverted; + + /* These require the operands to be swapped and likewise do not + support comparisons with zero. */ + case LEU: + case LTU: + emit_insn (gen_neon_vc (swap_condition (code), cmp_mode, + target, force_reg (cmp_mode, op1), op0)); + return inverted; + + /* These need a combination of two comparisons. */ + case LTGT: + case ORDERED: + { + /* Operands are LTGT iff (a > b || a > b). + Operands are ORDERED iff (a > b || a <= b). */ + rtx gt_res = gen_reg_rtx (cmp_result_mode); + rtx alt_res = gen_reg_rtx (cmp_result_mode); + rtx_code alt_code = (code == LTGT ? LT : LE); + if (arm_expand_vector_compare (gt_res, GT, op0, op1, true) + || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true)) + gcc_unreachable (); + emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode, + gt_res, alt_res))); + return inverted; + } + + default: + gcc_unreachable (); + } +} + +/* Expand a vcond or vcondu pattern with operands OPERANDS. + CMP_RESULT_MODE is the mode of the comparison result. */ + +void +arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode) +{ + rtx mask = gen_reg_rtx (cmp_result_mode); + bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]), + operands[4], operands[5], true); + if (inverted) + std::swap (operands[1], operands[2]); + emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0], + mask, operands[1], operands[2])); +} + #define MAX_VECT_LEN 16 struct expand_vec_perm_d diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 96bf277..58832cb 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1530,6 +1530,30 @@ [(set_attr "type" "neon_qsub")] ) +(define_expand "vec_cmp" + [(set (match_operand: 0 "s_register_operand") + (match_operator: 1 "comparison_operator" + [(match_operand:VDQW 2 "s_register_operand") + (match_operand:VDQW 3 "reg_or_zero_operand")]))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), + operands[2], operands[3], false); + DONE; +}) + +(define_expand "vec_cmpu" + [(set (match_operand:VDQIW 0 "s_register_operand") + (match_operator:VDQIW 1 "comparison_operator" + [(match_operand:VDQIW 2 "s_register_operand") + (match_operand:VDQIW 3 "reg_or_zero_operand")]))] + "TARGET_NEON" +{ + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), + operands[2], operands[3], false); + DONE; +}) + ;; Conditional instructions. These are comparisons with conditional moves for ;; vectors. They perform the assignment: ;; @@ -1543,230 +1567,53 @@ (if_then_else:VDQW (match_operator 3 "comparison_operator" [(match_operand:VDQW 4 "s_register_operand") - (match_operand:VDQW 5 "nonmemory_operand")]) + (match_operand:VDQW 5 "reg_or_zero_operand")]) (match_operand:VDQW 1 "s_register_operand") (match_operand:VDQW 2 "s_register_operand")))] "TARGET_NEON && (! || flag_unsafe_math_optimizations)" { - int inverse = 0; - int use_zero_form = 0; - int swap_bsl_operands = 0; - rtx mask = gen_reg_rtx (mode); - rtx tmp = gen_reg_rtx (mode); - - rtx (*base_comparison) (rtx, rtx, rtx); - rtx (*complimentary_comparison) (rtx, rtx, rtx); - - switch (GET_CODE (operands[3])) - { - case GE: - case GT: - case LE: - case LT: - case EQ: - if (operands[5] == CONST0_RTX (mode)) - { - use_zero_form = 1; - break; - } - /* Fall through. */ - default: - if (!REG_P (operands[5])) - operands[5] = force_reg (mode, operands[5]); - } - - switch (GET_CODE (operands[3])) - { - case LT: - case UNLT: - inverse = 1; - /* Fall through. */ - case GE: - case UNGE: - case ORDERED: - case UNORDERED: - base_comparison = gen_neon_vcge; - complimentary_comparison = gen_neon_vcgt; - break; - case LE: - case UNLE: - inverse = 1; - /* Fall through. */ - case GT: - case UNGT: - base_comparison = gen_neon_vcgt; - complimentary_comparison = gen_neon_vcge; - break; - case EQ: - case NE: - case UNEQ: - base_comparison = gen_neon_vceq; - complimentary_comparison = gen_neon_vceq; - break; - default: - gcc_unreachable (); - } - - switch (GET_CODE (operands[3])) - { - case LT: - case LE: - case GT: - case GE: - case EQ: - /* The easy case. Here we emit one of vcge, vcgt or vceq. - As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: - a GE b -> a GE b - a GT b -> a GT b - a LE b -> b GE a - a LT b -> b GT a - a EQ b -> a EQ b - Note that there also exist direct comparison against 0 forms, - so catch those as a special case. */ - if (use_zero_form) - { - inverse = 0; - switch (GET_CODE (operands[3])) - { - case LT: - base_comparison = gen_neon_vclt; - break; - case LE: - base_comparison = gen_neon_vcle; - break; - default: - /* Do nothing, other zero form cases already have the correct - base_comparison. */ - break; - } - } - - if (!inverse) - emit_insn (base_comparison (mask, operands[4], operands[5])); - else - emit_insn (complimentary_comparison (mask, operands[5], operands[4])); - break; - case UNLT: - case UNLE: - case UNGT: - case UNGE: - case NE: - /* Vector compare returns false for lanes which are unordered, so if we use - the inverse of the comparison we actually want to emit, then - swap the operands to BSL, we will end up with the correct result. - Note that a NE NaN and NaN NE b are true for all a, b. - - Our transformations are: - a GE b -> !(b GT a) - a GT b -> !(b GE a) - a LE b -> !(a GT b) - a LT b -> !(a GE b) - a NE b -> !(a EQ b) */ - - if (inverse) - emit_insn (base_comparison (mask, operands[4], operands[5])); - else - emit_insn (complimentary_comparison (mask, operands[5], operands[4])); - - swap_bsl_operands = 1; - break; - case UNEQ: - /* We check (a > b || b > a). combining these comparisons give us - true iff !(a != b && a ORDERED b), swapping the operands to BSL - will then give us (a == b || a UNORDERED b) as intended. */ - - emit_insn (gen_neon_vcgt (mask, operands[4], operands[5])); - emit_insn (gen_neon_vcgt (tmp, operands[5], operands[4])); - emit_insn (gen_ior3 (mask, mask, tmp)); - swap_bsl_operands = 1; - break; - case UNORDERED: - /* Operands are ORDERED iff (a > b || b >= a). - Swapping the operands to BSL will give the UNORDERED case. */ - swap_bsl_operands = 1; - /* Fall through. */ - case ORDERED: - emit_insn (gen_neon_vcgt (tmp, operands[4], operands[5])); - emit_insn (gen_neon_vcge (mask, operands[5], operands[4])); - emit_insn (gen_ior3 (mask, mask, tmp)); - break; - default: - gcc_unreachable (); - } + arm_expand_vcond (operands, mode); + DONE; +}) - if (swap_bsl_operands) - emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], - operands[1])); - else - emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], - operands[2])); +(define_expand "vcond" + [(set (match_operand: 0 "s_register_operand") + (if_then_else: + (match_operator 3 "comparison_operator" + [(match_operand:V32 4 "s_register_operand") + (match_operand:V32 5 "reg_or_zero_operand")]) + (match_operand: 1 "s_register_operand") + (match_operand: 2 "s_register_operand")))] + "TARGET_NEON && (! || flag_unsafe_math_optimizations)" +{ + arm_expand_vcond (operands, mode); DONE; }) -(define_expand "vcondu" - [(set (match_operand:VDQIW 0 "s_register_operand") - (if_then_else:VDQIW +(define_expand "vcondu" + [(set (match_operand:VDQW 0 "s_register_operand") + (if_then_else:VDQW (match_operator 3 "arm_comparison_operator" - [(match_operand:VDQIW 4 "s_register_operand") - (match_operand:VDQIW 5 "s_register_operand")]) - (match_operand:VDQIW 1 "s_register_operand") - (match_operand:VDQIW 2 "s_register_operand")))] + [(match_operand: 4 "s_register_operand") + (match_operand: 5 "reg_or_zero_operand")]) + (match_operand:VDQW 1 "s_register_operand") + (match_operand:VDQW 2 "s_register_operand")))] "TARGET_NEON" { - rtx mask; - int inverse = 0, immediate_zero = 0; - - mask = gen_reg_rtx (mode); - - if (operands[5] == CONST0_RTX (mode)) - immediate_zero = 1; - else if (!REG_P (operands[5])) - operands[5] = force_reg (mode, operands[5]); - - switch (GET_CODE (operands[3])) - { - case GEU: - emit_insn (gen_neon_vcgeu (mask, operands[4], operands[5])); - break; - - case GTU: - emit_insn (gen_neon_vcgtu (mask, operands[4], operands[5])); - break; - - case EQ: - emit_insn (gen_neon_vceq (mask, operands[4], operands[5])); - break; - - case LEU: - if (immediate_zero) - emit_insn (gen_neon_vcle (mask, operands[4], operands[5])); - else - emit_insn (gen_neon_vcgeu (mask, operands[5], operands[4])); - break; - - case LTU: - if (immediate_zero) - emit_insn (gen_neon_vclt (mask, operands[4], operands[5])); - else - emit_insn (gen_neon_vcgtu (mask, operands[5], operands[4])); - break; - - case NE: - emit_insn (gen_neon_vceq (mask, operands[4], operands[5])); - inverse = 1; - break; - - default: - gcc_unreachable (); - } - - if (inverse) - emit_insn (gen_neon_vbsl (operands[0], mask, operands[2], - operands[1])); - else - emit_insn (gen_neon_vbsl (operands[0], mask, operands[1], - operands[2])); + arm_expand_vcond (operands, mode); + DONE; +}) +(define_expand "vcond_mask_" + [(set (match_operand:VDQW 0 "s_register_operand") + (if_then_else:VDQW + (match_operand: 3 "s_register_operand") + (match_operand:VDQW 1 "s_register_operand") + (match_operand:VDQW 2 "s_register_operand")))] + "TARGET_NEON" +{ + emit_insn (gen_neon_vbsl (operands[0], operands[3], operands[1], + operands[2])); DONE; }) @@ -2601,7 +2448,7 @@ ;; These may expand to an UNSPEC pattern when a floating point mode is used ;; without unsafe math optimizations. -(define_expand "neon_vc" +(define_expand "@neon_vc" [(match_operand: 0 "s_register_operand") (neg: (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand") @@ -2641,7 +2488,7 @@ } ) -(define_insn "neon_vc_insn" +(define_insn "@neon_vc_insn" [(set (match_operand: 0 "s_register_operand" "=w,w") (neg: (COMPARISONS: @@ -2685,7 +2532,7 @@ [(set_attr "type" "neon_fp_compare_s")] ) -(define_expand "neon_vc" +(define_expand "@neon_vc" [(match_operand: 0 "s_register_operand") (neg: (COMPARISONS:VH @@ -2751,7 +2598,7 @@ } [(set_attr "type" "neon_fp_compare_s")]) -(define_insn "neon_vcu" +(define_insn "@neon_vc" [(set (match_operand: 0 "s_register_operand" "=w") (neg: (GTUGEU: @@ -4708,7 +4555,7 @@ if (BYTES_BIG_ENDIAN) [(set_attr "type" "neon_bsl")] ) -(define_expand "neon_vbsl" +(define_expand "@neon_vbsl" [(set (match_operand:VDQX 0 "s_register_operand") (unspec:VDQX [(match_operand: 1 "s_register_operand") (match_operand:VDQX 2 "s_register_operand") diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-1.c b/gcc/testsuite/gcc.target/arm/neon-compare-1.c new file mode 100644 index 0000000..c915eca --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-compare-1.c @@ -0,0 +1,84 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O1" } */ +/* { dg-add-options arm_neon } */ + +#define COMPARE_REG(NAME, OP, TYPE) \ + TYPE \ + cmp_##NAME##_##TYPE##_reg (TYPE a, TYPE b) \ + { \ + return a OP b; \ + } + +#define COMPARE_REG_AND_ZERO(NAME, OP, TYPE) \ + COMPARE_REG (NAME, OP, TYPE) \ + \ + TYPE \ + cmp_##NAME##_##TYPE##_zero (TYPE a) \ + { \ + return a OP (TYPE) {}; \ + } + +#define COMPARE_TYPE(TYPE, COMPARE_ORDERED) \ + COMPARE_REG_AND_ZERO (eq, ==, TYPE) \ + COMPARE_REG_AND_ZERO (ne, !=, TYPE) \ + COMPARE_ORDERED (lt, <, TYPE) \ + COMPARE_ORDERED (le, <=, TYPE) \ + COMPARE_ORDERED (gt, >, TYPE) \ + COMPARE_ORDERED (ge, >=, TYPE) + +#define TEST_TYPE(NAME, ELEM, COMPARE_ORDERED) \ + typedef ELEM NAME __attribute__((vector_size(16))); \ + COMPARE_TYPE (NAME, COMPARE_ORDERED) + +TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO) +TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG) +TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO) +TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG) +TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO) +TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG) + +/* { s8, u8 } x { eq, ne }. +/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvceq.i8\tq[0-9]+, q[0-9]+, #0\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvclt.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcle.s8\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.u8\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ + +/* { s16, u16 } x { eq, ne }. +/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvceq.i16\tq[0-9]+, q[0-9]+, #0\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvclt.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcle.s16\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.u16\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ + +/* { s32, u32 } x { eq, ne }. +/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvceq.i32\tq[0-9]+, q[0-9]+, #0\n} 4 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvclt.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcle.s32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.u32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-2.c b/gcc/testsuite/gcc.target/arm/neon-compare-2.c new file mode 100644 index 0000000..559c5e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-compare-2.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O1 -funsafe-math-optimizations" } */ +/* { dg-add-options arm_neon } */ + +#ifndef ELEM_TYPE +#define ELEM_TYPE float +#endif +#ifndef INT_ELEM_TYPE +#define INT_ELEM_TYPE __INT32_TYPE__ +#endif + +#define COMPARE(NAME, OP) \ + int_vec \ + cmp_##NAME##_reg (vec a, vec b) \ + { \ + return a OP b; \ + } \ + \ + int_vec \ + cmp_##NAME##_zero (vec a) \ + { \ + return a OP (vec) {}; \ + } + +typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16))); +typedef ELEM_TYPE vec __attribute__((vector_size(16))); + +COMPARE (eq, ==) +COMPARE (ne, !=) +COMPARE (lt, <) +COMPARE (le, <=) +COMPARE (gt, >) +COMPARE (ge, >=) + +/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvceq.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-3.c b/gcc/testsuite/gcc.target/arm/neon-compare-3.c new file mode 100644 index 0000000..efbe797 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-compare-3.c @@ -0,0 +1,44 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */ +/* { dg-add-options arm_neon } */ + +#define ult(a, b) (!__builtin_isgreaterequal (a, b)) +#define ule(a, b) (!__builtin_isgreater (a, b)) +#define ugt(a, b) (!__builtin_islessequal (a, b)) +#define uge(a, b) (!__builtin_isless (a, b)) + +int x[16]; +float a[16]; +float b[16]; + +#define COMPARE(NAME) \ + void \ + cmp_##NAME##_reg (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], b[i]) ? 2 : 0; \ + } \ + \ + void \ + cmp_##NAME##_zero (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], 0) ? 2 : 0; \ + } + +typedef int int_vec __attribute__((vector_size(16))); +typedef float vec __attribute__((vector_size(16))); + +COMPARE (ult) +COMPARE (ule) +COMPARE (ugt) +COMPARE (uge) + +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-4.c b/gcc/testsuite/gcc.target/arm/neon-compare-4.c new file mode 100644 index 0000000..3f8cc90 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-compare-4.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */ +/* { dg-add-options arm_neon } */ + +#define ordered(a, b) (!__builtin_isunordered (a, b)) +#define unordered(a, b) (__builtin_isunordered (a, b)) + +int x[16]; +float a[16]; +float b[16]; + +#define COMPARE(NAME) \ + void \ + cmp_##NAME##_reg (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], b[i]) ? 2 : 0; \ + } \ + \ + void \ + cmp_##NAME##_zero (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], 0) ? 2 : 0; \ + } + +typedef int int_vec __attribute__((vector_size(16))); +typedef float vec __attribute__((vector_size(16))); + +COMPARE (ordered) +COMPARE (unordered) + +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tvcge.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcle.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-compare-5.c b/gcc/testsuite/gcc.target/arm/neon-compare-5.c new file mode 100644 index 0000000..cb6428d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/neon-compare-5.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-O1 -ftree-vectorize -funsafe-math-optimizations" } */ +/* { dg-add-options arm_neon } */ + +#define uneq(a, b) (!__builtin_islessgreater (a, b)) +/* RTL's LTGT is a signaling comparison. */ +#define ltgt(a, b) (a < b || b < a) + +int x[16]; +float a[16]; +float b[16]; + +#define COMPARE(NAME) \ + void \ + cmp_##NAME##_reg (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], b[i]) ? 2 : 0; \ + } \ + \ + void \ + cmp_##NAME##_zero (void) \ + { \ + for (int i = 0; i < 16; ++i) \ + x[i] = NAME (a[i], 0) ? 2 : 0; \ + } + +typedef int int_vec __attribute__((vector_size(16))); +typedef float vec __attribute__((vector_size(16))); + +COMPARE (uneq) +COMPARE (ltgt) + +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvcgt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvclt.f32\tq[0-9]+, q[0-9]+, #0\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vcond-gt.c b/gcc/testsuite/gcc.target/arm/neon-vcond-gt.c index 8e9f378..9f601a1 100644 --- a/gcc/testsuite/gcc.target/arm/neon-vcond-gt.c +++ b/gcc/testsuite/gcc.target/arm/neon-vcond-gt.c @@ -13,5 +13,5 @@ void foo (int ilast,float* w, float* w2) } } -/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ +/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */ /* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c b/gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c index c8306e3..74bc220 100644 --- a/gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c +++ b/gcc/testsuite/gcc.target/arm/neon-vcond-ltgt.c @@ -13,6 +13,7 @@ void foo (int ilast,float* w, float* w2) } } -/* { dg-final { scan-assembler-times "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" 2 } } */ +/* { dg-final { scan-assembler "vclt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */ +/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */ /* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ /* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/arm/neon-vcond-unordered.c b/gcc/testsuite/gcc.target/arm/neon-vcond-unordered.c index 3bb67d3..8d31875 100644 --- a/gcc/testsuite/gcc.target/arm/neon-vcond-unordered.c +++ b/gcc/testsuite/gcc.target/arm/neon-vcond-unordered.c @@ -13,7 +13,7 @@ void foo (int ilast,float* w, float* w2) } } -/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ -/* { dg-final { scan-assembler "vcge\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ +/* { dg-final { scan-assembler "vcgt\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */ +/* { dg-final { scan-assembler "vcle\\.f32\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*#0" } } */ /* { dg-final { scan-assembler "vorr\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ /* { dg-final { scan-assembler "vbsl|vbit|vbif\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+,\[\\t \]*q\[0-9\]+" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 38af678..15f0649 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -7233,6 +7233,8 @@ proc check_effective_target_vect_cond_mixed { } { expr { [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget aarch64*-*-*] || [istarget powerpc*-*-*] + || ([istarget arm*-*-*] + && [check_effective_target_arm_neon_ok]) || ([istarget mips*-*-*] && [et-is-effective-target mips_msa]) || ([istarget s390*-*-*] -- cgit v1.1 From b3ec0de08250d7e0599e36895d5cb727016c81d3 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Thu, 1 Oct 2020 11:07:20 +0200 Subject: [nvptx] Emit mov.u32 instead of cvt.u32.u32 for truncsiqi2 When running: ... $ gcc.sh src/gcc/testsuite/gcc.target/nvptx/abi-complex-arg.c -S -dP ... we have in abi-complex-arg.s: ... //(insn 3 5 4 2 // (set // (reg:QI 23) // (truncate:QI (reg:SI 22))) "abi-complex-arg.c":38:1 29 {truncsiqi2} // (nil)) cvt.u32.u32 %r23, %r22; // 3 [c=4] truncsiqi2/0 ... The cvt.u32.u32 can be written shorter and clearer as mov.u32. Fix this in define_insn "truncsi2". Tested on nvptx. gcc/ChangeLog: 2020-10-01 Tom de Vries PR target/80845 * config/nvptx/nvptx.md (define_insn "truncsi2"): Emit mov.u32 instead of cvt.u32.u32. --- gcc/config/nvptx/nvptx.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 035f6e0..ccbcd09 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -383,9 +383,13 @@ [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m") (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))] "" - "@ - %.\\tcvt%t0.u32\\t%0, %1; - %.\\tst%A0.u%T0\\t%0, %1;" + { + if (which_alternative == 1) + return "%.\\tst%A0.u%T0\\t%0, %1;"; + if (GET_MODE (operands[0]) == QImode) + return "%.\\tmov%t0\\t%0, %1;"; + return "%.\\tcvt%t0.u32\\t%0, %1;"; + } [(set_attr "subregs_ok" "true")]) (define_insn "truncdi2" -- cgit v1.1 From 2eb3c3daf6f5e2c09ade7a237749ba1f64a0a0a8 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Thu, 1 Oct 2020 20:57:48 +0200 Subject: Fix build of ppc64 target. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since a889e06ac68 the following fails. In file included from ../../gcc/tree-ssa-propagate.h:25:0, from ../../gcc/config/rs6000/rs6000.c:78: ../../gcc/value-query.h:90:31: error: ‘irange’ has not been declared virtual bool range_of_expr (irange &r, tree name, gimple * = NULL) = 0; ^~~~~~ ../../gcc/value-query.h:91:31: error: ‘irange’ has not been declared virtual bool range_on_edge (irange &r, edge, tree name); ^~~~~~ ../../gcc/value-query.h:92:31: error: ‘irange’ has not been declared virtual bool range_of_stmt (irange &r, gimple *, tree name = NULL); ^~~~~~ In file included from ../../gcc/tree-ssa-propagate.h:25:0, from ../../gcc/config/rs6000/rs6000-call.c:67: ../../gcc/value-query.h:90:31: error: ‘irange’ has not been declared virtual bool range_of_expr (irange &r, tree name, gimple * = NULL) = 0; ^~~~~~ ../../gcc/value-query.h:91:31: error: ‘irange’ has not been declared virtual bool range_on_edge (irange &r, edge, tree name); ^~~~~~ ../../gcc/value-query.h:92:31: error: ‘irange’ has not been declared virtual bool range_of_stmt (irange &r, gimple *, tree name = NULL); gcc/ChangeLog: * config/rs6000/rs6000-call.c: Include value-range.h. * config/rs6000/rs6000.c: Likewise. --- gcc/config/rs6000/rs6000-call.c | 1 + gcc/config/rs6000/rs6000.c | 1 + 2 files changed, 2 insertions(+) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index a8b52083..d10119b 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -64,6 +64,7 @@ #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ #endif #include "ppc-auxv.h" +#include "value-range.h" #include "tree-ssa-propagate.h" #include "tree-vrp.h" #include "tree-ssanames.h" diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 375fff5..6a05f84 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -75,6 +75,7 @@ #endif #include "case-cfn-macros.h" #include "ppc-auxv.h" +#include "value-range.h" #include "tree-ssa-propagate.h" #include "tree-vrp.h" #include "tree-ssanames.h" -- cgit v1.1 From dfaa24c974bab4bc1bd3840d67ca1701acc0010c Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Thu, 1 Oct 2020 12:36:46 -0700 Subject: c++: Kill DECL_HIDDEN_P There are only a couple of asserts remaining using this macro, and nothing using TYPE_HIDDEN_P. Killed thusly. gcc/cp/ * cp-tree.h (DECL_ANTICIPATED): Adjust comment. (DECL_HIDDEN_P, TYPE_HIDDEN_P): Delete. * tree.c (ovl_insert): Delete DECL_HIDDEN_P assert. (ovl_skip_hidden): Likewise. --- gcc/cp/cp-tree.h | 13 +------------ gcc/cp/tree.c | 12 ++---------- 2 files changed, 3 insertions(+), 22 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 48a4074..3ccd54c 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -4045,22 +4045,11 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter) /* Nonzero if NODE is a DECL which we know about but which has not been explicitly declared, such as a built-in function or a friend - declared inside a class. In the latter case DECL_HIDDEN_FRIEND_P - will be set. */ + declared inside a class. */ #define DECL_ANTICIPATED(NODE) \ (DECL_LANG_SPECIFIC (TYPE_FUNCTION_OR_TEMPLATE_DECL_CHECK (NODE)) \ ->u.base.anticipated_p) -/* Is DECL NODE a hidden name? */ -#define DECL_HIDDEN_P(NODE) \ - (DECL_LANG_SPECIFIC (NODE) && TYPE_FUNCTION_OR_TEMPLATE_DECL_P (NODE) \ - && DECL_ANTICIPATED (NODE)) - -/* True if this is a hidden class type. */ -#define TYPE_HIDDEN_P(NODE) \ - (DECL_LANG_SPECIFIC (TYPE_NAME (NODE)) \ - && DECL_ANTICIPATED (TYPE_NAME (NODE))) - /* True for artificial decls added for OpenMP privatized non-static data members. */ #define DECL_OMP_PRIVATIZED_MEMBER(NODE) \ diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c index 0b80d8e..8b7c679 100644 --- a/gcc/cp/tree.c +++ b/gcc/cp/tree.c @@ -2261,8 +2261,6 @@ ovl_insert (tree fn, tree maybe_ovl, int using_or_hidden) { maybe_ovl = ovl_make (fn, maybe_ovl); - gcc_checking_assert ((using_or_hidden < 0) == DECL_HIDDEN_P (fn)); - if (using_or_hidden < 0) OVL_HIDDEN_P (maybe_ovl) = true; if (using_or_hidden > 0) @@ -2287,14 +2285,8 @@ ovl_insert (tree fn, tree maybe_ovl, int using_or_hidden) tree ovl_skip_hidden (tree ovl) { - for (; - ovl && TREE_CODE (ovl) == OVERLOAD && OVL_HIDDEN_P (ovl); - ovl = OVL_CHAIN (ovl)) - gcc_checking_assert (DECL_HIDDEN_P (OVL_FUNCTION (ovl))); - - /* We should not see a naked hidden decl. */ - gcc_checking_assert (!(ovl && TREE_CODE (ovl) != OVERLOAD - && DECL_HIDDEN_P (ovl))); + while (ovl && TREE_CODE (ovl) == OVERLOAD && OVL_HIDDEN_P (ovl)) + ovl = OVL_CHAIN (ovl); return ovl; } -- cgit v1.1 From 4830b30c823abaea8ea46dcece65c4681877b38d Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Thu, 1 Oct 2020 11:52:38 +0930 Subject: [RS6000] function for linux64 SUBSUBTARGET_OVERRIDE_OPTIONS * config/rs6000/freebsd64.h (SUBSUBTARGET_OVERRIDE_OPTIONS): Use rs6000_linux64_override_options. * config/rs6000/linux64.h (SUBSUBTARGET_OVERRIDE_OPTIONS): Break out to.. * config/rs6000/rs6000.c (rs6000_linux64_override_options): ..this, new function. Tweak non-biarch test and clearing of profile_kernel to work with freebsd64.h. --- gcc/config/rs6000/freebsd64.h | 60 +-------------------------- gcc/config/rs6000/linux64.h | 94 +----------------------------------------- gcc/config/rs6000/rs6000.c | 96 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 152 deletions(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/freebsd64.h b/gcc/config/rs6000/freebsd64.h index c991363..6984ca5 100644 --- a/gcc/config/rs6000/freebsd64.h +++ b/gcc/config/rs6000/freebsd64.h @@ -78,65 +78,7 @@ extern int dot_symbols; #undef SUBSUBTARGET_OVERRIDE_OPTIONS #define SUBSUBTARGET_OVERRIDE_OPTIONS \ - do \ - { \ - if (!global_options_set.x_rs6000_alignment_flags) \ - rs6000_alignment_flags = MASK_ALIGN_NATURAL; \ - if (TARGET_64BIT) \ - { \ - if (DEFAULT_ABI != ABI_AIX) \ - { \ - rs6000_current_abi = ABI_AIX; \ - error (INVALID_64BIT, "call"); \ - } \ - dot_symbols = !strcmp (rs6000_abi_name, "aixdesc"); \ - if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE) \ - { \ - rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \ - error (INVALID_64BIT, "relocatable"); \ - } \ - if (ELFv2_ABI_CHECK) \ - { \ - rs6000_current_abi = ABI_ELFv2; \ - if (dot_symbols) \ - error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>"); \ - } \ - if (rs6000_isa_flags & OPTION_MASK_EABI) \ - { \ - rs6000_isa_flags &= ~OPTION_MASK_EABI; \ - error (INVALID_64BIT, "eabi"); \ - } \ - if (TARGET_PROTOTYPE) \ - { \ - target_prototype = 0; \ - error (INVALID_64BIT, "prototype"); \ - } \ - if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0) \ - { \ - rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ - error ("%<-m64%> requires a PowerPC64 cpu"); \ - } \ - if ((rs6000_isa_flags_explicit \ - & OPTION_MASK_MINIMAL_TOC) != 0) \ - { \ - if (global_options_set.x_rs6000_current_cmodel \ - && rs6000_current_cmodel != CMODEL_SMALL) \ - error ("%<-mcmodel%> incompatible with other toc options"); \ - SET_CMODEL (CMODEL_SMALL); \ - } \ - else \ - { \ - if (!global_options_set.x_rs6000_current_cmodel) \ - SET_CMODEL (CMODEL_MEDIUM); \ - if (rs6000_current_cmodel != CMODEL_SMALL) \ - { \ - TARGET_NO_FP_IN_TOC = 0; \ - TARGET_NO_SUM_IN_TOC = 0; \ - } \ - } \ - } \ - } \ - while (0) + do rs6000_linux64_override_options (); while (0) #undef ASM_SPEC #undef LINK_OS_FREEBSD_SPEC diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h index 5c9f8e3..73b6c01 100644 --- a/gcc/config/rs6000/linux64.h +++ b/gcc/config/rs6000/linux64.h @@ -96,99 +96,7 @@ extern int dot_symbols; #undef SUBSUBTARGET_OVERRIDE_OPTIONS #define SUBSUBTARGET_OVERRIDE_OPTIONS \ - do \ - { \ - if (!global_options_set.x_rs6000_alignment_flags) \ - rs6000_alignment_flags = MASK_ALIGN_NATURAL; \ - if (rs6000_isa_flags & OPTION_MASK_64BIT) \ - { \ - if (DEFAULT_ABI != ABI_AIX) \ - { \ - rs6000_current_abi = ABI_AIX; \ - error (INVALID_64BIT, "call"); \ - } \ - dot_symbols = !strcmp (rs6000_abi_name, "aixdesc"); \ - if (ELFv2_ABI_CHECK) \ - { \ - rs6000_current_abi = ABI_ELFv2; \ - if (dot_symbols) \ - error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>"); \ - } \ - if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE) \ - { \ - rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; \ - error (INVALID_64BIT, "relocatable"); \ - } \ - if (rs6000_isa_flags & OPTION_MASK_EABI) \ - { \ - rs6000_isa_flags &= ~OPTION_MASK_EABI; \ - error (INVALID_64BIT, "eabi"); \ - } \ - if (TARGET_PROTOTYPE) \ - { \ - target_prototype = 0; \ - error (INVALID_64BIT, "prototype"); \ - } \ - if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0) \ - { \ - rs6000_isa_flags |= OPTION_MASK_POWERPC64; \ - error ("%<-m64%> requires a PowerPC64 cpu"); \ - } \ - if (!global_options_set.x_rs6000_current_cmodel) \ - SET_CMODEL (CMODEL_MEDIUM); \ - if ((rs6000_isa_flags_explicit \ - & OPTION_MASK_MINIMAL_TOC) != 0) \ - { \ - if (global_options_set.x_rs6000_current_cmodel \ - && rs6000_current_cmodel != CMODEL_SMALL) \ - error ("%<-mcmodel incompatible with other toc options%>"); \ - if (TARGET_MINIMAL_TOC) \ - SET_CMODEL (CMODEL_SMALL); \ - else if (TARGET_PCREL \ - || (PCREL_SUPPORTED_BY_OS \ - && (rs6000_isa_flags_explicit \ - & OPTION_MASK_PCREL) == 0)) \ - /* Ignore -mno-minimal-toc. */ \ - ; \ - else \ - SET_CMODEL (CMODEL_SMALL); \ - } \ - else \ - { \ - if (rs6000_current_cmodel != CMODEL_SMALL) \ - { \ - if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \ - TARGET_NO_FP_IN_TOC \ - = rs6000_current_cmodel == CMODEL_MEDIUM; \ - if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \ - TARGET_NO_SUM_IN_TOC = 0; \ - } \ - } \ - if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2) \ - { \ - if (global_options_set.x_rs6000_pltseq) \ - warning (0, "%qs unsupported for this ABI", \ - "-mpltseq"); \ - rs6000_pltseq = false; \ - } \ - } \ - else \ - { \ - if (!RS6000_BI_ARCH_P) \ - error (INVALID_32BIT, "32"); \ - if (TARGET_PROFILE_KERNEL) \ - { \ - TARGET_PROFILE_KERNEL = 0; \ - error (INVALID_32BIT, "profile-kernel"); \ - } \ - if (global_options_set.x_rs6000_current_cmodel) \ - { \ - SET_CMODEL (CMODEL_SMALL); \ - error (INVALID_32BIT, "cmodel"); \ - } \ - } \ - } \ - while (0) + do rs6000_linux64_override_options (); while (0) #undef ASM_SPEC #undef LINK_OS_LINUX_SPEC diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6a05f84..f5f927f 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -3452,6 +3452,102 @@ rs6000_override_options_after_change (void) flag_cunroll_grow_size = flag_peel_loops || optimize >= 3; } +#ifdef TARGET_USES_LINUX64_OPT +static void +rs6000_linux64_override_options () +{ + if (!global_options_set.x_rs6000_alignment_flags) + rs6000_alignment_flags = MASK_ALIGN_NATURAL; + if (rs6000_isa_flags & OPTION_MASK_64BIT) + { + if (DEFAULT_ABI != ABI_AIX) + { + rs6000_current_abi = ABI_AIX; + error (INVALID_64BIT, "call"); + } + dot_symbols = !strcmp (rs6000_abi_name, "aixdesc"); + if (ELFv2_ABI_CHECK) + { + rs6000_current_abi = ABI_ELFv2; + if (dot_symbols) + error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>"); + } + if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE) + { + rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE; + error (INVALID_64BIT, "relocatable"); + } + if (rs6000_isa_flags & OPTION_MASK_EABI) + { + rs6000_isa_flags &= ~OPTION_MASK_EABI; + error (INVALID_64BIT, "eabi"); + } + if (TARGET_PROTOTYPE) + { + target_prototype = 0; + error (INVALID_64BIT, "prototype"); + } + if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0) + { + rs6000_isa_flags |= OPTION_MASK_POWERPC64; + error ("%<-m64%> requires a PowerPC64 cpu"); + } + if (!global_options_set.x_rs6000_current_cmodel) + SET_CMODEL (CMODEL_MEDIUM); + if ((rs6000_isa_flags_explicit + & OPTION_MASK_MINIMAL_TOC) != 0) + { + if (global_options_set.x_rs6000_current_cmodel + && rs6000_current_cmodel != CMODEL_SMALL) + error ("%<-mcmodel incompatible with other toc options%>"); + if (TARGET_MINIMAL_TOC) + SET_CMODEL (CMODEL_SMALL); + else if (TARGET_PCREL + || (PCREL_SUPPORTED_BY_OS + && (rs6000_isa_flags_explicit + & OPTION_MASK_PCREL) == 0)) + /* Ignore -mno-minimal-toc. */ + ; + else + SET_CMODEL (CMODEL_SMALL); + } + else + { + if (rs6000_current_cmodel != CMODEL_SMALL) + { + if (!global_options_set.x_TARGET_NO_FP_IN_TOC) + TARGET_NO_FP_IN_TOC + = rs6000_current_cmodel == CMODEL_MEDIUM; + if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) + TARGET_NO_SUM_IN_TOC = 0; + } + } + if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2) + { + if (global_options_set.x_rs6000_pltseq) + warning (0, "%qs unsupported for this ABI", + "-mpltseq"); + rs6000_pltseq = false; + } + } + else if (TARGET_64BIT) + error (INVALID_32BIT, "32"); + else + { + if (TARGET_PROFILE_KERNEL) + { + profile_kernel = 0; + error (INVALID_32BIT, "profile-kernel"); + } + if (global_options_set.x_rs6000_current_cmodel) + { + SET_CMODEL (CMODEL_SMALL); + error (INVALID_32BIT, "cmodel"); + } + } +} +#endif + /* Override command line options. Combine build-specific configuration information with options -- cgit v1.1 From d26cc5885a491dedad0bdf3468a7b91c1f75a868 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Thu, 1 Oct 2020 18:41:37 +0930 Subject: [RS6000] rs6000_linux64_override_options fix Commit c6be439b37 wrongly left a block of code inside an "else" block, which changed the default for power10 TARGET_NO_FP_IN_TOC accidentally. We don't want FP constants in the TOC when -mcmodel=medium can address them just as efficiently outside the TOC. * config/rs6000/rs6000.c (rs6000_linux64_override_options): Formatting. Correct setting of TARGET_NO_FP_IN_TOC and TARGET_NO_SUM_IN_TOC. --- gcc/config/rs6000/rs6000.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index f5f927f..1b5b8e2 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -3494,8 +3494,7 @@ rs6000_linux64_override_options () } if (!global_options_set.x_rs6000_current_cmodel) SET_CMODEL (CMODEL_MEDIUM); - if ((rs6000_isa_flags_explicit - & OPTION_MASK_MINIMAL_TOC) != 0) + if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0) { if (global_options_set.x_rs6000_current_cmodel && rs6000_current_cmodel != CMODEL_SMALL) @@ -3504,23 +3503,18 @@ rs6000_linux64_override_options () SET_CMODEL (CMODEL_SMALL); else if (TARGET_PCREL || (PCREL_SUPPORTED_BY_OS - && (rs6000_isa_flags_explicit - & OPTION_MASK_PCREL) == 0)) + && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)) /* Ignore -mno-minimal-toc. */ ; else SET_CMODEL (CMODEL_SMALL); } - else + if (rs6000_current_cmodel != CMODEL_SMALL) { - if (rs6000_current_cmodel != CMODEL_SMALL) - { - if (!global_options_set.x_TARGET_NO_FP_IN_TOC) - TARGET_NO_FP_IN_TOC - = rs6000_current_cmodel == CMODEL_MEDIUM; - if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) - TARGET_NO_SUM_IN_TOC = 0; - } + if (!global_options_set.x_TARGET_NO_FP_IN_TOC) + TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM; + if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) + TARGET_NO_SUM_IN_TOC = 0; } if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2) { -- cgit v1.1 From 4c69e61f4307865b95151006e480ae2022b30454 Mon Sep 17 00:00:00 2001 From: Alan Modra Date: Thu, 1 Oct 2020 19:14:09 +0930 Subject: [RS6000] ICE in decompose, at rtl.h:2282 during RTL pass: fwprop1 gcc.dg/pr82596.c: In function 'test_cststring': gcc.dg/pr82596.c:27:1: internal compiler error: in decompose, at rtl.h:2282 -m32 gcc/testsuite/gcc.dg/pr82596.c fails along with other tests after applying rtx_cost patches, which exposed a backend bug. legitimize_address when presented with the following address (plus (reg) (const_int 0x7ffffffff)) attempts to rewrite it as a high/low sum. The low part is 0xffff, or -1, making the high part 0x80000000. But this is no longer canonical for SImode. * config/rs6000/rs6000.c (rs6000_legitimize_address): Use gen_int_mode for high part of address constant. --- gcc/config/rs6000/rs6000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 1b5b8e2..69c4f36 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -8455,7 +8455,7 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, low_int = 0; high_int = INTVAL (XEXP (x, 1)) - low_int; sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0), - GEN_INT (high_int)), 0); + gen_int_mode (high_int, Pmode)), 0); return plus_constant (Pmode, sum, low_int); } else if (GET_CODE (x) == PLUS -- cgit v1.1 From 3e52eaab8c57ad06bcd553f140923a34e5749991 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 1 Oct 2020 15:11:22 -0700 Subject: compiler: set varargs correctly for type of method expression Fixes golang/go#41737 Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/258977 --- gcc/go/gofrontend/MERGE | 2 +- gcc/go/gofrontend/types.cc | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 8d9fda5..9482740 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -c9c084bce713e258721e12041a351ec8ad33ad17 +801c458a562d22260ff176c26d65639dd32c8a90 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/types.cc b/gcc/go/gofrontend/types.cc index 7f65b4a..e7a742f 100644 --- a/gcc/go/gofrontend/types.cc +++ b/gcc/go/gofrontend/types.cc @@ -5350,8 +5350,12 @@ Function_type::copy_with_receiver_as_param(bool want_pointer_receiver) const ++p) new_params->push_back(*p); } - return Type::make_function_type(NULL, new_params, this->results_, - this->location_); + Function_type* ret = Type::make_function_type(NULL, new_params, + this->results_, + this->location_); + if (this->is_varargs_) + ret->set_is_varargs(); + return ret; } // Make a copy of a function type ignoring any receiver and adding a -- cgit v1.1 From 6c2675fa2bbcfe37308af593edb18e2c1c8eabf0 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Fri, 2 Oct 2020 00:16:27 +0000 Subject: Daily bump. --- gcc/ChangeLog | 253 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/cp/ChangeLog | 39 ++++++++ gcc/testsuite/ChangeLog | 80 +++++++++++++++ 4 files changed, 373 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 988351b..dd0710e 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,256 @@ +2020-10-01 Alan Modra + + * config/rs6000/rs6000.c (rs6000_legitimize_address): Use + gen_int_mode for high part of address constant. + +2020-10-01 Alan Modra + + * config/rs6000/rs6000.c (rs6000_linux64_override_options): + Formatting. Correct setting of TARGET_NO_FP_IN_TOC and + TARGET_NO_SUM_IN_TOC. + +2020-10-01 Alan Modra + + * config/rs6000/freebsd64.h (SUBSUBTARGET_OVERRIDE_OPTIONS): Use + rs6000_linux64_override_options. + * config/rs6000/linux64.h (SUBSUBTARGET_OVERRIDE_OPTIONS): Break + out to.. + * config/rs6000/rs6000.c (rs6000_linux64_override_options): ..this, + new function. Tweak non-biarch test and clearing of + profile_kernel to work with freebsd64.h. + +2020-10-01 Martin Liska + + * config/rs6000/rs6000-call.c: Include value-range.h. + * config/rs6000/rs6000.c: Likewise. + +2020-10-01 Tom de Vries + + PR target/80845 + * config/nvptx/nvptx.md (define_insn "truncsi2"): Emit mov.u32 + instead of cvt.u32.u32. + +2020-10-01 Richard Sandiford + + PR target/96528 + PR target/97288 + * config/arm/arm-protos.h (arm_expand_vector_compare): Declare. + (arm_expand_vcond): Likewise. + * config/arm/arm.c (arm_expand_vector_compare): New function. + (arm_expand_vcond): Likewise. + * config/arm/neon.md (vec_cmp): New pattern. + (vec_cmpu): Likewise. + (vcond): Require operand 5 to be a register + or zero. Use arm_expand_vcond. + (vcond): New pattern. + (vcondu): Generalize to... + (vcondu): New pattern. + (neon_vc, neon_vc_insn): Add "@" marker. + (neon_vbsl): Likewise. + (neon_vcu): Reexpress as... + (@neon_vc): ...this. + +2020-10-01 Michael Davidsaver + + * config/i386/t-rtems: Change from mtune to march when building + multilibs. The mtune argument tunes or optimizes for a specific + CPU model but does not ensure the generated code is appropriate + for the CPU model. Prior to this patch, i386 compatible code + was always generated but tuned for later models. + +2020-10-01 Aldy Hernandez + + * builtins.c (compute_objsize): Replace vr_values with range_query. + (get_range): Same. + (gimple_call_alloc_size): Same. + * builtins.h (class vr_values): Remove. + (gimple_call_alloc_size): Replace vr_values with range_query. + * gimple-ssa-sprintf.c (get_int_range): Same. + (struct directive): Pass gimple context to fmtfunc callback. + (directive::set_width): Replace inline with out-of-line version. + (directive::set_precision): Same. + (format_none): New gimple argument. + (format_percent): New gimple argument. + (format_integer): New gimple argument. + (format_floating): New gimple argument. + (get_string_length): Use range_query API. + (format_character): New gimple argument. + (format_string): New gimple argument. + (format_plain): New gimple argument. + (format_directive): New gimple argument. + (parse_directive): Replace vr_values with range_query. + (compute_format_length): Same. + (handle_printf_call): Same. Adjust for range_query API. + * tree-ssa-strlen.c (get_range): Same. + (compare_nonzero_chars): Same. + (get_addr_stridx) Replace vr_values with range_query. + (get_stridx): Same. + (dump_strlen_info): Same. + (get_range_strlen_dynamic): Adjust for range_query API. + (set_strlen_range): Same + (maybe_warn_overflow): Replace vr_values with range_query. + (handle_builtin_strcpy): Same. + (maybe_diag_stxncpy_trunc): Add FIXME comment. + (handle_builtin_memcpy): Replace vr_values with range_query. + (handle_builtin_memset): Same. + (get_len_or_size): Same. + (strxcmp_eqz_result): Same. + (handle_builtin_string_cmp): Same. + (count_nonzero_bytes_addr): Same, plus adjust for range_query API. + (count_nonzero_bytes): Replace vr_values with range_query. + (handle_store): Same. + (strlen_check_and_optimize_call): Same. + (handle_integral_assign): Same. + (check_and_optimize_stmt): Same. + * tree-ssa-strlen.h (class vr_values): Remove. + (get_range): Replace vr_values with range_query. + (get_range_strlen_dynamic): Same. + (handle_printf_call): Same. + +2020-10-01 Aldy Hernandez + + * gimple-loop-versioning.cc (lv_dom_walker::before_dom_children): + Pass m_range_analyzer instead of get_vr_values. + (loop_versioning::name_prop::get_value): Rename to... + (loop_versioning::name_prop::value_of_expr): ...this. + * gimple-ssa-evrp-analyze.c (evrp_range_analyzer::evrp_range_analyzer): + Adjust for evrp_range_analyzer + inheriting from vr_values. + (evrp_range_analyzer::try_find_new_range): Same. + (evrp_range_analyzer::record_ranges_from_incoming_edge): Same. + (evrp_range_analyzer::record_ranges_from_phis): Same. + (evrp_range_analyzer::record_ranges_from_stmt): Same. + (evrp_range_analyzer::push_value_range): Same. + (evrp_range_analyzer::pop_value_range): Same. + * gimple-ssa-evrp-analyze.h (class evrp_range_analyzer): Inherit from + vr_values. Adjust accordingly. + * gimple-ssa-evrp.c: Adjust for evrp_range_analyzer inheriting from + vr_values. + (evrp_folder::value_of_evrp): Rename from get_value. + * tree-ssa-ccp.c (class ccp_folder): Rename get_value to + value_of_expr. + (ccp_folder::get_value): Rename to... + (ccp_folder::value_of_expr): ...this. + * tree-ssa-copy.c (class copy_folder): Rename get_value to + value_of_expr. + (copy_folder::get_value): Rename to... + (copy_folder::value_of_expr): ...this. + * tree-ssa-dom.c (dom_opt_dom_walker::after_dom_children): Adjust + for evrp_range_analyzer inheriting from vr_values. + (dom_opt_dom_walker::optimize_stmt): Same. + * tree-ssa-propagate.c (substitute_and_fold_engine::replace_uses_in): + Call value_of_* instead of get_value. + (substitute_and_fold_engine::replace_phi_args_in): Same. + (substitute_and_fold_engine::propagate_into_phi_args): Same. + (substitute_and_fold_dom_walker::before_dom_children): Same. + * tree-ssa-propagate.h: Include value-query.h. + (class substitute_and_fold_engine): Inherit from value_query. + * tree-ssa-strlen.c (strlen_dom_walker::before_dom_children): + Adjust for evrp_range_analyzer inheriting from vr_values. + * tree-ssa-threadedge.c (record_temporary_equivalences_from_phis): + Same. + * tree-vrp.c (class vrp_folder): Same. + (vrp_folder::get_value): Rename to value_of_expr. + * vr-values.c (vr_values::get_lattice_entry): Adjust for + vr_values inheriting from range_query. + (vr_values::range_of_expr): New. + (vr_values::value_of_expr): New. + (vr_values::value_on_edge): New. + (vr_values::value_of_stmt): New. + (simplify_using_ranges::op_with_boolean_value_range_p): Call + get_value_range through query. + (check_for_binary_op_overflow): Rename store to query. + (vr_values::vr_values): Remove vrp_value_range_pool. + (vr_values::~vr_values): Same. + (simplify_using_ranges::get_vr_for_comparison): Call get_value_range + through query. + (simplify_using_ranges::compare_names): Same. + (simplify_using_ranges::vrp_evaluate_conditional): Same. + (simplify_using_ranges::vrp_visit_cond_stmt): Same. + (simplify_using_ranges::simplify_abs_using_ranges): Same. + (simplify_using_ranges::simplify_cond_using_ranges_1): Same. + (simplify_cond_using_ranges_2): Same. + (simplify_using_ranges::simplify_switch_using_ranges): Same. + (simplify_using_ranges::two_valued_val_range_p): Same. + (simplify_using_ranges::simplify_using_ranges): Rename store to query. + (simplify_using_ranges::simplify): Assert that we have a query. + * vr-values.h (class range_query): Remove. + (class simplify_using_ranges): Remove inheritance of range_query. + (class vr_values): Add virtuals for range_of_expr, value_of_expr, + value_on_edge, value_of_stmt, and get_value_range. + Call range_query allocator instead of using vrp_value_range_pool. + Remove vrp_value_range_pool. + (simplify_using_ranges::get_value_range): Remove. + +2020-10-01 Richard Biener + + PR tree-optimization/97236 + * tree-vect-stmts.c (get_group_load_store_type): Keep + VMAT_ELEMENTWISE for single-element vectors. + +2020-10-01 Jan Hubicka + + * ipa-modref.c (compute_parm_map): Be ready for callee_pi to be NULL. + +2020-10-01 Jan Hubicka + + PR ipa/97244 + * ipa-fnsummary.c (pass_free_fnsummary::execute): Free + also indirect inlining datastructure. + * ipa-modref.c (pass_ipa_modref::execute): Do not free them here. + * ipa-prop.c (ipa_free_all_node_params): Do not crash when info does + not exist. + (ipa_unregister_cgraph_hooks): Likewise. + +2020-10-01 Jan Hubicka + + * internal-fn.c (DEF_INTERNAL_FN): Fix handling of fnspec + +2020-10-01 Aldy Hernandez + + * Makefile.in: Add value-query.o. + * value-query.cc: New file. + * value-query.h: New file. + +2020-10-01 Alex Coplan + + * config/arm/arm-cpus.in: Fix ordering, move Neoverse N2 down. + * config/arm/arm-tables.opt: Regenerate. + * config/arm/arm-tune.md: Regenerate. + +2020-10-01 Jakub Jelinek + + * config/s390/s390.c (s390_atomic_assign_expand_fenv): Use + TARGET_EXPR instead of MODIFY_EXPR for the first assignments to + fenv_var and old_fpc. Formatting fixes. + +2020-10-01 Richard Biener + + * tree-vect-patterns.c (vect_recog_bool_pattern): Also handle + VIEW_CONVERT_EXPR. + +2020-10-01 Florian Weimer + + PR target/97250 + * config/i386/i386.h (PTA_NO_TUNE, PTA_X86_64_BASELINE) + (PTA_X86_64_V2, PTA_X86_64_V3, PTA_X86_64_V4): New. + * common/config/i386/i386-common.c (processor_alias_table): + Add "x86-64-v2", "x86-64-v3", "x86-64-v4". + * config/i386/i386-options.c (ix86_option_override_internal): + Handle new PTA_NO_TUNE processor table entries. + * doc/invoke.texi (x86 Options): Document new -march values. + +2020-10-01 Alan Modra + + * config/rs6000/ppc-asm.h: Support __PCREL__ code. + +2020-10-01 Alan Modra + + * config/rs6000/linux64.h (SUBSUBTARGET_OVERRIDE_OPTIONS): Don't + set -mcmodel=small for -mno-minimal-toc when pcrel. + 2020-09-30 Martin Sebor PR middle-end/97189 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index c5ffab1..4d58d2f 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20201001 +20201002 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index eeb6f8a..ed416cc 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,42 @@ +2020-10-01 Nathan Sidwell + + * cp-tree.h (DECL_ANTICIPATED): Adjust comment. + (DECL_HIDDEN_P, TYPE_HIDDEN_P): Delete. + * tree.c (ovl_insert): Delete DECL_HIDDEN_P assert. + (ovl_skip_hidden): Likewise. + +2020-10-01 Nathan Sidwell + + * name-lookup.c (pushdecl_top_level): Assert incoming context is + null, add global_namespace context. + (pushdecl_top_level_and_finish): Likewise. + * pt.c (get_template_parm_object): Clear decl context before + pushing. + * semantics.c (finish_compound_literal): Likewise. + +2020-10-01 Nathan Sidwell + + * decl.c (lookup_and_check_tag): Refactor. + +2020-10-01 Jakub Jelinek + + PR c++/96994 + * call.c (build_over_call): If obj_arg is non-NULL, return INIT_EXPR + setting obj_arg to call. + +2020-10-01 Jakub Jelinek + + PR c++/97195 + * constexpr.c (cxx_eval_call_expression): Don't VERIFY_CONSTANT the + second argument. + +2020-10-01 Marek Polacek + + PR c++/90210 + * pt.c (do_class_deduction): Don't prune explicit deduction guides + in copy-list-initialization. In copy-list-initialization, if an + explicit deduction guide was selected, give an error. + 2020-09-30 Nathan Sidwell * cp-tree.h (struct lang_decl_fn): Remove hidden_friend_p. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 72508ab..3f802a2 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,83 @@ +2020-10-01 Richard Sandiford + + * lib/target-supports.exp (check_effective_target_vect_cond_mixed): Add + arm neon targets. + * gcc.target/arm/neon-compare-1.c: New test. + * gcc.target/arm/neon-compare-2.c: Likewise. + * gcc.target/arm/neon-compare-3.c: Likewise. + * gcc.target/arm/neon-compare-4.c: Likewise. + * gcc.target/arm/neon-compare-5.c: Likewise. + * gcc.target/arm/neon-vcond-gt.c: Expect comparisons with zero. + * gcc.target/arm/neon-vcond-ltgt.c: Likewise. + * gcc.target/arm/neon-vcond-unordered.c: Likewise. + +2020-10-01 Richard Sandiford + + * gcc.target/aarch64/movtf_1.c: Restrict the asm matching to lp64. + * gcc.target/aarch64/movti_1.c: Likewise. + +2020-10-01 Andrea Corallo + + PR target/96375 + * gcc.target/arm/lob1.c: Fix missing flag. + * gcc.target/arm/lob2.c: Likewise. + * gcc.target/arm/lob3.c: Likewise. + * gcc.target/arm/lob4.c: Likewise. + * gcc.target/arm/lob5.c: Likewise. + * gcc.target/arm/lob6.c: Likewise. + * lib/target-supports.exp + (check_effective_target_arm_v8_1_lob_ok): Return 1 only for + cortex-m targets, add '-mthumb' flag. + +2020-10-01 Richard Biener + + PR tree-optimization/97236 + * gcc.dg/vect/pr97236.c: New testcase. + +2020-10-01 Jan Hubicka + + PR ipa/97243 + * gcc.c-torture/compile/pr97243.c: New test. + +2020-10-01 Jan Hubicka + + PR ipa/97244 + * gcc.dg/ipa/remref-2a.c: Add -fno-ipa-modref + +2020-10-01 Tom de Vries + + * gcc.dg/pr94600-1.c: Force 32-bit alignment for a0 for !non_strict_align + targets. Remove target clauses from scan tests. + * gcc.dg/pr94600-3.c: Same. + +2020-10-01 Jakub Jelinek + + PR c++/96994 + * g++.dg/cpp2a/consteval18.C: New test. + +2020-10-01 Jakub Jelinek + + PR c++/97195 + * g++.dg/cpp2a/constexpr-new14.C: New test. + +2020-10-01 Richard Biener + + * g++.dg/vect/pr97255.cc: New testcase. + +2020-10-01 Florian Weimer + + PR target/97250 + * gcc.target/i386/x86-64-v2.c: New test. + * gcc.target/i386/x86-64-v3.c: New test. + * gcc.target/i386/x86-64-v3-haswell.c: New test. + * gcc.target/i386/x86-64-v3-skylake.c: New test. + * gcc.target/i386/x86-64-v4.c: New test. + +2020-10-01 Marek Polacek + + PR c++/90210 + * g++.dg/cpp1z/class-deduction73.C: New test. + 2020-09-30 Martin Sebor PR middle-end/97189 -- cgit v1.1 From 6a0423c52ef56d6fc2e0392b91bf22941fdeb0db Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Fri, 2 Oct 2020 10:36:17 +0200 Subject: Implement irange::fits_p. This should have been included in the irange_allocator patch, as a method to see if the current object can hold a passed range without truncation. gcc/ChangeLog: * value-range.h (irange::fits_p): New. --- gcc/value-range.h | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/value-range.h b/gcc/value-range.h index c875e71..94b48e5 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -81,6 +81,7 @@ public: bool operator!= (const irange &r) const { return !(*this == r); } // Misc methods. + bool fits_p (const irange &r) { return m_max_ranges >= r.num_pairs (); } void dump (FILE * = stderr) const; // Deprecated legacy public methods. -- cgit v1.1 From b6158faacbfb7d24a1d25b3774bc4338dd849480 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Thu, 1 Oct 2020 10:08:58 +0200 Subject: c++: Move CALL_FROM_NEW_OR_DELETE_P to tree.h As discussed with richi, we should be able to use TREE_PROTECTED for this flag, since CALL_FROM_THUNK_P will never be set on a call to an operator new or delete. 2020-10-01 Jason Merril gcc/cp/ChangeLog: * lambda.c (call_from_lambda_thunk_p): New. * cp-gimplify.c (cp_genericize_r): Use it. * pt.c (tsubst_copy_and_build): Use it. * typeck.c (check_return_expr): Use it. * cp-tree.h: Declare it. (CALL_FROM_NEW_OR_DELETE_P): Move to gcc/tree.h. gcc/ChangeLog: * tree.h (CALL_FROM_NEW_OR_DELETE_P): Move from cp-tree.h. * tree-core.h: Document new usage of protected_flag. --- gcc/cp/cp-gimplify.c | 2 +- gcc/cp/cp-tree.h | 7 +------ gcc/cp/lambda.c | 7 +++++++ gcc/cp/pt.c | 2 +- gcc/cp/typeck.c | 2 +- gcc/tree-core.h | 3 ++- gcc/tree.h | 9 ++++++++- 7 files changed, 21 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/cp-gimplify.c b/gcc/cp/cp-gimplify.c index bc8a03c..0754982 100644 --- a/gcc/cp/cp-gimplify.c +++ b/gcc/cp/cp-gimplify.c @@ -962,7 +962,7 @@ cp_genericize_r (tree *stmt_p, int *walk_subtrees, void *data) omp_cxx_notice_variable (wtd->omp_ctx, stmt); /* Don't dereference parms in a thunk, pass the references through. */ - if ((TREE_CODE (stmt) == CALL_EXPR && CALL_FROM_THUNK_P (stmt)) + if ((TREE_CODE (stmt) == CALL_EXPR && call_from_lambda_thunk_p (stmt)) || (TREE_CODE (stmt) == AGGR_INIT_EXPR && AGGR_INIT_FROM_THUNK_P (stmt))) { *walk_subtrees = 0; diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 3ccd54c..fda5ffa 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -464,7 +464,6 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX]; SWITCH_STMT_NO_BREAK_P (in SWITCH_STMT) LAMBDA_EXPR_CAPTURE_OPTIMIZED (in LAMBDA_EXPR) IMPLICIT_CONV_EXPR_BRACED_INIT (in IMPLICIT_CONV_EXPR) - CALL_FROM_NEW_OR_DELETE_P (in CALL_EXPR) 3: IMPLICIT_RVALUE_P (in NON_LVALUE_EXPR or STATIC_CAST_EXPR) ICS_BAD_FLAG (in _CONV) FN_TRY_BLOCK_P (in TRY_BLOCK) @@ -3839,11 +3838,6 @@ struct GTY(()) lang_decl { should be performed at instantiation time. */ #define KOENIG_LOOKUP_P(NODE) TREE_LANG_FLAG_0 (CALL_EXPR_CHECK (NODE)) -/* In a CALL_EXPR, true for allocator calls from new or delete - expressions. */ -#define CALL_FROM_NEW_OR_DELETE_P(NODE) \ - TREE_LANG_FLAG_2 (CALL_EXPR_CHECK (NODE)) - /* True if the arguments to NODE should be evaluated in left-to-right order regardless of PUSH_ARGS_REVERSED. */ #define CALL_EXPR_ORDERED_ARGS(NODE) \ @@ -7268,6 +7262,7 @@ extern bool lambda_fn_in_template_p (tree); extern void maybe_add_lambda_conv_op (tree); extern bool is_lambda_ignored_entity (tree); extern bool lambda_static_thunk_p (tree); +extern bool call_from_lambda_thunk_p (tree); extern tree finish_builtin_launder (location_t, tree, tsubst_flags_t); extern tree cp_build_vec_convert (tree, location_t, tree, diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c index 07a5401..1a1647f 100644 --- a/gcc/cp/lambda.c +++ b/gcc/cp/lambda.c @@ -1325,6 +1325,13 @@ lambda_static_thunk_p (tree fn) && LAMBDA_TYPE_P (CP_DECL_CONTEXT (fn))); } +bool +call_from_lambda_thunk_p (tree call) +{ + return (CALL_FROM_THUNK_P (call) + && lambda_static_thunk_p (current_function_decl)); +} + /* Returns true iff VAL is a lambda-related declaration which should be ignored by unqualified lookup. */ diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 45b18f6..72efecf 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -19955,7 +19955,7 @@ tsubst_copy_and_build (tree t, /* Stripped-down processing for a call in a thunk. Specifically, in the thunk template for a generic lambda. */ - if (CALL_FROM_THUNK_P (t)) + if (call_from_lambda_thunk_p (t)) { /* Now that we've expanded any packs, the number of call args might be different. */ diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c index 9166156..95b36a9 100644 --- a/gcc/cp/typeck.c +++ b/gcc/cp/typeck.c @@ -10171,7 +10171,7 @@ check_return_expr (tree retval, bool *no_warning) /* The call in a (lambda) thunk needs no conversions. */ if (TREE_CODE (retval) == CALL_EXPR - && CALL_FROM_THUNK_P (retval)) + && call_from_lambda_thunk_p (retval)) converted = true; /* First convert the value to the function's return type, then diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 0e15878..752bec3 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -1220,7 +1220,8 @@ struct GTY(()) tree_base { all decls CALL_FROM_THUNK_P and - CALL_ALLOCA_FOR_VAR_P in + CALL_ALLOCA_FOR_VAR_P and + CALL_FROM_NEW_OR_DELETE_P in CALL_EXPR OMP_CLAUSE_LINEAR_VARIABLE_STRIDE in diff --git a/gcc/tree.h b/gcc/tree.h index 5bb6e7b..f27a739 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -921,7 +921,8 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int, (TREE_CHECK (NODE, PARM_DECL)->decl_common.decl_nonshareable_flag) /* In a CALL_EXPR, means that the call is the jump from a thunk to the - thunked-to function. */ + thunked-to function. Be careful to avoid using this macro when one of the + next two applies instead. */ #define CALL_FROM_THUNK_P(NODE) (CALL_EXPR_CHECK (NODE)->base.protected_flag) /* In a CALL_EXPR, if the function being called is BUILT_IN_ALLOCA, means that @@ -931,6 +932,12 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int, #define CALL_ALLOCA_FOR_VAR_P(NODE) \ (CALL_EXPR_CHECK (NODE)->base.protected_flag) +/* In a CALL_EXPR, if the function being called is DECL_IS_OPERATOR_NEW_P or + DECL_IS_OPERATOR_DELETE_P, true for allocator calls from C++ new or delete + expressions. */ +#define CALL_FROM_NEW_OR_DELETE_P(NODE) \ + (CALL_EXPR_CHECK (NODE)->base.protected_flag) + /* Used in classes in C++. */ #define TREE_PRIVATE(NODE) ((NODE)->base.private_flag) /* Used in classes in C++. */ -- cgit v1.1 From 0b945f959f03a6185a3130f30bfd524d01d4142c Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 1 Oct 2020 10:44:27 +0200 Subject: make use of CALL_FROM_NEW_OR_DELETE_P This fixes points-to analysis and DCE to only consider new/delete operator calls from new or delete expressions and not direct calls. 2020-10-01 Richard Biener * gimple.h (GF_CALL_FROM_NEW_OR_DELETE): New call flag. (gimple_call_set_from_new_or_delete): New. (gimple_call_from_new_or_delete): Likewise. * gimple.c (gimple_build_call_from_tree): Set GF_CALL_FROM_NEW_OR_DELETE appropriately. * ipa-icf-gimple.c (func_checker::compare_gimple_call): Compare gimple_call_from_new_or_delete. * tree-ssa-dce.c (mark_all_reaching_defs_necessary_1): Make sure to only consider new/delete calls from new or delete expressions. (propagate_necessity): Likewise. (eliminate_unnecessary_stmts): Likewise. * tree-ssa-structalias.c (find_func_aliases_for_call): Likewise. * g++.dg/tree-ssa/pta-delete-1.C: New testcase. --- gcc/gimple.c | 4 ++++ gcc/gimple.h | 24 +++++++++++++++++++++ gcc/ipa-icf-gimple.c | 1 + gcc/testsuite/g++.dg/tree-ssa/pta-delete-1.C | 24 +++++++++++++++++++++ gcc/tree-ssa-dce.c | 31 ++++++++++++++++------------ gcc/tree-ssa-structalias.c | 8 ++++++- 6 files changed, 78 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pta-delete-1.C (limited to 'gcc') diff --git a/gcc/gimple.c b/gcc/gimple.c index fd4e0fa..f07ddab 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -387,6 +387,10 @@ gimple_build_call_from_tree (tree t, tree fnptrtype) && fndecl_built_in_p (fndecl, BUILT_IN_NORMAL) && ALLOCA_FUNCTION_CODE_P (DECL_FUNCTION_CODE (fndecl))) gimple_call_set_alloca_for_var (call, CALL_ALLOCA_FOR_VAR_P (t)); + else if (fndecl + && (DECL_IS_OPERATOR_NEW_P (fndecl) + || DECL_IS_OPERATOR_DELETE_P (fndecl))) + gimple_call_set_from_new_or_delete (call, CALL_FROM_NEW_OR_DELETE_P (t)); else gimple_call_set_from_thunk (call, CALL_FROM_THUNK_P (t)); gimple_call_set_va_arg_pack (call, CALL_EXPR_VA_ARG_PACK (t)); diff --git a/gcc/gimple.h b/gcc/gimple.h index 6cc7e66..108ae84 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -149,6 +149,7 @@ enum gf_mask { GF_CALL_MUST_TAIL_CALL = 1 << 9, GF_CALL_BY_DESCRIPTOR = 1 << 10, GF_CALL_NOCF_CHECK = 1 << 11, + GF_CALL_FROM_NEW_OR_DELETE = 1 << 12, GF_OMP_PARALLEL_COMBINED = 1 << 0, GF_OMP_TASK_TASKLOOP = 1 << 0, GF_OMP_TASK_TASKWAIT = 1 << 1, @@ -3387,6 +3388,29 @@ gimple_call_from_thunk_p (gcall *s) } +/* If FROM_NEW_OR_DELETE_P is true, mark GIMPLE_CALL S as being a call + to operator new or delete created from a new or delete expression. */ + +static inline void +gimple_call_set_from_new_or_delete (gcall *s, bool from_new_or_delete_p) +{ + if (from_new_or_delete_p) + s->subcode |= GF_CALL_FROM_NEW_OR_DELETE; + else + s->subcode &= ~GF_CALL_FROM_NEW_OR_DELETE; +} + + +/* Return true if GIMPLE_CALL S is a call to operator new or delete from + from a new or delete expression. */ + +static inline bool +gimple_call_from_new_or_delete (gcall *s) +{ + return (s->subcode & GF_CALL_FROM_NEW_OR_DELETE) != 0; +} + + /* If PASS_ARG_PACK_P is true, GIMPLE_CALL S is a stdarg call that needs the argument pack in its argument list. */ diff --git a/gcc/ipa-icf-gimple.c b/gcc/ipa-icf-gimple.c index 1cd5872..d5423a7 100644 --- a/gcc/ipa-icf-gimple.c +++ b/gcc/ipa-icf-gimple.c @@ -556,6 +556,7 @@ func_checker::compare_gimple_call (gcall *s1, gcall *s2) || gimple_call_tail_p (s1) != gimple_call_tail_p (s2) || gimple_call_return_slot_opt_p (s1) != gimple_call_return_slot_opt_p (s2) || gimple_call_from_thunk_p (s1) != gimple_call_from_thunk_p (s2) + || gimple_call_from_new_or_delete (s1) != gimple_call_from_new_or_delete (s2) || gimple_call_va_arg_pack_p (s1) != gimple_call_va_arg_pack_p (s2) || gimple_call_alloca_for_var_p (s1) != gimple_call_alloca_for_var_p (s2)) return false; diff --git a/gcc/testsuite/g++.dg/tree-ssa/pta-delete-1.C b/gcc/testsuite/g++.dg/tree-ssa/pta-delete-1.C new file mode 100644 index 0000000..5e1e322 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pta-delete-1.C @@ -0,0 +1,24 @@ +// { dg-do run } +// { dg-options "-O2" } + +struct X { + static struct X saved; + int *p; + X() { __builtin_memcpy (this, &saved, sizeof (X)); } +}; +X X::saved; +void __attribute__((noinline)) operator delete (void *p) +{ + __builtin_memcpy (&X::saved, p, sizeof (X)); +} +int main() +{ + int y = 1; + X *p = new X; + p->p = &y; + ::operator delete (p); + X *q = new X; + *(q->p) = 2; + if (y != 2) + __builtin_abort (); +} diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c index fae5ae7..c9e0c8f 100644 --- a/gcc/tree-ssa-dce.c +++ b/gcc/tree-ssa-dce.c @@ -593,9 +593,9 @@ mark_all_reaching_defs_necessary_1 (ao_ref *ref ATTRIBUTE_UNUSED, /* We want to skip statments that do not constitute stores but have a virtual definition. */ - if (is_gimple_call (def_stmt)) + if (gcall *call = dyn_cast (def_stmt)) { - tree callee = gimple_call_fndecl (def_stmt); + tree callee = gimple_call_fndecl (call); if (callee != NULL_TREE && fndecl_built_in_p (callee, BUILT_IN_NORMAL)) switch (DECL_FUNCTION_CODE (callee)) @@ -612,7 +612,8 @@ mark_all_reaching_defs_necessary_1 (ao_ref *ref ATTRIBUTE_UNUSED, if (callee != NULL_TREE && (DECL_IS_REPLACEABLE_OPERATOR_NEW_P (callee) - || DECL_IS_REPLACEABLE_OPERATOR_DELETE_P (callee))) + || DECL_IS_REPLACEABLE_OPERATOR_DELETE_P (callee)) + && gimple_call_from_new_or_delete (call)) return false; } @@ -875,23 +876,25 @@ propagate_necessity (bool aggressive) processing the argument. */ bool is_delete_operator = (is_gimple_call (stmt) + && gimple_call_from_new_or_delete (as_a (stmt)) && gimple_call_replaceable_operator_delete_p (as_a (stmt))); if (is_delete_operator || gimple_call_builtin_p (stmt, BUILT_IN_FREE)) { tree ptr = gimple_call_arg (stmt, 0); - gimple *def_stmt; + gcall *def_stmt; tree def_callee; /* If the pointer we free is defined by an allocation function do not add the call to the worklist. */ if (TREE_CODE (ptr) == SSA_NAME - && is_gimple_call (def_stmt = SSA_NAME_DEF_STMT (ptr)) + && (def_stmt = dyn_cast (SSA_NAME_DEF_STMT (ptr))) && (def_callee = gimple_call_fndecl (def_stmt)) && ((DECL_BUILT_IN_CLASS (def_callee) == BUILT_IN_NORMAL && (DECL_FUNCTION_CODE (def_callee) == BUILT_IN_ALIGNED_ALLOC || DECL_FUNCTION_CODE (def_callee) == BUILT_IN_MALLOC || DECL_FUNCTION_CODE (def_callee) == BUILT_IN_CALLOC)) - || DECL_IS_REPLACEABLE_OPERATOR_NEW_P (def_callee))) + || (DECL_IS_REPLACEABLE_OPERATOR_NEW_P (def_callee) + && gimple_call_from_new_or_delete (def_stmt)))) { if (is_delete_operator) { @@ -947,9 +950,9 @@ propagate_necessity (bool aggressive) in 1). By keeping a global visited bitmap for references we walk for 2) we avoid quadratic behavior for those. */ - if (is_gimple_call (stmt)) + if (gcall *call = dyn_cast (stmt)) { - tree callee = gimple_call_fndecl (stmt); + tree callee = gimple_call_fndecl (call); unsigned i; /* Calls to functions that are merely acting as barriers @@ -972,22 +975,23 @@ propagate_necessity (bool aggressive) if (callee != NULL_TREE && (DECL_IS_REPLACEABLE_OPERATOR_NEW_P (callee) - || DECL_IS_REPLACEABLE_OPERATOR_DELETE_P (callee))) + || DECL_IS_REPLACEABLE_OPERATOR_DELETE_P (callee)) + && gimple_call_from_new_or_delete (call)) continue; /* Calls implicitly load from memory, their arguments in addition may explicitly perform memory loads. */ - mark_all_reaching_defs_necessary (stmt); - for (i = 0; i < gimple_call_num_args (stmt); ++i) + mark_all_reaching_defs_necessary (call); + for (i = 0; i < gimple_call_num_args (call); ++i) { - tree arg = gimple_call_arg (stmt, i); + tree arg = gimple_call_arg (call, i); if (TREE_CODE (arg) == SSA_NAME || is_gimple_min_invariant (arg)) continue; if (TREE_CODE (arg) == WITH_SIZE_EXPR) arg = TREE_OPERAND (arg, 0); if (!ref_may_be_aliased (arg)) - mark_aliased_reaching_defs_necessary (stmt, arg); + mark_aliased_reaching_defs_necessary (call, arg); } } else if (gimple_assign_single_p (stmt)) @@ -1397,6 +1401,7 @@ eliminate_unnecessary_stmts (void) if (gimple_plf (stmt, STMT_NECESSARY) && (gimple_call_builtin_p (stmt, BUILT_IN_FREE) || (is_gimple_call (stmt) + && gimple_call_from_new_or_delete (as_a (stmt)) && gimple_call_replaceable_operator_delete_p (as_a (stmt))))) { tree ptr = gimple_call_arg (stmt, 0); diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index f676bf9..69de932 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -4857,7 +4857,13 @@ find_func_aliases_for_call (struct function *fn, gcall *t) point for reachable memory of their arguments. */ else if (flags & (ECF_PURE|ECF_LOOPING_CONST_OR_PURE)) handle_pure_call (t, &rhsc); - else if (fndecl && DECL_IS_REPLACEABLE_OPERATOR_DELETE_P (fndecl)) + /* If the call is to a replaceable operator delete and results + from a delete expression as opposed to a direct call to + such operator, then the effects for PTA (in particular + the escaping of the pointer) can be ignored. */ + else if (fndecl + && DECL_IS_REPLACEABLE_OPERATOR_DELETE_P (fndecl) + && gimple_call_from_new_or_delete (t)) ; else handle_rhs_call (t, &rhsc); -- cgit v1.1 From 4f4ced28826ece7b7b76649522ee2a9601a63b90 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Fri, 2 Oct 2020 09:00:49 +0200 Subject: c++: Set CALL_FROM_NEW_OR_DELETE_P on more calls. We were failing to set the flag on a delete call in a new expression, in a deleting destructor, and in a coroutine. Fixed by setting it in the function that builds the call. 2020-10-02 Jason Merril gcc/cp/ChangeLog: * call.c (build_operator_new_call): Set CALL_FROM_NEW_OR_DELETE_P. (build_op_delete_call): Likewise. * init.c (build_new_1, build_vec_delete_1, build_delete): Not here. (build_delete): gcc/ChangeLog: * gimple.h (gimple_call_operator_delete_p): Rename from gimple_call_replaceable_operator_delete_p. * gimple.c (gimple_call_operator_delete_p): Likewise. * tree.h (DECL_IS_REPLACEABLE_OPERATOR_DELETE_P): Remove. * tree-ssa-dce.c (mark_all_reaching_defs_necessary_1): Adjust. (propagate_necessity): Likewise. (eliminate_unnecessary_stmts): Likewise. * tree-ssa-structalias.c (find_func_aliases_for_call): Likewise. gcc/testsuite/ChangeLog: * g++.dg/pr94314.C: new/delete no longer omitted. --- gcc/cp/call.c | 29 ++++++++++++++++++++++++----- gcc/cp/init.c | 14 -------------- gcc/gimple.c | 4 ++-- gcc/gimple.h | 2 +- gcc/testsuite/g++.dg/pr94314.C | 2 +- gcc/tree-ssa-dce.c | 8 ++++---- gcc/tree-ssa-structalias.c | 2 +- gcc/tree.h | 3 --- 8 files changed, 33 insertions(+), 31 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/call.c b/gcc/cp/call.c index d67e8fe..bd66251 100644 --- a/gcc/cp/call.c +++ b/gcc/cp/call.c @@ -4769,7 +4769,16 @@ build_operator_new_call (tree fnname, vec **args, *fn = cand->fn; /* Build the CALL_EXPR. */ - return build_over_call (cand, LOOKUP_NORMAL, complain); + tree ret = build_over_call (cand, LOOKUP_NORMAL, complain); + + /* Set this flag for all callers of this function. In addition to + new-expressions, this is called for allocating coroutine state; treat + that as an implicit new-expression. */ + tree call = extract_call_expr (ret); + if (TREE_CODE (call) == CALL_EXPR) + CALL_FROM_NEW_OR_DELETE_P (call) = 1; + + return ret; } /* Build a new call to operator(). This may change ARGS. */ @@ -6146,7 +6155,7 @@ build_new_op_1 (const op_location_t &loc, enum tree_code code, int flags, case VEC_NEW_EXPR: case VEC_DELETE_EXPR: case DELETE_EXPR: - /* Use build_op_new_call and build_op_delete_call instead. */ + /* Use build_operator_new_call and build_op_delete_call instead. */ gcc_unreachable (); case CALL_EXPR: @@ -6983,6 +6992,7 @@ build_op_delete_call (enum tree_code code, tree addr, tree size, if (DECL_DELETED_FN (fn) && alloc_fn) return NULL_TREE; + tree ret; if (placement) { /* The placement args might not be suitable for overload @@ -6995,7 +7005,7 @@ build_op_delete_call (enum tree_code code, tree addr, tree size, argarray[i] = CALL_EXPR_ARG (placement, i); if (!mark_used (fn, complain) && !(complain & tf_error)) return error_mark_node; - return build_cxx_call (fn, nargs, argarray, complain); + ret = build_cxx_call (fn, nargs, argarray, complain); } else { @@ -7013,7 +7023,6 @@ build_op_delete_call (enum tree_code code, tree addr, tree size, complain); } - tree ret; releasing_vec args; args->quick_push (addr); if (destroying) @@ -7026,8 +7035,18 @@ build_op_delete_call (enum tree_code code, tree addr, tree size, args->quick_push (al); } ret = cp_build_function_call_vec (fn, &args, complain); - return ret; } + + /* Set this flag for all callers of this function. In addition to + delete-expressions, this is called for deallocating coroutine state; + treat that as an implicit delete-expression. This is also called for + the delete if the constructor throws in a new-expression, and for a + deleting destructor (which implements a delete-expression). */ + tree call = extract_call_expr (ret); + if (TREE_CODE (call) == CALL_EXPR) + CALL_FROM_NEW_OR_DELETE_P (call) = 1; + + return ret; } /* [expr.new] diff --git a/gcc/cp/init.c b/gcc/cp/init.c index e84e985..00fff3f 100644 --- a/gcc/cp/init.c +++ b/gcc/cp/init.c @@ -3433,10 +3433,6 @@ build_new_1 (vec **placement, tree type, tree nelts, } } - tree alloc_call_expr = extract_call_expr (alloc_call); - if (TREE_CODE (alloc_call_expr) == CALL_EXPR) - CALL_FROM_NEW_OR_DELETE_P (alloc_call_expr) = 1; - if (cookie_size) alloc_call = maybe_wrap_new_for_constexpr (alloc_call, elt_type, cookie_size); @@ -4145,10 +4141,6 @@ build_vec_delete_1 (location_t loc, tree base, tree maxindex, tree type, /*placement=*/NULL_TREE, /*alloc_fn=*/NULL_TREE, complain); - - tree deallocate_call_expr = extract_call_expr (deallocate_expr); - if (TREE_CODE (deallocate_call_expr) == CALL_EXPR) - CALL_FROM_NEW_OR_DELETE_P (deallocate_call_expr) = 1; } body = loop; @@ -5073,12 +5065,6 @@ build_delete (location_t loc, tree otype, tree addr, if (do_delete == error_mark_node) return error_mark_node; - else if (do_delete) - { - tree do_delete_call_expr = extract_call_expr (do_delete); - if (TREE_CODE (do_delete_call_expr) == CALL_EXPR) - CALL_FROM_NEW_OR_DELETE_P (do_delete_call_expr) = 1; - } if (do_delete && !TREE_SIDE_EFFECTS (expr)) expr = do_delete; diff --git a/gcc/gimple.c b/gcc/gimple.c index f07ddab..523d845 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -2717,12 +2717,12 @@ gimple_builtin_call_types_compatible_p (const gimple *stmt, tree fndecl) /* Return true when STMT is operator a replaceable delete call. */ bool -gimple_call_replaceable_operator_delete_p (const gcall *stmt) +gimple_call_operator_delete_p (const gcall *stmt) { tree fndecl; if ((fndecl = gimple_call_fndecl (stmt)) != NULL_TREE) - return DECL_IS_REPLACEABLE_OPERATOR_DELETE_P (fndecl); + return DECL_IS_OPERATOR_DELETE_P (fndecl); return false; } diff --git a/gcc/gimple.h b/gcc/gimple.h index 108ae84..3c9b996 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -1605,7 +1605,7 @@ extern alias_set_type gimple_get_alias_set (tree); extern bool gimple_ior_addresses_taken (bitmap, gimple *); extern bool gimple_builtin_call_types_compatible_p (const gimple *, tree); extern combined_fn gimple_call_combined_fn (const gimple *); -extern bool gimple_call_replaceable_operator_delete_p (const gcall *); +extern bool gimple_call_operator_delete_p (const gcall *); extern bool gimple_call_builtin_p (const gimple *); extern bool gimple_call_builtin_p (const gimple *, enum built_in_class); extern bool gimple_call_builtin_p (const gimple *, enum built_in_function); diff --git a/gcc/testsuite/g++.dg/pr94314.C b/gcc/testsuite/g++.dg/pr94314.C index 4e5ae12..7246712 100644 --- a/gcc/testsuite/g++.dg/pr94314.C +++ b/gcc/testsuite/g++.dg/pr94314.C @@ -78,5 +78,5 @@ int main(){ return 0; } -/* { dg-final { scan-tree-dump-times "Deleting : operator delete" 1 "cddce1"} } */ +/* { dg-final { scan-tree-dump-not "Deleting : operator delete" "cddce1"} } */ /* { dg-final { scan-tree-dump-not "Deleting : B::operator delete" "cddce1"} } */ diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c index c9e0c8f..a046612 100644 --- a/gcc/tree-ssa-dce.c +++ b/gcc/tree-ssa-dce.c @@ -612,7 +612,7 @@ mark_all_reaching_defs_necessary_1 (ao_ref *ref ATTRIBUTE_UNUSED, if (callee != NULL_TREE && (DECL_IS_REPLACEABLE_OPERATOR_NEW_P (callee) - || DECL_IS_REPLACEABLE_OPERATOR_DELETE_P (callee)) + || DECL_IS_OPERATOR_DELETE_P (callee)) && gimple_call_from_new_or_delete (call)) return false; } @@ -877,7 +877,7 @@ propagate_necessity (bool aggressive) bool is_delete_operator = (is_gimple_call (stmt) && gimple_call_from_new_or_delete (as_a (stmt)) - && gimple_call_replaceable_operator_delete_p (as_a (stmt))); + && gimple_call_operator_delete_p (as_a (stmt))); if (is_delete_operator || gimple_call_builtin_p (stmt, BUILT_IN_FREE)) { @@ -975,7 +975,7 @@ propagate_necessity (bool aggressive) if (callee != NULL_TREE && (DECL_IS_REPLACEABLE_OPERATOR_NEW_P (callee) - || DECL_IS_REPLACEABLE_OPERATOR_DELETE_P (callee)) + || DECL_IS_OPERATOR_DELETE_P (callee)) && gimple_call_from_new_or_delete (call)) continue; @@ -1402,7 +1402,7 @@ eliminate_unnecessary_stmts (void) && (gimple_call_builtin_p (stmt, BUILT_IN_FREE) || (is_gimple_call (stmt) && gimple_call_from_new_or_delete (as_a (stmt)) - && gimple_call_replaceable_operator_delete_p (as_a (stmt))))) + && gimple_call_operator_delete_p (as_a (stmt))))) { tree ptr = gimple_call_arg (stmt, 0); if (TREE_CODE (ptr) == SSA_NAME) diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index 69de932..30a8c93 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -4862,7 +4862,7 @@ find_func_aliases_for_call (struct function *fn, gcall *t) such operator, then the effects for PTA (in particular the escaping of the pointer) can be ignored. */ else if (fndecl - && DECL_IS_REPLACEABLE_OPERATOR_DELETE_P (fndecl) + && DECL_IS_OPERATOR_DELETE_P (fndecl) && gimple_call_from_new_or_delete (t)) ; else diff --git a/gcc/tree.h b/gcc/tree.h index f27a739..c0a027a 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -3074,9 +3074,6 @@ set_function_decl_type (tree decl, function_decl_type t, bool set) #define DECL_IS_OPERATOR_DELETE_P(NODE) \ (FUNCTION_DECL_CHECK (NODE)->function_decl.decl_type == OPERATOR_DELETE) -#define DECL_IS_REPLACEABLE_OPERATOR_DELETE_P(NODE) \ - (DECL_IS_OPERATOR_DELETE_P (NODE) && DECL_IS_REPLACEABLE_OPERATOR (NODE)) - #define DECL_SET_IS_OPERATOR_DELETE(NODE, VAL) \ set_function_decl_type (FUNCTION_DECL_CHECK (NODE), OPERATOR_DELETE, VAL) -- cgit v1.1 From f8dcbea5d2fb17dca3a7de97f15fc49997222365 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Fri, 25 Sep 2020 10:53:26 +0200 Subject: GCOV: do not mangle .gcno files. gcc/ChangeLog: PR gcov-profile/97193 * coverage.c (coverage_init): GCDA note files should not be mangled and should end in output directory. --- gcc/coverage.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/coverage.c b/gcc/coverage.c index f353c9c..7711412 100644 --- a/gcc/coverage.c +++ b/gcc/coverage.c @@ -1206,6 +1206,8 @@ coverage_obj_finish (vec *ctor) void coverage_init (const char *filename) { + const char *original_filename = filename; + int original_len = strlen (original_filename); #if HAVE_DOS_BASED_FILE_SYSTEM const char *separator = "\\"; #else @@ -1277,9 +1279,9 @@ coverage_init (const char *filename) bbg_file_name = xstrdup (profile_note_location); else { - bbg_file_name = XNEWVEC (char, len + strlen (GCOV_NOTE_SUFFIX) + 1); - memcpy (bbg_file_name, filename, len); - strcpy (bbg_file_name + len, GCOV_NOTE_SUFFIX); + bbg_file_name = XNEWVEC (char, original_len + strlen (GCOV_NOTE_SUFFIX) + 1); + memcpy (bbg_file_name, original_filename, original_len); + strcpy (bbg_file_name + original_len, GCOV_NOTE_SUFFIX); } if (!gcov_open (bbg_file_name, -1)) -- cgit v1.1 From 01c288035aa960631dd0ffd9131ed0a824a95f30 Mon Sep 17 00:00:00 2001 From: Alex Coplan Date: Fri, 2 Oct 2020 11:16:31 +0100 Subject: aarch64: ilp32 testsuite fixes This fixes test failures on ilp32 introduced in r11-3032-gd4febc75e8dfab23bd3132d5747eded918f85107. The assembler checks in extend-syntax.c simply needed adjusting for 32-bit pointers. It appears the subsp.c test has never passed on ILP32 due to a missed optimisation there. Since this isn't a code quality regression, disable that check on ILP32. gcc/testsuite/ChangeLog: * gcc.target/aarch64/extend-syntax.c: Fix assembler checks for ilp32, disable check-function-bodies on ilp32. * gcc.target/aarch64/subsp.c: Only check second scan-assembler on lp64 since the code on ilp32 is missing the optimization needed for this test to pass. --- gcc/testsuite/gcc.target/aarch64/extend-syntax.c | 13 +++++++++++-- gcc/testsuite/gcc.target/aarch64/subsp.c | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/aarch64/extend-syntax.c b/gcc/testsuite/gcc.target/aarch64/extend-syntax.c index 23fa9f4..1bfcdb5 100644 --- a/gcc/testsuite/gcc.target/aarch64/extend-syntax.c +++ b/gcc/testsuite/gcc.target/aarch64/extend-syntax.c @@ -20,6 +20,7 @@ unsigned long long *add1(unsigned long long *p, unsigned x) */ unsigned long long add2(unsigned long long x, unsigned y) { + /* { dg-final { scan-assembler-times "add\tx0, x0, w1, uxtw" 1 { target ilp32 } } } */ return x + y; } @@ -34,6 +35,9 @@ double *add3(double *p, int x) return p + x; } +// add1 and add3 should both generate this on ILP32: +/* { dg-final { scan-assembler-times "add\tw0, w0, w1, lsl 3" 2 { target ilp32 } } } */ + // Hits *sub_zero_extendsi_di (*sub__). /* ** sub1: @@ -42,6 +46,7 @@ double *add3(double *p, int x) */ unsigned long long sub1(unsigned long long x, unsigned n) { + /* { dg-final { scan-assembler-times "sub\tx0, x0, w1, uxtw" 1 { target ilp32 } } } */ return x - n; } @@ -67,6 +72,9 @@ double *sub3(double *p, int n) return p - n; } +// sub2 and sub3 should both generate this on ILP32: +/* { dg-final { scan-assembler-times "sub\tw0, w0, w1, lsl 3" 2 { target ilp32 } } } */ + // Hits *adds_zero_extendsi_di (*adds__). int adds1(unsigned long long x, unsigned y) { @@ -97,7 +105,8 @@ int subs1(unsigned long long x, unsigned y) unsigned long long *w; int subs2(unsigned long long *x, int y) { - /* { dg-final { scan-assembler-times "subs\tx\[0-9\]+, x\[0-9\]+, w\[0-9\]+, sxtw 3" 1 } } */ + /* { dg-final { scan-assembler-times "subs\tx\[0-9\]+, x\[0-9\]+, w\[0-9\]+, sxtw 3" 1 { target lp64 } } } */ + /* { dg-final { scan-assembler-times "subs\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 3" 1 { target ilp32 } } } */ unsigned long long *t = x - y; w = t; return !!t; @@ -117,4 +126,4 @@ int cmp2(unsigned long long x, int y) return x == ((unsigned long long)y << 3); } -/* { dg-final { check-function-bodies "**" "" "" } } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/subsp.c b/gcc/testsuite/gcc.target/aarch64/subsp.c index 341b83d..e7f61e0 100644 --- a/gcc/testsuite/gcc.target/aarch64/subsp.c +++ b/gcc/testsuite/gcc.target/aarch64/subsp.c @@ -16,4 +16,4 @@ f2 (int *x, int y) } /* { dg-final { scan-assembler "sub\tsp, sp, x\[0-9\]*\n" } } */ -/* { dg-final { scan-assembler "sub\tsp, sp, w\[0-9\]*, sxtw 4\n" } } */ +/* { dg-final { scan-assembler "sub\tsp, sp, w\[0-9\]*, sxtw 4\n" { target lp64 } } } */ -- cgit v1.1 From bb78e5876aa6a6b4a8158cdc0f6c8511eb2be75f Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 2 Oct 2020 11:53:05 +0100 Subject: arm: Make more use of the new mode macros As Christophe pointed out, my r11-3522 patch didn't in fact fix all of the armv8_2-fp16-arith-2.c failures introduced by allowing FP16 vectorisation without -funsafe-math-optimizations. I must have only tested the final patch on my usual arm-linux-gnueabihf bootstrap, which it turns out treats the test as unsupported. The focus of the original patch was to use mode macros for patterns that are shared between Advanced SIMD, iwMMXt and MVE. This patch uses the mode macros for general neon.md patterns too. gcc/ * config/arm/neon.md (*sub3_neon): Use the new mode macros for the insn condition. (sub3, *mul3_neon): Likewise. (mul3add_neon): Likewise. (mul3add_neon): Likewise. (mul3negadd_neon): Likewise. (fma4, fma4, *fmsub4): Likewise. (quad_halves_v4sf, reduc_plus_scal_): Likewise. (reduc_plus_scal_, reduc_smin_scal_): Likewise. (reduc_smin_scal_, reduc_smax_scal_): Likewise. (reduc_smax_scal_, mul3): Likewise. (neon_vabd_2, neon_vabd_3): Likewise. (fma4_intrinsic): Delete. (neon_vadd): Use the new mode macros to decide which form of instruction to generate. (neon_vmla, neon_vmls): Likewise. (neon_vsub): Likewise. (neon_vfma): Generate the main fma4 form instead of using fma4_intrinsic. gcc/testsuite/ * gcc.target/arm/armv8_2-fp16-arith-2.c (float16_t): Use _Float16_t rather than __fp16. (float16x4_t, float16x4_t): Likewise. (fp16_abs): Use __builtin_fabsf16. --- gcc/config/arm/neon.md | 64 +++++++++------------- .../gcc.target/arm/armv8_2-fp16-arith-2.c | 8 +-- 2 files changed, 29 insertions(+), 43 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 58832cb..85e424e 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -513,7 +513,7 @@ [(set (match_operand:VDQ 0 "s_register_operand" "=w") (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (match_operand:VDQ 2 "s_register_operand" "w")))] - "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "ARM_HAVE_NEON__ARITH" "vsub.\t%0, %1, %2" [(set (attr "type") (if_then_else (match_test "") @@ -527,7 +527,7 @@ (minus:VH (match_operand:VH 1 "s_register_operand" "w") (match_operand:VH 2 "s_register_operand" "w")))] - "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" + "ARM_HAVE_NEON__ARITH" "vsub.\t%0, %1, %2" [(set_attr "type" "neon_sub")] ) @@ -547,7 +547,7 @@ [(set (match_operand:VDQW 0 "s_register_operand" "=w") (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w") (match_operand:VDQW 2 "s_register_operand" "w")))] - "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "ARM_HAVE_NEON__ARITH" "vmul.\t%0, %1, %2" [(set (attr "type") (if_then_else (match_test "") @@ -592,7 +592,7 @@ (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") (match_operand:VDQW 3 "s_register_operand" "w")) (match_operand:VDQW 1 "s_register_operand" "0")))] - "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "ARM_HAVE_NEON__ARITH" "vmla.\t%0, %2, %3" [(set (attr "type") (if_then_else (match_test "") @@ -605,7 +605,7 @@ (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w") (match_operand:VH 3 "s_register_operand" "w")) (match_operand:VH 1 "s_register_operand" "0")))] - "TARGET_NEON_FP16INST && (! || flag_unsafe_math_optimizations)" + "ARM_HAVE_NEON__ARITH" "vmla.f16\t%0, %2, %3" [(set_attr "type" "neon_fp_mla_s")] ) @@ -615,7 +615,7 @@ (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0") (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w") (match_operand:VDQW 3 "s_register_operand" "w"))))] - "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "ARM_HAVE_NEON__ARITH" "vmls.\t%0, %2, %3" [(set (attr "type") (if_then_else (match_test "") @@ -633,7 +633,7 @@ (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") (match_operand:VCVTF 2 "register_operand" "w") (match_operand:VCVTF 3 "register_operand" "0")))] - "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" + "ARM_HAVE_NEON__ARITH && TARGET_FMA" "vfma.\\t%0, %1, %2" [(set_attr "type" "neon_fp_mla_s")] ) @@ -654,18 +654,7 @@ (match_operand:VH 1 "register_operand" "w") (match_operand:VH 2 "register_operand" "w") (match_operand:VH 3 "register_operand" "0")))] - "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" - "vfma.\\t%0, %1, %2" - [(set_attr "type" "neon_fp_mla_s")] -) - -(define_insn "fma4_intrinsic" - [(set (match_operand:VH 0 "register_operand" "=w") - (fma:VH - (match_operand:VH 1 "register_operand" "w") - (match_operand:VH 2 "register_operand" "w") - (match_operand:VH 3 "register_operand" "0")))] - "TARGET_NEON_FP16INST" + "ARM_HAVE_NEON__ARITH" "vfma.\\t%0, %1, %2" [(set_attr "type" "neon_fp_mla_s")] ) @@ -675,7 +664,7 @@ (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) (match_operand:VCVTF 2 "register_operand" "w") (match_operand:VCVTF 3 "register_operand" "0")))] - "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations" + "ARM_HAVE_NEON__ARITH && TARGET_FMA" "vfms.\\t%0, %1, %2" [(set_attr "type" "neon_fp_mla_s")] ) @@ -1195,7 +1184,7 @@ (parallel [(const_int 0) (const_int 1)])) (vec_select:V2SF (match_dup 1) (parallel [(const_int 2) (const_int 3)]))))] - "TARGET_NEON && flag_unsafe_math_optimizations" + "ARM_HAVE_NEON_V4SF_ARITH" ".f32\t%P0, %e1, %f1" [(set_attr "vqh_mnem" "") (set_attr "type" "neon_fp_reduc__s_q")] @@ -1262,7 +1251,7 @@ (define_expand "reduc_plus_scal_" [(match_operand: 0 "nonimmediate_operand") (match_operand:VD 1 "s_register_operand")] - "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "ARM_HAVE_NEON__ARITH" { rtx vec = gen_reg_rtx (mode); neon_pairwise_reduce (vec, operands[1], mode, @@ -1275,8 +1264,7 @@ (define_expand "reduc_plus_scal_" [(match_operand: 0 "nonimmediate_operand") (match_operand:VQ 1 "s_register_operand")] - "TARGET_NEON && (! || flag_unsafe_math_optimizations) - && !BYTES_BIG_ENDIAN" + "ARM_HAVE_NEON__ARITH && !BYTES_BIG_ENDIAN" { rtx step1 = gen_reg_rtx (mode); @@ -1311,7 +1299,7 @@ (define_expand "reduc_smin_scal_" [(match_operand: 0 "nonimmediate_operand") (match_operand:VD 1 "s_register_operand")] - "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "ARM_HAVE_NEON__ARITH" { rtx vec = gen_reg_rtx (mode); @@ -1325,8 +1313,7 @@ (define_expand "reduc_smin_scal_" [(match_operand: 0 "nonimmediate_operand") (match_operand:VQ 1 "s_register_operand")] - "TARGET_NEON && (! || flag_unsafe_math_optimizations) - && !BYTES_BIG_ENDIAN" + "ARM_HAVE_NEON__ARITH && !BYTES_BIG_ENDIAN" { rtx step1 = gen_reg_rtx (mode); @@ -1339,7 +1326,7 @@ (define_expand "reduc_smax_scal_" [(match_operand: 0 "nonimmediate_operand") (match_operand:VD 1 "s_register_operand")] - "TARGET_NEON && (! || flag_unsafe_math_optimizations)" + "ARM_HAVE_NEON__ARITH" { rtx vec = gen_reg_rtx (mode); neon_pairwise_reduce (vec, operands[1], mode, @@ -1352,8 +1339,7 @@ (define_expand "reduc_smax_scal_" [(match_operand: 0 "nonimmediate_operand") (match_operand:VQ 1 "s_register_operand")] - "TARGET_NEON && (! || flag_unsafe_math_optimizations) - && !BYTES_BIG_ENDIAN" + "ARM_HAVE_NEON__ARITH && !BYTES_BIG_ENDIAN" { rtx step1 = gen_reg_rtx (mode); @@ -1627,7 +1613,7 @@ (match_operand:VCVTF 2 "s_register_operand")] "TARGET_NEON" { - if (! || flag_unsafe_math_optimizations) + if (ARM_HAVE_NEON__ARITH) emit_insn (gen_add3 (operands[0], operands[1], operands[2])); else emit_insn (gen_neon_vadd_unspec (operands[0], operands[1], @@ -1752,7 +1738,7 @@ (mult:VH (match_operand:VH 1 "s_register_operand" "w") (match_operand:VH 2 "s_register_operand" "w")))] - "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" + "ARM_HAVE_NEON__ARITH" "vmul.f16\t%0, %1, %2" [(set_attr "type" "neon_mul_")] ) @@ -1775,7 +1761,7 @@ (match_operand:VDQW 3 "s_register_operand")] "TARGET_NEON" { - if (! || flag_unsafe_math_optimizations) + if (ARM_HAVE_NEON__ARITH) emit_insn (gen_mul3add_neon (operands[0], operands[1], operands[2], operands[3])); else @@ -1803,8 +1789,8 @@ (match_operand:VH 3 "s_register_operand")] "TARGET_NEON_FP16INST" { - emit_insn (gen_fma4_intrinsic (operands[0], operands[2], operands[3], - operands[1])); + emit_insn (gen_fma4 (operands[0], operands[2], operands[3], + operands[1])); DONE; }) @@ -2266,7 +2252,7 @@ (match_operand:VDQW 3 "s_register_operand")] "TARGET_NEON" { - if (! || flag_unsafe_math_optimizations) + if (ARM_HAVE_NEON__ARITH) emit_insn (gen_mul3negadd_neon (operands[0], operands[1], operands[2], operands[3])); else @@ -2373,7 +2359,7 @@ (match_operand:VCVTF 2 "s_register_operand")] "TARGET_NEON" { - if (! || flag_unsafe_math_optimizations) + if (ARM_HAVE_NEON__ARITH) emit_insn (gen_sub3 (operands[0], operands[1], operands[2])); else emit_insn (gen_neon_vsub_unspec (operands[0], operands[1], @@ -6462,7 +6448,7 @@ if (BYTES_BIG_ENDIAN) [(set (match_operand:VF 0 "s_register_operand" "=w") (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w") (match_operand:VF 2 "s_register_operand" "w"))))] - "TARGET_NEON && flag_unsafe_math_optimizations" + "ARM_HAVE_NEON__ARITH" "vabd. %0, %1, %2" [(set_attr "type" "neon_fp_abd_s")] ) @@ -6472,7 +6458,7 @@ if (BYTES_BIG_ENDIAN) (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w") (match_operand:VF 2 "s_register_operand" "w")] UNSPEC_VSUB)))] - "TARGET_NEON && flag_unsafe_math_optimizations" + "ARM_HAVE_NEON__ARITH" "vabd. %0, %1, %2" [(set_attr "type" "neon_fp_abd_s")] ) diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c index 81bad22..f94109c 100644 --- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-2.c @@ -6,9 +6,9 @@ /* Test instructions generated for half-precision arithmetic without unsafe-math-optimizations. */ -typedef __fp16 float16_t; -typedef __simd64_float16_t float16x4_t; -typedef __simd128_float16_t float16x8_t; +typedef _Float16 float16_t; +typedef _Float16 float16x4_t __attribute__ ((vector_size (8))); +typedef _Float16 float16x8_t __attribute__ ((vector_size (16))); typedef short int16x4_t __attribute__ ((vector_size (8))); typedef short int int16x8_t __attribute__ ((vector_size (16))); @@ -16,7 +16,7 @@ typedef short int int16x8_t __attribute__ ((vector_size (16))); float16_t fp16_abs (float16_t a) { - return (a < 0) ? -a : a; + return __builtin_fabsf16 (a); } #define TEST_UNOP(NAME, OPERATOR, TY) \ -- cgit v1.1 From 0eb5e901f6e25a7b8a9790a7a8c209147fb649ec Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 2 Oct 2020 11:53:06 +0100 Subject: aarch64: Remove aarch64_sve_pred_dominates_p MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In r11-2922, Przemek fixed a post-RA instruction match failure caused by the SVE FP subtraction patterns.. This patch applies the same fix to the other patterns. To recap, the issue is around the handling of predication. We want to do two things: - Optimise cases in which a predicate is known to be all-true. - Differentiate cases in which the predicate on an _x ACLE function has to be kept as-is from cases in which we can make more lanes active. The former is true by default, the latter is true for certain combinations of flags in the -ffast-math group. This is handled by a boolean flag in the unspecs to say whether the predicate is “strict” or “relaxed”. When combining multiple strict operations, the predicates used in the operations generally need to match. When combining multiple relaxed operations, we can ignore the predicates on nested operations and just use the predicate on the “outermost” operation. Originally I'd tried to reduce combinatorial explosion by using aarch64_sve_pred_dominates_p. This required matching predicates for strict operations but allowed more combinations for relaxed operations. The problem (as I should have remembered) is that C conditions on insn patterns can't reliably enforce matching operands. If the same register is used in two different input operands, the RA is allowed to use different hard registers for those input operands (and sometimes it has to). So operands that match before RA might not match afterwards. The only sure way to force a match is via match_dup. This patch splits the cases into two. I cry bitter tears at having to do this, but I think it's the only backportable fix. There might be some way of using define_subst to generate the cond_* patterns from the pred_* patterns, with some alternatives strategically disabled in each case, but that's future work and might not be an improvement. Since so many patterns now do this, I moved the comments from the subtraction pattern to a new banner comment at the head of the file. gcc/ * config/aarch64/aarch64-protos.h (aarch64_sve_pred_dominates_p): Delete. * config/aarch64/aarch64.c (aarch64_sve_pred_dominates_p): Likewise. * config/aarch64/aarch64-sve.md: Add banner comment describing how merging predicated FP operations are represented. (*cond__2): Split into... (*cond__2_relaxed): ...this and... (*cond__2_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond__2): Split into... (*cond__2_relaxed): ...this and... (*cond__2_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond__2): Split into... (*cond__2_relaxed): ...this and... (*cond__2_strict): ...this. (*cond__2_const): Split into... (*cond__2_const_relaxed): ...this and... (*cond__2_const_strict): ...this. (*cond__3): Split into... (*cond__3_relaxed): ...this and... (*cond__3_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond__any_const): Split into... (*cond__any_const_relaxed): ...this and... (*cond__any_const_strict): ...this. (*cond_add_2_const): Split into... (*cond_add_2_const_relaxed): ...this and... (*cond_add_2_const_strict): ...this. (*cond_add_any_const): Split into... (*cond_add_any_const_relaxed): ...this and... (*cond_add_any_const_strict): ...this. (*cond__2): Split into... (*cond__2_relaxed): ...this and... (*cond__2_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond_sub_3_const): Split into... (*cond_sub_3_const_relaxed): ...this and... (*cond_sub_3_const_strict): ...this. (*aarch64_pred_abd): Split into... (*aarch64_pred_abd_relaxed): ...this and... (*aarch64_pred_abd_strict): ...this. (*aarch64_cond_abd_2): Split into... (*aarch64_cond_abd_2_relaxed): ...this and... (*aarch64_cond_abd_2_strict): ...this. (*aarch64_cond_abd_3): Split into... (*aarch64_cond_abd_3_relaxed): ...this and... (*aarch64_cond_abd_3_strict): ...this. (*aarch64_cond_abd_any): Split into... (*aarch64_cond_abd_any_relaxed): ...this and... (*aarch64_cond_abd_any_strict): ...this. (*cond__2): Split into... (*cond__2_relaxed): ...this and... (*cond__2_strict): ...this. (*cond__4): Split into... (*cond__4_relaxed): ...this and... (*cond__4_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond__4): Split into... (*cond__4_relaxed): ...this and... (*cond__4_strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*aarch64_pred_fac): Split into... (*aarch64_pred_fac_relaxed): ...this and... (*aarch64_pred_fac_strict): ...this. (*cond__nontrunc): Split into... (*cond__nontrunc_relaxed): ...this and... (*cond__nontrunc_strict): ...this. (*cond__nonextend): Split into... (*cond__nonextend_relaxed): ...this and... (*cond__nonextend_strict): ...this. * config/aarch64/aarch64-sve2.md (*cond_): Split into... (*cond__relaxed): ...this and... (*cond__strict): ...this. (*cond__any): Split into... (*cond__any_relaxed): ...this and... (*cond__any_strict): ...this. (*cond_): Split into... (*cond__relaxed): ...this and... (*cond__strict): ...this. --- gcc/config/aarch64/aarch64-protos.h | 1 - gcc/config/aarch64/aarch64-sve.md | 923 +++++++++++++++++++++++++++++++----- gcc/config/aarch64/aarch64-sve2.md | 73 ++- gcc/config/aarch64/aarch64.c | 18 - 4 files changed, 853 insertions(+), 162 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 302e09b..7a34c84 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -630,7 +630,6 @@ void aarch64_expand_mov_immediate (rtx, rtx); rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type); rtx aarch64_ptrue_reg (machine_mode); rtx aarch64_pfalse_reg (machine_mode); -bool aarch64_sve_pred_dominates_p (rtx *, rtx); bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); void aarch64_emit_sve_pred_move (rtx, rtx, rtx); void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index cd79aba..31a8c5a 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -464,6 +464,95 @@ ;; ;; - MNEMONIC is the mnemonic of the associated SVE instruction. ;; +;; For (3) and (4), we combine these operations with an UNSPEC_SEL +;; that selects between the result of the FP operation and the "else" +;; value. (This else value is a merge input for _m ACLE functions +;; and zero for _z ACLE functions.) The outer pattern then has the form: +;; +;; (unspec [pred fp_operation else_value] UNSPEC_SEL) +;; +;; This means that the patterns for (3) and (4) have two predicates: +;; one for the FP operation itself and one for the UNSPEC_SEL. +;; This pattern is equivalent to the result of combining an instance +;; of (1) or (2) with a separate vcond instruction, so these patterns +;; are useful as combine targets too. +;; +;; However, in the combine case, the instructions that we want to +;; combine might use different predicates. Then: +;; +;; - Some of the active lanes of the FP operation might be discarded +;; by the UNSPEC_SEL. It's OK to drop the FP operation on those lanes, +;; even for SVE_STRICT_GP, since the operations on those lanes are +;; effectively dead code. +;; +;; - Some of the inactive lanes of the FP operation might be selected +;; by the UNSPEC_SEL, giving unspecified values for those lanes. +;; SVE_RELAXED_GP lets us extend the FP operation to cover these +;; extra lanes, but SVE_STRICT_GP does not. +;; +;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation +;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate. +;; This typically leads to patterns like: +;; +;; (unspec [(match_operand 1 "register_operand" "Upl") +;; (unspec [(match_operand N) +;; (const_int SVE_RELAXED_GP) +;; ...] +;; UNSPEC_COND_) +;; ...]) +;; +;; where operand N is allowed to be anything. These instructions then +;; have rewrite rules to replace operand N with operand 1, which gives the +;; instructions a canonical form and means that the original operand N is +;; not kept live unnecessarily. +;; +;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be +;; a subset of the FP operation predicate. This case isn't interesting +;; for FP operations that have an all-true predicate, since such operations +;; use SVE_RELAXED_GP instead. And it is not possible for instruction +;; conditions to track the subset relationship for arbitrary registers. +;; So in practice, the only useful case for SVE_STRICT_GP is the one +;; in which the predicates match: +;; +;; (unspec [(match_operand 1 "register_operand" "Upl") +;; (unspec [(match_dup 1) +;; (const_int SVE_STRICT_GP) +;; ...] +;; UNSPEC_COND_) +;; ...]) +;; +;; This pattern would also be correct for SVE_RELAXED_GP, but it would +;; be redundant with the one above. However, if the combine pattern +;; has multiple FP operations, using a match_operand allows combinations +;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided +;; that the predicates are the same: +;; +;; (unspec [(match_operand 1 "register_operand" "Upl") +;; (... +;; (unspec [(match_dup 1) +;; (match_operand:SI N "aarch64_sve_gp_strictness") +;; ...] +;; UNSPEC_COND_) +;; (unspec [(match_dup 1) +;; (match_operand:SI M "aarch64_sve_gp_strictness") +;; ...] +;; UNSPEC_COND_) ...) +;; ...]) +;; +;; The fully-relaxed version of this pattern is: +;; +;; (unspec [(match_operand 1 "register_operand" "Upl") +;; (... +;; (unspec [(match_operand:SI N) +;; (const_int SVE_RELAXED_GP) +;; ...] +;; UNSPEC_COND_) +;; (unspec [(match_operand:SI M) +;; (const_int SVE_RELAXED_GP) +;; ...] +;; UNSPEC_COND_) ...) +;; ...]) +;; ;; ------------------------------------------------------------------------- ;; ---- Note on FFR handling ;; ------------------------------------------------------------------------- @@ -3304,18 +3393,18 @@ ) ;; Predicated floating-point unary arithmetic, merging with the first input. -(define_insn_and_rewrite "*cond__2" +(define_insn_and_rewrite "*cond__2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 3) - (match_operand:SI 4 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] SVE_COND_FP_UNARY) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %0. movprfx\t%0, %2\;\t%0., %1/m, %2." @@ -3326,6 +3415,24 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] + SVE_COND_FP_UNARY) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %0. + movprfx\t%0, %2\;\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point unary arithmetic, merging with an independent ;; value. ;; @@ -3334,20 +3441,18 @@ ;; which is handled above rather than here. Marking all the alternatives ;; as earlyclobber helps to make the instruction more regular to the ;; register allocator. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] SVE_COND_FP_UNARY) (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[2], operands[3]) - && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" "@ \t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. @@ -3359,6 +3464,25 @@ [(set_attr "movprfx" "*,yes,yes")] ) +(define_insn "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] + SVE_COND_FP_UNARY) + (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" + "@ + \t%0., %1/m, %2. + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. + movprfx\t%0, %3\;\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes,yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Square root ;; ------------------------------------------------------------------------- @@ -4649,19 +4773,19 @@ ;; Predicated floating-point binary operations that take an integer as their ;; second operand, with inactive lanes coming from the first operand. -(define_insn_and_rewrite "*cond__2" +(define_insn_and_rewrite "*cond__2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand: 3 "register_operand" "w, w")] SVE_COND_FP_BINARY_INT) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %0., %3. movprfx\t%0, %2\;\t%0., %1/m, %0., %3." @@ -4672,24 +4796,41 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand: 3 "register_operand" "w, w")] + SVE_COND_FP_BINARY_INT) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %0., %3. + movprfx\t%0, %2\;\t%0., %1/m, %0., %3." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point binary operations that take an integer as ;; their second operand, with the values of inactive lanes being distinct ;; from the other inputs. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") (match_operand: 3 "register_operand" "w, w, w, w")] SVE_COND_FP_BINARY_INT) (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[2], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" "@ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. @@ -4713,6 +4854,35 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") + (match_operand: 3 "register_operand" "w, w, w, w")] + SVE_COND_FP_BINARY_INT) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" + "@ + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. + movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] General binary arithmetic corresponding to rtx codes ;; ------------------------------------------------------------------------- @@ -4813,19 +4983,19 @@ ) ;; Predicated floating-point operations, merging with the first input. -(define_insn_and_rewrite "*cond__2" +(define_insn_and_rewrite "*cond__2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] SVE_COND_FP_BINARY) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %0., %3. movprfx\t%0, %2\;\t%0., %1/m, %0., %3." @@ -4836,20 +5006,39 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] + SVE_COND_FP_BINARY) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %0., %3. + movprfx\t%0, %2\;\t%0., %1/m, %0., %3." + [(set_attr "movprfx" "*,yes")] +) + ;; Same for operations that take a 1-bit constant. -(define_insn_and_rewrite "*cond__2_const" +(define_insn_and_rewrite "*cond__2_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand:SVE_FULL_F 3 "")] SVE_COND_FP_BINARY_I1) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %0., #%3 movprfx\t%0, %2\;\t%0., %1/m, %0., #%3" @@ -4860,20 +5049,39 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_const_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand:SVE_FULL_F 3 "")] + SVE_COND_FP_BINARY_I1) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %0., #%3 + movprfx\t%0, %2\;\t%0., %1/m, %0., #%3" + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point operations, merging with the second input. -(define_insn_and_rewrite "*cond__3" +(define_insn_and_rewrite "*cond__3_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w") (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] SVE_COND_FP_BINARY) (match_dup 3)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %0., %2. movprfx\t%0, %3\;\t%0., %1/m, %0., %2." @@ -4884,14 +5092,33 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__3_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] + SVE_COND_FP_BINARY) + (match_dup 3)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %0., %2. + movprfx\t%0, %3\;\t%0., %1/m, %0., %2." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point operations, merging with an independent value. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] SVE_COND_FP_BINARY) @@ -4899,8 +5126,7 @@ UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4]) - && !rtx_equal_p (operands[3], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + && !rtx_equal_p (operands[3], operands[4])" "@ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. @@ -4925,22 +5151,52 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] + SVE_COND_FP_BINARY) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE + && !rtx_equal_p (operands[2], operands[4]) + && !rtx_equal_p (operands[3], operands[4])" + "@ + movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. + movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. + movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; Same for operations that take a 1-bit constant. -(define_insn_and_rewrite "*cond__any_const" +(define_insn_and_rewrite "*cond__any_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") (match_operand:SVE_FULL_F 3 "")] SVE_COND_FP_BINARY_I1) (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[2], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" "@ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., #%3 movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., #%3 @@ -4963,6 +5219,34 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_const_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") + (match_operand:SVE_FULL_F 3 "")] + SVE_COND_FP_BINARY_I1) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" + "@ + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., #%3 + movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., #%3 + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Addition ;; ------------------------------------------------------------------------- @@ -5001,19 +5285,19 @@ ;; Predicated floating-point addition of a constant, merging with the ;; first input. -(define_insn_and_rewrite "*cond_add_2_const" +(define_insn_and_rewrite "*cond_add_2_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] UNSPEC_COND_FADD) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ fadd\t%0., %1/m, %0., #%3 fsub\t%0., %1/m, %0., #%N3 @@ -5026,23 +5310,42 @@ [(set_attr "movprfx" "*,*,yes,yes")] ) +(define_insn "*cond_add_2_const_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") + (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] + UNSPEC_COND_FADD) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fadd\t%0., %1/m, %0., #%3 + fsub\t%0., %1/m, %0., #%N3 + movprfx\t%0, %2\;fadd\t%0., %1/m, %0., #%3 + movprfx\t%0, %2\;fsub\t%0., %1/m, %0., #%N3" + [(set_attr "movprfx" "*,*,yes,yes")] +) + ;; Predicated floating-point addition of a constant, merging with an ;; independent value. -(define_insn_and_rewrite "*cond_add_any_const" +(define_insn_and_rewrite "*cond_add_any_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] UNSPEC_COND_FADD) (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[2], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" "@ movprfx\t%0., %1/z, %2.\;fadd\t%0., %1/m, %0., #%3 movprfx\t%0., %1/z, %2.\;fsub\t%0., %1/m, %0., #%N3 @@ -5068,6 +5371,37 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond_add_any_const_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") + (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] + UNSPEC_COND_FADD) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" + "@ + movprfx\t%0., %1/z, %2.\;fadd\t%0., %1/m, %0., #%3 + movprfx\t%0., %1/z, %2.\;fsub\t%0., %1/m, %0., #%N3 + movprfx\t%0., %1/m, %2.\;fadd\t%0., %1/m, %0., #%3 + movprfx\t%0., %1/m, %2.\;fsub\t%0., %1/m, %0., #%N3 + # + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; Register merging forms are handled through SVE_COND_FP_BINARY. ;; ------------------------------------------------------------------------- @@ -5110,19 +5444,19 @@ ) ;; Predicated FCADD, merging with the first input. -(define_insn_and_rewrite "*cond__2" +(define_insn_and_rewrite "*cond__2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] SVE_COND_FCADD) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ fcadd\t%0., %1/m, %0., %3., # movprfx\t%0, %2\;fcadd\t%0., %1/m, %0., %3., #" @@ -5133,22 +5467,39 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] + SVE_COND_FCADD) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fcadd\t%0., %1/m, %0., %3., # + movprfx\t%0, %2\;fcadd\t%0., %1/m, %0., %3., #" + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated FCADD, merging with an independent value. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] SVE_COND_FCADD) (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[2], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" "@ movprfx\t%0., %1/z, %2.\;fcadd\t%0., %1/m, %0., %3., # movprfx\t%0., %1/z, %0.\;fcadd\t%0., %1/m, %0., %3., # @@ -5172,6 +5523,35 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] + SVE_COND_FCADD) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" + "@ + movprfx\t%0., %1/z, %2.\;fcadd\t%0., %1/m, %0., %3., # + movprfx\t%0., %1/z, %0.\;fcadd\t%0., %1/m, %0., %3., # + movprfx\t%0., %1/m, %2.\;fcadd\t%0., %1/m, %0., %3., # + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[2], + operands[4], operands[1])); + operands[4] = operands[2] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Subtraction ;; ------------------------------------------------------------------------- @@ -5209,19 +5589,19 @@ ;; Predicated floating-point subtraction from a constant, merging with the ;; second input. -(define_insn_and_rewrite "*cond_sub_3_const" +(define_insn_and_rewrite "*cond_sub_3_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] UNSPEC_COND_FSUB) (match_dup 3)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE" "@ fsubr\t%0., %1/m, %0., #%2 movprfx\t%0, %3\;fsubr\t%0., %1/m, %0., #%2" @@ -5232,12 +5612,28 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond_sub_3_const_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] + UNSPEC_COND_FSUB) + (match_dup 3)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fsubr\t%0., %1/m, %0., #%2 + movprfx\t%0, %3\;fsubr\t%0., %1/m, %0., #%2" + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point subtraction from a constant, merging with an ;; independent value. -;; -;; The subtraction predicate and the merge predicate are allowed to be -;; different. -(define_insn_and_rewrite "*cond_sub_relaxed_const" +(define_insn_and_rewrite "*cond_sub_const_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") @@ -5272,11 +5668,7 @@ [(set_attr "movprfx" "yes")] ) -;; Predicated floating-point subtraction from a constant, merging with an -;; independent value. -;; -;; The subtraction predicate and the merge predicate must be the same. -(define_insn_and_rewrite "*cond_sub_strict_const" +(define_insn_and_rewrite "*cond_sub_const_strict" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") @@ -5329,19 +5721,19 @@ ) ;; Predicated floating-point absolute difference. -(define_insn_and_rewrite "*aarch64_pred_abd" +(define_insn_and_rewrite "*aarch64_pred_abd_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (match_operand:SI 4 "aarch64_sve_gp_strictness") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] UNSPEC_COND_FSUB)] UNSPEC_COND_FABS))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE" "@ fabd\t%0., %1/m, %0., %3. movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." @@ -5352,6 +5744,25 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*aarch64_pred_abd_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (match_operand:SI 4 "aarch64_sve_gp_strictness") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] + UNSPEC_COND_FSUB)] + UNSPEC_COND_FABS))] + "TARGET_SVE" + "@ + fabd\t%0., %1/m, %0., %3. + movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." + [(set_attr "movprfx" "*,yes")] +) + (define_expand "@aarch64_cond_abd" [(set (match_operand:SVE_FULL_F 0 "register_operand") (unspec:SVE_FULL_F @@ -5376,82 +5787,124 @@ ;; Predicated floating-point absolute difference, merging with the first ;; input. -(define_insn_and_rewrite "*aarch64_cond_abd_2" +(define_insn_and_rewrite "*aarch64_cond_abd_2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (unspec:SVE_FULL_F - [(match_operand 6) - (match_operand:SI 7 "aarch64_sve_gp_strictness") + [(match_operand 5) + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] UNSPEC_COND_FSUB)] UNSPEC_COND_FABS) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE - && aarch64_sve_pred_dominates_p (&operands[4], operands[1]) - && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" + "TARGET_SVE" "@ fabd\t%0., %1/m, %0., %3. movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." "&& (!rtx_equal_p (operands[1], operands[4]) - || !rtx_equal_p (operands[1], operands[6]))" + || !rtx_equal_p (operands[1], operands[5]))" { operands[4] = copy_rtx (operands[1]); - operands[6] = copy_rtx (operands[1]); + operands[5] = copy_rtx (operands[1]); } [(set_attr "movprfx" "*,yes")] ) +(define_insn "*aarch64_cond_abd_2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 4 "aarch64_sve_gp_strictness") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 5 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] + UNSPEC_COND_FSUB)] + UNSPEC_COND_FABS) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fabd\t%0., %1/m, %0., %3. + movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point absolute difference, merging with the second ;; input. -(define_insn_and_rewrite "*aarch64_cond_abd_3" +(define_insn_and_rewrite "*aarch64_cond_abd_3_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (unspec:SVE_FULL_F - [(match_operand 6) - (match_operand:SI 7 "aarch64_sve_gp_strictness") + [(match_operand 5) + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w") (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] UNSPEC_COND_FSUB)] UNSPEC_COND_FABS) (match_dup 3)] UNSPEC_SEL))] - "TARGET_SVE - && aarch64_sve_pred_dominates_p (&operands[4], operands[1]) - && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" + "TARGET_SVE" "@ fabd\t%0., %1/m, %0., %2. movprfx\t%0, %3\;fabd\t%0., %1/m, %0., %2." "&& (!rtx_equal_p (operands[1], operands[4]) - || !rtx_equal_p (operands[1], operands[6]))" + || !rtx_equal_p (operands[1], operands[5]))" { operands[4] = copy_rtx (operands[1]); - operands[6] = copy_rtx (operands[1]); + operands[5] = copy_rtx (operands[1]); } [(set_attr "movprfx" "*,yes")] ) +(define_insn "*aarch64_cond_abd_3_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 4 "aarch64_sve_gp_strictness") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 5 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 2 "register_operand" "w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] + UNSPEC_COND_FSUB)] + UNSPEC_COND_FABS) + (match_dup 3)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fabd\t%0., %1/m, %0., %2. + movprfx\t%0, %3\;fabd\t%0., %1/m, %0., %2." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point absolute difference, merging with an ;; independent value. -(define_insn_and_rewrite "*aarch64_cond_abd_any" +(define_insn_and_rewrite "*aarch64_cond_abd_any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (unspec:SVE_FULL_F - [(match_operand 7) - (match_operand:SI 8 "aarch64_sve_gp_strictness") + [(match_operand 6) + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] UNSPEC_COND_FSUB)] @@ -5460,9 +5913,7 @@ UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4]) - && !rtx_equal_p (operands[3], operands[4]) - && aarch64_sve_pred_dominates_p (&operands[5], operands[1]) - && aarch64_sve_pred_dominates_p (&operands[7], operands[1])" + && !rtx_equal_p (operands[3], operands[4])" "@ movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %2. @@ -5472,18 +5923,18 @@ "&& 1" { if (reload_completed - && register_operand (operands[4], mode) - && !rtx_equal_p (operands[0], operands[4])) + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])) { emit_insn (gen_vcond_mask_ (operands[0], operands[3], operands[4], operands[1])); operands[4] = operands[3] = operands[0]; } else if (!rtx_equal_p (operands[1], operands[5]) - || !rtx_equal_p (operands[1], operands[7])) + || !rtx_equal_p (operands[1], operands[6])) { operands[5] = copy_rtx (operands[1]); - operands[7] = copy_rtx (operands[1]); + operands[6] = copy_rtx (operands[1]); } else FAIL; @@ -5491,6 +5942,42 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*aarch64_cond_abd_any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 5 "aarch64_sve_gp_strictness") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 6 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] + UNSPEC_COND_FSUB)] + UNSPEC_COND_FABS) + (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE + && !rtx_equal_p (operands[2], operands[4]) + && !rtx_equal_p (operands[3], operands[4])" + "@ + movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %3. + movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %2. + movprfx\t%0., %1/z, %2.\;fabd\t%0., %1/m, %0., %3. + movprfx\t%0., %1/m, %2.\;fabd\t%0., %1/m, %0., %3. + #" + "&& reload_completed + && register_operand (operands[4], mode) + && !rtx_equal_p (operands[0], operands[4])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[3], + operands[4], operands[1])); + operands[4] = operands[3] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [FP] Multiplication ;; ------------------------------------------------------------------------- @@ -6416,20 +6903,20 @@ ;; Predicated floating-point ternary operations, merging with the ;; first input. -(define_insn_and_rewrite "*cond__2" +(define_insn_and_rewrite "*cond__2_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "0, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w") (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %3., %4. movprfx\t%0, %2\;\t%0., %1/m, %3., %4." @@ -6440,22 +6927,42 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__2_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "0, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w") + (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] + SVE_COND_FP_TERNARY) + (match_dup 2)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %3., %4. + movprfx\t%0, %2\;\t%0., %1/m, %3., %4." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point ternary operations, merging with the ;; third input. -(define_insn_and_rewrite "*cond__4" +(define_insn_and_rewrite "*cond__4_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w") (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE" "@ \t%0., %1/m, %2., %3. movprfx\t%0, %4\;\t%0., %1/m, %2., %3." @@ -6466,15 +6973,35 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__4_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w") + (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] + SVE_COND_FP_TERNARY) + (match_dup 4)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + \t%0., %1/m, %2., %3. + movprfx\t%0, %4\;\t%0., %1/m, %2., %3." + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated floating-point ternary operations, merging with an ;; independent value. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 6) - (match_operand:SI 7 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] @@ -6484,8 +7011,7 @@ "TARGET_SVE && !rtx_equal_p (operands[2], operands[5]) && !rtx_equal_p (operands[3], operands[5]) - && !rtx_equal_p (operands[4], operands[5]) - && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" + && !rtx_equal_p (operands[4], operands[5])" "@ movprfx\t%0., %1/z, %4.\;\t%0., %1/m, %2., %3. movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %2., %3. @@ -6511,6 +7037,41 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") + (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] + SVE_COND_FP_TERNARY) + (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE + && !rtx_equal_p (operands[2], operands[5]) + && !rtx_equal_p (operands[3], operands[5]) + && !rtx_equal_p (operands[4], operands[5])" + "@ + movprfx\t%0., %1/z, %4.\;\t%0., %1/m, %2., %3. + movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %2., %3. + movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %3., %4. + movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %2., %4. + movprfx\t%0., %1/m, %4.\;\t%0., %1/m, %2., %3. + #" + "&& reload_completed + && register_operand (operands[5], mode) + && !rtx_equal_p (operands[0], operands[5])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[4], + operands[5], operands[1])); + operands[5] = operands[4] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using ;; (fma ...) since target-independent code won't understand the indexing. (define_insn "@aarch64__lane_" @@ -6572,20 +7133,20 @@ ) ;; Predicated FCMLA, merging with the third input. -(define_insn_and_rewrite "*cond__4" +(define_insn_and_rewrite "*cond__4_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w") (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] SVE_COND_FCMLA) (match_dup 4)] UNSPEC_SEL))] - "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" + "TARGET_SVE" "@ fcmla\t%0., %1/m, %2., %3., # movprfx\t%0, %4\;fcmla\t%0., %1/m, %2., %3., #" @@ -6596,23 +7157,41 @@ [(set_attr "movprfx" "*,yes")] ) +(define_insn "*cond__4_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w") + (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] + SVE_COND_FCMLA) + (match_dup 4)] + UNSPEC_SEL))] + "TARGET_SVE" + "@ + fcmla\t%0., %1/m, %2., %3., # + movprfx\t%0, %4\;fcmla\t%0., %1/m, %2., %3., #" + [(set_attr "movprfx" "*,yes")] +) + ;; Predicated FCMLA, merging with an independent value. -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 6) - (match_operand:SI 7 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] SVE_COND_FCMLA) (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] UNSPEC_SEL))] - "TARGET_SVE - && !rtx_equal_p (operands[4], operands[5]) - && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" + "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])" "@ movprfx\t%0., %1/z, %4.\;fcmla\t%0., %1/m, %2., %3., # movprfx\t%0., %1/z, %0.\;fcmla\t%0., %1/m, %2., %3., # @@ -6636,6 +7215,36 @@ [(set_attr "movprfx" "yes")] ) +(define_insn_and_rewrite "*cond__any_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") + (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") + (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] + SVE_COND_FCMLA) + (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] + UNSPEC_SEL))] + "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])" + "@ + movprfx\t%0., %1/z, %4.\;fcmla\t%0., %1/m, %2., %3., # + movprfx\t%0., %1/z, %0.\;fcmla\t%0., %1/m, %2., %3., # + movprfx\t%0., %1/m, %4.\;fcmla\t%0., %1/m, %2., %3., # + #" + "&& reload_completed + && register_operand (operands[5], mode) + && !rtx_equal_p (operands[0], operands[5])" + { + emit_insn (gen_vcond_mask_ (operands[0], operands[4], + operands[5], operands[1])); + operands[5] = operands[4] = operands[0]; + } + [(set_attr "movprfx" "yes")] +) + ;; Unpredicated FCMLA with indexing. (define_insn "@aarch64__lane_" [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w") @@ -7328,34 +7937,52 @@ "TARGET_SVE" ) -(define_insn_and_rewrite "*aarch64_pred_fac" +(define_insn_and_rewrite "*aarch64_pred_fac_relaxed" [(set (match_operand: 0 "register_operand" "=Upa") (unspec: [(match_operand: 1 "register_operand" "Upl") (match_operand:SI 4 "aarch64_sve_ptrue_flag") (unspec:SVE_FULL_F [(match_operand 5) - (match_operand:SI 6 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w")] UNSPEC_COND_FABS) (unspec:SVE_FULL_F - [(match_operand 7) - (match_operand:SI 8 "aarch64_sve_gp_strictness") + [(match_operand 6) + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 3 "register_operand" "w")] UNSPEC_COND_FABS)] SVE_COND_FP_ABS_CMP))] - "TARGET_SVE - && aarch64_sve_pred_dominates_p (&operands[5], operands[1]) - && aarch64_sve_pred_dominates_p (&operands[7], operands[1])" + "TARGET_SVE" "fac\t%0., %1/z, %2., %3." "&& (!rtx_equal_p (operands[1], operands[5]) - || !rtx_equal_p (operands[1], operands[7]))" + || !rtx_equal_p (operands[1], operands[6]))" { operands[5] = copy_rtx (operands[1]); - operands[7] = copy_rtx (operands[1]); + operands[6] = copy_rtx (operands[1]); } ) +(define_insn "*aarch64_pred_fac_strict" + [(set (match_operand: 0 "register_operand" "=Upa") + (unspec: + [(match_operand: 1 "register_operand" "Upl") + (match_operand:SI 4 "aarch64_sve_ptrue_flag") + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 5 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 2 "register_operand" "w")] + UNSPEC_COND_FABS) + (unspec:SVE_FULL_F + [(match_dup 1) + (match_operand:SI 6 "aarch64_sve_gp_strictness") + (match_operand:SVE_FULL_F 3 "register_operand" "w")] + UNSPEC_COND_FABS)] + SVE_COND_FP_ABS_CMP))] + "TARGET_SVE" + "fac\t%0., %1/z, %2., %3." +) + ;; ------------------------------------------------------------------------- ;; ---- [PRED] Select ;; ------------------------------------------------------------------------- @@ -7937,20 +8564,18 @@ ;; the same register (despite having different modes). Making all the ;; alternatives earlyclobber makes things more consistent for the ;; register allocator. -(define_insn_and_rewrite "*cond__nontrunc" +(define_insn_and_rewrite "*cond__nontrunc_relaxed" [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") (unspec:SVE_FULL_HSDI [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:SVE_FULL_HSDI [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] SVE_COND_FCVTI) (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] - "TARGET_SVE - && >= - && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE && >= " "@ fcvtz\t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;fcvtz\t%0., %1/m, %2. @@ -7962,6 +8587,25 @@ [(set_attr "movprfx" "*,yes,yes")] ) +(define_insn "*cond__nontrunc_strict" + [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") + (unspec:SVE_FULL_HSDI + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec:SVE_FULL_HSDI + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] + SVE_COND_FCVTI) + (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] + UNSPEC_SEL))] + "TARGET_SVE && >= " + "@ + fcvtz\t%0., %1/m, %2. + movprfx\t%0., %1/z, %2.\;fcvtz\t%0., %1/m, %2. + movprfx\t%0, %3\;fcvtz\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes,yes")] +) + ;; Predicated narrowing float-to-integer conversion with merging. (define_expand "@cond__trunc" [(set (match_operand:VNx4SI_ONLY 0 "register_operand") @@ -8101,20 +8745,18 @@ ;; the same register (despite having different modes). Making all the ;; alternatives earlyclobber makes things more consistent for the ;; register allocator. -(define_insn_and_rewrite "*cond__nonextend" +(define_insn_and_rewrite "*cond__nonextend_relaxed" [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") (unspec:SVE_FULL_F [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:SVE_FULL_F [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] SVE_COND_ICVTF) (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] - "TARGET_SVE - && >= - && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE && >= " "@ cvtf\t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. @@ -8126,6 +8768,25 @@ [(set_attr "movprfx" "*,yes,yes")] ) +(define_insn "*cond__nonextend_strict" + [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") + (unspec:SVE_FULL_F + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec:SVE_FULL_F + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] + SVE_COND_ICVTF) + (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] + UNSPEC_SEL))] + "TARGET_SVE && >= " + "@ + cvtf\t%0., %1/m, %2. + movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. + movprfx\t%0, %3\;cvtf\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes,yes")] +) + ;; Predicated widening integer-to-float conversion with merging. (define_expand "@cond__extend" [(set (match_operand:VNx2DF_ONLY 0 "register_operand") diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index e18b9fe..0cafd0b 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -1890,18 +1890,18 @@ ) ;; These instructions do not take MOVPRFX. -(define_insn_and_rewrite "*cond_" +(define_insn_and_rewrite "*cond__relaxed" [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") (unspec:SVE_FULL_SDF [(match_operand: 1 "register_operand" "Upl") (unspec:SVE_FULL_SDF [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand: 2 "register_operand" "w")] SVE2_COND_FP_UNARY_LONG) (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] UNSPEC_SEL))] - "TARGET_SVE2 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE2" "\t%0., %1/m, %2." "&& !rtx_equal_p (operands[1], operands[4])" { @@ -1909,6 +1909,21 @@ } ) +(define_insn "*cond__strict" + [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") + (unspec:SVE_FULL_SDF + [(match_operand: 1 "register_operand" "Upl") + (unspec:SVE_FULL_SDF + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand: 2 "register_operand" "w")] + SVE2_COND_FP_UNARY_LONG) + (match_operand:SVE_FULL_SDF 3 "register_operand" "0")] + UNSPEC_SEL))] + "TARGET_SVE2" + "\t%0., %1/m, %2." +) + ;; ------------------------------------------------------------------------- ;; ---- [FP<-FP] Narrowing conversions ;; ------------------------------------------------------------------------- @@ -1963,20 +1978,18 @@ "TARGET_SVE2" ) -(define_insn_and_rewrite "*cond__any" +(define_insn_and_rewrite "*cond__any_relaxed" [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w") (unspec:VNx4SF_ONLY [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec:VNx4SF_ONLY [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand: 2 "register_operand" "w, w, w")] SVE2_COND_FP_UNARY_NARROWB) (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] - "TARGET_SVE2 - && !rtx_equal_p (operands[2], operands[3]) - && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" "@ \t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. @@ -1988,6 +2001,25 @@ [(set_attr "movprfx" "*,yes,yes")] ) +(define_insn "*cond__any_strict" + [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w") + (unspec:VNx4SF_ONLY + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec:VNx4SF_ONLY + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand: 2 "register_operand" "w, w, w")] + SVE2_COND_FP_UNARY_NARROWB) + (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] + UNSPEC_SEL))] + "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" + "@ + \t%0., %1/m, %2. + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. + movprfx\t%0, %3\;\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes,yes")] +) + ;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_* ;; pair because the even elements always have to be supplied for active ;; elements, even if the inactive elements don't matter. @@ -2113,14 +2145,12 @@ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") (unspec: [(match_operand 4) - (match_operand:SI 5 "aarch64_sve_gp_strictness") + (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] SVE2_COND_INT_UNARY_FP) (match_operand: 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] UNSPEC_SEL))] - "TARGET_SVE2 - && !rtx_equal_p (operands[2], operands[3]) - && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" + "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" "@ \t%0., %1/m, %2. movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. @@ -2132,6 +2162,25 @@ [(set_attr "movprfx" "*,yes,yes")] ) +(define_insn "*cond__strict" + [(set (match_operand: 0 "register_operand" "=&w, ?&w, ?&w") + (unspec: + [(match_operand: 1 "register_operand" "Upl, Upl, Upl") + (unspec: + [(match_dup 1) + (const_int SVE_STRICT_GP) + (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] + SVE2_COND_INT_UNARY_FP) + (match_operand: 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] + UNSPEC_SEL))] + "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])" + "@ + \t%0., %1/m, %2. + movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. + movprfx\t%0, %3\;\t%0., %1/m, %2." + [(set_attr "movprfx" "*,yes,yes")] +) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Polynomial multiplication ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 9e88438..a3408f4 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3717,24 +3717,6 @@ aarch64_pfalse_reg (machine_mode mode) return gen_lowpart (mode, reg); } -/* Return true if predicate PRED1[0] is true whenever predicate PRED2 is - true, or alternatively if we know that the operation predicated by - PRED1[0] is safe to perform whenever PRED2 is true. PRED1[1] is a - aarch64_sve_gp_strictness operand that describes the operation - predicated by PRED1[0]. */ - -bool -aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2) -{ - machine_mode mode = GET_MODE (pred2); - gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL - && mode == GET_MODE (pred1[0]) - && aarch64_sve_gp_strictness (pred1[1], SImode)); - return (pred1[0] == CONSTM1_RTX (mode) - || INTVAL (pred1[1]) == SVE_RELAXED_GP - || rtx_equal_p (pred1[0], pred2)); -} - /* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag for it. PRED2[0] is the predicate for the instruction whose result is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag -- cgit v1.1 From 8d1cede1bb58dbb9ae1d24a7be5f111a07674363 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Fri, 2 Oct 2020 13:01:01 +0200 Subject: Add poly_int64 streaming support 2020-10-02 Jan Hubicka * data-streamer-in.c (streamer_read_poly_int64): New function. * data-streamer-out.c (streamer_write_poly_int64): New function. * data-streamer.h (streamer_write_poly_int64): Declare. (streamer_read_poly_int64): Declare. --- gcc/data-streamer-in.c | 11 +++++++++++ gcc/data-streamer-out.c | 9 +++++++++ gcc/data-streamer.h | 2 ++ 3 files changed, 22 insertions(+) (limited to 'gcc') diff --git a/gcc/data-streamer-in.c b/gcc/data-streamer-in.c index d3ba634..7abb48a 100644 --- a/gcc/data-streamer-in.c +++ b/gcc/data-streamer-in.c @@ -186,6 +186,17 @@ streamer_read_poly_uint64 (class lto_input_block *ib) return res; } +/* Read a poly_int64 from IB. */ + +poly_int64 +streamer_read_poly_int64 (class lto_input_block *ib) +{ + poly_int64 res; + for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i) + res.coeffs[i] = streamer_read_hwi (ib); + return res; +} + /* Read gcov_type value from IB. */ gcov_type diff --git a/gcc/data-streamer-out.c b/gcc/data-streamer-out.c index 6603839..fb8a90a 100644 --- a/gcc/data-streamer-out.c +++ b/gcc/data-streamer-out.c @@ -229,6 +229,15 @@ streamer_write_poly_uint64 (struct output_block *ob, poly_uint64 work) streamer_write_uhwi_stream (ob->main_stream, work.coeffs[i]); } +/* Write a poly_int64 value WORK to OB->main_stream. */ + +void +streamer_write_poly_int64 (struct output_block *ob, poly_int64 work) +{ + for (int i = 0; i < NUM_POLY_INT_COEFFS; ++i) + streamer_write_hwi_stream (ob->main_stream, work.coeffs[i]); +} + /* Write a gcov counter value WORK to OB->main_stream. */ void diff --git a/gcc/data-streamer.h b/gcc/data-streamer.h index 61fa3f9..6ab586f 100644 --- a/gcc/data-streamer.h +++ b/gcc/data-streamer.h @@ -54,6 +54,7 @@ void streamer_write_zero (struct output_block *); void streamer_write_uhwi (struct output_block *, unsigned HOST_WIDE_INT); void streamer_write_hwi (struct output_block *, HOST_WIDE_INT); void streamer_write_poly_uint64 (struct output_block *, poly_uint64); +void streamer_write_poly_int64 (struct output_block *, poly_int64); void streamer_write_gcov_count (struct output_block *, gcov_type); void streamer_write_string (struct output_block *, struct lto_output_stream *, const char *, bool); @@ -84,6 +85,7 @@ const char *bp_unpack_string (class data_in *, struct bitpack_d *); unsigned HOST_WIDE_INT streamer_read_uhwi (class lto_input_block *); HOST_WIDE_INT streamer_read_hwi (class lto_input_block *); poly_uint64 streamer_read_poly_uint64 (class lto_input_block *); +poly_int64 streamer_read_poly_int64 (class lto_input_block *); gcov_type streamer_read_gcov_count (class lto_input_block *); wide_int streamer_read_wide_int (class lto_input_block *); widest_int streamer_read_widest_int (class lto_input_block *); -- cgit v1.1 From b8e773e9921904210cad1f396b2ab01ffdbc4b39 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Fri, 2 Oct 2020 13:14:57 +0200 Subject: Break out ao_ref_init_from_ptr_and_range from ao_ref_init_from_ptr_and_size * tree-ssa-alias.c (ao_ref_init_from_ptr_and_range): Break out from ... (ao_ref_init_from_ptr_and_size): ... here. --- gcc/tree-ssa-alias.c | 52 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 12 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c index fe390d4..9e5c3ee 100644 --- a/gcc/tree-ssa-alias.c +++ b/gcc/tree-ssa-alias.c @@ -735,14 +735,21 @@ ao_ref_alias_set (ao_ref *ref) } /* Init an alias-oracle reference representation from a gimple pointer - PTR and a gimple size SIZE in bytes. If SIZE is NULL_TREE then the - size is assumed to be unknown. The access is assumed to be only - to or after of the pointer target, not before it. */ + PTR a range specified by OFFSET, SIZE and MAX_SIZE under the assumption + that RANGE_KNOWN is set. -void -ao_ref_init_from_ptr_and_size (ao_ref *ref, tree ptr, tree size) + The access is assumed to be only to or after of the pointer target adjusted + by the offset, not before it (even in the case RANGE_KNOWN is false). */ + +static void +ao_ref_init_from_ptr_and_range (ao_ref *ref, tree ptr, + bool range_known, + poly_int64 offset, + poly_int64 size, + poly_int64 max_size) { - poly_int64 t, size_hwi, extra_offset = 0; + poly_int64 t, extra_offset = 0; + ref->ref = NULL_TREE; if (TREE_CODE (ptr) == SSA_NAME) { @@ -766,7 +773,7 @@ ao_ref_init_from_ptr_and_size (ao_ref *ref, tree ptr, tree size) ref->offset = BITS_PER_UNIT * t; else { - size = NULL_TREE; + range_known = false; ref->offset = 0; ref->base = get_base_address (TREE_OPERAND (ptr, 0)); } @@ -778,11 +785,12 @@ ao_ref_init_from_ptr_and_size (ao_ref *ref, tree ptr, tree size) ptr, null_pointer_node); ref->offset = 0; } - ref->offset += extra_offset; - if (size - && poly_int_tree_p (size, &size_hwi) - && coeffs_in_range_p (size_hwi, 0, HOST_WIDE_INT_MAX / BITS_PER_UNIT)) - ref->max_size = ref->size = size_hwi * BITS_PER_UNIT; + ref->offset += extra_offset + offset; + if (range_known) + { + ref->max_size = max_size; + ref->size = size; + } else ref->max_size = ref->size = -1; ref->ref_alias_set = 0; @@ -790,6 +798,26 @@ ao_ref_init_from_ptr_and_size (ao_ref *ref, tree ptr, tree size) ref->volatile_p = false; } +/* Init an alias-oracle reference representation from a gimple pointer + PTR and a gimple size SIZE in bytes. If SIZE is NULL_TREE then the + size is assumed to be unknown. The access is assumed to be only + to or after of the pointer target, not before it. */ + +void +ao_ref_init_from_ptr_and_size (ao_ref *ref, tree ptr, tree size) +{ + poly_int64 size_hwi; + if (size + && poly_int_tree_p (size, &size_hwi) + && coeffs_in_range_p (size_hwi, 0, HOST_WIDE_INT_MAX / BITS_PER_UNIT)) + { + size_hwi = size_hwi * BITS_PER_UNIT; + ao_ref_init_from_ptr_and_range (ref, ptr, true, 0, size_hwi, size_hwi); + } + else + ao_ref_init_from_ptr_and_range (ref, ptr, false, 0, -1, -1); +} + /* S1 and S2 are TYPE_SIZE or DECL_SIZE. Compare them: Return -1 if S1 < S2 Return 1 if S1 > S2 -- cgit v1.1 From 05d39f0de9ee0455d7b2b60f314f4231bc9a87c1 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Fri, 2 Oct 2020 13:31:05 +0200 Subject: Commonize handling of attr-fnspec * attr-fnspec.h: New file. * calls.c (decl_return_flags): Use attr_fnspec. * gimple.c (gimple_call_arg_flags): Use attr_fnspec. (gimple_call_return_flags): Use attr_fnspec. * tree-into-ssa.c (pass_build_ssa::execute): Use attr_fnspec. * tree-ssa-alias.c (attr_fnspec::verify): New member fuction. --- gcc/attr-fnspec.h | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/calls.c | 25 +++------ gcc/gimple.c | 61 +++++++++------------- gcc/tree-into-ssa.c | 19 +++---- gcc/tree-ssa-alias.c | 41 +++++++++++++++ 5 files changed, 229 insertions(+), 62 deletions(-) create mode 100644 gcc/attr-fnspec.h (limited to 'gcc') diff --git a/gcc/attr-fnspec.h b/gcc/attr-fnspec.h new file mode 100644 index 0000000..607c0cf --- /dev/null +++ b/gcc/attr-fnspec.h @@ -0,0 +1,145 @@ +/* Handling of fnspec attribute specifiers + Copyright (C) 2008-2020 Free Software Foundation, Inc. + Contributed by Richard Guenther + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +/* Parse string of attribute "fn spec". This is an internal attribute + describing side effects of a function as follows: + + character 0 specifies properties of return values as follows: + '1'...'4' specifies number of argument function returns (as in memset) + 'm' specifies that returned value is noalias (as in malloc) + '.' specifies that nothing is known. + + character 1+i specifies properties of argument number i as follows: + 'x' or 'X' specifies that parameter is unused. + 'r' or 'R' specifies that parameter is only read and memory pointed to is + never dereferenced. + 'w' or 'W' specifies that parameter is only written to. + '.' specifies that nothing is known. + The uppercase letter in addition specifies that parameter + is non-escaping. */ + +#ifndef ATTR_FNSPEC_H +#define ATTR_FNSPEC_H + +class attr_fnspec +{ +private: + /* fn spec attribute string. */ + const char *str; + /* length of the fn spec string. */ + const unsigned len; + /* Number of characters specifying return value. */ + const unsigned int return_desc_size = 1; + /* Number of characters specifying size. */ + const unsigned int arg_desc_size = 1; + + /* Return start of specifier of arg i. */ + unsigned int arg_idx (int i) + { + return return_desc_size + arg_desc_size * i; + } + +public: + attr_fnspec (const char *str, unsigned len) + : str (str), len (len) + { + if (flag_checking) + verify (); + } + attr_fnspec (const_tree identifier) + : str (TREE_STRING_POINTER (identifier)), + len (TREE_STRING_LENGTH (identifier)) + { + if (flag_checking) + verify (); + } + + /* Return true if arg I is specified. */ + bool + arg_specified_p (unsigned int i) + { + return len >= arg_idx (i + 1); + } + + /* True if the argument is not dereferenced recursively, thus only + directly reachable memory is read or written. */ + bool + arg_direct_p (unsigned int i) + { + unsigned int idx = arg_idx (i); + gcc_checking_assert (arg_specified_p (i)); + return str[idx] == 'R' || str[idx] == 'W'; + } + + /* True if argument is used. */ + bool + arg_used_p (unsigned int i) + { + unsigned int idx = arg_idx (i); + gcc_checking_assert (arg_specified_p (i)); + return str[idx] != 'x' && str[idx] != 'X'; + } + + /* True if memory reached by the argument is readonly (not clobbered). */ + bool + arg_readonly_p (unsigned int i) + { + unsigned int idx = arg_idx (i); + gcc_checking_assert (arg_specified_p (i)); + return str[idx] == 'r' || str[idx] == 'R'; + } + + /* True if the argument does not escape. */ + bool + arg_noescape_p (unsigned int i) + { + unsigned int idx = arg_idx (i); + gcc_checking_assert (arg_specified_p (i)); + return str[idx] == 'w' || str[idx] == 'W' + || str[idx] == 'R' || str[idx] == 'r'; + } + + /* Return true if function returns value of its parameter. If ARG_NO is + non-NULL return initialize it to the argument returned. */ + bool + returns_arg (unsigned int *arg_no) + { + if (str[0] >= '1' && str[0] <= '4') + { + if (arg_no) + *arg_no = str[0] - '1'; + return true; + } + return false; + } + + /* Nonzero if the return value does not alias with anything. Functions + with the malloc attribute have this set on their return value. */ + bool + returns_noalias_p () + { + return str[0] == 'm'; + } + + /* Check validity of the string. */ + void verify (); +}; + +#endif /* ATTR_FNSPEC_H */ diff --git a/gcc/calls.c b/gcc/calls.c index ed43638..93da3d6 100644 --- a/gcc/calls.c +++ b/gcc/calls.c @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see #include "attribs.h" #include "builtins.h" #include "gimple-fold.h" +#include "attr-fnspec.h" #include "tree-pretty-print.h" @@ -642,25 +643,15 @@ decl_return_flags (tree fndecl) if (!attr) return 0; - attr = TREE_VALUE (TREE_VALUE (attr)); - if (!attr || TREE_STRING_LENGTH (attr) < 1) - return 0; - - switch (TREE_STRING_POINTER (attr)[0]) - { - case '1': - case '2': - case '3': - case '4': - return ERF_RETURNS_ARG | (TREE_STRING_POINTER (attr)[0] - '1'); + attr_fnspec fnspec (TREE_VALUE (TREE_VALUE (attr))); - case 'm': - return ERF_NOALIAS; + unsigned int arg; + if (fnspec.returns_arg (&arg)) + return ERF_RETURNS_ARG | arg; - case '.': - default: - return 0; - } + if (fnspec.returns_noalias_p ()) + return ERF_NOALIAS; + return 0; } /* Return nonzero when FNDECL represents a call to setjmp. */ diff --git a/gcc/gimple.c b/gcc/gimple.c index 523d845..f19e24d 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -45,6 +45,7 @@ along with GCC; see the file COPYING3. If not see #include "attribs.h" #include "asan.h" #include "langhooks.h" +#include "attr-fnspec.h" /* All the tuples have their operand vector (if present) at the very bottom @@ -1512,31 +1513,26 @@ gimple_call_arg_flags (const gcall *stmt, unsigned arg) { const_tree attr = gimple_call_fnspec (stmt); - if (!attr || 1 + arg >= (unsigned) TREE_STRING_LENGTH (attr)) + if (!attr) return 0; - switch (TREE_STRING_POINTER (attr)[1 + arg]) - { - case 'x': - case 'X': - return EAF_UNUSED; - - case 'R': - return EAF_DIRECT | EAF_NOCLOBBER | EAF_NOESCAPE; - - case 'r': - return EAF_NOCLOBBER | EAF_NOESCAPE; - - case 'W': - return EAF_DIRECT | EAF_NOESCAPE; - - case 'w': - return EAF_NOESCAPE; + int flags = 0; + attr_fnspec fnspec (attr); - case '.': - default: - return 0; + if (!fnspec.arg_specified_p (arg)) + ; + else if (!fnspec.arg_used_p (arg)) + flags = EAF_UNUSED; + else + { + if (fnspec.arg_direct_p (arg)) + flags |= EAF_DIRECT; + if (fnspec.arg_noescape_p (arg)) + flags |= EAF_NOESCAPE; + if (fnspec.arg_readonly_p (arg)) + flags |= EAF_NOCLOBBER; } + return flags; } /* Detects return flags for the call STMT. */ @@ -1550,24 +1546,17 @@ gimple_call_return_flags (const gcall *stmt) return ERF_NOALIAS; attr = gimple_call_fnspec (stmt); - if (!attr || TREE_STRING_LENGTH (attr) < 1) + if (!attr) return 0; + attr_fnspec fnspec (attr); - switch (TREE_STRING_POINTER (attr)[0]) - { - case '1': - case '2': - case '3': - case '4': - return ERF_RETURNS_ARG | (TREE_STRING_POINTER (attr)[0] - '1'); - - case 'm': - return ERF_NOALIAS; + unsigned int arg_no; + if (fnspec.returns_arg (&arg_no)) + return ERF_RETURNS_ARG | arg_no; - case '.': - default: - return 0; - } + if (fnspec.returns_noalias_p ()) + return ERF_NOALIAS; + return 0; } diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c index 0d01613..1493b32 100644 --- a/gcc/tree-into-ssa.c +++ b/gcc/tree-into-ssa.c @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see #include "stringpool.h" #include "attribs.h" #include "asan.h" +#include "attr-fnspec.h" #define PERCENT(x,y) ((float)(x) * 100.0 / (float)(y)) @@ -2492,19 +2493,19 @@ pass_build_ssa::execute (function *fun) } /* Initialize SSA_NAME_POINTS_TO_READONLY_MEMORY. */ - tree fnspec = lookup_attribute ("fn spec", - TYPE_ATTRIBUTES (TREE_TYPE (fun->decl))); - if (fnspec) + tree fnspec_tree + = lookup_attribute ("fn spec", + TYPE_ATTRIBUTES (TREE_TYPE (fun->decl))); + if (fnspec_tree) { - fnspec = TREE_VALUE (TREE_VALUE (fnspec)); - unsigned i = 1; + attr_fnspec fnspec (TREE_VALUE (TREE_VALUE (fnspec_tree))); + unsigned i = 0; for (tree arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg), ++i) { - if (i >= (unsigned) TREE_STRING_LENGTH (fnspec)) - break; - if (TREE_STRING_POINTER (fnspec)[i] == 'R' - || TREE_STRING_POINTER (fnspec)[i] == 'r') + if (!fnspec.arg_specified_p (i)) + break; + if (fnspec.arg_readonly_p (i)) { tree name = ssa_default_def (fun, arg); if (name) diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c index 9e5c3ee..52aeaeb 100644 --- a/gcc/tree-ssa-alias.c +++ b/gcc/tree-ssa-alias.c @@ -40,6 +40,8 @@ along with GCC; see the file COPYING3. If not see #include "varasm.h" #include "ipa-modref-tree.h" #include "ipa-modref.h" +#include "attr-fnspec.h" +#include "errors.h" /* Broad overview of how alias analysis on gimple works: @@ -4012,3 +4014,42 @@ walk_aliased_vdefs (ao_ref *ref, tree vdef, return ret; } +/* Verify validity of the fnspec string. + See attr-fnspec.h for details. */ + +void +attr_fnspec::verify () +{ + /* FIXME: Fortran trans-decl.c contains multiple wrong fnspec strings. + re-enable verification after these are fixed. */ + return; + bool err = false; + + /* Check return value specifier. */ + if (len < return_desc_size) + err = true; + else if ((str[0] < '1' || str[0] > '4') + && str[0] != '.' && str[0] != 'm') + err = true; + + /* Now check all parameters. */ + for (unsigned int i = 0; arg_specified_p (i); i++) + { + unsigned int idx = arg_idx (i); + switch (str[idx]) + { + case 'x': + case 'X': + case 'r': + case 'R': + case 'w': + case 'W': + case '.': + break; + default: + err = true; + } + } + if (err) + internal_error ("invalid fn spec attribute %s", str); +} -- cgit v1.1 From 1d3e12c469e5f5627c2e271232e1a3d8a88783be Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Fri, 2 Oct 2020 04:58:57 -0700 Subject: c++: Simplify __FUNCTION__ creation I had reason to wander into cp_make_fname, and noticed it's the only caller of cp_fname_init. Folding it in makes the code simpler. gcc/cp/ * cp-tree.h (cp_fname_init): Delete declaration. * decl.c (cp_fname_init): Merge into only caller ... (cp_make_fname): ... here & refactor. --- gcc/cp/cp-tree.h | 1 - gcc/cp/decl.c | 85 +++++++++++++++++++++----------------------------------- 2 files changed, 32 insertions(+), 54 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index fda5ffa..9f948ae 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6514,7 +6514,6 @@ extern tree create_implicit_typedef (tree, tree); extern int local_variable_p (const_tree); extern tree register_dtor_fn (tree); extern tmpl_spec_kind current_tmpl_spec_kind (int); -extern tree cp_fname_init (const char *, tree *); extern tree cxx_builtin_function (tree decl); extern tree cxx_builtin_function_ext_scope (tree decl); extern tree cxx_simulate_builtin_function_decl (tree); diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index d2a8d40..6b306ee 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -4592,38 +4592,6 @@ cxx_init_decl_processing (void) using_eh_for_cleanups (); } -/* Generate an initializer for a function naming variable from - NAME. NAME may be NULL, to indicate a dependent name. TYPE_P is - filled in with the type of the init. */ - -tree -cp_fname_init (const char* name, tree *type_p) -{ - tree domain = NULL_TREE; - tree type; - tree init = NULL_TREE; - size_t length = 0; - - if (name) - { - length = strlen (name); - domain = build_index_type (size_int (length)); - init = build_string (length + 1, name); - } - - type = cp_build_qualified_type (char_type_node, TYPE_QUAL_CONST); - type = build_cplus_array_type (type, domain); - - *type_p = type; - - if (init) - TREE_TYPE (init) = type; - else - init = error_mark_node; - - return init; -} - /* Create the VAR_DECL for __FUNCTION__ etc. ID is the name to give the decl, LOC is the location to give the decl, NAME is the initialization string and TYPE_DEP indicates whether NAME depended @@ -4634,31 +4602,45 @@ cp_fname_init (const char* name, tree *type_p) static tree cp_make_fname_decl (location_t loc, tree id, int type_dep) { - const char * name = NULL; - bool release_name = false; + tree domain = NULL_TREE; + tree init = NULL_TREE; + if (!(type_dep && in_template_function ())) { + const char *name = NULL; + bool release_name = false; + if (current_function_decl == NULL_TREE) name = "top level"; - else if (type_dep == 1) /* __PRETTY_FUNCTION__ */ - name = cxx_printable_name (current_function_decl, 2); - else if (type_dep == 0) /* __FUNCTION__ */ + else if (type_dep == 0) { + /* __FUNCTION__ */ name = fname_as_string (type_dep); release_name = true; } else - gcc_unreachable (); + { + /* __PRETTY_FUNCTION__ */ + gcc_checking_assert (type_dep == 1); + name = cxx_printable_name (current_function_decl, 2); + } + + size_t length = strlen (name); + domain = build_index_type (size_int (length)); + init = build_string (length + 1, name); + if (release_name) + free (const_cast (name)); } - tree type; - tree init = cp_fname_init (name, &type); - tree decl = build_decl (loc, VAR_DECL, id, type); - if (release_name) - free (CONST_CAST (char *, name)); + tree type = cp_build_qualified_type (char_type_node, TYPE_QUAL_CONST); + type = build_cplus_array_type (type, domain); - /* As we're using pushdecl_with_scope, we must set the context. */ - DECL_CONTEXT (decl) = current_function_decl; + if (init) + TREE_TYPE (init) = type; + else + init = error_mark_node; + + tree decl = build_decl (loc, VAR_DECL, id, type); TREE_READONLY (decl) = 1; DECL_ARTIFICIAL (decl) = 1; @@ -4667,13 +4649,10 @@ cp_make_fname_decl (location_t loc, tree id, int type_dep) TREE_USED (decl) = 1; - if (init) - { - SET_DECL_VALUE_EXPR (decl, init); - DECL_HAS_VALUE_EXPR_P (decl) = 1; - /* For decl_constant_var_p. */ - DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (decl) = 1; - } + SET_DECL_VALUE_EXPR (decl, init); + DECL_HAS_VALUE_EXPR_P (decl) = 1; + /* For decl_constant_var_p. */ + DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (decl) = 1; if (current_function_decl) { @@ -4685,7 +4664,7 @@ cp_make_fname_decl (location_t loc, tree id, int type_dep) else { DECL_THIS_STATIC (decl) = true; - pushdecl_top_level_and_finish (decl, NULL_TREE); + decl = pushdecl_top_level_and_finish (decl, NULL_TREE); } return decl; -- cgit v1.1 From 762cca0023c9bdbd762c44f33a954845bbccd568 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Fri, 2 Oct 2020 15:56:12 +0200 Subject: Perforate fnspec strings gcc/ChangeLog: 2020-10-02 Jan Hubicka * attr-fnspec.h: Update documentation. (attr_fnsec::return_desc_size): Set to 2 (attr_fnsec::arg_desc_size): Set to 2 * builtin-attrs.def (STR1): Update fnspec. * internal-fn.def (UBSAN_NULL): Update fnspec. (UBSAN_VPTR): Update fnspec. (UBSAN_PTR): Update fnspec. (ASAN_CHECK): Update fnspec. (GOACC_DIM_SIZE): Remove fnspec. (GOACC_DIM_POS): Remove fnspec. * tree-ssa-alias.c (attr_fnspec::verify): Update verification. gcc/fortran/ChangeLog: 2020-10-02 Jan Hubicka * trans-decl.c (gfc_build_library_function_decl_with_spec): Verify fnspec. (gfc_build_intrinsic_function_decls): Update fnspecs. (gfc_build_builtin_function_decls): Update fnspecs. * trans-io.c (gfc_build_io_library_fndecls): Update fnspecs. * trans-types.c (create_fn_spec): Update fnspecs. --- gcc/attr-fnspec.h | 15 ++-- gcc/builtin-attrs.def | 2 +- gcc/fortran/trans-decl.c | 176 +++++++++++++++++++++++++--------------------- gcc/fortran/trans-io.c | 72 +++++++++---------- gcc/fortran/trans-types.c | 38 +++++++--- gcc/internal-fn.def | 13 ++-- gcc/tree-ssa-alias.c | 17 +++-- 7 files changed, 191 insertions(+), 142 deletions(-) (limited to 'gcc') diff --git a/gcc/attr-fnspec.h b/gcc/attr-fnspec.h index 607c0cf..921bb48 100644 --- a/gcc/attr-fnspec.h +++ b/gcc/attr-fnspec.h @@ -25,15 +25,22 @@ '1'...'4' specifies number of argument function returns (as in memset) 'm' specifies that returned value is noalias (as in malloc) '.' specifies that nothing is known. + character 1 specifies additional function properties + ' ' specifies that nothing is known - character 1+i specifies properties of argument number i as follows: + character 2+2i specifies properties of argument number i as follows: 'x' or 'X' specifies that parameter is unused. 'r' or 'R' specifies that parameter is only read and memory pointed to is never dereferenced. 'w' or 'W' specifies that parameter is only written to. '.' specifies that nothing is known. The uppercase letter in addition specifies that parameter - is non-escaping. */ + is non-escaping. + + character 3+2i specifies additional properties of argument number i + as follows: + ' ' nothing is known + */ #ifndef ATTR_FNSPEC_H #define ATTR_FNSPEC_H @@ -46,9 +53,9 @@ private: /* length of the fn spec string. */ const unsigned len; /* Number of characters specifying return value. */ - const unsigned int return_desc_size = 1; + const unsigned int return_desc_size = 2; /* Number of characters specifying size. */ - const unsigned int arg_desc_size = 1; + const unsigned int arg_desc_size = 2; /* Return start of specifier of arg i. */ unsigned int arg_idx (int i) diff --git a/gcc/builtin-attrs.def b/gcc/builtin-attrs.def index 3239311..778bc8a 100644 --- a/gcc/builtin-attrs.def +++ b/gcc/builtin-attrs.def @@ -66,7 +66,7 @@ DEF_ATTR_FOR_INT (6) DEF_ATTR_STRING (ATTR_##ENUM, VALUE) \ DEF_ATTR_TREE_LIST (ATTR_LIST_##ENUM, ATTR_NULL, \ ATTR_##ENUM, ATTR_NULL) -DEF_ATTR_FOR_STRING (STR1, "1") +DEF_ATTR_FOR_STRING (STR1, "1 ") #undef DEF_ATTR_FOR_STRING /* Construct a tree for a list of two integers. */ diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 2be9df4..5940a1f 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see #include "gomp-constants.h" #include "gimplify.h" #include "omp-general.h" +#include "attr-fnspec.h" #define MAX_LABEL_VALUE 99999 @@ -3306,6 +3307,11 @@ gfc_build_library_function_decl_with_spec (tree name, const char *spec, tree ret; va_list args; va_start (args, nargs); + if (flag_checking) + { + attr_fnspec fnspec (spec, strlen (spec)); + fnspec.verify (); + } ret = build_library_function_decl_1 (name, spec, rettype, nargs, args); va_end (args); return ret; @@ -3325,144 +3331,144 @@ gfc_build_intrinsic_function_decls (void) /* String functions. */ gfor_fndecl_compare_string = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("compare_string")), "..R.R", + get_identifier (PREFIX("compare_string")), ". . R . R ", integer_type_node, 4, gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node, pchar1_type_node); DECL_PURE_P (gfor_fndecl_compare_string) = 1; TREE_NOTHROW (gfor_fndecl_compare_string) = 1; gfor_fndecl_concat_string = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("concat_string")), "..W.R.R", + get_identifier (PREFIX("concat_string")), ". . W . R . R ", void_type_node, 6, gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node, pchar1_type_node); TREE_NOTHROW (gfor_fndecl_concat_string) = 1; gfor_fndecl_string_len_trim = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_len_trim")), "..R", + get_identifier (PREFIX("string_len_trim")), ". . R ", gfc_charlen_type_node, 2, gfc_charlen_type_node, pchar1_type_node); DECL_PURE_P (gfor_fndecl_string_len_trim) = 1; TREE_NOTHROW (gfor_fndecl_string_len_trim) = 1; gfor_fndecl_string_index = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_index")), "..R.R.", + get_identifier (PREFIX("string_index")), ". . R . R . ", gfc_charlen_type_node, 5, gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node, pchar1_type_node, gfc_logical4_type_node); DECL_PURE_P (gfor_fndecl_string_index) = 1; TREE_NOTHROW (gfor_fndecl_string_index) = 1; gfor_fndecl_string_scan = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_scan")), "..R.R.", + get_identifier (PREFIX("string_scan")), ". . R . R . ", gfc_charlen_type_node, 5, gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node, pchar1_type_node, gfc_logical4_type_node); DECL_PURE_P (gfor_fndecl_string_scan) = 1; TREE_NOTHROW (gfor_fndecl_string_scan) = 1; gfor_fndecl_string_verify = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_verify")), "..R.R.", + get_identifier (PREFIX("string_verify")), ". . R . R . ", gfc_charlen_type_node, 5, gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node, pchar1_type_node, gfc_logical4_type_node); DECL_PURE_P (gfor_fndecl_string_verify) = 1; TREE_NOTHROW (gfor_fndecl_string_verify) = 1; gfor_fndecl_string_trim = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_trim")), ".Ww.R", + get_identifier (PREFIX("string_trim")), ". W w . R ", void_type_node, 4, build_pointer_type (gfc_charlen_type_node), build_pointer_type (pchar1_type_node), gfc_charlen_type_node, pchar1_type_node); gfor_fndecl_string_minmax = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_minmax")), ".Ww.R", + get_identifier (PREFIX("string_minmax")), ". W w . R ", void_type_node, -4, build_pointer_type (gfc_charlen_type_node), build_pointer_type (pchar1_type_node), integer_type_node, integer_type_node); gfor_fndecl_adjustl = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("adjustl")), ".W.R", + get_identifier (PREFIX("adjustl")), ". W . R ", void_type_node, 3, pchar1_type_node, gfc_charlen_type_node, pchar1_type_node); TREE_NOTHROW (gfor_fndecl_adjustl) = 1; gfor_fndecl_adjustr = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("adjustr")), ".W.R", + get_identifier (PREFIX("adjustr")), ". W . R ", void_type_node, 3, pchar1_type_node, gfc_charlen_type_node, pchar1_type_node); TREE_NOTHROW (gfor_fndecl_adjustr) = 1; gfor_fndecl_select_string = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("select_string")), ".R.R.", + get_identifier (PREFIX("select_string")), ". R . R . ", integer_type_node, 4, pvoid_type_node, integer_type_node, pchar1_type_node, gfc_charlen_type_node); DECL_PURE_P (gfor_fndecl_select_string) = 1; TREE_NOTHROW (gfor_fndecl_select_string) = 1; gfor_fndecl_compare_string_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("compare_string_char4")), "..R.R", + get_identifier (PREFIX("compare_string_char4")), ". . R . R ", integer_type_node, 4, gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node, pchar4_type_node); DECL_PURE_P (gfor_fndecl_compare_string_char4) = 1; TREE_NOTHROW (gfor_fndecl_compare_string_char4) = 1; gfor_fndecl_concat_string_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("concat_string_char4")), "..W.R.R", + get_identifier (PREFIX("concat_string_char4")), ". . W . R . R ", void_type_node, 6, gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node, pchar4_type_node); TREE_NOTHROW (gfor_fndecl_concat_string_char4) = 1; gfor_fndecl_string_len_trim_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_len_trim_char4")), "..R", + get_identifier (PREFIX("string_len_trim_char4")), ". . R ", gfc_charlen_type_node, 2, gfc_charlen_type_node, pchar4_type_node); DECL_PURE_P (gfor_fndecl_string_len_trim_char4) = 1; TREE_NOTHROW (gfor_fndecl_string_len_trim_char4) = 1; gfor_fndecl_string_index_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_index_char4")), "..R.R.", + get_identifier (PREFIX("string_index_char4")), ". . R . R . ", gfc_charlen_type_node, 5, gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node, pchar4_type_node, gfc_logical4_type_node); DECL_PURE_P (gfor_fndecl_string_index_char4) = 1; TREE_NOTHROW (gfor_fndecl_string_index_char4) = 1; gfor_fndecl_string_scan_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_scan_char4")), "..R.R.", + get_identifier (PREFIX("string_scan_char4")), ". . R . R . ", gfc_charlen_type_node, 5, gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node, pchar4_type_node, gfc_logical4_type_node); DECL_PURE_P (gfor_fndecl_string_scan_char4) = 1; TREE_NOTHROW (gfor_fndecl_string_scan_char4) = 1; gfor_fndecl_string_verify_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_verify_char4")), "..R.R.", + get_identifier (PREFIX("string_verify_char4")), ". . R . R . ", gfc_charlen_type_node, 5, gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node, pchar4_type_node, gfc_logical4_type_node); DECL_PURE_P (gfor_fndecl_string_verify_char4) = 1; TREE_NOTHROW (gfor_fndecl_string_verify_char4) = 1; gfor_fndecl_string_trim_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_trim_char4")), ".Ww.R", + get_identifier (PREFIX("string_trim_char4")), ". W w . R ", void_type_node, 4, build_pointer_type (gfc_charlen_type_node), build_pointer_type (pchar4_type_node), gfc_charlen_type_node, pchar4_type_node); gfor_fndecl_string_minmax_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("string_minmax_char4")), ".Ww.R", + get_identifier (PREFIX("string_minmax_char4")), ". W w . R ", void_type_node, -4, build_pointer_type (gfc_charlen_type_node), build_pointer_type (pchar4_type_node), integer_type_node, integer_type_node); gfor_fndecl_adjustl_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("adjustl_char4")), ".W.R", + get_identifier (PREFIX("adjustl_char4")), ". W . R ", void_type_node, 3, pchar4_type_node, gfc_charlen_type_node, pchar4_type_node); TREE_NOTHROW (gfor_fndecl_adjustl_char4) = 1; gfor_fndecl_adjustr_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("adjustr_char4")), ".W.R", + get_identifier (PREFIX("adjustr_char4")), ". W . R ", void_type_node, 3, pchar4_type_node, gfc_charlen_type_node, pchar4_type_node); TREE_NOTHROW (gfor_fndecl_adjustr_char4) = 1; gfor_fndecl_select_string_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("select_string_char4")), ".R.R.", + get_identifier (PREFIX("select_string_char4")), ". R . R . ", integer_type_node, 4, pvoid_type_node, integer_type_node, pvoid_type_node, gfc_charlen_type_node); DECL_PURE_P (gfor_fndecl_select_string_char4) = 1; @@ -3472,28 +3478,28 @@ gfc_build_intrinsic_function_decls (void) /* Conversion between character kinds. */ gfor_fndecl_convert_char1_to_char4 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("convert_char1_to_char4")), ".w.R", + get_identifier (PREFIX("convert_char1_to_char4")), ". w . R ", void_type_node, 3, build_pointer_type (pchar4_type_node), gfc_charlen_type_node, pchar1_type_node); gfor_fndecl_convert_char4_to_char1 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("convert_char4_to_char1")), ".w.R", + get_identifier (PREFIX("convert_char4_to_char1")), ". w . R ", void_type_node, 3, build_pointer_type (pchar1_type_node), gfc_charlen_type_node, pchar4_type_node); /* Misc. functions. */ gfor_fndecl_ttynam = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("ttynam")), ".W..", + get_identifier (PREFIX("ttynam")), ". W . . ", void_type_node, 3, pchar_type_node, gfc_charlen_type_node, integer_type_node); gfor_fndecl_fdate = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("fdate")), ".W.", + get_identifier (PREFIX("fdate")), ". W . ", void_type_node, 2, pchar_type_node, gfc_charlen_type_node); gfor_fndecl_ctime = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("ctime")), ".W..", + get_identifier (PREFIX("ctime")), ". W . . ", void_type_node, 3, pchar_type_node, gfc_charlen_type_node, gfc_int8_type_node); @@ -3503,19 +3509,19 @@ gfc_build_intrinsic_function_decls (void) gfc_int4_type_node); gfor_fndecl_sc_kind = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("selected_char_kind")), "..R", + get_identifier (PREFIX("selected_char_kind")), ". . R ", gfc_int4_type_node, 2, gfc_charlen_type_node, pchar_type_node); DECL_PURE_P (gfor_fndecl_sc_kind) = 1; TREE_NOTHROW (gfor_fndecl_sc_kind) = 1; gfor_fndecl_si_kind = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("selected_int_kind")), ".R", + get_identifier (PREFIX("selected_int_kind")), ". R ", gfc_int4_type_node, 1, pvoid_type_node); DECL_PURE_P (gfor_fndecl_si_kind) = 1; TREE_NOTHROW (gfor_fndecl_si_kind) = 1; gfor_fndecl_sr_kind = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("selected_real_kind2008")), ".RR", + get_identifier (PREFIX("selected_real_kind2008")), ". R R ", gfc_int4_type_node, 3, pvoid_type_node, pvoid_type_node, pvoid_type_node); DECL_PURE_P (gfor_fndecl_sr_kind) = 1; @@ -3656,13 +3662,13 @@ gfc_build_intrinsic_function_decls (void) /* Other functions. */ gfor_fndecl_size0 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("size0")), ".R", + get_identifier (PREFIX("size0")), ". R ", gfc_array_index_type, 1, pvoid_type_node); DECL_PURE_P (gfor_fndecl_size0) = 1; TREE_NOTHROW (gfor_fndecl_size0) = 1; gfor_fndecl_size1 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("size1")), ".R.", + get_identifier (PREFIX("size1")), ". R . ", gfc_array_index_type, 2, pvoid_type_node, gfc_array_index_type); DECL_PURE_P (gfor_fndecl_size1) = 1; TREE_NOTHROW (gfor_fndecl_size1) = 1; @@ -3680,7 +3686,7 @@ gfc_build_intrinsic_function_decls (void) 2, gfc_int4_type_node, gfc_int4_type_node); gfor_fndecl_is_contiguous0 = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("is_contiguous0")), ".R", + get_identifier (PREFIX("is_contiguous0")), ". R ", gfc_int4_type_node, 1, pvoid_type_node); DECL_PURE_P (gfor_fndecl_is_contiguous0) = 1; TREE_NOTHROW (gfor_fndecl_is_contiguous0) = 1; @@ -3701,7 +3707,7 @@ gfc_build_builtin_function_decls (void) TREE_THIS_VOLATILE (gfor_fndecl_stop_numeric) = 1; gfor_fndecl_stop_string = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("stop_string")), ".R..", + get_identifier (PREFIX("stop_string")), ". R . . ", void_type_node, 3, pchar_type_node, size_type_node, boolean_type_node); /* STOP doesn't return. */ @@ -3714,7 +3720,7 @@ gfc_build_builtin_function_decls (void) TREE_THIS_VOLATILE (gfor_fndecl_error_stop_numeric) = 1; gfor_fndecl_error_stop_string = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("error_stop_string")), ".R..", + get_identifier (PREFIX("error_stop_string")), ". R . . ", void_type_node, 3, pchar_type_node, size_type_node, boolean_type_node); /* ERROR STOP doesn't return. */ @@ -3725,32 +3731,32 @@ gfc_build_builtin_function_decls (void) void_type_node, 1, gfc_int8_type_node); gfor_fndecl_pause_string = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("pause_string")), ".R.", + get_identifier (PREFIX("pause_string")), ". R . ", void_type_node, 2, pchar_type_node, size_type_node); gfor_fndecl_runtime_error = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("runtime_error")), ".R", + get_identifier (PREFIX("runtime_error")), ". R ", void_type_node, -1, pchar_type_node); /* The runtime_error function does not return. */ TREE_THIS_VOLATILE (gfor_fndecl_runtime_error) = 1; gfor_fndecl_runtime_error_at = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("runtime_error_at")), ".RR", + get_identifier (PREFIX("runtime_error_at")), ". R R ", void_type_node, -2, pchar_type_node, pchar_type_node); /* The runtime_error_at function does not return. */ TREE_THIS_VOLATILE (gfor_fndecl_runtime_error_at) = 1; gfor_fndecl_runtime_warning_at = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("runtime_warning_at")), ".RR", + get_identifier (PREFIX("runtime_warning_at")), ". R R ", void_type_node, -2, pchar_type_node, pchar_type_node); gfor_fndecl_generate_error = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("generate_error")), ".R.R", + get_identifier (PREFIX("generate_error")), ". R . R ", void_type_node, 3, pvoid_type_node, integer_type_node, pchar_type_node); gfor_fndecl_os_error_at = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("os_error_at")), ".RR", + get_identifier (PREFIX("os_error_at")), ". R R ", void_type_node, -2, pchar_type_node, pchar_type_node); /* The os_error_at function does not return. */ TREE_THIS_VOLATILE (gfor_fndecl_os_error_at) = 1; @@ -3774,7 +3780,7 @@ gfc_build_builtin_function_decls (void) /* Keep the array dimension in sync with the call, later in this file. */ gfor_fndecl_set_options = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("set_options")), "..R", + get_identifier (PREFIX("set_options")), ". . R ", void_type_node, 2, integer_type_node, build_pointer_type (integer_type_node)); @@ -3791,11 +3797,11 @@ gfc_build_builtin_function_decls (void) void_type_node, 1, integer_type_node); gfor_fndecl_in_pack = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("internal_pack")), ".r", + get_identifier (PREFIX("internal_pack")), ". r ", pvoid_type_node, 1, pvoid_type_node); gfor_fndecl_in_unpack = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("internal_unpack")), ".wR", + get_identifier (PREFIX("internal_unpack")), ". w R ", void_type_node, 2, pvoid_type_node, pvoid_type_node); /* These two builtins write into what the first argument points to and @@ -3804,15 +3810,15 @@ gfc_build_builtin_function_decls (void) which is copied into the descriptor pointed by the first argument, effectively escaping that way. See PR92123. */ gfor_fndecl_cfi_to_gfc = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("cfi_desc_to_gfc_desc")), ".w.", + get_identifier (PREFIX("cfi_desc_to_gfc_desc")), ". w . ", void_type_node, 2, pvoid_type_node, ppvoid_type_node); gfor_fndecl_gfc_to_cfi = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("gfc_desc_to_cfi_desc")), ".w.", + get_identifier (PREFIX("gfc_desc_to_cfi_desc")), ". w . ", void_type_node, 2, ppvoid_type_node, pvoid_type_node); gfor_fndecl_associated = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("associated")), ".RR", + get_identifier (PREFIX("associated")), ". R R ", integer_type_node, 2, ppvoid_type_node, ppvoid_type_node); DECL_PURE_P (gfor_fndecl_associated) = 1; TREE_NOTHROW (gfor_fndecl_associated) = 1; @@ -3842,49 +3848,55 @@ gfc_build_builtin_function_decls (void) 2, integer_type_node, integer_type_node); gfor_fndecl_caf_register = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_register")), "RRWWWWR", void_type_node, 7, + get_identifier (PREFIX("caf_register")), "R R W W W W R ", + void_type_node, 7, size_type_node, integer_type_node, ppvoid_type_node, pvoid_type_node, pint_type, pchar_type_node, size_type_node); gfor_fndecl_caf_deregister = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_deregister")), "WRWWR", void_type_node, 5, + get_identifier (PREFIX("caf_deregister")), "W R W W R ", + void_type_node, 5, ppvoid_type_node, integer_type_node, pint_type, pchar_type_node, size_type_node); gfor_fndecl_caf_get = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_get")), ".R.RRWRRRW", void_type_node, 10, + get_identifier (PREFIX("caf_get")), ". R . R R W R R R W ", + void_type_node, 10, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, pvoid_type_node, pvoid_type_node, integer_type_node, integer_type_node, boolean_type_node, pint_type); gfor_fndecl_caf_send = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_send")), ".R.RRRRRRWR", void_type_node, 11, + get_identifier (PREFIX("caf_send")), ". R . R R R R R R W R ", + void_type_node, 11, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, pvoid_type_node, pvoid_type_node, integer_type_node, integer_type_node, boolean_type_node, pint_type, pvoid_type_node); gfor_fndecl_caf_sendget = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_sendget")), ".R.RRRR.RRRRRR", + get_identifier (PREFIX("caf_sendget")), ". R . R R R R . R R R R R R ", void_type_node, 14, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, pvoid_type_node, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, pvoid_type_node, integer_type_node, integer_type_node, boolean_type_node, integer_type_node); gfor_fndecl_caf_get_by_ref = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_get_by_ref")), ".RWRRRRRWR", void_type_node, + get_identifier (PREFIX("caf_get_by_ref")), ". R W R R R R R W R ", + void_type_node, 10, pvoid_type_node, integer_type_node, pvoid_type_node, pvoid_type_node, integer_type_node, integer_type_node, boolean_type_node, boolean_type_node, pint_type, integer_type_node); gfor_fndecl_caf_send_by_ref = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_send_by_ref")), ".RRRRRRRWR", + get_identifier (PREFIX("caf_send_by_ref")), ". R R R R R R R W R ", void_type_node, 10, pvoid_type_node, integer_type_node, pvoid_type_node, pvoid_type_node, integer_type_node, integer_type_node, boolean_type_node, boolean_type_node, pint_type, integer_type_node); gfor_fndecl_caf_sendget_by_ref = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_sendget_by_ref")), ".RR.RRRRRWWRR", + get_identifier (PREFIX("caf_sendget_by_ref")), + ". R R . R R R R R W W R R ", void_type_node, 13, pvoid_type_node, integer_type_node, pvoid_type_node, pvoid_type_node, integer_type_node, pvoid_type_node, integer_type_node, integer_type_node, @@ -3892,15 +3904,15 @@ gfc_build_builtin_function_decls (void) integer_type_node); gfor_fndecl_caf_sync_all = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_sync_all")), ".WW.", void_type_node, + get_identifier (PREFIX("caf_sync_all")), ". W W . ", void_type_node, 3, pint_type, pchar_type_node, size_type_node); gfor_fndecl_caf_sync_memory = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_sync_memory")), ".WW.", void_type_node, + get_identifier (PREFIX("caf_sync_memory")), ". W W . ", void_type_node, 3, pint_type, pchar_type_node, size_type_node); gfor_fndecl_caf_sync_images = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_sync_images")), "..RWW.", void_type_node, + get_identifier (PREFIX("caf_sync_images")), ". . R W W . ", void_type_node, 5, integer_type_node, pint_type, pint_type, pchar_type_node, size_type_node); @@ -3911,7 +3923,7 @@ gfc_build_builtin_function_decls (void) TREE_THIS_VOLATILE (gfor_fndecl_caf_error_stop) = 1; gfor_fndecl_caf_error_stop_str = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_error_stop_str")), ".R.", + get_identifier (PREFIX("caf_error_stop_str")), ". R . ", void_type_node, 2, pchar_type_node, size_type_node); /* CAF's ERROR STOP doesn't return. */ TREE_THIS_VOLATILE (gfor_fndecl_caf_error_stop_str) = 1; @@ -3923,55 +3935,55 @@ gfc_build_builtin_function_decls (void) TREE_THIS_VOLATILE (gfor_fndecl_caf_stop_numeric) = 1; gfor_fndecl_caf_stop_str = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_stop_str")), ".R.", + get_identifier (PREFIX("caf_stop_str")), ". R . ", void_type_node, 2, pchar_type_node, size_type_node); /* CAF's STOP doesn't return. */ TREE_THIS_VOLATILE (gfor_fndecl_caf_stop_str) = 1; gfor_fndecl_caf_atomic_def = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_atomic_define")), "R..RW", + get_identifier (PREFIX("caf_atomic_define")), "R . . R W ", void_type_node, 7, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, pint_type, integer_type_node, integer_type_node); gfor_fndecl_caf_atomic_ref = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_atomic_ref")), "R..WW", + get_identifier (PREFIX("caf_atomic_ref")), "R . . W W ", void_type_node, 7, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, pint_type, integer_type_node, integer_type_node); gfor_fndecl_caf_atomic_cas = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_atomic_cas")), "R..WRRW", + get_identifier (PREFIX("caf_atomic_cas")), "R . . W R R W ", void_type_node, 9, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, pvoid_type_node, pvoid_type_node, pint_type, integer_type_node, integer_type_node); gfor_fndecl_caf_atomic_op = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_atomic_op")), ".R..RWW", + get_identifier (PREFIX("caf_atomic_op")), ". R . . R W W ", void_type_node, 9, integer_type_node, pvoid_type_node, size_type_node, integer_type_node, pvoid_type_node, pvoid_type_node, pint_type, integer_type_node, integer_type_node); gfor_fndecl_caf_lock = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_lock")), "R..WWW", + get_identifier (PREFIX("caf_lock")), "R . . W W W ", void_type_node, 7, pvoid_type_node, size_type_node, integer_type_node, pint_type, pint_type, pchar_type_node, size_type_node); gfor_fndecl_caf_unlock = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_unlock")), "R..WW", + get_identifier (PREFIX("caf_unlock")), "R . . W W ", void_type_node, 6, pvoid_type_node, size_type_node, integer_type_node, pint_type, pchar_type_node, size_type_node); gfor_fndecl_caf_event_post = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_event_post")), "R..WW", + get_identifier (PREFIX("caf_event_post")), "R . . W W ", void_type_node, 6, pvoid_type_node, size_type_node, integer_type_node, pint_type, pchar_type_node, size_type_node); gfor_fndecl_caf_event_wait = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_event_wait")), "R..WW", + get_identifier (PREFIX("caf_event_wait")), "R . . W W ", void_type_node, 6, pvoid_type_node, size_type_node, integer_type_node, pint_type, pchar_type_node, size_type_node); gfor_fndecl_caf_event_query = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_event_query")), "R..WW", + get_identifier (PREFIX("caf_event_query")), "R . . W W ", void_type_node, 5, pvoid_type_node, size_type_node, integer_type_node, pint_type, pint_type); @@ -3982,19 +3994,19 @@ gfc_build_builtin_function_decls (void) gfor_fndecl_caf_failed_images = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_failed_images")), "WRR", + get_identifier (PREFIX("caf_failed_images")), "W R R ", void_type_node, 3, pvoid_type_node, ppvoid_type_node, integer_type_node); gfor_fndecl_caf_form_team = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_form_team")), "RWR", + get_identifier (PREFIX("caf_form_team")), "R W R ", void_type_node, 3, integer_type_node, ppvoid_type_node, integer_type_node); gfor_fndecl_caf_change_team = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_change_team")), "RR", + get_identifier (PREFIX("caf_change_team")), "R R ", void_type_node, 2, ppvoid_type_node, integer_type_node); @@ -4009,43 +4021,43 @@ gfc_build_builtin_function_decls (void) gfor_fndecl_caf_sync_team = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_sync_team")), "RR", + get_identifier (PREFIX("caf_sync_team")), "R R ", void_type_node, 2, ppvoid_type_node, integer_type_node); gfor_fndecl_caf_team_number = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_team_number")), "R", + get_identifier (PREFIX("caf_team_number")), "R ", integer_type_node, 1, integer_type_node); gfor_fndecl_caf_image_status = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_image_status")), "RR", + get_identifier (PREFIX("caf_image_status")), "R R ", integer_type_node, 2, integer_type_node, ppvoid_type_node); gfor_fndecl_caf_stopped_images = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_stopped_images")), "WRR", + get_identifier (PREFIX("caf_stopped_images")), "W R R ", void_type_node, 3, pvoid_type_node, ppvoid_type_node, integer_type_node); gfor_fndecl_co_broadcast = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_co_broadcast")), "W.WW", + get_identifier (PREFIX("caf_co_broadcast")), "W . W W ", void_type_node, 5, pvoid_type_node, integer_type_node, pint_type, pchar_type_node, size_type_node); gfor_fndecl_co_max = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_co_max")), "W.WW", + get_identifier (PREFIX("caf_co_max")), "W . W W ", void_type_node, 6, pvoid_type_node, integer_type_node, pint_type, pchar_type_node, integer_type_node, size_type_node); gfor_fndecl_co_min = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_co_min")), "W.WW", + get_identifier (PREFIX("caf_co_min")), "W . W W ", void_type_node, 6, pvoid_type_node, integer_type_node, pint_type, pchar_type_node, integer_type_node, size_type_node); gfor_fndecl_co_reduce = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_co_reduce")), "W.R.WW", + get_identifier (PREFIX("caf_co_reduce")), "W . R . W W ", void_type_node, 8, pvoid_type_node, build_pointer_type (build_varargs_function_type_list (void_type_node, NULL_TREE)), @@ -4053,12 +4065,12 @@ gfc_build_builtin_function_decls (void) integer_type_node, size_type_node); gfor_fndecl_co_sum = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_co_sum")), "W.WW", + get_identifier (PREFIX("caf_co_sum")), "W . W W ", void_type_node, 5, pvoid_type_node, integer_type_node, pint_type, pchar_type_node, size_type_node); gfor_fndecl_caf_is_present = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("caf_is_present")), "RRR", + get_identifier (PREFIX("caf_is_present")), "R R R ", integer_type_node, 3, pvoid_type_node, integer_type_node, pvoid_type_node); } diff --git a/gcc/fortran/trans-io.c b/gcc/fortran/trans-io.c index 363cca5..666dc37 100644 --- a/gcc/fortran/trans-io.c +++ b/gcc/fortran/trans-io.c @@ -328,165 +328,165 @@ gfc_build_io_library_fndecls (void) dt_parm_type = build_pointer_type (st_parameter[IOPARM_ptype_dt].type); iocall[IOCALL_X_INTEGER] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_integer")), ".wW.", + get_identifier (PREFIX("transfer_integer")), ". w W . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_INTEGER_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_integer_write")), ".wR.", + get_identifier (PREFIX("transfer_integer_write")), ". w R . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_LOGICAL] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_logical")), ".wW.", + get_identifier (PREFIX("transfer_logical")), ". w W . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_LOGICAL_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_logical_write")), ".wR.", + get_identifier (PREFIX("transfer_logical_write")), ". w R . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_CHARACTER] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_character")), ".wW.", + get_identifier (PREFIX("transfer_character")), ". w W . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_charlen_type_node); iocall[IOCALL_X_CHARACTER_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_character_write")), ".wR.", + get_identifier (PREFIX("transfer_character_write")), ". w R . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_charlen_type_node); iocall[IOCALL_X_CHARACTER_WIDE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_character_wide")), ".wW..", + get_identifier (PREFIX("transfer_character_wide")), ". w W . . ", void_type_node, 4, dt_parm_type, pvoid_type_node, gfc_charlen_type_node, gfc_int4_type_node); iocall[IOCALL_X_CHARACTER_WIDE_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_character_wide_write")), ".wR..", + get_identifier (PREFIX("transfer_character_wide_write")), ". w R . . ", void_type_node, 4, dt_parm_type, pvoid_type_node, gfc_charlen_type_node, gfc_int4_type_node); iocall[IOCALL_X_REAL] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_real")), ".wW.", + get_identifier (PREFIX("transfer_real")), ". w W . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_REAL_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_real_write")), ".wR.", + get_identifier (PREFIX("transfer_real_write")), ". w R . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_COMPLEX] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_complex")), ".wW.", + get_identifier (PREFIX("transfer_complex")), ". w W . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_COMPLEX_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_complex_write")), ".wR.", + get_identifier (PREFIX("transfer_complex_write")), ". w R . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); /* Version for __float128. */ iocall[IOCALL_X_REAL128] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_real128")), ".wW.", + get_identifier (PREFIX("transfer_real128")), ". w W . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_REAL128_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_real128_write")), ".wR.", + get_identifier (PREFIX("transfer_real128_write")), ". w R . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_COMPLEX128] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_complex128")), ".wW.", + get_identifier (PREFIX("transfer_complex128")), ". w W . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_COMPLEX128_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_complex128_write")), ".wR.", + get_identifier (PREFIX("transfer_complex128_write")), ". w R . ", void_type_node, 3, dt_parm_type, pvoid_type_node, gfc_int4_type_node); iocall[IOCALL_X_ARRAY] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_array")), ".ww..", + get_identifier (PREFIX("transfer_array")), ". w w . . ", void_type_node, 4, dt_parm_type, pvoid_type_node, integer_type_node, gfc_charlen_type_node); iocall[IOCALL_X_ARRAY_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_array_write")), ".wr..", + get_identifier (PREFIX("transfer_array_write")), ". w r . . ", void_type_node, 4, dt_parm_type, pvoid_type_node, integer_type_node, gfc_charlen_type_node); iocall[IOCALL_X_DERIVED] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("transfer_derived")), ".wr", + get_identifier (PREFIX("transfer_derived")), ". w r ", void_type_node, 2, dt_parm_type, pvoid_type_node); /* Library entry points */ iocall[IOCALL_READ] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_read")), ".w", + get_identifier (PREFIX("st_read")), ". w ", void_type_node, 1, dt_parm_type); iocall[IOCALL_WRITE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_write")), ".w", + get_identifier (PREFIX("st_write")), ". w ", void_type_node, 1, dt_parm_type); parm_type = build_pointer_type (st_parameter[IOPARM_ptype_open].type); iocall[IOCALL_OPEN] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_open")), ".w", + get_identifier (PREFIX("st_open")), ". w ", void_type_node, 1, parm_type); parm_type = build_pointer_type (st_parameter[IOPARM_ptype_close].type); iocall[IOCALL_CLOSE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_close")), ".w", + get_identifier (PREFIX("st_close")), ". w ", void_type_node, 1, parm_type); parm_type = build_pointer_type (st_parameter[IOPARM_ptype_inquire].type); iocall[IOCALL_INQUIRE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_inquire")), ".w", + get_identifier (PREFIX("st_inquire")), ". w ", void_type_node, 1, parm_type); iocall[IOCALL_IOLENGTH] = gfc_build_library_function_decl_with_spec( - get_identifier (PREFIX("st_iolength")), ".w", + get_identifier (PREFIX("st_iolength")), ". w ", void_type_node, 1, dt_parm_type); parm_type = build_pointer_type (st_parameter[IOPARM_ptype_wait].type); iocall[IOCALL_WAIT] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_wait_async")), ".w", + get_identifier (PREFIX("st_wait_async")), ". w ", void_type_node, 1, parm_type); parm_type = build_pointer_type (st_parameter[IOPARM_ptype_filepos].type); iocall[IOCALL_REWIND] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_rewind")), ".w", + get_identifier (PREFIX("st_rewind")), ". w ", void_type_node, 1, parm_type); iocall[IOCALL_BACKSPACE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_backspace")), ".w", + get_identifier (PREFIX("st_backspace")), ". w ", void_type_node, 1, parm_type); iocall[IOCALL_ENDFILE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_endfile")), ".w", + get_identifier (PREFIX("st_endfile")), ". w ", void_type_node, 1, parm_type); iocall[IOCALL_FLUSH] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_flush")), ".w", + get_identifier (PREFIX("st_flush")), ". w ", void_type_node, 1, parm_type); /* Library helpers */ iocall[IOCALL_READ_DONE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_read_done")), ".w", + get_identifier (PREFIX("st_read_done")), ". w ", void_type_node, 1, dt_parm_type); iocall[IOCALL_WRITE_DONE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_write_done")), ".w", + get_identifier (PREFIX("st_write_done")), ". w ", void_type_node, 1, dt_parm_type); iocall[IOCALL_IOLENGTH_DONE] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_iolength_done")), ".w", + get_identifier (PREFIX("st_iolength_done")), ". w ", void_type_node, 1, dt_parm_type); iocall[IOCALL_SET_NML_VAL] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_set_nml_var")), ".w.R...", + get_identifier (PREFIX("st_set_nml_var")), ". w . R . . . ", void_type_node, 6, dt_parm_type, pvoid_type_node, pvoid_type_node, gfc_int4_type_node, gfc_charlen_type_node, get_dtype_type_node()); iocall[IOCALL_SET_NML_DTIO_VAL] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_set_nml_dtio_var")), ".w.R.....", + get_identifier (PREFIX("st_set_nml_dtio_var")), ". w . R . . . . . ", void_type_node, 8, dt_parm_type, pvoid_type_node, pvoid_type_node, gfc_int4_type_node, gfc_charlen_type_node, get_dtype_type_node(), pvoid_type_node, pvoid_type_node); iocall[IOCALL_SET_NML_VAL_DIM] = gfc_build_library_function_decl_with_spec ( - get_identifier (PREFIX("st_set_nml_var_dim")), ".w....", + get_identifier (PREFIX("st_set_nml_var_dim")), ". w . . . . ", void_type_node, 5, dt_parm_type, gfc_int4_type_node, gfc_array_index_type, gfc_array_index_type, gfc_array_index_type); } diff --git a/gcc/fortran/trans-types.c b/gcc/fortran/trans-types.c index 26fdb28..17f3ccc 100644 --- a/gcc/fortran/trans-types.c +++ b/gcc/fortran/trans-types.c @@ -2940,20 +2940,33 @@ create_fn_spec (gfc_symbol *sym, tree fntype) memset (&spec, 0, sizeof (spec)); spec[0] = '.'; - spec_len = 1; + spec[1] = ' '; + spec_len = 2; if (sym->attr.entry_master) - spec[spec_len++] = 'R'; + { + spec[spec_len++] = 'R'; + spec[spec_len++] = ' '; + } if (gfc_return_by_reference (sym)) { gfc_symbol *result = sym->result ? sym->result : sym; if (result->attr.pointer || sym->attr.proc_pointer) - spec[spec_len++] = '.'; + { + spec[spec_len++] = '.'; + spec[spec_len++] = ' '; + } else - spec[spec_len++] = 'w'; + { + spec[spec_len++] = 'w'; + spec[spec_len++] = ' '; + } if (sym->ts.type == BT_CHARACTER) - spec[spec_len++] = 'R'; + { + spec[spec_len++] = 'R'; + spec[spec_len++] = ' '; + } } for (f = gfc_sym_get_dummy_args (sym); f; f = f->next) @@ -2968,11 +2981,20 @@ create_fn_spec (gfc_symbol *sym, tree fntype) && (CLASS_DATA (f->sym)->ts.u.derived->attr.proc_pointer_comp || CLASS_DATA (f->sym)->ts.u.derived->attr.pointer_comp)) || (f->sym->ts.type == BT_INTEGER && f->sym->ts.is_c_interop)) - spec[spec_len++] = '.'; + { + spec[spec_len++] = '.'; + spec[spec_len++] = ' '; + } else if (f->sym->attr.intent == INTENT_IN) - spec[spec_len++] = 'r'; + { + spec[spec_len++] = 'r'; + spec[spec_len++] = ' '; + } else if (f->sym) - spec[spec_len++] = 'w'; + { + spec[spec_len++] = 'w'; + spec[spec_len++] = ' '; + } } tmp = build_tree_list (NULL_TREE, build_string (spec_len, spec)); diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index e6cfe1b..310d37a 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -311,17 +311,18 @@ DEF_INTERNAL_FN (GOMP_SIMD_ORDERED_END, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (LOOP_VECTORIZED, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (LOOP_DIST_ALIAS, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (ANNOTATE, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) -DEF_INTERNAL_FN (UBSAN_NULL, ECF_LEAF | ECF_NOTHROW, ".R.") +DEF_INTERNAL_FN (UBSAN_NULL, ECF_LEAF | ECF_NOTHROW, ". R . ") DEF_INTERNAL_FN (UBSAN_BOUNDS, ECF_LEAF | ECF_NOTHROW, NULL) -DEF_INTERNAL_FN (UBSAN_VPTR, ECF_LEAF | ECF_NOTHROW, ".RR..") +DEF_INTERNAL_FN (UBSAN_VPTR, ECF_LEAF | ECF_NOTHROW, ". R R . . ") DEF_INTERNAL_FN (UBSAN_CHECK_ADD, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (UBSAN_CHECK_SUB, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (UBSAN_CHECK_MUL, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) -DEF_INTERNAL_FN (UBSAN_PTR, ECF_LEAF | ECF_NOTHROW, ".R.") +DEF_INTERNAL_FN (UBSAN_PTR, ECF_LEAF | ECF_NOTHROW, ". R . ") DEF_INTERNAL_FN (UBSAN_OBJECT_SIZE, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (ABNORMAL_DISPATCHER, ECF_NORETURN, NULL) DEF_INTERNAL_FN (BUILTIN_EXPECT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL) -DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, "..R..") +DEF_INTERNAL_FN (ASAN_CHECK, ECF_TM_PURE | ECF_LEAF | ECF_NOTHROW, + ". . R . . ") DEF_INTERNAL_FN (ASAN_MARK, ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (ASAN_POISON, ECF_LEAF | ECF_NOTHROW | ECF_NOVOPS, NULL) DEF_INTERNAL_FN (ASAN_POISON_USE, ECF_LEAF | ECF_NOTHROW | ECF_NOVOPS, NULL) @@ -345,8 +346,8 @@ DEF_INTERNAL_FN (PHI, 0, NULL) thought to clobber memory and can be gcse'd within a single parallel region, but not across FORK/JOIN boundaries. They take a single INTEGER_CST argument. This might be overly conservative. */ -DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".") -DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_PURE | ECF_NOTHROW | ECF_LEAF, ".") +DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, NULL) +DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_PURE | ECF_NOTHROW | ECF_LEAF, NULL) /* OpenACC looping abstraction. See internal-fn.h for usage. */ DEF_INTERNAL_FN (GOACC_LOOP, ECF_PURE | ECF_NOTHROW, NULL) diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c index 52aeaeb..3d3a91c 100644 --- a/gcc/tree-ssa-alias.c +++ b/gcc/tree-ssa-alias.c @@ -4020,16 +4020,21 @@ walk_aliased_vdefs (ao_ref *ref, tree vdef, void attr_fnspec::verify () { - /* FIXME: Fortran trans-decl.c contains multiple wrong fnspec strings. - re-enable verification after these are fixed. */ - return; bool err = false; /* Check return value specifier. */ if (len < return_desc_size) err = true; + else if ((len - return_desc_size) % arg_desc_size) + err = true; else if ((str[0] < '1' || str[0] > '4') - && str[0] != '.' && str[0] != 'm') + && str[0] != '.' && str[0] != 'm' + /* FIXME: Fortran trans-decl.c contains multiple wrong fnspec + strings. The following characters have no meaning. */ + && str[0] != 'R' && str[0] != 'W') + err = true; + + if (str[1] != ' ') err = true; /* Now check all parameters. */ @@ -4049,7 +4054,9 @@ attr_fnspec::verify () default: err = true; } + if (str[idx + 1] != ' ') + err = true; } if (err) - internal_error ("invalid fn spec attribute %s", str); + internal_error ("invalid fn spec attribute \"%s\"", str); } -- cgit v1.1 From c8c77ed747abb61a7f3cf34f71539bc87a5d6c3c Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Fri, 2 Oct 2020 15:23:19 +0100 Subject: AArch64: Add neoversev1_tunings struct This patch adds a Neoverse V1-specific tuning struct that currently is just a deduplication of the N1 struct it was using before and specifying the SVE width. This will allow us to tweak Neoverse V1 things in the future as needed. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ * config/aarch64/aarch64.c (neoversev1_tunings): Define. * config/aarch64/aarch64-cores.def (zeus): Use it. (neoverse-v1): Likewise. --- gcc/config/aarch64/aarch64-cores.def | 4 ++-- gcc/config/aarch64/aarch64.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index ac2a9b4..b2e1932 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -136,8 +136,8 @@ AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_ /* ARMv8.4-A Architecture Processors. */ /* Arm ('A') cores. */ -AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversen1, 0x41, 0xd40, -1) -AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversen1, 0x41, 0xd40, -1) +AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) +AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1) /* Qualcomm ('Q') cores. */ AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index a3408f4..3cf20ea 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1336,6 +1336,32 @@ static const struct tune_params neoversen1_tunings = &generic_prefetch_tune }; +static const struct tune_params neoversev1_tunings = +{ + &cortexa57_extra_costs, + &generic_addrcost_table, + &generic_regmove_cost, + &cortexa57_vector_cost, + &generic_branch_cost, + &generic_approx_modes, + SVE_256, /* sve_width */ + 4, /* memmov_cost */ + 3, /* issue_rate */ + (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */ + "32:16", /* function_align. */ + "4", /* jump_align. */ + "32:16", /* loop_align. */ + 2, /* int_reassoc_width. */ + 4, /* fp_reassoc_width. */ + 2, /* vec_reassoc_width. */ + 2, /* min_div_recip_mul_sf. */ + 2, /* min_div_recip_mul_df. */ + 0, /* max_case_values. */ + tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ + (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + &generic_prefetch_tune +}; + static const struct tune_params a64fx_tunings = { &generic_extra_costs, -- cgit v1.1 From 251950d899bc3c18b5775fe9fe20bebbdc8d15cb Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Fri, 2 Oct 2020 15:28:29 +0100 Subject: arm: Remove coercion from scalar argument to vmin & vmax intrinsics This patch fixes an issue with vmin* and vmax* intrinsics which accept a scalar argument. Previously when the scalar was of different width to the vector elements this would generate __ARM_undef. This change allows the scalar argument to be implicitly converted to the correct width. Also tidied up the relevant unit tests, some of which would have passed even if only one of two or three intrinsic calls had compiled correctly. Bootstrapped and tested on arm-none-eabi, gcc and CMSIS_DSP testsuites are clean. OK for trunk? Thanks, Joe gcc/ChangeLog: 2020-08-10 Joe Ramsay * config/arm/arm_mve.h (__arm_vmaxnmavq): Remove coercion of scalar argument. (__arm_vmaxnmvq): Likewise. (__arm_vminnmavq): Likewise. (__arm_vminnmvq): Likewise. (__arm_vmaxnmavq_p): Likewise. (__arm_vmaxnmvq_p): Likewise (and delete duplicate definition). (__arm_vminnmavq_p): Likewise. (__arm_vminnmvq_p): Likewise. (__arm_vmaxavq): Likewise. (__arm_vmaxavq_p): Likewise. (__arm_vmaxvq): Likewise. (__arm_vmaxvq_p): Likewise. (__arm_vminavq): Likewise. (__arm_vminavq_p): Likewise. (__arm_vminvq): Likewise. (__arm_vminvq_p): Likewise. gcc/testsuite/ChangeLog: * gcc.target/arm/mve/intrinsics/vmaxavq_p_s16.c: Add test for mismatched width of scalar argument. * gcc.target/arm/mve/intrinsics/vmaxavq_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxavq_p_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxavq_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxavq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxavq_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_p_u8.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vmaxvq_u8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_p_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_p_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminavq_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_p_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmavq_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_p_f16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminnmvq_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_p_u8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vminvq_u8.c: Likewise. --- gcc/config/arm/arm_mve.h | 72 +++++++++++----------- .../gcc.target/arm/mve/intrinsics/vmaxavq_p_s16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxavq_p_s32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxavq_p_s8.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxavq_s16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxavq_s32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxavq_s8.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxnmavq_f16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxnmavq_f32.c | 11 +++- .../arm/mve/intrinsics/vmaxnmavq_p_f16.c | 11 +++- .../arm/mve/intrinsics/vmaxnmavq_p_f32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxnmvq_f16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxnmvq_f32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_p_s16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_p_s32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_p_s8.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_p_u16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_p_u32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_p_u8.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_s16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_s32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_s8.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_u16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_u32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vmaxvq_u8.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminavq_p_s16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminavq_p_s32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminavq_p_s8.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminavq_s16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminavq_s32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminavq_s8.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminnmavq_f16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminnmavq_f32.c | 11 +++- .../arm/mve/intrinsics/vminnmavq_p_f16.c | 11 +++- .../arm/mve/intrinsics/vminnmavq_p_f32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminnmvq_f16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminnmvq_f32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminnmvq_p_f16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminnmvq_p_f32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminvq_p_s16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminvq_p_s32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminvq_p_s8.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminvq_p_u16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminvq_p_u32.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminvq_p_u8.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminvq_s16.c | 10 ++- .../gcc.target/arm/mve/intrinsics/vminvq_s32.c | 10 ++- .../gcc.target/arm/mve/intrinsics/vminvq_s8.c | 10 ++- .../gcc.target/arm/mve/intrinsics/vminvq_u16.c | 11 +++- .../gcc.target/arm/mve/intrinsics/vminvq_u32.c | 10 ++- .../gcc.target/arm/mve/intrinsics/vminvq_u8.c | 11 +++- 53 files changed, 500 insertions(+), 140 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index 99cff41..26c83c7 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -41682,16 +41682,16 @@ extern void *__ARM_undef; #define __arm_vmaxavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)));}) #define __arm_vmaxavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_p_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_p_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_p_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t), p2));}) + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));}) #define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ __typeof(p2) __p2 = (p2); \ @@ -41706,36 +41706,36 @@ extern void *__ARM_undef; #define __arm_vmaxvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_u32 (__p0,__ARM_mve_coerce(__p1, uint32x4_t)));}) #define __arm_vmaxvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_p_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_p_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_p_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_p_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_p_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_p_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));}) + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_p_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_p_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_p_u32 (__p0, __ARM_mve_coerce(__p1, uint32x4_t), p2));}) #define __arm_vminavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t)));}) + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)));}) #define __arm_vminavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_p_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_p_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_p_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t), p2));}) + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));}) #define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \ __typeof(p2) __p2 = (p2); \ @@ -41750,22 +41750,22 @@ extern void *__ARM_undef; #define __arm_vminvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t)));}) + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t)), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_u32 (__p0, __ARM_mve_coerce(__p1, uint32x4_t)));}) #define __arm_vminvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_p_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_p_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_p_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_p_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_p_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_p_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));}) + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_p_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_p_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t), p2), \ + int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_p_u32 (__p0, __ARM_mve_coerce(__p1, uint32x4_t), p2));}) #define __arm_vmladavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ __typeof(p1) __p1 = (p1); \ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s16.c index 02e0227..74ffad4 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s16.c @@ -10,7 +10,6 @@ foo (uint16_t a, int16x8_t b, mve_pred16_t p) return vmaxavq_p_s16 (a, b, p); } -/* { dg-final { scan-assembler "vmaxavt.s16" } } */ uint16_t foo1 (uint16_t a, int16x8_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint16_t a, int16x8_t b, mve_pred16_t p) return vmaxavq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxavt.s16" } } */ + +int16_t +foo2 (uint8_t a, int16x8_t b, mve_pred16_t p) +{ + return vmaxavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxavt.s16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s32.c index 7ecd94a..40800b0 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s32.c @@ -10,7 +10,6 @@ foo (uint32_t a, int32x4_t b, mve_pred16_t p) return vmaxavq_p_s32 (a, b, p); } -/* { dg-final { scan-assembler "vmaxavt.s32" } } */ uint32_t foo1 (uint32_t a, int32x4_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint32_t a, int32x4_t b, mve_pred16_t p) return vmaxavq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxavt.s32" } } */ + +int32_t +foo2 (uint16_t a, int32x4_t b, mve_pred16_t p) +{ + return vmaxavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxavt.s32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s8.c index 7a21de7..7638737 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_p_s8.c @@ -10,7 +10,6 @@ foo (uint8_t a, int8x16_t b, mve_pred16_t p) return vmaxavq_p_s8 (a, b, p); } -/* { dg-final { scan-assembler "vmaxavt.s8" } } */ uint8_t foo1 (uint8_t a, int8x16_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint8_t a, int8x16_t b, mve_pred16_t p) return vmaxavq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxavt.s8" } } */ + +int8_t +foo2 (uint32_t a, int8x16_t b, mve_pred16_t p) +{ + return vmaxavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxavt.s8" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s16.c index 4621eba..0dca149 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s16.c @@ -10,7 +10,6 @@ foo (uint16_t a, int16x8_t b) return vmaxavq_s16 (a, b); } -/* { dg-final { scan-assembler "vmaxav.s16" } } */ uint16_t foo1 (uint16_t a, int16x8_t b) @@ -18,4 +17,12 @@ foo1 (uint16_t a, int16x8_t b) return vmaxavq (a, b); } -/* { dg-final { scan-assembler "vmaxav.s16" } } */ + +int16_t +foo2 (uint8_t a, int16x8_t b) +{ + return vmaxavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxav.s16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s32.c index 8813d9d..f419a77 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s32.c @@ -10,7 +10,6 @@ foo (uint32_t a, int32x4_t b) return vmaxavq_s32 (a, b); } -/* { dg-final { scan-assembler "vmaxav.s32" } } */ uint32_t foo1 (uint32_t a, int32x4_t b) @@ -18,4 +17,12 @@ foo1 (uint32_t a, int32x4_t b) return vmaxavq (a, b); } -/* { dg-final { scan-assembler "vmaxav.s32" } } */ + +int32_t +foo2 (uint16_t a, int32x4_t b) +{ + return vmaxavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxav.s32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s8.c index 961f1d2..214ad88 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxavq_s8.c @@ -10,7 +10,6 @@ foo (uint8_t a, int8x16_t b) return vmaxavq_s8 (a, b); } -/* { dg-final { scan-assembler "vmaxav.s8" } } */ uint8_t foo1 (uint8_t a, int8x16_t b) @@ -18,4 +17,12 @@ foo1 (uint8_t a, int8x16_t b) return vmaxavq (a, b); } -/* { dg-final { scan-assembler "vmaxav.s8" } } */ + +int8_t +foo2 (uint32_t a, int8x16_t b) +{ + return vmaxavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxav.s8" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f16.c index de48ea8..6d8cf19 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f16.c @@ -10,7 +10,6 @@ foo (float16_t a, float16x8_t b) return vmaxnmavq_f16 (a, b); } -/* { dg-final { scan-assembler "vmaxnmav.f16" } } */ float16_t foo1 (float16_t a, float16x8_t b) @@ -18,4 +17,12 @@ foo1 (float16_t a, float16x8_t b) return vmaxnmavq (a, b); } -/* { dg-final { scan-assembler "vmaxnmav.f16" } } */ + +float16_t +foo2 (float32_t a, float16x8_t b) +{ + return vmaxnmavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxnmav.f16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f32.c index b4c7f83..ef79030 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_f32.c @@ -10,7 +10,6 @@ foo (float32_t a, float32x4_t b) return vmaxnmavq_f32 (a, b); } -/* { dg-final { scan-assembler "vmaxnmav.f32" } } */ float32_t foo1 (float32_t a, float32x4_t b) @@ -18,4 +17,12 @@ foo1 (float32_t a, float32x4_t b) return vmaxnmavq (a, b); } -/* { dg-final { scan-assembler "vmaxnmav.f32" } } */ + +float32_t +foo2 (float16_t a, float32x4_t b) +{ + return vmaxnmavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxnmav.f32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16.c index 9c2eed0..f7f39f5 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16.c @@ -10,7 +10,6 @@ foo (float16_t a, float16x8_t b, mve_pred16_t p) return vmaxnmavq_p_f16 (a, b, p); } -/* { dg-final { scan-assembler "vmaxnmavt.f16" } } */ float16_t foo1 (float16_t a, float16x8_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (float16_t a, float16x8_t b, mve_pred16_t p) return vmaxnmavq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxnmavt.f16" } } */ + +float16_t +foo2 (float32_t a, float16x8_t b, mve_pred16_t p) +{ + return vmaxnmavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxnmavt.f16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32.c index 1cadccb..341f625 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32.c @@ -10,7 +10,6 @@ foo (float32_t a, float32x4_t b, mve_pred16_t p) return vmaxnmavq_p_f32 (a, b, p); } -/* { dg-final { scan-assembler "vmaxnmavt.f32" } } */ float32_t foo1 (float32_t a, float32x4_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (float32_t a, float32x4_t b, mve_pred16_t p) return vmaxnmavq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxnmavt.f32" } } */ + +float32_t +foo2 (float16_t a, float32x4_t b, mve_pred16_t p) +{ + return vmaxnmavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxnmavt.f32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f16.c index 81f4b9b..80bd1d4 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f16.c @@ -10,7 +10,6 @@ foo (float16_t a, float16x8_t b) return vmaxnmvq_f16 (a, b); } -/* { dg-final { scan-assembler "vmaxnmv.f16" } } */ float16_t foo1 (float16_t a, float16x8_t b) @@ -18,4 +17,12 @@ foo1 (float16_t a, float16x8_t b) return vmaxnmvq (a, b); } -/* { dg-final { scan-assembler "vmaxnmv.f16" } } */ + +float16_t +foo2 (float32_t a, float16x8_t b) +{ + return vmaxnmvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxnmv.f16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f32.c index ab06c2b..bb2fc46 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_f32.c @@ -10,7 +10,6 @@ foo (float32_t a, float32x4_t b) return vmaxnmvq_f32 (a, b); } -/* { dg-final { scan-assembler "vmaxnmv.f32" } } */ float32_t foo1 (float32_t a, float32x4_t b) @@ -18,4 +17,12 @@ foo1 (float32_t a, float32x4_t b) return vmaxnmvq (a, b); } -/* { dg-final { scan-assembler "vmaxnmv.f32" } } */ + +float32_t +foo2 (float16_t a, float32x4_t b) +{ + return vmaxnmvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxnmv.f32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16.c index e37c5a1..3efe203 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16.c @@ -10,7 +10,6 @@ foo (float16_t a, float16x8_t b, mve_pred16_t p) return vmaxnmvq_p_f16 (a, b, p); } -/* { dg-final { scan-assembler "vmaxnmvt.f16" } } */ float16_t foo1 (float16_t a, float16x8_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (float16_t a, float16x8_t b, mve_pred16_t p) return vmaxnmvq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxnmvt.f16" } } */ + +float16_t +foo2 (float32_t a, float16x8_t b, mve_pred16_t p) +{ + return vmaxnmvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxnmvt.f16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32.c index 884cd45..6c13247 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32.c @@ -10,7 +10,6 @@ foo (float32_t a, float32x4_t b, mve_pred16_t p) return vmaxnmvq_p_f32 (a, b, p); } -/* { dg-final { scan-assembler "vmaxnmvt.f32" } } */ float32_t foo1 (float32_t a, float32x4_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (float32_t a, float32x4_t b, mve_pred16_t p) return vmaxnmvq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxnmvt.f32" } } */ + +float32_t +foo2 (float16_t a, float32x4_t b, mve_pred16_t p) +{ + return vmaxnmvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxnmvt.f32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s16.c index 79de370..657efc5 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s16.c @@ -10,7 +10,6 @@ foo (int16_t a, int16x8_t b, mve_pred16_t p) return vmaxvq_p_s16 (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.s16" } } */ int16_t foo1 (int16_t a, int16x8_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (int16_t a, int16x8_t b, mve_pred16_t p) return vmaxvq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.s16" } } */ + +int16_t +foo2 (int8_t a, int16x8_t b, mve_pred16_t p) +{ + return vmaxvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxvt.s16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s32.c index e526744..5882351 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s32.c @@ -10,7 +10,6 @@ foo (int32_t a, int32x4_t b, mve_pred16_t p) return vmaxvq_p_s32 (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.s32" } } */ int32_t foo1 (int32_t a, int32x4_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (int32_t a, int32x4_t b, mve_pred16_t p) return vmaxvq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.s32" } } */ + +int32_t +foo2 (int16_t a, int32x4_t b, mve_pred16_t p) +{ + return vmaxvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxvt.s32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s8.c index d3cedd4..3737ecd 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_s8.c @@ -10,7 +10,6 @@ foo (int8_t a, int8x16_t b, mve_pred16_t p) return vmaxvq_p_s8 (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.s8" } } */ int8_t foo1 (int8_t a, int8x16_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (int8_t a, int8x16_t b, mve_pred16_t p) return vmaxvq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.s8" } } */ + +int8_t +foo2 (int32_t a, int8x16_t b, mve_pred16_t p) +{ + return vmaxvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxvt.s8" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u16.c index 79572f7..348cf39 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u16.c @@ -10,7 +10,6 @@ foo (uint16_t a, uint16x8_t b, mve_pred16_t p) return vmaxvq_p_u16 (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.u16" } } */ uint16_t foo1 (uint16_t a, uint16x8_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint16_t a, uint16x8_t b, mve_pred16_t p) return vmaxvq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.u16" } } */ + +uint16_t +foo2 (uint32_t a, uint16x8_t b, mve_pred16_t p) +{ + return vmaxvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxvt.u16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u32.c index e2f7a6f..f2e9762 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u32.c @@ -10,7 +10,6 @@ foo (uint32_t a, uint32x4_t b, mve_pred16_t p) return vmaxvq_p_u32 (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.u32" } } */ uint32_t foo1 (uint32_t a, uint32x4_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint32_t a, uint32x4_t b, mve_pred16_t p) return vmaxvq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.u32" } } */ + +uint32_t +foo2 (uint8_t a, uint32x4_t b, mve_pred16_t p) +{ + return vmaxvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxvt.u32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u8.c index f977806..7df5b63 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_p_u8.c @@ -10,7 +10,6 @@ foo (uint8_t a, uint8x16_t b, mve_pred16_t p) return vmaxvq_p_u8 (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.u8" } } */ uint8_t foo1 (uint8_t a, uint8x16_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint8_t a, uint8x16_t b, mve_pred16_t p) return vmaxvq_p (a, b, p); } -/* { dg-final { scan-assembler "vmaxvt.u8" } } */ + +uint8_t +foo2 (uint16_t a, uint8x16_t b, mve_pred16_t p) +{ + return vmaxvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxvt.u8" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s16.c index 90f10b5..8412452 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s16.c @@ -10,7 +10,6 @@ foo (int16_t a, int16x8_t b) return vmaxvq_s16 (a, b); } -/* { dg-final { scan-assembler "vmaxv.s16" } } */ int16_t foo1 (int16_t a, int16x8_t b) @@ -18,4 +17,12 @@ foo1 (int16_t a, int16x8_t b) return vmaxvq (a, b); } -/* { dg-final { scan-assembler "vmaxv.s16" } } */ + +int16_t +foo2 (int8_t a, int16x8_t b) +{ + return vmaxvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxv.s16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s32.c index aa0e88b..09f4909 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s32.c @@ -10,7 +10,6 @@ foo (int32_t a, int32x4_t b) return vmaxvq_s32 (a, b); } -/* { dg-final { scan-assembler "vmaxv.s32" } } */ int32_t foo1 (int32_t a, int32x4_t b) @@ -18,4 +17,12 @@ foo1 (int32_t a, int32x4_t b) return vmaxvq (a, b); } -/* { dg-final { scan-assembler "vmaxv.s32" } } */ + +int32_t +foo2 (int16_t a, int32x4_t b) +{ + return vmaxvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxv.s32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s8.c index 884b84d..a087bbc 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_s8.c @@ -10,7 +10,6 @@ foo (int8_t a, int8x16_t b) return vmaxvq_s8 (a, b); } -/* { dg-final { scan-assembler "vmaxv.s8" } } */ int8_t foo1 (int8_t a, int8x16_t b) @@ -18,4 +17,12 @@ foo1 (int8_t a, int8x16_t b) return vmaxvq (a, b); } -/* { dg-final { scan-assembler "vmaxv.s8" } } */ + +int8_t +foo2 (int32_t a, int8x16_t b) +{ + return vmaxvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxv.s8" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u16.c index 2813ebd..47fe0d1 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u16.c @@ -10,7 +10,6 @@ foo (uint16_t a, uint16x8_t b) return vmaxvq_u16 (a, b); } -/* { dg-final { scan-assembler "vmaxv.u16" } } */ uint16_t foo1 (uint16_t a, uint16x8_t b) @@ -18,4 +17,12 @@ foo1 (uint16_t a, uint16x8_t b) return vmaxvq (a, b); } -/* { dg-final { scan-assembler "vmaxv.u16" } } */ + +uint16_t +foo2 (uint32_t a, uint16x8_t b) +{ + return vmaxvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxv.u16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u32.c index ab51b1e..aa723da 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u32.c @@ -10,7 +10,6 @@ foo (uint32_t a, uint32x4_t b) return vmaxvq_u32 (a, b); } -/* { dg-final { scan-assembler "vmaxv.u32" } } */ uint32_t foo1 (uint32_t a, uint32x4_t b) @@ -18,4 +17,12 @@ foo1 (uint32_t a, uint32x4_t b) return vmaxvq (a, b); } -/* { dg-final { scan-assembler "vmaxv.u32" } } */ + +uint32_t +foo2 (uint8_t a, uint32x4_t b) +{ + return vmaxvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxv.u32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u8.c index 3326cfb..3aae785 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmaxvq_u8.c @@ -10,7 +10,6 @@ foo (uint8_t a, uint8x16_t b) return vmaxvq_u8 (a, b); } -/* { dg-final { scan-assembler "vmaxv.u8" } } */ uint8_t foo1 (uint8_t a, uint8x16_t b) @@ -18,4 +17,12 @@ foo1 (uint8_t a, uint8x16_t b) return vmaxvq (a, b); } -/* { dg-final { scan-assembler "vmaxv.u8" } } */ + +uint8_t +foo2 (uint16_t a, uint8x16_t b) +{ + return vmaxvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vmaxv.u8" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s16.c index 6b87648..9303ae0 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s16.c @@ -10,7 +10,6 @@ foo (uint16_t a, int16x8_t b, mve_pred16_t p) return vminavq_p_s16 (a, b, p); } -/* { dg-final { scan-assembler "vminavt.s16" } } */ uint16_t foo1 (uint16_t a, int16x8_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint16_t a, int16x8_t b, mve_pred16_t p) return vminavq_p (a, b, p); } -/* { dg-final { scan-assembler "vminavt.s16" } } */ + +int16_t +foo2 (uint8_t a, int16x8_t b, mve_pred16_t p) +{ + return vminavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminavt.s16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s32.c index 086ff56..36247f6 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s32.c @@ -10,7 +10,6 @@ foo (uint32_t a, int32x4_t b, mve_pred16_t p) return vminavq_p_s32 (a, b, p); } -/* { dg-final { scan-assembler "vminavt.s32" } } */ uint32_t foo1 (uint32_t a, int32x4_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint32_t a, int32x4_t b, mve_pred16_t p) return vminavq_p (a, b, p); } -/* { dg-final { scan-assembler "vminavt.s32" } } */ + +int32_t +foo2 (uint16_t a, int32x4_t b, mve_pred16_t p) +{ + return vminavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminavt.s32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s8.c index 999c11c..d336161 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_p_s8.c @@ -10,7 +10,6 @@ foo (uint8_t a, int8x16_t b, mve_pred16_t p) return vminavq_p_s8 (a, b, p); } -/* { dg-final { scan-assembler "vminavt.s8" } } */ uint8_t foo1 (uint8_t a, int8x16_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint8_t a, int8x16_t b, mve_pred16_t p) return vminavq_p (a, b, p); } -/* { dg-final { scan-assembler "vminavt.s8" } } */ + +int8_t +foo2 (uint32_t a, int8x16_t b, mve_pred16_t p) +{ + return vminavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminavt.s8" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s16.c index a626e31..17e4edc 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s16.c @@ -10,7 +10,6 @@ foo (uint16_t a, int16x8_t b) return vminavq_s16 (a, b); } -/* { dg-final { scan-assembler "vminav.s16" } } */ uint16_t foo1 (uint16_t a, int16x8_t b) @@ -18,4 +17,12 @@ foo1 (uint16_t a, int16x8_t b) return vminavq (a, b); } -/* { dg-final { scan-assembler "vminav.s16" } } */ + +int16_t +foo2 (uint8_t a, int16x8_t b) +{ + return vminavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminav.s16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s32.c index be575cb..032d02b 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s32.c @@ -10,7 +10,6 @@ foo (uint32_t a, int32x4_t b) return vminavq_s32 (a, b); } -/* { dg-final { scan-assembler "vminav.s32" } } */ uint32_t foo1 (uint32_t a, int32x4_t b) @@ -18,4 +17,12 @@ foo1 (uint32_t a, int32x4_t b) return vminavq (a, b); } -/* { dg-final { scan-assembler "vminav.s32" } } */ + +int32_t +foo2 (uint16_t a, int32x4_t b) +{ + return vminavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminav.s32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s8.c index c3dfe4b..2a2bb3d6 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminavq_s8.c @@ -10,7 +10,6 @@ foo (uint8_t a, int8x16_t b) return vminavq_s8 (a, b); } -/* { dg-final { scan-assembler "vminav.s8" } } */ uint8_t foo1 (uint8_t a, int8x16_t b) @@ -18,4 +17,12 @@ foo1 (uint8_t a, int8x16_t b) return vminavq (a, b); } -/* { dg-final { scan-assembler "vminav.s8" } } */ + +int8_t +foo2 (uint32_t a, int8x16_t b) +{ + return vminavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminav.s8" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f16.c index 2111681..fadb23e 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f16.c @@ -10,7 +10,6 @@ foo (float16_t a, float16x8_t b) return vminnmavq_f16 (a, b); } -/* { dg-final { scan-assembler "vminnmav.f16" } } */ float16_t foo1 (float16_t a, float16x8_t b) @@ -18,4 +17,12 @@ foo1 (float16_t a, float16x8_t b) return vminnmavq (a, b); } -/* { dg-final { scan-assembler "vminnmav.f16" } } */ + +float16_t +foo2 (float32_t a, float16x8_t b) +{ + return vminnmavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminnmav.f16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f32.c index bd87b85..84714a9 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_f32.c @@ -10,7 +10,6 @@ foo (float32_t a, float32x4_t b) return vminnmavq_f32 (a, b); } -/* { dg-final { scan-assembler "vminnmav.f32" } } */ float32_t foo1 (float32_t a, float32x4_t b) @@ -18,4 +17,12 @@ foo1 (float32_t a, float32x4_t b) return vminnmavq (a, b); } -/* { dg-final { scan-assembler "vminnmav.f32" } } */ + +float32_t +foo2 (float16_t a, float32x4_t b) +{ + return vminnmavq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminnmav.f32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f16.c index e6d0bb5..c79fa30 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f16.c @@ -10,7 +10,6 @@ foo (float16_t a, float16x8_t b, mve_pred16_t p) return vminnmavq_p_f16 (a, b, p); } -/* { dg-final { scan-assembler "vminnmavt.f16" } } */ float16_t foo1 (float16_t a, float16x8_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (float16_t a, float16x8_t b, mve_pred16_t p) return vminnmavq_p (a, b, p); } -/* { dg-final { scan-assembler "vminnmavt.f16" } } */ + +float16_t +foo2 (float32_t a, float16x8_t b, mve_pred16_t p) +{ + return vminnmavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminnmavt.f16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f32.c index 6b56b67..bea04c7 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmavq_p_f32.c @@ -10,7 +10,6 @@ foo (float32_t a, float32x4_t b, mve_pred16_t p) return vminnmavq_p_f32 (a, b, p); } -/* { dg-final { scan-assembler "vminnmavt.f32" } } */ float32_t foo1 (float32_t a, float32x4_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (float32_t a, float32x4_t b, mve_pred16_t p) return vminnmavq_p (a, b, p); } -/* { dg-final { scan-assembler "vminnmavt.f32" } } */ + +float32_t +foo2 (float16_t a, float32x4_t b, mve_pred16_t p) +{ + return vminnmavq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminnmavt.f32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f16.c index 4d4caae..0eb3a4a 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f16.c @@ -10,7 +10,6 @@ foo (float16_t a, float16x8_t b) return vminnmvq_f16 (a, b); } -/* { dg-final { scan-assembler "vminnmv.f16" } } */ float16_t foo1 (float16_t a, float16x8_t b) @@ -18,4 +17,12 @@ foo1 (float16_t a, float16x8_t b) return vminnmvq (a, b); } -/* { dg-final { scan-assembler "vminnmv.f16" } } */ + +float16_t +foo2 (float32_t a, float16x8_t b) +{ + return vminnmvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminnmv.f16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f32.c index dab04d9..f318350 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_f32.c @@ -10,7 +10,6 @@ foo (float32_t a, float32x4_t b) return vminnmvq_f32 (a, b); } -/* { dg-final { scan-assembler "vminnmv.f32" } } */ float32_t foo1 (float32_t a, float32x4_t b) @@ -18,4 +17,12 @@ foo1 (float32_t a, float32x4_t b) return vminnmvq (a, b); } -/* { dg-final { scan-assembler "vminnmv.f32" } } */ + +float32_t +foo2 (float16_t a, float32x4_t b) +{ + return vminnmvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminnmv.f32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f16.c index f5eafb1..16f6ac5 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f16.c @@ -10,7 +10,6 @@ foo (float16_t a, float16x8_t b, mve_pred16_t p) return vminnmvq_p_f16 (a, b, p); } -/* { dg-final { scan-assembler "vminnmvt.f16" } } */ float16_t foo1 (float16_t a, float16x8_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (float16_t a, float16x8_t b, mve_pred16_t p) return vminnmvq_p (a, b, p); } -/* { dg-final { scan-assembler "vminnmvt.f16" } } */ + +float16_t +foo2 (float32_t a, float16x8_t b, mve_pred16_t p) +{ + return vminnmvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminnmvt.f16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f32.c index 5ac20bf..a8e4f9f 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminnmvq_p_f32.c @@ -10,7 +10,6 @@ foo (float32_t a, float32x4_t b, mve_pred16_t p) return vminnmvq_p_f32 (a, b, p); } -/* { dg-final { scan-assembler "vminnmvt.f32" } } */ float32_t foo1 (float32_t a, float32x4_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (float32_t a, float32x4_t b, mve_pred16_t p) return vminnmvq_p (a, b, p); } -/* { dg-final { scan-assembler "vminnmvt.f32" } } */ + +float32_t +foo2 (float16_t a, float32x4_t b, mve_pred16_t p) +{ + return vminnmvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminnmvt.f32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s16.c index c2edb62..91bb63f 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s16.c @@ -10,7 +10,6 @@ foo (int16_t a, int16x8_t b, mve_pred16_t p) return vminvq_p_s16 (a, b, p); } -/* { dg-final { scan-assembler "vminvt.s16" } } */ int16_t foo1 (int16_t a, int16x8_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (int16_t a, int16x8_t b, mve_pred16_t p) return vminvq_p (a, b, p); } -/* { dg-final { scan-assembler "vminvt.s16" } } */ + +int16_t +foo2 (int8_t a, int16x8_t b, mve_pred16_t p) +{ + return vminvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminvt.s16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s32.c index ba89217..a846701 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s32.c @@ -10,7 +10,6 @@ foo (int32_t a, int32x4_t b, mve_pred16_t p) return vminvq_p_s32 (a, b, p); } -/* { dg-final { scan-assembler "vminvt.s32" } } */ int32_t foo1 (int32_t a, int32x4_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (int32_t a, int32x4_t b, mve_pred16_t p) return vminvq_p (a, b, p); } -/* { dg-final { scan-assembler "vminvt.s32" } } */ + +int32_t +foo2 (int16_t a, int32x4_t b, mve_pred16_t p) +{ + return vminvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminvt.s32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s8.c index 1665c53..716d414 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_s8.c @@ -10,7 +10,6 @@ foo (int8_t a, int8x16_t b, mve_pred16_t p) return vminvq_p_s8 (a, b, p); } -/* { dg-final { scan-assembler "vminvt.s8" } } */ int8_t foo1 (int8_t a, int8x16_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (int8_t a, int8x16_t b, mve_pred16_t p) return vminvq_p (a, b, p); } -/* { dg-final { scan-assembler "vminvt.s8" } } */ + +int8_t +foo2 (int32_t a, int8x16_t b, mve_pred16_t p) +{ + return vminvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminvt.s8" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u16.c index 5bade0a..cc7f8fe 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u16.c @@ -10,7 +10,6 @@ foo (uint16_t a, uint16x8_t b, mve_pred16_t p) return vminvq_p_u16 (a, b, p); } -/* { dg-final { scan-assembler "vminvt.u16" } } */ uint16_t foo1 (uint16_t a, uint16x8_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint16_t a, uint16x8_t b, mve_pred16_t p) return vminvq_p (a, b, p); } -/* { dg-final { scan-assembler "vminvt.u16" } } */ + +uint16_t +foo2 (uint32_t a, uint16x8_t b, mve_pred16_t p) +{ + return vminvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminvt.u16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u32.c index c4c5748..6bde0be 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u32.c @@ -10,7 +10,6 @@ foo (uint32_t a, uint32x4_t b, mve_pred16_t p) return vminvq_p_u32 (a, b, p); } -/* { dg-final { scan-assembler "vminvt.u32" } } */ uint32_t foo1 (uint32_t a, uint32x4_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint32_t a, uint32x4_t b, mve_pred16_t p) return vminvq_p (a, b, p); } -/* { dg-final { scan-assembler "vminvt.u32" } } */ + +uint32_t +foo2 (uint8_t a, uint32x4_t b, mve_pred16_t p) +{ + return vminvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminvt.u32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u8.c index dc890dc..bb89490 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_p_u8.c @@ -10,7 +10,6 @@ foo (uint8_t a, uint8x16_t b, mve_pred16_t p) return vminvq_p_u8 (a, b, p); } -/* { dg-final { scan-assembler "vminvt.u8" } } */ uint8_t foo1 (uint8_t a, uint8x16_t b, mve_pred16_t p) @@ -18,4 +17,12 @@ foo1 (uint8_t a, uint8x16_t b, mve_pred16_t p) return vminvq_p (a, b, p); } -/* { dg-final { scan-assembler "vminvt.u8" } } */ + +uint8_t +foo2 (uint16_t a, uint8x16_t b, mve_pred16_t p) +{ + return vminvq_p (a, b, p); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminvt.u8" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s16.c index f6eed63..6d589aa 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s16.c @@ -10,7 +10,6 @@ foo (int16_t a, int16x8_t b) return vminvq_s16 (a, b); } -/* { dg-final { scan-assembler "vminv.s16" } } */ int16_t foo1 (int16_t a, int16x8_t b) @@ -18,4 +17,11 @@ foo1 (int16_t a, int16x8_t b) return vminvq (a, b); } -/* { dg-final { scan-assembler "vminv.s16" } } */ +int16_t +foo2 (int8_t a, int16x8_t b) +{ + return vminvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminv.s16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s32.c index 4077c32..7c727d6 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s32.c @@ -10,7 +10,6 @@ foo (int32_t a, int32x4_t b) return vminvq_s32 (a, b); } -/* { dg-final { scan-assembler "vminv.s32" } } */ int32_t foo1 (int32_t a, int32x4_t b) @@ -18,4 +17,11 @@ foo1 (int32_t a, int32x4_t b) return vminvq (a, b); } -/* { dg-final { scan-assembler "vminv.s32" } } */ +int32_t +foo2 (int8_t a, int32x4_t b) +{ + return vminvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminv.s32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s8.c index bdf15f4..7630948 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_s8.c @@ -10,7 +10,6 @@ foo (int8_t a, int8x16_t b) return vminvq_s8 (a, b); } -/* { dg-final { scan-assembler "vminv.s8" } } */ int8_t foo1 (int8_t a, int8x16_t b) @@ -18,4 +17,11 @@ foo1 (int8_t a, int8x16_t b) return vminvq (a, b); } -/* { dg-final { scan-assembler "vminv.s8" } } */ +int8_t +foo2 (int32_t a, int8x16_t b) +{ + return vminvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminv.s8" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u16.c index 5c0935c..698975f 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u16.c @@ -10,7 +10,6 @@ foo (uint16_t a, uint16x8_t b) return vminvq_u16 (a, b); } -/* { dg-final { scan-assembler "vminv.u16" } } */ uint16_t foo1 (uint16_t a, uint16x8_t b) @@ -18,4 +17,12 @@ foo1 (uint16_t a, uint16x8_t b) return vminvq (a, b); } -/* { dg-final { scan-assembler "vminv.u16" } } */ + +uint8_t +foo2 (uint32_t a, uint16x8_t b) +{ + return vminvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminv.u16" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u32.c index 1580c87..7489f81 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u32.c @@ -10,7 +10,6 @@ foo (uint32_t a, uint32x4_t b) return vminvq_u32 (a, b); } -/* { dg-final { scan-assembler "vminv.u32" } } */ uint32_t foo1 (uint32_t a, uint32x4_t b) @@ -18,4 +17,11 @@ foo1 (uint32_t a, uint32x4_t b) return vminvq (a, b); } -/* { dg-final { scan-assembler "vminv.u32" } } */ +uint32_t +foo2 (uint16_t a, uint32x4_t b) +{ + return vminvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminv.u32" 3 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u8.c index 95919b4..aa2b986 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vminvq_u8.c @@ -10,7 +10,6 @@ foo (uint8_t a, uint8x16_t b) return vminvq_u8 (a, b); } -/* { dg-final { scan-assembler "vminv.u8" } } */ uint8_t foo1 (uint8_t a, uint8x16_t b) @@ -18,4 +17,12 @@ foo1 (uint8_t a, uint8x16_t b) return vminvq (a, b); } -/* { dg-final { scan-assembler "vminv.u8" } } */ + +uint16_t +foo2 (uint32_t a, uint8x16_t b) +{ + return vminvq (a, b); +} + +/* { dg-final { scan-assembler-not "__ARM_undef" } } */ +/* { dg-final { scan-assembler-times "vminv.u8" 3 } } */ -- cgit v1.1 From 9d5af1db2d1e7ca4f47f16fc885e1d70fa0229fc Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Fri, 2 Oct 2020 18:41:34 +0200 Subject: ipa: Bundle vectors describing argument values Hi, this large patch is mostly mechanical change which aims to replace uses of separate vectors about known scalar values (usually called known_vals or known_csts), known aggregate values (known_aggs), known virtual call contexts (known_contexts) and known value ranges (known_value_ranges) with uses of either new type ipa_call_arg_values or ipa_auto_call_arg_values, both of which simply contain these vectors inside them. The need for two distinct comes from the fact that when the vectors are constructed from jump functions or lattices, we really should use auto_vecs with embedded storage allocated on stack. On the other hand, the bundle in ipa_call_context can be allocated on heap when in cache, one time for each call_graph node. ipa_call_context is constructible from ipa_auto_call_arg_values but then its vectors must not be resized, otherwise the vectors will stop pointing to the stack ones. Unfortunately, I don't think the structure embedded in ipa_call_context can be made constant because we need to manipulate and deallocate it when in cache. gcc/ChangeLog: 2020-09-01 Martin Jambor * ipa-prop.h (ipa_auto_call_arg_values): New type. (class ipa_call_arg_values): Likewise. (ipa_get_indirect_edge_target): Replaced vector arguments with ipa_call_arg_values in declaration. Added an overload for ipa_auto_call_arg_values. * ipa-fnsummary.h (ipa_call_context): Removed members m_known_vals, m_known_contexts, m_known_aggs, duplicate_from, release and equal_to, new members m_avals, store_to_cache and equivalent_to_p. Adjusted construcotr arguments. (estimate_ipcp_clone_size_and_time): Replaced vector arguments with ipa_auto_call_arg_values in declaration. (evaluate_properties_for_edge): Likewise. * ipa-cp.c (ipa_get_indirect_edge_target): Adjusted to work on ipa_call_arg_values rather than on separate vectors. Added an overload for ipa_auto_call_arg_values. (devirtualization_time_bonus): Adjusted to work on ipa_auto_call_arg_values rather than on separate vectors. (gather_context_independent_values): Adjusted to work on ipa_auto_call_arg_values rather than on separate vectors. (perform_estimation_of_a_value): Likewise. (estimate_local_effects): Likewise. (modify_known_vectors_with_val): Adjusted both variants to work on ipa_auto_call_arg_values and rename them to copy_known_vectors_add_val. (decide_about_value): Adjusted to work on ipa_call_arg_values rather than on separate vectors. (decide_whether_version_node): Likewise. * ipa-fnsummary.c (evaluate_conditions_for_known_args): Likewise. (evaluate_properties_for_edge): Likewise. (ipa_fn_summary_t::duplicate): Likewise. (estimate_edge_devirt_benefit): Adjusted to work on ipa_call_arg_values rather than on separate vectors. (estimate_edge_size_and_time): Likewise. (estimate_calls_size_and_time_1): Likewise. (summarize_calls_size_and_time): Adjusted calls to estimate_edge_size_and_time. (estimate_calls_size_and_time): Adjusted to work on ipa_call_arg_values rather than on separate vectors. (ipa_call_context::ipa_call_context): Construct from a pointer to ipa_auto_call_arg_values instead of inividual vectors. (ipa_call_context::duplicate_from): Adjusted to access vectors within m_avals. (ipa_call_context::release): Likewise. (ipa_call_context::equal_to): Likewise. (ipa_call_context::estimate_size_and_time): Adjusted to work on ipa_call_arg_values rather than on separate vectors. (estimate_ipcp_clone_size_and_time): Adjusted to work with ipa_auto_call_arg_values rather than on separate vectors. (ipa_merge_fn_summary_after_inlining): Likewise. Adjusted call to estimate_edge_size_and_time. (ipa_update_overall_fn_summary): Adjusted call to estimate_edge_size_and_time. * ipa-inline-analysis.c (do_estimate_edge_time): Adjusted to work with ipa_auto_call_arg_values rather than with separate vectors. (do_estimate_edge_size): Likewise. (do_estimate_edge_hints): Likewise. * ipa-prop.c (ipa_auto_call_arg_values::~ipa_auto_call_arg_values): New destructor. --- gcc/ipa-cp.c | 245 ++++++++++++-------------- gcc/ipa-fnsummary.c | 440 +++++++++++++++++++++------------------------- gcc/ipa-fnsummary.h | 27 +-- gcc/ipa-inline-analysis.c | 41 ++--- gcc/ipa-prop.c | 10 ++ gcc/ipa-prop.h | 112 +++++++++++- 6 files changed, 449 insertions(+), 426 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index b3e7d41..292dd7e 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -3117,30 +3117,40 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie, return target; } +/* If an indirect edge IE can be turned into a direct one based on data in + AVALS, return the destination. Store into *SPECULATIVE a boolean determinig + whether the discovered target is only speculative guess. */ -/* If an indirect edge IE can be turned into a direct one based on KNOWN_CSTS, - KNOWN_CONTEXTS (which can be vNULL) or KNOWN_AGGS (which also can be vNULL) - return the destination. */ +tree +ipa_get_indirect_edge_target (struct cgraph_edge *ie, + ipa_call_arg_values *avals, + bool *speculative) +{ + return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals, + avals->m_known_contexts, + avals->m_known_aggs, + NULL, speculative); +} + +/* The same functionality as above overloaded for ipa_auto_call_arg_values. */ tree ipa_get_indirect_edge_target (struct cgraph_edge *ie, - vec known_csts, - vec known_contexts, - vec known_aggs, + ipa_auto_call_arg_values *avals, bool *speculative) { - return ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts, - known_aggs, NULL, speculative); + return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals, + avals->m_known_contexts, + avals->m_known_aggs, + NULL, speculative); } -/* Calculate devirtualization time bonus for NODE, assuming we know KNOWN_CSTS - and KNOWN_CONTEXTS. */ +/* Calculate devirtualization time bonus for NODE, assuming we know information + about arguments stored in AVALS. */ static int devirtualization_time_bonus (struct cgraph_node *node, - vec known_csts, - vec known_contexts, - vec known_aggs) + ipa_auto_call_arg_values *avals) { struct cgraph_edge *ie; int res = 0; @@ -3153,8 +3163,7 @@ devirtualization_time_bonus (struct cgraph_node *node, tree target; bool speculative; - target = ipa_get_indirect_edge_target (ie, known_csts, known_contexts, - known_aggs, &speculative); + target = ipa_get_indirect_edge_target (ie, avals, &speculative); if (!target) continue; @@ -3306,32 +3315,27 @@ context_independent_aggregate_values (class ipcp_param_lattices *plats) return res; } -/* Allocate KNOWN_CSTS, KNOWN_CONTEXTS and, if non-NULL, KNOWN_AGGS and - populate them with values of parameters that are known independent of the - context. INFO describes the function. If REMOVABLE_PARAMS_COST is - non-NULL, the movement cost of all removable parameters will be stored in - it. */ +/* Grow vectors in AVALS and fill them with information about values of + parameters that are known to be independent of the context. Only calculate + m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If + REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable + parameters will be stored in it. + + TODO: Also grow context independent value range vectors. */ static bool gather_context_independent_values (class ipa_node_params *info, - vec *known_csts, - vec - *known_contexts, - vec *known_aggs, + ipa_auto_call_arg_values *avals, + bool calculate_aggs, int *removable_params_cost) { int i, count = ipa_get_param_count (info); bool ret = false; - known_csts->create (0); - known_contexts->create (0); - known_csts->safe_grow_cleared (count, true); - known_contexts->safe_grow_cleared (count, true); - if (known_aggs) - { - known_aggs->create (0); - known_aggs->safe_grow_cleared (count, true); - } + avals->m_known_vals.safe_grow_cleared (count, true); + avals->m_known_contexts.safe_grow_cleared (count, true); + if (calculate_aggs) + avals->m_known_aggs.safe_grow_cleared (count, true); if (removable_params_cost) *removable_params_cost = 0; @@ -3345,7 +3349,7 @@ gather_context_independent_values (class ipa_node_params *info, { ipcp_value *val = lat->values; gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO); - (*known_csts)[i] = val->value; + avals->m_known_vals[i] = val->value; if (removable_params_cost) *removable_params_cost += estimate_move_cost (TREE_TYPE (val->value), false); @@ -3363,15 +3367,15 @@ gather_context_independent_values (class ipa_node_params *info, /* Do not account known context as reason for cloning. We can see if it permits devirtualization. */ if (ctxlat->is_single_const ()) - (*known_contexts)[i] = ctxlat->values->value; + avals->m_known_contexts[i] = ctxlat->values->value; - if (known_aggs) + if (calculate_aggs) { vec agg_items; struct ipa_agg_value_set *agg; agg_items = context_independent_aggregate_values (plats); - agg = &(*known_aggs)[i]; + agg = &avals->m_known_aggs[i]; agg->items = agg_items; agg->by_ref = plats->aggs_by_ref; ret |= !agg_items.is_empty (); @@ -3381,25 +3385,23 @@ gather_context_independent_values (class ipa_node_params *info, return ret; } -/* Perform time and size measurement of NODE with the context given in - KNOWN_CSTS, KNOWN_CONTEXTS and KNOWN_AGGS, calculate the benefit and cost - given BASE_TIME of the node without specialization, REMOVABLE_PARAMS_COST of - all context-independent removable parameters and EST_MOVE_COST of estimated - movement of the considered parameter and store it into VAL. */ +/* Perform time and size measurement of NODE with the context given in AVALS, + calculate the benefit compared to the node without specialization and store + it into VAL. Take into account REMOVABLE_PARAMS_COST of all + context-independent or unused removable parameters and EST_MOVE_COST, the + estimated movement of the considered parameter. */ static void -perform_estimation_of_a_value (cgraph_node *node, vec known_csts, - vec known_contexts, - vec known_aggs, - int removable_params_cost, - int est_move_cost, ipcp_value_base *val) +perform_estimation_of_a_value (cgraph_node *node, + ipa_auto_call_arg_values *avals, + int removable_params_cost, int est_move_cost, + ipcp_value_base *val) { int size, time_benefit; sreal time, base_time; ipa_hints hints; - estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts, - known_aggs, &size, &time, + estimate_ipcp_clone_size_and_time (node, avals, &size, &time, &base_time, &hints); base_time -= time; if (base_time > 65535) @@ -3412,8 +3414,7 @@ perform_estimation_of_a_value (cgraph_node *node, vec known_csts, time_benefit = 0; else time_benefit = base_time.to_int () - + devirtualization_time_bonus (node, known_csts, known_contexts, - known_aggs) + + devirtualization_time_bonus (node, avals) + hint_time_bonus (node, hints) + removable_params_cost + est_move_cost; @@ -3454,9 +3455,6 @@ estimate_local_effects (struct cgraph_node *node) { class ipa_node_params *info = IPA_NODE_REF (node); int i, count = ipa_get_param_count (info); - vec known_csts; - vec known_contexts; - vec known_aggs; bool always_const; int removable_params_cost; @@ -3466,11 +3464,10 @@ estimate_local_effects (struct cgraph_node *node) if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ()); - always_const = gather_context_independent_values (info, &known_csts, - &known_contexts, &known_aggs, + ipa_auto_call_arg_values avals; + always_const = gather_context_independent_values (info, &avals, true, &removable_params_cost); - int devirt_bonus = devirtualization_time_bonus (node, known_csts, - known_contexts, known_aggs); + int devirt_bonus = devirtualization_time_bonus (node, &avals); if (always_const || devirt_bonus || (removable_params_cost && node->can_change_signature)) { @@ -3482,8 +3479,7 @@ estimate_local_effects (struct cgraph_node *node) init_caller_stats (&stats); node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false); - estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts, - known_aggs, &size, &time, + estimate_ipcp_clone_size_and_time (node, &avals, &size, &time, &base_time, &hints); time -= devirt_bonus; time -= hint_time_bonus (node, hints); @@ -3536,18 +3532,17 @@ estimate_local_effects (struct cgraph_node *node) if (lat->bottom || !lat->values - || known_csts[i]) + || avals.m_known_vals[i]) continue; for (val = lat->values; val; val = val->next) { gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO); - known_csts[i] = val->value; + avals.m_known_vals[i] = val->value; int emc = estimate_move_cost (TREE_TYPE (val->value), true); - perform_estimation_of_a_value (node, known_csts, known_contexts, - known_aggs, - removable_params_cost, emc, val); + perform_estimation_of_a_value (node, &avals, removable_params_cost, + emc, val); if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -3559,7 +3554,7 @@ estimate_local_effects (struct cgraph_node *node) val->local_time_benefit, val->local_size_cost); } } - known_csts[i] = NULL_TREE; + avals.m_known_vals[i] = NULL_TREE; } for (i = 0; i < count; i++) @@ -3574,15 +3569,14 @@ estimate_local_effects (struct cgraph_node *node) if (ctxlat->bottom || !ctxlat->values - || !known_contexts[i].useless_p ()) + || !avals.m_known_contexts[i].useless_p ()) continue; for (val = ctxlat->values; val; val = val->next) { - known_contexts[i] = val->value; - perform_estimation_of_a_value (node, known_csts, known_contexts, - known_aggs, - removable_params_cost, 0, val); + avals.m_known_contexts[i] = val->value; + perform_estimation_of_a_value (node, &avals, removable_params_cost, + 0, val); if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -3594,20 +3588,18 @@ estimate_local_effects (struct cgraph_node *node) val->local_time_benefit, val->local_size_cost); } } - known_contexts[i] = ipa_polymorphic_call_context (); + avals.m_known_contexts[i] = ipa_polymorphic_call_context (); } for (i = 0; i < count; i++) { class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - struct ipa_agg_value_set *agg; - struct ipcp_agg_lattice *aglat; if (plats->aggs_bottom || !plats->aggs) continue; - agg = &known_aggs[i]; - for (aglat = plats->aggs; aglat; aglat = aglat->next) + ipa_agg_value_set *agg = &avals.m_known_aggs[i]; + for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next) { ipcp_value *val; if (aglat->bottom || !aglat->values @@ -3624,8 +3616,7 @@ estimate_local_effects (struct cgraph_node *node) item.value = val->value; agg->items.safe_push (item); - perform_estimation_of_a_value (node, known_csts, known_contexts, - known_aggs, + perform_estimation_of_a_value (node, &avals, removable_params_cost, 0, val); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -3645,10 +3636,6 @@ estimate_local_effects (struct cgraph_node *node) } } } - - known_csts.release (); - known_contexts.release (); - ipa_release_agg_values (known_aggs); } @@ -5372,31 +5359,34 @@ copy_useful_known_contexts (vec known_contexts) return vNULL; } -/* Copy KNOWN_CSTS and modify the copy according to VAL and INDEX. If - non-empty, replace KNOWN_CONTEXTS with its copy too. */ +/* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy + according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its + copy too. */ static void -modify_known_vectors_with_val (vec *known_csts, - vec *known_contexts, - ipcp_value *val, - int index) +copy_known_vectors_add_val (ipa_auto_call_arg_values *avals, + vec *known_csts, + vec *known_contexts, + ipcp_value *val, int index) { - *known_csts = known_csts->copy (); - *known_contexts = copy_useful_known_contexts (*known_contexts); + *known_csts = avals->m_known_vals.copy (); + *known_contexts = copy_useful_known_contexts (avals->m_known_contexts); (*known_csts)[index] = val->value; } -/* Replace KNOWN_CSTS with its copy. Also copy KNOWN_CONTEXTS and modify the - copy according to VAL and INDEX. */ +/* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy + contexts to KNOWN_CONTEXTS and modify the copy according to VAL and + INDEX. */ static void -modify_known_vectors_with_val (vec *known_csts, - vec *known_contexts, - ipcp_value *val, - int index) -{ - *known_csts = known_csts->copy (); - *known_contexts = known_contexts->copy (); +copy_known_vectors_add_val (ipa_auto_call_arg_values *avals, + vec *known_csts, + vec *known_contexts, + ipcp_value *val, + int index) +{ + *known_csts = avals->m_known_vals.copy (); + *known_contexts = avals->m_known_contexts.copy (); (*known_contexts)[index] = val->value; } @@ -5433,16 +5423,15 @@ ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *, return offset == -1; } -/* Decide whether to create a special version of NODE for value VAL of parameter - at the given INDEX. If OFFSET is -1, the value is for the parameter itself, - otherwise it is stored at the given OFFSET of the parameter. KNOWN_CSTS, - KNOWN_CONTEXTS and KNOWN_AGGS describe the other already known values. */ +/* Decide whether to create a special version of NODE for value VAL of + parameter at the given INDEX. If OFFSET is -1, the value is for the + parameter itself, otherwise it is stored at the given OFFSET of the + parameter. AVALS describes the other already known values. */ template static bool decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, - ipcp_value *val, vec known_csts, - vec known_contexts) + ipcp_value *val, ipa_auto_call_arg_values *avals) { struct ipa_agg_replacement_value *aggvals; int freq_sum, caller_count; @@ -5492,13 +5481,16 @@ decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, fprintf (dump_file, " Creating a specialized node of %s.\n", node->dump_name ()); + vec known_csts; + vec known_contexts; + callers = gather_edges_for_value (val, node, caller_count); if (offset == -1) - modify_known_vectors_with_val (&known_csts, &known_contexts, val, index); + copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index); else { - known_csts = known_csts.copy (); - known_contexts = copy_useful_known_contexts (known_contexts); + known_csts = avals->m_known_vals.copy (); + known_contexts = copy_useful_known_contexts (avals->m_known_contexts); } find_more_scalar_values_for_callers_subset (node, known_csts, callers); find_more_contexts_for_caller_subset (node, &known_contexts, callers); @@ -5522,8 +5514,6 @@ decide_whether_version_node (struct cgraph_node *node) { class ipa_node_params *info = IPA_NODE_REF (node); int i, count = ipa_get_param_count (info); - vec known_csts; - vec known_contexts; bool ret = false; if (count == 0) @@ -5533,8 +5523,8 @@ decide_whether_version_node (struct cgraph_node *node) fprintf (dump_file, "\nEvaluating opportunities for %s.\n", node->dump_name ()); - gather_context_independent_values (info, &known_csts, &known_contexts, - NULL, NULL); + ipa_auto_call_arg_values avals; + gather_context_independent_values (info, &avals, false, NULL); for (i = 0; i < count;i++) { @@ -5543,12 +5533,11 @@ decide_whether_version_node (struct cgraph_node *node) ipcp_lattice *ctxlat = &plats->ctxlat; if (!lat->bottom - && !known_csts[i]) + && !avals.m_known_vals[i]) { ipcp_value *val; for (val = lat->values; val; val = val->next) - ret |= decide_about_value (node, i, -1, val, known_csts, - known_contexts); + ret |= decide_about_value (node, i, -1, val, &avals); } if (!plats->aggs_bottom) @@ -5557,22 +5546,20 @@ decide_whether_version_node (struct cgraph_node *node) ipcp_value *val; for (aglat = plats->aggs; aglat; aglat = aglat->next) if (!aglat->bottom && aglat->values - /* If the following is false, the one value is in - known_aggs. */ + /* If the following is false, the one value has been considered + for cloning for all contexts. */ && (plats->aggs_contain_variable || !aglat->is_single_const ())) for (val = aglat->values; val; val = val->next) - ret |= decide_about_value (node, i, aglat->offset, val, - known_csts, known_contexts); + ret |= decide_about_value (node, i, aglat->offset, val, &avals); } if (!ctxlat->bottom - && known_contexts[i].useless_p ()) + && avals.m_known_contexts[i].useless_p ()) { ipcp_value *val; for (val = ctxlat->values; val; val = val->next) - ret |= decide_about_value (node, i, -1, val, known_csts, - known_contexts); + ret |= decide_about_value (node, i, -1, val, &avals); } info = IPA_NODE_REF (node); @@ -5595,11 +5582,9 @@ decide_whether_version_node (struct cgraph_node *node) if (!adjust_callers_for_value_intersection (callers, node)) { /* If node is not called by anyone, or all its caller edges are - self-recursive, the node is not really be in use, no need to - do cloning. */ + self-recursive, the node is not really in use, no need to do + cloning. */ callers.release (); - known_csts.release (); - known_contexts.release (); info->do_clone_for_all_contexts = false; return ret; } @@ -5608,6 +5593,9 @@ decide_whether_version_node (struct cgraph_node *node) fprintf (dump_file, " - Creating a specialized node of %s " "for all known contexts.\n", node->dump_name ()); + vec known_csts = avals.m_known_vals.copy (); + vec known_contexts + = copy_useful_known_contexts (avals.m_known_contexts); find_more_scalar_values_for_callers_subset (node, known_csts, callers); find_more_contexts_for_caller_subset (node, &known_contexts, callers); ipa_agg_replacement_value *aggvals @@ -5625,11 +5613,6 @@ decide_whether_version_node (struct cgraph_node *node) IPA_NODE_REF (clone)->is_all_contexts_clone = true; ret = true; } - else - { - known_csts.release (); - known_contexts.release (); - } return ret; } diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index 8285cc0..b525cfd 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -320,19 +320,18 @@ set_hint_predicate (predicate **p, predicate new_predicate) is always false in the second and also builtin_constant_p tests cannot use the fact that parameter is indeed a constant. - KNOWN_VALS is partial mapping of parameters of NODE to constant values. - KNOWN_AGGS is a vector of aggregate known offset/value set for each - parameter. Return clause of possible truths. When INLINE_P is true, assume - that we are inlining. + When INLINE_P is true, assume that we are inlining. AVAL contains known + information about argument values. The function does not modify its content + and so AVALs could also be of type ipa_call_arg_values but so far all + callers work with the auto version and so we avoid the conversion for + convenience. - ERROR_MARK means compile time invariant. */ + ERROR_MARK value of an argument means compile time invariant. */ static void evaluate_conditions_for_known_args (struct cgraph_node *node, bool inline_p, - vec known_vals, - vec known_value_ranges, - vec known_aggs, + ipa_auto_call_arg_values *avals, clause_t *ret_clause, clause_t *ret_nonspec_clause) { @@ -351,38 +350,33 @@ evaluate_conditions_for_known_args (struct cgraph_node *node, /* We allow call stmt to have fewer arguments than the callee function (especially for K&R style programs). So bound check here (we assume - known_aggs vector, if non-NULL, has the same length as - known_vals). */ - gcc_checking_assert (!known_aggs.length () || !known_vals.length () - || (known_vals.length () == known_aggs.length ())); + m_known_aggs vector is either empty or has the same length as + m_known_vals). */ + gcc_checking_assert (!avals->m_known_aggs.length () + || !avals->m_known_vals.length () + || (avals->m_known_vals.length () + == avals->m_known_aggs.length ())); if (c->agg_contents) { - struct ipa_agg_value_set *agg; - if (c->code == predicate::changed && !c->by_ref - && c->operand_num < (int)known_vals.length () - && (known_vals[c->operand_num] == error_mark_node)) + && (avals->safe_sval_at(c->operand_num) == error_mark_node)) continue; - if (c->operand_num < (int)known_aggs.length ()) + if (ipa_agg_value_set *agg = avals->safe_aggval_at (c->operand_num)) { - agg = &known_aggs[c->operand_num]; - val = ipa_find_agg_cst_for_param (agg, - c->operand_num - < (int) known_vals.length () - ? known_vals[c->operand_num] - : NULL, - c->offset, c->by_ref); + tree sval = avals->safe_sval_at (c->operand_num); + val = ipa_find_agg_cst_for_param (agg, sval, c->offset, + c->by_ref); } else val = NULL_TREE; } - else if (c->operand_num < (int) known_vals.length ()) + else { - val = known_vals[c->operand_num]; - if (val == error_mark_node && c->code != predicate::changed) + val = avals->safe_sval_at (c->operand_num); + if (val && val == error_mark_node && c->code != predicate::changed) val = NULL_TREE; } @@ -446,53 +440,54 @@ evaluate_conditions_for_known_args (struct cgraph_node *node, continue; } } - if (c->operand_num < (int) known_value_ranges.length () + if (c->operand_num < (int) avals->m_known_value_ranges.length () && !c->agg_contents - && !known_value_ranges[c->operand_num].undefined_p () - && !known_value_ranges[c->operand_num].varying_p () - && TYPE_SIZE (c->type) - == TYPE_SIZE (known_value_ranges[c->operand_num].type ()) && (!val || TREE_CODE (val) != INTEGER_CST)) { - value_range vr = known_value_ranges[c->operand_num]; - if (!useless_type_conversion_p (c->type, vr.type ())) + value_range vr = avals->m_known_value_ranges[c->operand_num]; + if (!vr.undefined_p () + && !vr.varying_p () + && (TYPE_SIZE (c->type) == TYPE_SIZE (vr.type ()))) { - value_range res; - range_fold_unary_expr (&res, NOP_EXPR, + if (!useless_type_conversion_p (c->type, vr.type ())) + { + value_range res; + range_fold_unary_expr (&res, NOP_EXPR, c->type, &vr, vr.type ()); - vr = res; - } - tree type = c->type; + vr = res; + } + tree type = c->type; - for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++) - { - if (vr.varying_p () || vr.undefined_p ()) - break; + for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++) + { + if (vr.varying_p () || vr.undefined_p ()) + break; - value_range res; - if (!op->val[0]) - range_fold_unary_expr (&res, op->code, op->type, &vr, type); - else if (!op->val[1]) + value_range res; + if (!op->val[0]) + range_fold_unary_expr (&res, op->code, op->type, &vr, type); + else if (!op->val[1]) + { + value_range op0 (op->val[0], op->val[0]); + range_fold_binary_expr (&res, op->code, op->type, + op->index ? &op0 : &vr, + op->index ? &vr : &op0); + } + else + gcc_unreachable (); + type = op->type; + vr = res; + } + if (!vr.varying_p () && !vr.undefined_p ()) { - value_range op0 (op->val[0], op->val[0]); - range_fold_binary_expr (&res, op->code, op->type, - op->index ? &op0 : &vr, - op->index ? &vr : &op0); + value_range res; + value_range val_vr (c->val, c->val); + range_fold_binary_expr (&res, c->code, boolean_type_node, + &vr, + &val_vr); + if (res.zero_p ()) + continue; } - else - gcc_unreachable (); - type = op->type; - vr = res; - } - if (!vr.varying_p () && !vr.undefined_p ()) - { - value_range res; - value_range val_vr (c->val, c->val); - range_fold_binary_expr (&res, c->code, boolean_type_node, - &vr, - &val_vr); - if (res.zero_p ()) - continue; } } @@ -538,24 +533,20 @@ fre_will_run_p (struct cgraph_node *node) (if non-NULL) conditions evaluated for nonspecialized clone called in a given context. - KNOWN_VALS_PTR and KNOWN_AGGS_PTR must be non-NULL and will be filled by - known constant and aggregate values of parameters. - - KNOWN_CONTEXT_PTR, if non-NULL, will be filled by polymorphic call contexts - of parameter used by a polymorphic call. */ + Vectors in AVALS will be populated with useful known information about + argument values - information not known to have any uses will be omitted - + except for m_known_contexts which will only be calculated if + COMPUTE_CONTEXTS is true. */ void evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, clause_t *clause_ptr, clause_t *nonspec_clause_ptr, - vec *known_vals_ptr, - vec - *known_contexts_ptr, - vec *known_aggs_ptr) + ipa_auto_call_arg_values *avals, + bool compute_contexts) { struct cgraph_node *callee = e->callee->ultimate_alias_target (); class ipa_fn_summary *info = ipa_fn_summaries->get (callee); - auto_vec known_value_ranges; class ipa_edge_args *args; if (clause_ptr) @@ -563,7 +554,7 @@ evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, if (ipa_node_params_sum && !e->call_stmt_cannot_inline_p - && (info->conds || known_contexts_ptr) + && (info->conds || compute_contexts) && (args = IPA_EDGE_REF (e)) != NULL) { struct cgraph_node *caller; @@ -608,15 +599,15 @@ evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, if (cst) { gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO); - if (!known_vals_ptr->length ()) - vec_safe_grow_cleared (known_vals_ptr, count, true); - (*known_vals_ptr)[i] = cst; + if (!avals->m_known_vals.length ()) + avals->m_known_vals.safe_grow_cleared (count, true); + avals->m_known_vals[i] = cst; } else if (inline_p && !es->param[i].change_prob) { - if (!known_vals_ptr->length ()) - vec_safe_grow_cleared (known_vals_ptr, count, true); - (*known_vals_ptr)[i] = error_mark_node; + if (!avals->m_known_vals.length ()) + avals->m_known_vals.safe_grow_cleared (count, true); + avals->m_known_vals[i] = error_mark_node; } /* If we failed to get simple constant, try value range. */ @@ -624,19 +615,20 @@ evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, && vrp_will_run_p (caller) && ipa_is_param_used_by_ipa_predicates (callee_pi, i)) { - value_range vr + value_range vr = ipa_value_range_from_jfunc (caller_parms_info, e, jf, ipa_get_type (callee_pi, i)); if (!vr.undefined_p () && !vr.varying_p ()) { - if (!known_value_ranges.length ()) + if (!avals->m_known_value_ranges.length ()) { - known_value_ranges.safe_grow (count, true); + avals->m_known_value_ranges.safe_grow (count, true); for (int i = 0; i < count; ++i) - new (&known_value_ranges[i]) value_range (); + new (&avals->m_known_value_ranges[i]) + value_range (); } - known_value_ranges[i] = vr; + avals->m_known_value_ranges[i] = vr; } } @@ -648,25 +640,25 @@ evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, caller, &jf->agg); if (agg.items.length ()) { - if (!known_aggs_ptr->length ()) - vec_safe_grow_cleared (known_aggs_ptr, count, true); - (*known_aggs_ptr)[i] = agg; + if (!avals->m_known_aggs.length ()) + avals->m_known_aggs.safe_grow_cleared (count, true); + avals->m_known_aggs[i] = agg; } } } /* For calls used in polymorphic calls we further determine polymorphic call context. */ - if (known_contexts_ptr + if (compute_contexts && ipa_is_param_used_by_polymorphic_call (callee_pi, i)) { ipa_polymorphic_call_context ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf); if (!ctx.useless_p ()) { - if (!known_contexts_ptr->length ()) - known_contexts_ptr->safe_grow_cleared (count, true); - (*known_contexts_ptr)[i] + if (!avals->m_known_contexts.length ()) + avals->m_known_contexts.safe_grow_cleared (count, true); + avals->m_known_contexts[i] = ipa_context_from_jfunc (caller_parms_info, e, i, jf); } } @@ -685,18 +677,14 @@ evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, cst = NULL; if (cst) { - if (!known_vals_ptr->length ()) - vec_safe_grow_cleared (known_vals_ptr, count, true); - (*known_vals_ptr)[i] = cst; + if (!avals->m_known_vals.length ()) + avals->m_known_vals.safe_grow_cleared (count, true); + avals->m_known_vals[i] = cst; } } } - evaluate_conditions_for_known_args (callee, inline_p, - *known_vals_ptr, - known_value_ranges, - *known_aggs_ptr, - clause_ptr, + evaluate_conditions_for_known_args (callee, inline_p, avals, clause_ptr, nonspec_clause_ptr); } @@ -781,7 +769,7 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, vec *entry = info->size_time_table; /* Use SRC parm info since it may not be copied yet. */ class ipa_node_params *parms_info = IPA_NODE_REF (src); - vec known_vals = vNULL; + ipa_auto_call_arg_values avals; int count = ipa_get_param_count (parms_info); int i, j; clause_t possible_truths; @@ -792,7 +780,7 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, struct cgraph_edge *edge, *next; info->size_time_table = 0; - known_vals.safe_grow_cleared (count, true); + avals.m_known_vals.safe_grow_cleared (count, true); for (i = 0; i < count; i++) { struct ipa_replace_map *r; @@ -801,20 +789,17 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, { if (r->parm_num == i) { - known_vals[i] = r->new_tree; + avals.m_known_vals[i] = r->new_tree; break; } } } evaluate_conditions_for_known_args (dst, false, - known_vals, - vNULL, - vNULL, + &avals, &possible_truths, /* We are going to specialize, so ignore nonspec truths. */ NULL); - known_vals.release (); info->account_size_time (0, 0, true_pred, true_pred); @@ -3054,15 +3039,14 @@ compute_fn_summary_for_current (void) return 0; } -/* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS, - KNOWN_CONTEXTS and KNOWN_AGGS. */ +/* Estimate benefit devirtualizing indirect edge IE and return true if it can + be devirtualized and inlined, provided m_known_vals, m_known_contexts and + m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */ static bool estimate_edge_devirt_benefit (struct cgraph_edge *ie, int *size, int *time, - vec known_vals, - vec known_contexts, - vec known_aggs) + ipa_call_arg_values *avals) { tree target; struct cgraph_node *callee; @@ -3070,13 +3054,13 @@ estimate_edge_devirt_benefit (struct cgraph_edge *ie, enum availability avail; bool speculative; - if (!known_vals.length () && !known_contexts.length ()) + if (!avals + || (!avals->m_known_vals.length() && !avals->m_known_contexts.length ())) return false; if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining)) return false; - target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts, - known_aggs, &speculative); + target = ipa_get_indirect_edge_target (ie, avals, &speculative); if (!target || speculative) return false; @@ -3100,17 +3084,13 @@ estimate_edge_devirt_benefit (struct cgraph_edge *ie, } /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to - handle edge E with probability PROB. - Set HINTS if edge may be devirtualized. - KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call - site. */ + handle edge E with probability PROB. Set HINTS accordingly if edge may be + devirtualized. AVALS, if non-NULL, describes the context of the call site + as far as values of parameters are concerened. */ static inline void estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size, - sreal *time, - vec known_vals, - vec known_contexts, - vec known_aggs, + sreal *time, ipa_call_arg_values *avals, ipa_hints *hints) { class ipa_call_summary *es = ipa_call_summaries->get (e); @@ -3119,8 +3099,7 @@ estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size, int cur_size; if (!e->callee && hints && e->maybe_hot_p () - && estimate_edge_devirt_benefit (e, &call_size, &call_time, - known_vals, known_contexts, known_aggs)) + && estimate_edge_devirt_benefit (e, &call_size, &call_time, avals)) *hints |= INLINE_HINT_indirect_call; cur_size = call_size * ipa_fn_summary::size_scale; *size += cur_size; @@ -3132,9 +3111,9 @@ estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size, /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all - calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS - describe context of the call site. - + calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call + site. + Helper for estimate_calls_size_and_time which does the same but (in most cases) faster. */ @@ -3143,9 +3122,7 @@ estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size, int *min_size, sreal *time, ipa_hints *hints, clause_t possible_truths, - vec known_vals, - vec known_contexts, - vec known_aggs) + ipa_call_arg_values *avals) { struct cgraph_edge *e; for (e = node->callees; e; e = e->next_callee) @@ -3154,10 +3131,8 @@ estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size, { gcc_checking_assert (!ipa_call_summaries->get (e)); estimate_calls_size_and_time_1 (e->callee, size, min_size, time, - hints, - possible_truths, - known_vals, known_contexts, - known_aggs); + hints, possible_truths, avals); + continue; } class ipa_call_summary *es = ipa_call_summaries->get (e); @@ -3175,9 +3150,7 @@ estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size, so we do not need to compute probabilities. */ estimate_edge_size_and_time (e, size, es->predicate ? NULL : min_size, - time, - known_vals, known_contexts, - known_aggs, hints); + time, avals, hints); } } for (e = node->indirect_calls; e; e = e->next_callee) @@ -3187,9 +3160,7 @@ estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size, || es->predicate->evaluate (possible_truths)) estimate_edge_size_and_time (e, size, es->predicate ? NULL : min_size, - time, - known_vals, known_contexts, known_aggs, - hints); + time, avals, hints); } } @@ -3211,8 +3182,7 @@ summarize_calls_size_and_time (struct cgraph_node *node, int size = 0; sreal time = 0; - estimate_edge_size_and_time (e, &size, NULL, &time, - vNULL, vNULL, vNULL, NULL); + estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL); struct predicate pred = true; class ipa_call_summary *es = ipa_call_summaries->get (e); @@ -3226,8 +3196,7 @@ summarize_calls_size_and_time (struct cgraph_node *node, int size = 0; sreal time = 0; - estimate_edge_size_and_time (e, &size, NULL, &time, - vNULL, vNULL, vNULL, NULL); + estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL); struct predicate pred = true; class ipa_call_summary *es = ipa_call_summaries->get (e); @@ -3238,17 +3207,15 @@ summarize_calls_size_and_time (struct cgraph_node *node, } /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all - calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS - describe context of the call site. */ + calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe + context of the call site. */ static void estimate_calls_size_and_time (struct cgraph_node *node, int *size, int *min_size, sreal *time, ipa_hints *hints, clause_t possible_truths, - vec known_vals, - vec known_contexts, - vec known_aggs) + ipa_call_arg_values *avals) { class ipa_fn_summary *sum = ipa_fn_summaries->get (node); bool use_table = true; @@ -3267,9 +3234,10 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, use_table = false; /* If there is an indirect edge that may be optimized, we need to go the slow way. */ - else if ((known_vals.length () - || known_contexts.length () - || known_aggs.length ()) && hints) + else if (avals && hints + && (avals->m_known_vals.length () + || avals->m_known_contexts.length () + || avals->m_known_aggs.length ())) { class ipa_node_params *params_summary = IPA_NODE_REF (node); unsigned int nargs = params_summary @@ -3278,13 +3246,13 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, for (unsigned int i = 0; i < nargs && use_table; i++) { if (ipa_is_param_used_by_indirect_call (params_summary, i) - && ((known_vals.length () > i && known_vals[i]) - || (known_aggs.length () > i - && known_aggs[i].items.length ()))) + && (avals->safe_sval_at (i) + || (avals->m_known_aggs.length () > i + && avals->m_known_aggs[i].items.length ()))) use_table = false; else if (ipa_is_param_used_by_polymorphic_call (params_summary, i) - && (known_contexts.length () > i - && !known_contexts[i].useless_p ())) + && (avals->m_known_contexts.length () > i + && !avals->m_known_contexts[i].useless_p ())) use_table = false; } } @@ -3327,8 +3295,7 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, < ipa_fn_summary::max_size_time_table_size) { estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL, - possible_truths, known_vals, - known_contexts, known_aggs); + possible_truths, avals); gcc_assert (*size == old_size); if (time && (*time - old_time > 1 || *time - old_time < -1) && dump_file) @@ -3340,31 +3307,22 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, /* Slow path by walking all edges. */ else estimate_calls_size_and_time_1 (node, size, min_size, time, hints, - possible_truths, known_vals, known_contexts, - known_aggs); + possible_truths, avals); } -/* Default constructor for ipa call context. - Memory allocation of known_vals, known_contexts - and known_aggs vectors is owned by the caller, but can - be release by ipa_call_context::release. - - inline_param_summary is owned by the caller. */ -ipa_call_context::ipa_call_context (cgraph_node *node, - clause_t possible_truths, +/* Main constructor for ipa call context. Memory allocation of ARG_VALUES + is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the + caller. */ + +ipa_call_context::ipa_call_context (cgraph_node *node, clause_t possible_truths, clause_t nonspec_possible_truths, - vec known_vals, - vec - known_contexts, - vec known_aggs, vec - inline_param_summary) + inline_param_summary, + ipa_auto_call_arg_values *arg_values) : m_node (node), m_possible_truths (possible_truths), m_nonspec_possible_truths (nonspec_possible_truths), m_inline_param_summary (inline_param_summary), - m_known_vals (known_vals), - m_known_contexts (known_contexts), - m_known_aggs (known_aggs) + m_avals (arg_values) { } @@ -3395,47 +3353,50 @@ ipa_call_context::duplicate_from (const ipa_call_context &ctx) break; } } - m_known_vals = vNULL; - if (ctx.m_known_vals.exists ()) + m_avals.m_known_vals = vNULL; + if (ctx.m_avals.m_known_vals.exists ()) { - unsigned int n = MIN (ctx.m_known_vals.length (), nargs); + unsigned int n = MIN (ctx.m_avals.m_known_vals.length (), nargs); for (unsigned int i = 0; i < n; i++) if (ipa_is_param_used_by_indirect_call (params_summary, i) - && ctx.m_known_vals[i]) + && ctx.m_avals.m_known_vals[i]) { - m_known_vals = ctx.m_known_vals.copy (); + m_avals.m_known_vals = ctx.m_avals.m_known_vals.copy (); break; } } - m_known_contexts = vNULL; - if (ctx.m_known_contexts.exists ()) + m_avals.m_known_contexts = vNULL; + if (ctx.m_avals.m_known_contexts.exists ()) { - unsigned int n = MIN (ctx.m_known_contexts.length (), nargs); + unsigned int n = MIN (ctx.m_avals.m_known_contexts.length (), nargs); for (unsigned int i = 0; i < n; i++) if (ipa_is_param_used_by_polymorphic_call (params_summary, i) - && !ctx.m_known_contexts[i].useless_p ()) + && !ctx.m_avals.m_known_contexts[i].useless_p ()) { - m_known_contexts = ctx.m_known_contexts.copy (); + m_avals.m_known_contexts = ctx.m_avals.m_known_contexts.copy (); break; } } - m_known_aggs = vNULL; - if (ctx.m_known_aggs.exists ()) + m_avals.m_known_aggs = vNULL; + if (ctx.m_avals.m_known_aggs.exists ()) { - unsigned int n = MIN (ctx.m_known_aggs.length (), nargs); + unsigned int n = MIN (ctx.m_avals.m_known_aggs.length (), nargs); for (unsigned int i = 0; i < n; i++) if (ipa_is_param_used_by_indirect_call (params_summary, i) - && !ctx.m_known_aggs[i].is_empty ()) + && !ctx.m_avals.m_known_aggs[i].is_empty ()) { - m_known_aggs = ipa_copy_agg_values (ctx.m_known_aggs); + m_avals.m_known_aggs + = ipa_copy_agg_values (ctx.m_avals.m_known_aggs); break; } } + + m_avals.m_known_value_ranges = vNULL; } /* Release memory used by known_vals/contexts/aggs vectors. @@ -3449,11 +3410,11 @@ ipa_call_context::release (bool all) /* See if context is initialized at first place. */ if (!m_node) return; - ipa_release_agg_values (m_known_aggs, all); + ipa_release_agg_values (m_avals.m_known_aggs, all); if (all) { - m_known_vals.release (); - m_known_contexts.release (); + m_avals.m_known_vals.release (); + m_avals.m_known_contexts.release (); m_inline_param_summary.release (); } } @@ -3499,77 +3460,81 @@ ipa_call_context::equal_to (const ipa_call_context &ctx) return false; } } - if (m_known_vals.exists () || ctx.m_known_vals.exists ()) + if (m_avals.m_known_vals.exists () || ctx.m_avals.m_known_vals.exists ()) { for (unsigned int i = 0; i < nargs; i++) { if (!ipa_is_param_used_by_indirect_call (params_summary, i)) continue; - if (i >= m_known_vals.length () || !m_known_vals[i]) + if (i >= m_avals.m_known_vals.length () || !m_avals.m_known_vals[i]) { - if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i]) + if (i < ctx.m_avals.m_known_vals.length () + && ctx.m_avals.m_known_vals[i]) return false; continue; } - if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i]) + if (i >= ctx.m_avals.m_known_vals.length () + || !ctx.m_avals.m_known_vals[i]) { - if (i < m_known_vals.length () && m_known_vals[i]) + if (i < m_avals.m_known_vals.length () && m_avals.m_known_vals[i]) return false; continue; } - if (m_known_vals[i] != ctx.m_known_vals[i]) + if (m_avals.m_known_vals[i] != ctx.m_avals.m_known_vals[i]) return false; } } - if (m_known_contexts.exists () || ctx.m_known_contexts.exists ()) + if (m_avals.m_known_contexts.exists () + || ctx.m_avals.m_known_contexts.exists ()) { for (unsigned int i = 0; i < nargs; i++) { if (!ipa_is_param_used_by_polymorphic_call (params_summary, i)) continue; - if (i >= m_known_contexts.length () - || m_known_contexts[i].useless_p ()) + if (i >= m_avals.m_known_contexts.length () + || m_avals.m_known_contexts[i].useless_p ()) { - if (i < ctx.m_known_contexts.length () - && !ctx.m_known_contexts[i].useless_p ()) + if (i < ctx.m_avals.m_known_contexts.length () + && !ctx.m_avals.m_known_contexts[i].useless_p ()) return false; continue; } - if (i >= ctx.m_known_contexts.length () - || ctx.m_known_contexts[i].useless_p ()) + if (i >= ctx.m_avals.m_known_contexts.length () + || ctx.m_avals.m_known_contexts[i].useless_p ()) { - if (i < m_known_contexts.length () - && !m_known_contexts[i].useless_p ()) + if (i < m_avals.m_known_contexts.length () + && !m_avals.m_known_contexts[i].useless_p ()) return false; continue; } - if (!m_known_contexts[i].equal_to - (ctx.m_known_contexts[i])) + if (!m_avals.m_known_contexts[i].equal_to + (ctx.m_avals.m_known_contexts[i])) return false; } } - if (m_known_aggs.exists () || ctx.m_known_aggs.exists ()) + if (m_avals.m_known_aggs.exists () || ctx.m_avals.m_known_aggs.exists ()) { for (unsigned int i = 0; i < nargs; i++) { if (!ipa_is_param_used_by_indirect_call (params_summary, i)) continue; - if (i >= m_known_aggs.length () || m_known_aggs[i].is_empty ()) + if (i >= m_avals.m_known_aggs.length () + || m_avals.m_known_aggs[i].is_empty ()) { - if (i < ctx.m_known_aggs.length () - && !ctx.m_known_aggs[i].is_empty ()) + if (i < ctx.m_avals.m_known_aggs.length () + && !ctx.m_avals.m_known_aggs[i].is_empty ()) return false; continue; } - if (i >= ctx.m_known_aggs.length () - || ctx.m_known_aggs[i].is_empty ()) + if (i >= ctx.m_avals.m_known_aggs.length () + || ctx.m_avals.m_known_aggs[i].is_empty ()) { - if (i < m_known_aggs.length () - && !m_known_aggs[i].is_empty ()) + if (i < m_avals.m_known_aggs.length () + && !m_avals.m_known_aggs[i].is_empty ()) return false; continue; } - if (!m_known_aggs[i].equal_to (ctx.m_known_aggs[i])) + if (!m_avals.m_known_aggs[i].equal_to (ctx.m_avals.m_known_aggs[i])) return false; } } @@ -3619,7 +3584,7 @@ ipa_call_context::estimate_size_and_time (int *ret_size, estimate_calls_size_and_time (m_node, &size, &min_size, ret_time ? &time : NULL, ret_hints ? &hints : NULL, m_possible_truths, - m_known_vals, m_known_contexts, m_known_aggs); + &m_avals); sreal nonspecialized_time = time; @@ -3726,22 +3691,16 @@ ipa_call_context::estimate_size_and_time (int *ret_size, void estimate_ipcp_clone_size_and_time (struct cgraph_node *node, - vec known_vals, - vec - known_contexts, - vec known_aggs, + ipa_auto_call_arg_values *avals, int *ret_size, sreal *ret_time, sreal *ret_nonspec_time, ipa_hints *hints) { clause_t clause, nonspec_clause; - /* TODO: Also pass known value ranges. */ - evaluate_conditions_for_known_args (node, false, known_vals, vNULL, - known_aggs, &clause, &nonspec_clause); - ipa_call_context ctx (node, clause, nonspec_clause, - known_vals, known_contexts, - known_aggs, vNULL); + evaluate_conditions_for_known_args (node, false, avals, &clause, + &nonspec_clause); + ipa_call_context ctx (node, clause, nonspec_clause, vNULL, avals); ctx.estimate_size_and_time (ret_size, NULL, ret_time, ret_nonspec_time, hints); } @@ -3970,10 +3929,8 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) if (callee_info->conds) { - auto_vec known_vals; - auto_vec known_aggs; - evaluate_properties_for_edge (edge, true, &clause, NULL, - &known_vals, NULL, &known_aggs); + ipa_auto_call_arg_values avals; + evaluate_properties_for_edge (edge, true, &clause, NULL, &avals, false); } if (ipa_node_params_sum && callee_info->conds) { @@ -4067,8 +4024,7 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) int edge_size = 0; sreal edge_time = 0; - estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, vNULL, - vNULL, vNULL, 0); + estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, NULL, 0); /* Unaccount size and time of the optimized out call. */ info->account_size_time (-edge_size, -edge_time, es->predicate ? *es->predicate : true, @@ -4110,7 +4066,7 @@ ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset) estimate_calls_size_and_time (node, &size_info->size, &info->min_size, &info->time, NULL, ~(clause_t) (1 << predicate::false_condition), - vNULL, vNULL, vNULL); + NULL); size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale); info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale); } diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h index 4e1f841..6893858 100644 --- a/gcc/ipa-fnsummary.h +++ b/gcc/ipa-fnsummary.h @@ -297,10 +297,8 @@ public: ipa_call_context (cgraph_node *node, clause_t possible_truths, clause_t nonspec_possible_truths, - vec known_vals, - vec known_contexts, - vec known_aggs, - vec m_inline_param_summary); + vec inline_param_summary, + ipa_auto_call_arg_values *arg_values); ipa_call_context () : m_node(NULL) { @@ -328,14 +326,9 @@ private: /* Inline summary maintains info about change probabilities. */ vec m_inline_param_summary; - /* The following is used only to resolve indirect calls. */ - - /* Vector describing known values of parameters. */ - vec m_known_vals; - /* Vector describing known polymorphic call contexts. */ - vec m_known_contexts; - /* Vector describing known aggregate values. */ - vec m_known_aggs; + /* Even after having calculated clauses, the information about argument + values is used to resolve indirect calls. */ + ipa_call_arg_values m_avals; }; extern fast_call_summary *ipa_call_summaries; @@ -349,9 +342,7 @@ void ipa_free_fn_summary (void); void ipa_free_size_summary (void); void inline_analyze_function (struct cgraph_node *node); void estimate_ipcp_clone_size_and_time (struct cgraph_node *, - vec, - vec, - vec, + ipa_auto_call_arg_values *, int *, sreal *, sreal *, ipa_hints *); void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge); @@ -365,10 +356,8 @@ void evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, clause_t *clause_ptr, clause_t *nonspec_clause_ptr, - vec *known_vals_ptr, - vec - *known_contexts_ptr, - vec *); + ipa_auto_call_arg_values *avals, + bool compute_contexts); void ipa_fnsummary_c_finalize (void); HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node); diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c index 148efbc..d2ae819 100644 --- a/gcc/ipa-inline-analysis.c +++ b/gcc/ipa-inline-analysis.c @@ -184,20 +184,16 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) ipa_hints hints; struct cgraph_node *callee; clause_t clause, nonspec_clause; - auto_vec known_vals; - auto_vec known_contexts; - auto_vec known_aggs; + ipa_auto_call_arg_values avals; class ipa_call_summary *es = ipa_call_summaries->get (edge); int min_size = -1; callee = edge->callee->ultimate_alias_target (); gcc_checking_assert (edge->inline_failed); - evaluate_properties_for_edge (edge, true, - &clause, &nonspec_clause, &known_vals, - &known_contexts, &known_aggs); - ipa_call_context ctx (callee, clause, nonspec_clause, known_vals, - known_contexts, known_aggs, es->param); + evaluate_properties_for_edge (edge, true, &clause, &nonspec_clause, + &avals, true); + ipa_call_context ctx (callee, clause, nonspec_clause, es->param, &avals); if (node_context_cache != NULL) { node_context_summary *e = node_context_cache->get_create (callee); @@ -255,7 +251,6 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) : edge->caller->count.ipa ()))) hints |= INLINE_HINT_known_hot; - ctx.release (); gcc_checking_assert (size >= 0); gcc_checking_assert (time >= 0); @@ -307,9 +302,6 @@ do_estimate_edge_size (struct cgraph_edge *edge) int size; struct cgraph_node *callee; clause_t clause, nonspec_clause; - auto_vec known_vals; - auto_vec known_contexts; - auto_vec known_aggs; /* When we do caching, use do_estimate_edge_time to populate the entry. */ @@ -325,14 +317,11 @@ do_estimate_edge_size (struct cgraph_edge *edge) /* Early inliner runs without caching, go ahead and do the dirty work. */ gcc_checking_assert (edge->inline_failed); - evaluate_properties_for_edge (edge, true, - &clause, &nonspec_clause, - &known_vals, &known_contexts, - &known_aggs); - ipa_call_context ctx (callee, clause, nonspec_clause, known_vals, - known_contexts, known_aggs, vNULL); + ipa_auto_call_arg_values avals; + evaluate_properties_for_edge (edge, true, &clause, &nonspec_clause, + &avals, true); + ipa_call_context ctx (callee, clause, nonspec_clause, vNULL, &avals); ctx.estimate_size_and_time (&size, NULL, NULL, NULL, NULL); - ctx.release (); return size; } @@ -346,9 +335,6 @@ do_estimate_edge_hints (struct cgraph_edge *edge) ipa_hints hints; struct cgraph_node *callee; clause_t clause, nonspec_clause; - auto_vec known_vals; - auto_vec known_contexts; - auto_vec known_aggs; /* When we do caching, use do_estimate_edge_time to populate the entry. */ @@ -364,14 +350,11 @@ do_estimate_edge_hints (struct cgraph_edge *edge) /* Early inliner runs without caching, go ahead and do the dirty work. */ gcc_checking_assert (edge->inline_failed); - evaluate_properties_for_edge (edge, true, - &clause, &nonspec_clause, - &known_vals, &known_contexts, - &known_aggs); - ipa_call_context ctx (callee, clause, nonspec_clause, known_vals, - known_contexts, known_aggs, vNULL); + ipa_auto_call_arg_values avals; + evaluate_properties_for_edge (edge, true, &clause, &nonspec_clause, + &avals, true); + ipa_call_context ctx (callee, clause, nonspec_clause, vNULL, &avals); ctx.estimate_size_and_time (NULL, NULL, NULL, NULL, &hints); - ctx.release (); hints |= simple_edge_hints (edge); return hints; } diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index ea88fd3..dec6c73 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -5797,4 +5797,14 @@ ipa_agg_value::equal_to (const ipa_agg_value &other) return offset == other.offset && operand_equal_p (value, other.value, 0); } + +/* Destructor also removing individual aggregate values. */ + +ipa_auto_call_arg_values::~ipa_auto_call_arg_values () +{ + ipa_release_agg_values (m_known_aggs, false); +} + + + #include "gt-ipa-prop.h" diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h index 23fcf90..8b2edf6 100644 --- a/gcc/ipa-prop.h +++ b/gcc/ipa-prop.h @@ -433,6 +433,107 @@ ipa_get_jf_ancestor_type_preserved (struct ipa_jump_func *jfunc) return jfunc->value.ancestor.agg_preserved; } +/* Class for allocating a bundle of various potentially known properties about + actual arguments of a particular call on stack for the usual case and on + heap only if there are unusually many arguments. The data is deallocated + when the instance of this class goes out of scope or is otherwise + destructed. */ + +class ipa_auto_call_arg_values +{ +public: + ~ipa_auto_call_arg_values (); + + /* If m_known_vals (vector of known "scalar" values) is sufficiantly long, + return its element at INDEX, otherwise return NULL. */ + tree safe_sval_at (int index) + { + /* TODO: Assert non-negative index here and test. */ + if ((unsigned) index < m_known_vals.length ()) + return m_known_vals[index]; + return NULL; + } + + /* If m_known_aggs is sufficiantly long, return the pointer rto its element + at INDEX, otherwise return NULL. */ + ipa_agg_value_set *safe_aggval_at (int index) + { + /* TODO: Assert non-negative index here and test. */ + if ((unsigned) index < m_known_aggs.length ()) + return &m_known_aggs[index]; + return NULL; + } + + /* Vector describing known values of parameters. */ + auto_vec m_known_vals; + + /* Vector describing known polymorphic call contexts. */ + auto_vec m_known_contexts; + + /* Vector describing known aggregate values. */ + auto_vec m_known_aggs; + + /* Vector describing known value ranges of arguments. */ + auto_vec m_known_value_ranges; +}; + +/* Class bundling the various potentially known properties about actual + arguments of a particular call. This variant does not deallocate the + bundled data in any way. */ + +class ipa_call_arg_values +{ +public: + /* Default constructor, setting the vectors to empty ones. */ + ipa_call_arg_values () + {} + + /* Construct this general variant of the bundle from the variant which uses + auto_vecs to hold the vectors. This means that vectors of objects + constructed with this constructor should not be changed because if they + get reallocated, the member vectors and the underlying auto_vecs would get + out of sync. */ + ipa_call_arg_values (ipa_auto_call_arg_values *aavals) + : m_known_vals (aavals->m_known_vals), + m_known_contexts (aavals->m_known_contexts), + m_known_aggs (aavals->m_known_aggs), + m_known_value_ranges (aavals->m_known_value_ranges) + {} + + /* If m_known_vals (vector of known "scalar" values) is sufficiantly long, + return its element at INDEX, otherwise return NULL. */ + tree safe_sval_at (int index) + { + /* TODO: Assert non-negative index here and test. */ + if ((unsigned) index < m_known_vals.length ()) + return m_known_vals[index]; + return NULL; + } + + /* If m_known_aggs is sufficiantly long, return the pointer rto its element + at INDEX, otherwise return NULL. */ + ipa_agg_value_set *safe_aggval_at (int index) + { + /* TODO: Assert non-negative index here and test. */ + if ((unsigned) index < m_known_aggs.length ()) + return &m_known_aggs[index]; + return NULL; + } + + /* Vector describing known values of parameters. */ + vec m_known_vals = vNULL; + + /* Vector describing known polymorphic call contexts. */ + vec m_known_contexts = vNULL; + + /* Vector describing known aggregate values. */ + vec m_known_aggs = vNULL; + + /* Vector describing known value ranges of arguments. */ + vec m_known_value_ranges = vNULL; +}; + + /* Summary describing a single formal parameter. */ struct GTY(()) ipa_param_descriptor @@ -970,12 +1071,13 @@ void ipa_initialize_node_params (struct cgraph_node *node); bool ipa_propagate_indirect_call_infos (struct cgraph_edge *cs, vec *new_edges); -/* Indirect edge and binfo processing. */ +/* Indirect edge processing and target discovery. */ +tree ipa_get_indirect_edge_target (struct cgraph_edge *ie, + ipa_call_arg_values *avals, + bool *speculative); tree ipa_get_indirect_edge_target (struct cgraph_edge *ie, - vec, - vec, - vec, - bool *); + ipa_auto_call_arg_values *avals, + bool *speculative); struct cgraph_edge *ipa_make_edge_direct_to_target (struct cgraph_edge *, tree, bool speculative = false); tree ipa_impossible_devirt_target (struct cgraph_edge *, tree); -- cgit v1.1 From 7d2cb2755a14b95b1d71c3e52895d75954b4ec47 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Fri, 2 Oct 2020 18:41:34 +0200 Subject: ipa: Introduce ipa_cached_call_context Hi, as we discussed with Honza on the mailin glist last week, making cached call context structure distinct from the normal one may make it clearer that the cached data need to be explicitely deallocated. This patch does that division. It is not mandatory for the overall main goals of the patch set and can be dropped if deemed superfluous. gcc/ChangeLog: 2020-09-02 Martin Jambor * ipa-fnsummary.h (ipa_cached_call_context): New forward declaration and class. (class ipa_call_context): Make friend ipa_cached_call_context. Moved methods duplicate_from and release to it too. * ipa-fnsummary.c (ipa_call_context::duplicate_from): Moved to class ipa_cached_call_context. (ipa_call_context::release): Likewise, removed the parameter. * ipa-inline-analysis.c (node_context_cache_entry): Change the type of ctx to ipa_cached_call_context. (do_estimate_edge_time): Remove parameter from the call to ipa_cached_call_context::release. --- gcc/ipa-fnsummary.c | 21 ++++++++------------- gcc/ipa-fnsummary.h | 16 ++++++++++++++-- gcc/ipa-inline-analysis.c | 4 ++-- 3 files changed, 24 insertions(+), 17 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index b525cfd..fe23696 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -3329,7 +3329,7 @@ ipa_call_context::ipa_call_context (cgraph_node *node, clause_t possible_truths, /* Set THIS to be a duplicate of CTX. Copy all relevant info. */ void -ipa_call_context::duplicate_from (const ipa_call_context &ctx) +ipa_cached_call_context::duplicate_from (const ipa_call_context &ctx) { m_node = ctx.m_node; m_possible_truths = ctx.m_possible_truths; @@ -3399,24 +3399,19 @@ ipa_call_context::duplicate_from (const ipa_call_context &ctx) m_avals.m_known_value_ranges = vNULL; } -/* Release memory used by known_vals/contexts/aggs vectors. - If ALL is true release also inline_param_summary. - This happens when context was previously duplicated to be stored - into cache. */ +/* Release memory used by known_vals/contexts/aggs vectors. and + inline_param_summary. */ void -ipa_call_context::release (bool all) +ipa_cached_call_context::release () { /* See if context is initialized at first place. */ if (!m_node) return; - ipa_release_agg_values (m_avals.m_known_aggs, all); - if (all) - { - m_avals.m_known_vals.release (); - m_avals.m_known_contexts.release (); - m_inline_param_summary.release (); - } + ipa_release_agg_values (m_avals.m_known_aggs, true); + m_avals.m_known_vals.release (); + m_avals.m_known_contexts.release (); + m_inline_param_summary.release (); } /* Return true if CTX describes the same call context as THIS. */ diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h index 6893858..020a6f0 100644 --- a/gcc/ipa-fnsummary.h +++ b/gcc/ipa-fnsummary.h @@ -287,6 +287,8 @@ public: ipa_call_summary *dst_data); }; +class ipa_cached_call_context; + /* This object describe a context of call. That is a summary of known information about its parameters. Main purpose of this context is to give more realistic estimations of function runtime, size and @@ -307,8 +309,6 @@ public: sreal *ret_time, sreal *ret_nonspecialized_time, ipa_hints *ret_hints); - void duplicate_from (const ipa_call_context &ctx); - void release (bool all = false); bool equal_to (const ipa_call_context &); bool exists_p () { @@ -329,6 +329,18 @@ private: /* Even after having calculated clauses, the information about argument values is used to resolve indirect calls. */ ipa_call_arg_values m_avals; + + friend ipa_cached_call_context; +}; + +/* Variant of ipa_call_context that is stored in a cache over a longer period + of time. */ + +class ipa_cached_call_context : public ipa_call_context +{ +public: + void duplicate_from (const ipa_call_context &ctx); + void release (); }; extern fast_call_summary *ipa_call_summaries; diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c index d2ae819..b7af77f 100644 --- a/gcc/ipa-inline-analysis.c +++ b/gcc/ipa-inline-analysis.c @@ -57,7 +57,7 @@ fast_call_summary *edge_growth_cache = NULL; class node_context_cache_entry { public: - ipa_call_context ctx; + ipa_cached_call_context ctx; sreal time, nonspec_time; int size; ipa_hints hints; @@ -226,7 +226,7 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) node_context_cache_miss++; else node_context_cache_clear++; - e->entry.ctx.release (true); + e->entry.ctx.release (); ctx.estimate_size_and_time (&size, &min_size, &time, &nonspec_time, &hints); e->entry.size = size; -- cgit v1.1 From 1e7fdc02cba43e646fb2389e3c79e7c4e5ff772e Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Fri, 2 Oct 2020 18:41:34 +0200 Subject: ipa: Bundle estimates of ipa_call_context::estimate_size_and_time A subsequent patch adds another two estimates that the code in ipa_call_context::estimate_size_and_time computes, and the fact that the function has a special output parameter for each thing it computes would make it have just too many. Therefore, this patch collapses all those ouptut parameters into one output structure. gcc/ChangeLog: 2020-09-02 Martin Jambor * ipa-inline-analysis.c (do_estimate_edge_time): Adjusted to use ipa_call_estimates. (do_estimate_edge_size): Likewise. (do_estimate_edge_hints): Likewise. * ipa-fnsummary.h (struct ipa_call_estimates): New type. (ipa_call_context::estimate_size_and_time): Adjusted declaration. (estimate_ipcp_clone_size_and_time): Likewise. * ipa-cp.c (hint_time_bonus): Changed the type of the second argument to ipa_call_estimates. (perform_estimation_of_a_value): Adjusted to use ipa_call_estimates. (estimate_local_effects): Likewise. * ipa-fnsummary.c (ipa_call_context::estimate_size_and_time): Adjusted to return estimates in a single ipa_call_estimates parameter. (estimate_ipcp_clone_size_and_time): Likewise. --- gcc/ipa-cp.c | 45 +++++++++++++++++------------------ gcc/ipa-fnsummary.c | 60 +++++++++++++++++++++++------------------------ gcc/ipa-fnsummary.h | 36 +++++++++++++++++++++------- gcc/ipa-inline-analysis.c | 47 +++++++++++++++++++++---------------- 4 files changed, 105 insertions(+), 83 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 292dd7e..77c84a6 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -3196,12 +3196,13 @@ devirtualization_time_bonus (struct cgraph_node *node, return res; } -/* Return time bonus incurred because of HINTS. */ +/* Return time bonus incurred because of hints stored in ESTIMATES. */ static int -hint_time_bonus (cgraph_node *node, ipa_hints hints) +hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates) { int result = 0; + ipa_hints hints = estimates.hints; if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride)) result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); return result; @@ -3397,15 +3398,13 @@ perform_estimation_of_a_value (cgraph_node *node, int removable_params_cost, int est_move_cost, ipcp_value_base *val) { - int size, time_benefit; - sreal time, base_time; - ipa_hints hints; + int time_benefit; + ipa_call_estimates estimates; - estimate_ipcp_clone_size_and_time (node, avals, &size, &time, - &base_time, &hints); - base_time -= time; - if (base_time > 65535) - base_time = 65535; + estimate_ipcp_clone_size_and_time (node, avals, &estimates); + sreal time_delta = estimates.nonspecialized_time - estimates.time; + if (time_delta > 65535) + time_delta = 65535; /* Extern inline functions have no cloning local time benefits because they will be inlined anyway. The only reason to clone them is if it enables @@ -3413,11 +3412,12 @@ perform_estimation_of_a_value (cgraph_node *node, if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl)) time_benefit = 0; else - time_benefit = base_time.to_int () + time_benefit = time_delta.to_int () + devirtualization_time_bonus (node, avals) - + hint_time_bonus (node, hints) + + hint_time_bonus (node, estimates) + removable_params_cost + est_move_cost; + int size = estimates.size; gcc_checking_assert (size >=0); /* The inliner-heuristics based estimates may think that in certain contexts some functions do not have any size at all but we want @@ -3472,23 +3472,21 @@ estimate_local_effects (struct cgraph_node *node) || (removable_params_cost && node->can_change_signature)) { struct caller_statistics stats; - ipa_hints hints; - sreal time, base_time; - int size; + ipa_call_estimates estimates; init_caller_stats (&stats); node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false); - estimate_ipcp_clone_size_and_time (node, &avals, &size, &time, - &base_time, &hints); - time -= devirt_bonus; - time -= hint_time_bonus (node, hints); - time -= removable_params_cost; - size -= stats.n_calls * removable_params_cost; + estimate_ipcp_clone_size_and_time (node, &avals, &estimates); + sreal time = estimates.nonspecialized_time - estimates.time; + time += devirt_bonus; + time += hint_time_bonus (node, estimates); + time += removable_params_cost; + int size = estimates.size - stats.n_calls * removable_params_cost; if (dump_file) fprintf (dump_file, " - context independent values, size: %i, " - "time_benefit: %f\n", size, (base_time - time).to_double ()); + "time_benefit: %f\n", size, (time).to_double ()); if (size <= 0 || node->local) { @@ -3499,8 +3497,7 @@ estimate_local_effects (struct cgraph_node *node) "known contexts, code not going to grow.\n"); } else if (good_cloning_opportunity_p (node, - MIN ((base_time - time).to_int (), - 65536), + MIN ((time).to_int (), 65536), stats.freq_sum, stats.count_sum, size)) { diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index fe23696..f89f158 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -3536,18 +3536,14 @@ ipa_call_context::equal_to (const ipa_call_context &ctx) return true; } -/* Estimate size and time needed to execute call in the given context. - Additionally determine hints determined by the context. Finally compute - minimal size needed for the call that is independent on the call context and - can be used for fast estimates. Return the values in RET_SIZE, - RET_MIN_SIZE, RET_TIME and RET_HINTS. */ +/* Fill in the selected fields in ESTIMATES with value estimated for call in + this context. Always compute size and min_size. Only compute time and + nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS + is true. */ void -ipa_call_context::estimate_size_and_time (int *ret_size, - int *ret_min_size, - sreal *ret_time, - sreal *ret_nonspecialized_time, - ipa_hints *ret_hints) +ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates, + bool est_times, bool est_hints) { class ipa_fn_summary *info = ipa_fn_summaries->get (m_node); size_time_entry *e; @@ -3577,8 +3573,8 @@ ipa_call_context::estimate_size_and_time (int *ret_size, if (m_node->callees || m_node->indirect_calls) estimate_calls_size_and_time (m_node, &size, &min_size, - ret_time ? &time : NULL, - ret_hints ? &hints : NULL, m_possible_truths, + est_times ? &time : NULL, + est_hints ? &hints : NULL, m_possible_truths, &m_avals); sreal nonspecialized_time = time; @@ -3605,7 +3601,7 @@ ipa_call_context::estimate_size_and_time (int *ret_size, known to be constant in a specialized setting. */ if (nonconst) size += e->size; - if (!ret_time) + if (!est_times) continue; nonspecialized_time += e->time; if (!nonconst) @@ -3645,7 +3641,7 @@ ipa_call_context::estimate_size_and_time (int *ret_size, if (time > nonspecialized_time) time = nonspecialized_time; - if (ret_hints) + if (est_hints) { if (info->loop_iterations && !info->loop_iterations->evaluate (m_possible_truths)) @@ -3663,18 +3659,23 @@ ipa_call_context::estimate_size_and_time (int *ret_size, min_size = RDIV (min_size, ipa_fn_summary::size_scale); if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size, - time.to_double (), nonspecialized_time.to_double ()); - if (ret_time) - *ret_time = time; - if (ret_nonspecialized_time) - *ret_nonspecialized_time = nonspecialized_time; - if (ret_size) - *ret_size = size; - if (ret_min_size) - *ret_min_size = min_size; - if (ret_hints) - *ret_hints = hints; + { + if (est_times) + fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", + (int) size, time.to_double (), + nonspecialized_time.to_double ()); + else + fprintf (dump_file, "\n size:%i (time not estimated)\n", (int) size); + } + if (est_times) + { + estimates->time = time; + estimates->nonspecialized_time = nonspecialized_time; + } + estimates->size = size; + estimates->min_size = min_size; + if (est_hints) + estimates->hints = hints; return; } @@ -3687,17 +3688,14 @@ ipa_call_context::estimate_size_and_time (int *ret_size, void estimate_ipcp_clone_size_and_time (struct cgraph_node *node, ipa_auto_call_arg_values *avals, - int *ret_size, sreal *ret_time, - sreal *ret_nonspec_time, - ipa_hints *hints) + ipa_call_estimates *estimates) { clause_t clause, nonspec_clause; evaluate_conditions_for_known_args (node, false, avals, &clause, &nonspec_clause); ipa_call_context ctx (node, clause, nonspec_clause, vNULL, avals); - ctx.estimate_size_and_time (ret_size, NULL, ret_time, - ret_nonspec_time, hints); + ctx.estimate_size_and_time (estimates); } /* Return stack frame offset where frame of NODE is supposed to start inside diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h index 020a6f0..ccb6b43 100644 --- a/gcc/ipa-fnsummary.h +++ b/gcc/ipa-fnsummary.h @@ -287,6 +287,29 @@ public: ipa_call_summary *dst_data); }; +/* Estimated execution times, code sizes and other information about the + code executing a call described by ipa_call_context. */ + +struct ipa_call_estimates +{ + /* Estimated size needed to execute call in the given context. */ + int size; + + /* Minimal size needed for the call that is + independent on the call context + and can be used for fast estimates. */ + int min_size; + + /* Estimated time needed to execute call in the given context. */ + sreal time; + + /* Estimated time needed to execute the function when not ignoring + computations known to be constant in this context. */ + sreal nonspecialized_time; + + /* Further discovered reasons why to inline or specialize the give calls. */ + ipa_hints hints; +}; + class ipa_cached_call_context; /* This object describe a context of call. That is a summary of known @@ -305,10 +328,8 @@ public: : m_node(NULL) { } - void estimate_size_and_time (int *ret_size, int *ret_min_size, - sreal *ret_time, - sreal *ret_nonspecialized_time, - ipa_hints *ret_hints); + void estimate_size_and_time (ipa_call_estimates *estimates, + bool est_times = true, bool est_hints = true); bool equal_to (const ipa_call_context &); bool exists_p () { @@ -353,10 +374,9 @@ void ipa_dump_hints (FILE *f, ipa_hints); void ipa_free_fn_summary (void); void ipa_free_size_summary (void); void inline_analyze_function (struct cgraph_node *node); -void estimate_ipcp_clone_size_and_time (struct cgraph_node *, - ipa_auto_call_arg_values *, - int *, sreal *, sreal *, - ipa_hints *); +void estimate_ipcp_clone_size_and_time (struct cgraph_node *node, + ipa_auto_call_arg_values *avals, + ipa_call_estimates *estimates); void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge); void ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset = true); void compute_fn_summary (struct cgraph_node *, bool); diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c index b7af77f..acbf82e 100644 --- a/gcc/ipa-inline-analysis.c +++ b/gcc/ipa-inline-analysis.c @@ -208,16 +208,12 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) && !opt_for_fn (callee->decl, flag_profile_partial_training) && !callee->count.ipa_p ()) { - sreal chk_time, chk_nonspec_time; - int chk_size, chk_min_size; - - ipa_hints chk_hints; - ctx.estimate_size_and_time (&chk_size, &chk_min_size, - &chk_time, &chk_nonspec_time, - &chk_hints); - gcc_assert (chk_size == size && chk_time == time - && chk_nonspec_time == nonspec_time - && chk_hints == hints); + ipa_call_estimates chk_estimates; + ctx.estimate_size_and_time (&chk_estimates); + gcc_assert (chk_estimates.size == size + && chk_estimates.time == time + && chk_estimates.nonspecialized_time == nonspec_time + && chk_estimates.hints == hints); } } else @@ -227,18 +223,28 @@ do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) else node_context_cache_clear++; e->entry.ctx.release (); - ctx.estimate_size_and_time (&size, &min_size, - &time, &nonspec_time, &hints); + ipa_call_estimates estimates; + ctx.estimate_size_and_time (&estimates); + size = estimates.size; e->entry.size = size; + time = estimates.time; e->entry.time = time; + nonspec_time = estimates.nonspecialized_time; e->entry.nonspec_time = nonspec_time; + hints = estimates.hints; e->entry.hints = hints; e->entry.ctx.duplicate_from (ctx); } } else - ctx.estimate_size_and_time (&size, &min_size, - &time, &nonspec_time, &hints); + { + ipa_call_estimates estimates; + ctx.estimate_size_and_time (&estimates); + size = estimates.size; + time = estimates.time; + nonspec_time = estimates.nonspecialized_time; + hints = estimates.hints; + } /* When we have profile feedback, we can quite safely identify hot edges and for those we disable size limits. Don't do that when @@ -321,8 +327,9 @@ do_estimate_edge_size (struct cgraph_edge *edge) evaluate_properties_for_edge (edge, true, &clause, &nonspec_clause, &avals, true); ipa_call_context ctx (callee, clause, nonspec_clause, vNULL, &avals); - ctx.estimate_size_and_time (&size, NULL, NULL, NULL, NULL); - return size; + ipa_call_estimates estimates; + ctx.estimate_size_and_time (&estimates, false, false); + return estimates.size; } @@ -332,7 +339,6 @@ do_estimate_edge_size (struct cgraph_edge *edge) ipa_hints do_estimate_edge_hints (struct cgraph_edge *edge) { - ipa_hints hints; struct cgraph_node *callee; clause_t clause, nonspec_clause; @@ -341,7 +347,7 @@ do_estimate_edge_hints (struct cgraph_edge *edge) if (edge_growth_cache != NULL) { do_estimate_edge_time (edge); - hints = edge_growth_cache->get (edge)->hints; + ipa_hints hints = edge_growth_cache->get (edge)->hints; gcc_checking_assert (hints); return hints - 1; } @@ -354,8 +360,9 @@ do_estimate_edge_hints (struct cgraph_edge *edge) evaluate_properties_for_edge (edge, true, &clause, &nonspec_clause, &avals, true); ipa_call_context ctx (callee, clause, nonspec_clause, vNULL, &avals); - ctx.estimate_size_and_time (NULL, NULL, NULL, NULL, &hints); - hints |= simple_edge_hints (edge); + ipa_call_estimates estimates; + ctx.estimate_size_and_time (&estimates, false, true); + ipa_hints hints = estimates.hints | simple_edge_hints (edge); return hints; } -- cgit v1.1 From 67ce9099bc953f22e6c9e9ee4c64329fdb08286a Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Fri, 2 Oct 2020 18:41:35 +0200 Subject: ipa: Multiple predicates for loop properties, with frequencies This patch enhances the ability of IPA to reason under what conditions loops in a function have known iteration counts or strides because it replaces single predicates which currently hold conjunction of predicates for all loops with vectors capable of holding multiple predicates, each with a cumulative frequency of loops with the property. This second property is then used by IPA-CP to much more aggressively boost its heuristic score for cloning opportunities which make iteration counts or strides of frequent loops compile time constant. gcc/ChangeLog: 2020-09-03 Martin Jambor * ipa-fnsummary.h (ipa_freqcounting_predicate): New type. (ipa_fn_summary): Change the type of loop_iterations and loop_strides to vectors of ipa_freqcounting_predicate. (ipa_fn_summary::ipa_fn_summary): Construct the new vectors. (ipa_call_estimates): New fields loops_with_known_iterations and loops_with_known_strides. * ipa-cp.c (hint_time_bonus): Multiply param_ipa_cp_loop_hint_bonus with the expected frequencies of loops with known iteration count or stride. * ipa-fnsummary.c (add_freqcounting_predicate): New function. (ipa_fn_summary::~ipa_fn_summary): Release the new vectors instead of just two predicates. (remap_hint_predicate_after_duplication): Replace with function remap_freqcounting_preds_after_dup. (ipa_fn_summary_t::duplicate): Use it or duplicate new vectors. (ipa_dump_fn_summary): Dump the new vectors. (analyze_function_body): Compute the loop property vectors. (ipa_call_context::estimate_size_and_time): Calculate also loops_with_known_iterations and loops_with_known_strides. Adjusted dumping accordinly. (remap_hint_predicate): Replace with function remap_freqcounting_predicate. (ipa_merge_fn_summary_after_inlining): Use it. (inline_read_section): Stream loopcounting vectors instead of two simple predicates. (ipa_fn_summary_write): Likewise. * params.opt (ipa-max-loop-predicates): New parameter. * doc/invoke.texi (ipa-max-loop-predicates): Document new param. gcc/testsuite/ChangeLog: 2020-09-03 Martin Jambor * gcc.dg/ipa/ipcp-loophint-1.c: New test. --- gcc/doc/invoke.texi | 4 + gcc/ipa-cp.c | 9 + gcc/ipa-fnsummary.c | 318 +++++++++++++++++++---------- gcc/ipa-fnsummary.h | 38 +++- gcc/params.opt | 4 + gcc/testsuite/gcc.dg/ipa/ipcp-loophint-1.c | 29 +++ 6 files changed, 288 insertions(+), 114 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/ipa/ipcp-loophint-1.c (limited to 'gcc') diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a5ecb1b..f623467 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -13453,6 +13453,10 @@ of iterations of a loop known, it adds a bonus of @option{ipa-cp-loop-hint-bonus} to the profitability score of the candidate. +@item ipa-max-loop-predicates +The maximum number of different predicates IPA will use to describe when +loops in a function have known properties. + @item ipa-max-aa-steps During its analysis of function bodies, IPA-CP employs alias analysis in order to track values pointed to by function parameters. In order diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 77c84a6..f6320c7 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -3205,6 +3205,15 @@ hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates) ipa_hints hints = estimates.hints; if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride)) result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); + + sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus); + + if (hints & INLINE_HINT_loop_iterations) + result += (estimates.loops_with_known_iterations * bonus_for_one).to_int (); + + if (hints & INLINE_HINT_loop_stride) + result += (estimates.loops_with_known_strides * bonus_for_one).to_int (); + return result; } diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c index f89f158..771f432 100644 --- a/gcc/ipa-fnsummary.c +++ b/gcc/ipa-fnsummary.c @@ -310,6 +310,36 @@ set_hint_predicate (predicate **p, predicate new_predicate) } } +/* Find if NEW_PREDICATE is already in V and if so, increment its freq. + Otherwise add a new item to the vector with this predicate and frerq equal + to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES + in which case the function does nothing. */ + +static void +add_freqcounting_predicate (vec **v, + const predicate &new_predicate, sreal add_freq, + unsigned max_num_predicates) +{ + if (new_predicate == false || new_predicate == true) + return; + ipa_freqcounting_predicate *f; + for (int i = 0; vec_safe_iterate (*v, i, &f); i++) + if (new_predicate == f->predicate) + { + f->freq += add_freq; + return; + } + if (vec_safe_length (*v) >= max_num_predicates) + /* Too many different predicates to account for. */ + return; + + ipa_freqcounting_predicate fcp; + fcp.predicate = NULL; + set_hint_predicate (&fcp.predicate, new_predicate); + fcp.freq = add_freq; + vec_safe_push (*v, fcp); + return; +} /* Compute what conditions may or may not hold given information about parameters. RET_CLAUSE returns truths that may hold in a specialized copy, @@ -710,13 +740,17 @@ ipa_call_summary::~ipa_call_summary () ipa_fn_summary::~ipa_fn_summary () { - if (loop_iterations) - edge_predicate_pool.remove (loop_iterations); - if (loop_stride) - edge_predicate_pool.remove (loop_stride); + unsigned len = vec_safe_length (loop_iterations); + for (unsigned i = 0; i < len; i++) + edge_predicate_pool.remove ((*loop_iterations)[i].predicate); + len = vec_safe_length (loop_strides); + for (unsigned i = 0; i < len; i++) + edge_predicate_pool.remove ((*loop_strides)[i].predicate); vec_free (conds); vec_free (size_time_table); vec_free (call_size_time_table); + vec_free (loop_iterations); + vec_free (loop_strides); } void @@ -729,24 +763,33 @@ ipa_fn_summary_t::remove_callees (cgraph_node *node) ipa_call_summaries->remove (e); } -/* Same as remap_predicate_after_duplication but handle hint predicate *P. - Additionally care about allocating new memory slot for updated predicate - and set it to NULL when it becomes true or false (and thus uninteresting). - */ +/* Duplicate predicates in loop hint vector, allocating memory for them and + remove and deallocate any uninteresting (true or false) ones. Return the + result. */ -static void -remap_hint_predicate_after_duplication (predicate **p, - clause_t possible_truths) +static vec * +remap_freqcounting_preds_after_dup (vec *v, + clause_t possible_truths) { - predicate new_predicate; + if (vec_safe_length (v) == 0) + return NULL; - if (!*p) - return; + vec *res = v->copy (); + int len = res->length(); + for (int i = len - 1; i >= 0; i--) + { + predicate new_predicate + = (*res)[i].predicate->remap_after_duplication (possible_truths); + /* We do not want to free previous predicate; it is used by node + origin. */ + (*res)[i].predicate = NULL; + set_hint_predicate (&(*res)[i].predicate, new_predicate); + + if (!(*res)[i].predicate) + res->unordered_remove (i); + } - new_predicate = (*p)->remap_after_duplication (possible_truths); - /* We do not want to free previous predicate; it is used by node origin. */ - *p = NULL; - set_hint_predicate (p, new_predicate); + return res; } @@ -859,9 +902,11 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale; edge_set_predicate (edge, &new_predicate); } - remap_hint_predicate_after_duplication (&info->loop_iterations, + info->loop_iterations + = remap_freqcounting_preds_after_dup (info->loop_iterations, possible_truths); - remap_hint_predicate_after_duplication (&info->loop_stride, + info->loop_strides + = remap_freqcounting_preds_after_dup (info->loop_strides, possible_truths); /* If inliner or someone after inliner will ever start producing @@ -873,17 +918,21 @@ ipa_fn_summary_t::duplicate (cgraph_node *src, else { info->size_time_table = vec_safe_copy (info->size_time_table); - if (info->loop_iterations) + info->loop_iterations = vec_safe_copy (info->loop_iterations); + info->loop_strides = vec_safe_copy (info->loop_strides); + + ipa_freqcounting_predicate *f; + for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++) { - predicate p = *info->loop_iterations; - info->loop_iterations = NULL; - set_hint_predicate (&info->loop_iterations, p); + predicate p = *f->predicate; + f->predicate = NULL; + set_hint_predicate (&f->predicate, p); } - if (info->loop_stride) + for (int i = 0; vec_safe_iterate (info->loop_strides, i, &f); i++) { - predicate p = *info->loop_stride; - info->loop_stride = NULL; - set_hint_predicate (&info->loop_stride, p); + predicate p = *f->predicate; + f->predicate = NULL; + set_hint_predicate (&f->predicate, p); } } if (!dst->inlined_to) @@ -1045,15 +1094,28 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node) } fprintf (f, "\n"); } - if (s->loop_iterations) + ipa_freqcounting_predicate *fcp; + bool first_fcp = true; + for (int i = 0; vec_safe_iterate (s->loop_iterations, i, &fcp); i++) { - fprintf (f, " loop iterations:"); - s->loop_iterations->dump (f, s->conds); + if (first_fcp) + { + fprintf (f, " loop iterations:"); + first_fcp = false; + } + fprintf (f, " %3.2f for ", fcp->freq.to_double ()); + fcp->predicate->dump (f, s->conds); } - if (s->loop_stride) + first_fcp = true; + for (int i = 0; vec_safe_iterate (s->loop_strides, i, &fcp); i++) { - fprintf (f, " loop stride:"); - s->loop_stride->dump (f, s->conds); + if (first_fcp) + { + fprintf (f, " loop strides:"); + first_fcp = false; + } + fprintf (f, " %3.2f for :", fcp->freq.to_double ()); + fcp->predicate->dump (f, s->conds); } fprintf (f, " calls:\n"); dump_ipa_call_summary (f, 4, node, s); @@ -2543,12 +2605,13 @@ analyze_function_body (struct cgraph_node *node, bool early) if (fbi.info) compute_bb_predicates (&fbi, node, info, params_summary); + const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; order = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); nblocks = pre_and_rev_post_order_compute (NULL, order, false); for (n = 0; n < nblocks; n++) { bb = BASIC_BLOCK_FOR_FN (cfun, order[n]); - freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count); + freq = bb->count.to_sreal_scale (entry_count); if (clobber_only_eh_bb_p (bb)) { if (dump_file && (dump_flags & TDF_DETAILS)) @@ -2790,23 +2853,28 @@ analyze_function_body (struct cgraph_node *node, bool early) if (nonconstant_names.exists () && !early) { + ipa_fn_summary *s = ipa_fn_summaries->get (node); class loop *loop; - predicate loop_iterations = true; - predicate loop_stride = true; + unsigned max_loop_predicates = opt_for_fn (node->decl, + param_ipa_max_loop_predicates); if (dump_file && (dump_flags & TDF_DETAILS)) flow_loops_dump (dump_file, NULL, 0); scev_initialize (); FOR_EACH_LOOP (loop, 0) { + predicate loop_iterations = true; + sreal header_freq; edge ex; unsigned int j; class tree_niter_desc niter_desc; - if (loop->header->aux) - bb_predicate = *(predicate *) loop->header->aux; - else - bb_predicate = false; + if (!loop->header->aux) + continue; + profile_count phdr_count = loop_preheader_edge (loop)->count (); + sreal phdr_freq = phdr_count.to_sreal_scale (entry_count); + + bb_predicate = *(predicate *) loop->header->aux; auto_vec exits = get_loop_exit_edges (loop); FOR_EACH_VEC_ELT (exits, j, ex) if (number_of_iterations_exit (loop, ex, &niter_desc, false) @@ -2821,10 +2889,10 @@ analyze_function_body (struct cgraph_node *node, bool early) will_be_nonconstant = bb_predicate & will_be_nonconstant; if (will_be_nonconstant != true && will_be_nonconstant != false) - /* This is slightly inprecise. We may want to represent each - loop with independent predicate. */ loop_iterations &= will_be_nonconstant; } + add_freqcounting_predicate (&s->loop_iterations, loop_iterations, + phdr_freq, max_loop_predicates); } /* To avoid quadratic behavior we analyze stride predicates only @@ -2833,14 +2901,17 @@ analyze_function_body (struct cgraph_node *node, bool early) for (loop = loops_for_fn (cfun)->tree_root->inner; loop != NULL; loop = loop->next) { + predicate loop_stride = true; basic_block *body = get_loop_body (loop); + profile_count phdr_count = loop_preheader_edge (loop)->count (); + sreal phdr_freq = phdr_count.to_sreal_scale (entry_count); for (unsigned i = 0; i < loop->num_nodes; i++) { gimple_stmt_iterator gsi; - if (body[i]->aux) - bb_predicate = *(predicate *) body[i]->aux; - else - bb_predicate = false; + if (!body[i]->aux) + continue; + + bb_predicate = *(predicate *) body[i]->aux; for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) { @@ -2869,16 +2940,13 @@ analyze_function_body (struct cgraph_node *node, bool early) will_be_nonconstant = bb_predicate & will_be_nonconstant; if (will_be_nonconstant != true && will_be_nonconstant != false) - /* This is slightly inprecise. We may want to represent - each loop with independent predicate. */ loop_stride = loop_stride & will_be_nonconstant; } } + add_freqcounting_predicate (&s->loop_strides, loop_stride, + phdr_freq, max_loop_predicates); free (body); } - ipa_fn_summary *s = ipa_fn_summaries->get (node); - set_hint_predicate (&s->loop_iterations, loop_iterations); - set_hint_predicate (&s->loop_stride, loop_stride); scev_finalize (); } FOR_ALL_BB_FN (bb, my_function) @@ -3551,6 +3619,8 @@ ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates, sreal time = 0; int min_size = 0; ipa_hints hints = 0; + sreal loops_with_known_iterations = 0; + sreal loops_with_known_strides = 0; int i; if (dump_file && (dump_flags & TDF_DETAILS)) @@ -3643,16 +3713,27 @@ ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates, if (est_hints) { - if (info->loop_iterations - && !info->loop_iterations->evaluate (m_possible_truths)) - hints |= INLINE_HINT_loop_iterations; - if (info->loop_stride - && !info->loop_stride->evaluate (m_possible_truths)) - hints |= INLINE_HINT_loop_stride; if (info->scc_no) hints |= INLINE_HINT_in_scc; if (DECL_DECLARED_INLINE_P (m_node->decl)) hints |= INLINE_HINT_declared_inline; + + ipa_freqcounting_predicate *fcp; + for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++) + if (!fcp->predicate->evaluate (m_possible_truths)) + { + hints |= INLINE_HINT_loop_iterations; + loops_with_known_iterations += fcp->freq; + } + estimates->loops_with_known_iterations = loops_with_known_iterations; + + for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++) + if (!fcp->predicate->evaluate (m_possible_truths)) + { + hints |= INLINE_HINT_loop_stride; + loops_with_known_strides += fcp->freq; + } + estimates->loops_with_known_strides = loops_with_known_strides; } size = RDIV (size, ipa_fn_summary::size_scale); @@ -3660,12 +3741,15 @@ ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates, if (dump_file && (dump_flags & TDF_DETAILS)) { + fprintf (dump_file, "\n size:%i", (int) size); if (est_times) - fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", - (int) size, time.to_double (), - nonspecialized_time.to_double ()); - else - fprintf (dump_file, "\n size:%i (time not estimated)\n", (int) size); + fprintf (dump_file, " time:%f nonspec time:%f", + time.to_double (), nonspecialized_time.to_double ()); + if (est_hints) + fprintf (dump_file, " loops with known iterations:%f " + "known strides:%f", loops_with_known_iterations.to_double (), + loops_with_known_strides.to_double ()); + fprintf (dump_file, "\n"); } if (est_times) { @@ -3865,32 +3949,29 @@ remap_edge_summaries (struct cgraph_edge *inlined_edge, } } -/* Same as remap_predicate, but set result into hint *HINT. */ +/* Run remap_after_inlining on each predicate in V. */ static void -remap_hint_predicate (class ipa_fn_summary *info, - class ipa_node_params *params_summary, - class ipa_fn_summary *callee_info, - predicate **hint, - vec operand_map, - vec offset_map, - clause_t possible_truths, - predicate *toplev_predicate) -{ - predicate p; +remap_freqcounting_predicate (class ipa_fn_summary *info, + class ipa_node_params *params_summary, + class ipa_fn_summary *callee_info, + vec *v, + vec operand_map, + vec offset_map, + clause_t possible_truths, + predicate *toplev_predicate) - if (!*hint) - return; - p = (*hint)->remap_after_inlining - (info, params_summary, callee_info, - operand_map, offset_map, - possible_truths, *toplev_predicate); - if (p != false && p != true) +{ + ipa_freqcounting_predicate *fcp; + for (int i = 0; vec_safe_iterate (v, i, &fcp); i++) { - if (!*hint) - set_hint_predicate (hint, p); - else - **hint &= p; + predicate p + = fcp->predicate->remap_after_inlining (info, params_summary, + callee_info, operand_map, + offset_map, possible_truths, + *toplev_predicate); + if (p != false && p != true) + *fcp->predicate &= p; } } @@ -3998,12 +4079,12 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) remap_edge_summaries (edge, edge->callee, info, params_summary, callee_info, operand_map, offset_map, clause, &toplev_predicate); - remap_hint_predicate (info, params_summary, callee_info, - &callee_info->loop_iterations, - operand_map, offset_map, clause, &toplev_predicate); - remap_hint_predicate (info, params_summary, callee_info, - &callee_info->loop_stride, - operand_map, offset_map, clause, &toplev_predicate); + remap_freqcounting_predicate (info, params_summary, callee_info, + info->loop_iterations, operand_map, + offset_map, clause, &toplev_predicate); + remap_freqcounting_predicate (info, params_summary, callee_info, + info->loop_strides, operand_map, + offset_map, clause, &toplev_predicate); HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee); HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size; @@ -4334,12 +4415,34 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data, info->size_time_table->quick_push (e); } - p.stream_in (&ib); - if (info) - set_hint_predicate (&info->loop_iterations, p); - p.stream_in (&ib); - if (info) - set_hint_predicate (&info->loop_stride, p); + count2 = streamer_read_uhwi (&ib); + for (j = 0; j < count2; j++) + { + p.stream_in (&ib); + sreal fcp_freq = sreal::stream_in (&ib); + if (info) + { + ipa_freqcounting_predicate fcp; + fcp.predicate = NULL; + set_hint_predicate (&fcp.predicate, p); + fcp.freq = fcp_freq; + vec_safe_push (info->loop_iterations, fcp); + } + } + count2 = streamer_read_uhwi (&ib); + for (j = 0; j < count2; j++) + { + p.stream_in (&ib); + sreal fcp_freq = sreal::stream_in (&ib); + if (info) + { + ipa_freqcounting_predicate fcp; + fcp.predicate = NULL; + set_hint_predicate (&fcp.predicate, p); + fcp.freq = fcp_freq; + vec_safe_push (info->loop_strides, fcp); + } + } for (e = node->callees; e; e = e->next_callee) read_ipa_call_summary (&ib, e, info != NULL); for (e = node->indirect_calls; e; e = e->next_callee) @@ -4502,14 +4605,19 @@ ipa_fn_summary_write (void) e->exec_predicate.stream_out (ob); e->nonconst_predicate.stream_out (ob); } - if (info->loop_iterations) - info->loop_iterations->stream_out (ob); - else - streamer_write_uhwi (ob, 0); - if (info->loop_stride) - info->loop_stride->stream_out (ob); - else - streamer_write_uhwi (ob, 0); + ipa_freqcounting_predicate *fcp; + streamer_write_uhwi (ob, vec_safe_length (info->loop_iterations)); + for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++) + { + fcp->predicate->stream_out (ob); + fcp->freq.stream_out (ob); + } + streamer_write_uhwi (ob, vec_safe_length (info->loop_strides)); + for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++) + { + fcp->predicate->stream_out (ob); + fcp->freq.stream_out (ob); + } for (edge = cnode->callees; edge; edge = edge->next_callee) write_ipa_call_summary (ob, edge); for (edge = cnode->indirect_calls; edge; edge = edge->next_callee) diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h index ccb6b43..f4dd5b8 100644 --- a/gcc/ipa-fnsummary.h +++ b/gcc/ipa-fnsummary.h @@ -101,6 +101,19 @@ public: } }; +/* Structure to capture how frequently some interesting events occur given a + particular predicate. The structure is used to estimate how often we + encounter loops with known iteration count or stride in various + contexts. */ + +struct GTY(()) ipa_freqcounting_predicate +{ + /* The described event happens with this frequency... */ + sreal freq; + /* ...when this predicate evaluates to false. */ + class predicate * GTY((skip)) predicate; +}; + /* Function inlining information. */ class GTY(()) ipa_fn_summary { @@ -112,8 +125,9 @@ public: inlinable (false), single_caller (false), fp_expressions (false), estimated_stack_size (false), time (0), conds (NULL), - size_time_table (NULL), call_size_time_table (NULL), loop_iterations (NULL), - loop_stride (NULL), growth (0), scc_no (0) + size_time_table (NULL), call_size_time_table (NULL), + loop_iterations (NULL), loop_strides (NULL), + growth (0), scc_no (0) { } @@ -125,7 +139,7 @@ public: estimated_stack_size (s.estimated_stack_size), time (s.time), conds (s.conds), size_time_table (s.size_time_table), call_size_time_table (NULL), - loop_iterations (s.loop_iterations), loop_stride (s.loop_stride), + loop_iterations (s.loop_iterations), loop_strides (s.loop_strides), growth (s.growth), scc_no (s.scc_no) {} @@ -164,12 +178,10 @@ public: vec *size_time_table; vec *call_size_time_table; - /* Predicate on when some loop in the function becomes to have known - bounds. */ - predicate * GTY((skip)) loop_iterations; - /* Predicate on when some loop in the function becomes to have known - stride. */ - predicate * GTY((skip)) loop_stride; + /* Predicates on when some loops in the function can have known bounds. */ + vec *loop_iterations; + /* Predicates on when some loops in the function can have known strides. */ + vec *loop_strides; /* Estimated growth for inlining all copies of the function before start of small functions inlining. This value will get out of date as the callers are duplicated, but @@ -308,6 +320,14 @@ struct ipa_call_estimates /* Further discovered reasons why to inline or specialize the give calls. */ ipa_hints hints; + + /* Frequency how often a loop with known number of iterations is encountered. + Calculated with hints. */ + sreal loops_with_known_iterations; + + /* Frequency how often a loop with known strides is encountered. Calculated + with hints. */ + sreal loops_with_known_strides; }; class ipa_cached_call_context; diff --git a/gcc/params.opt b/gcc/params.opt index a232161..c0b94bb 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -230,6 +230,10 @@ Maximum number of aggregate content items for a parameter in jump functions and Common Joined UInteger Var(param_ipa_max_param_expr_ops) Init(10) Param Optimization Maximum number of operations in a parameter expression that can be handled by IPA analysis. +-param=ipa-max-loop-predicates= +Common Joined UInteger Var(param_ipa_max_loop_predicates) Init(16) Param Optimization +Maximum number of different predicates used to track properties of loops in IPA analysis. + -param=ipa-max-switch-predicate-bounds= Common Joined UInteger Var(param_ipa_max_switch_predicate_bounds) Init(5) Param Optimization Maximal number of boundary endpoints of case ranges of switch statement used during IPA function summary generation. diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-loophint-1.c b/gcc/testsuite/gcc.dg/ipa/ipcp-loophint-1.c new file mode 100644 index 0000000..6d049af --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/ipcp-loophint-1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-ipa-cp-details" } */ + +extern int *o, *p, *q, *r; + +#define FUNCTIONS fa(), fb(), fc(), fd(), fe(), ff(), fg() + +extern void FUNCTIONS; + +void foo (int c) +{ + FUNCTIONS; + FUNCTIONS; + for (int i = 0; i < 100; i++) + { + for (int j = 0; j < c; j++) + o[i] = p[i] + q[i] * r[i]; + } + FUNCTIONS; + FUNCTIONS; +} + +void bar() +{ + foo (8); + p[4]++; +} + +/* { dg-final { scan-ipa-dump {with known iterations:[1-9]} "cp" } } */ -- cgit v1.1 From 91153e0af9a457a0085bb796613a81404480d5af Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Fri, 2 Oct 2020 18:41:35 +0200 Subject: ipa-cp: Add dumping of overall_size after cloning When experimenting with IPA-CP parameters, especially when looking into exchange2_r, it has been very useful to know what the value of overall_size is at different stages of the decision process. This patch therefore adds it to the generated dumps. gcc/ChangeLog: 2020-09-07 Martin Jambor * ipa-cp.c (estimate_local_effects): Add overeall_size to dumped string. (decide_about_value): Add dumping new overall_size. --- gcc/ipa-cp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index f6320c7..12acf24 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -3517,7 +3517,8 @@ estimate_local_effects (struct cgraph_node *node) if (dump_file) fprintf (dump_file, " Decided to specialize for all " - "known contexts, growth deemed beneficial.\n"); + "known contexts, growth (to %li) deemed " + "beneficial.\n", overall_size); } else if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Not cloning for all contexts because " @@ -5506,6 +5507,9 @@ decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, val->spec_node = create_specialized_node (node, known_csts, known_contexts, aggvals, callers); overall_size += val->local_size_cost; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " overall size reached %li\n", + overall_size); /* TODO: If for some lattice there is only one other known value left, make a special node for it too. */ -- cgit v1.1 From 315848246657607d5acd9bca29d64d98a4c5f042 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Fri, 2 Oct 2020 18:41:35 +0200 Subject: ipa-cp: Separate and increase the large-unit parameter A previous patch in the series has taught IPA-CP to identify the important cloning opportunities in 548.exchange2_r as worthwhile on their own, but the optimization is still prevented from taking place because of the overall unit-growh limit. This patches raises that limit so that it takes place and the benchmark runs 30% faster (on AMD Zen2 CPU at least). Before this patch, IPA-CP uses the following formulae to arrive at the overall_size limit: base = MAX(orig_size, param_large_unit_insns) unit_growth_limit = base + base * param_ipa_cp_unit_growth / 100 since param_ipa_cp_unit_growth has default 10, param_large_unit_insns has default value 10000. The problem with exchange2 (at least on zen2 but I have had a quick look on aarch64 too) is that the original estimated unit size is 10513 and so param_large_unit_insns does not apply and the default limit is therefore 11564 which is good enough only for one of the ideal 8 clonings, we need the limit to be at least 16291. I would like to raise param_ipa_cp_unit_growth a little bit more soon too, but most certainly not to 55. Therefore, the large_unit must be increased. In this patch, I decided to decouple the inlining and ipa-cp large-unit parameters. It also makes sense because IPA-CP uses it only at -O3 while inlining also at -O2 (IIUC). But if we agree we can try raising param_large_unit_insns to 13-14 thousand "instructions," perhaps it is not necessary. But then again, it may make sense to actually increase the IPA-CP limit further. I plan to experiment with IPA-CP tuning on a larger set of programs. Meanwhile, mainly to address the 548.exchange2_r regression, I'm suggesting this simple change. gcc/ChangeLog: 2020-09-07 Martin Jambor * params.opt (ipa-cp-large-unit-insns): New parameter. * ipa-cp.c (get_max_overall_size): Use the new parameter. --- gcc/ipa-cp.c | 2 +- gcc/params.opt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index 12acf24..2152f9e 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -3448,7 +3448,7 @@ static long get_max_overall_size (cgraph_node *node) { long max_new_size = orig_overall_size; - long large_unit = opt_for_fn (node->decl, param_large_unit_insns); + long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns); if (max_new_size < large_unit) max_new_size = large_unit; int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth); diff --git a/gcc/params.opt b/gcc/params.opt index c0b94bb..6f308a1 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -214,6 +214,10 @@ Percentage penalty functions containing a single call to another function will r Common Joined UInteger Var(param_ipa_cp_unit_growth) Init(10) Param Optimization How much can given compilation unit grow because of the interprocedural constant propagation (in percent). +-param=ipa-cp-large-unit-insns= +Common Joined UInteger Var(param_ipa_cp_large_unit_insns) Optimization Init(16000) Param +The size of translation unit that IPA-CP pass considers large. + -param=ipa-cp-value-list-size= Common Joined UInteger Var(param_ipa_cp_value_list_size) Init(8) Param Optimization Maximum size of a list of values associated with each parameter for interprocedural constant propagation. -- cgit v1.1 From 9340d1c97b8a7aa47aff677f9b6db4799670f47b Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Fri, 2 Oct 2020 09:47:00 -0700 Subject: c++: cleanup ctor_omit_inherited_parms [PR97268] ctor_omit_inherited_parms was being somewhat abused. What I'd missed is that it checks for a base-dtor name, before proceeding with the check. But we ended up passing it that during cloning before we'd completed the cloning. It was also using DECL_ORIGIN to get to the in-charge ctor, but we sometimes zap DECL_ABSTRACT_ORIGIN, and it ends up processing the incoming function -- which happens to work. so, this breaks out a predicate that expects to get the incharge ctor, and will tell you whether its base ctor will need to omit the parms. We call that directly during cloning. Then the original fn is essentially just a wrapper, but uses DECL_CLONED_FUNCTION to get to the in-charge ctor. That uncovered abuse in add_method, which was happily passing TEMPLATE_DECLs to it. Let's not do that. add_method itself contained a loop mostly containing an 'if (nomatch) continue' idiom, except for a final 'if (match) {...}' check, which itself contained instances of the former idiom. I refactored that to use the former idiom throughout. In doing that I found a place where we'd issue an error, but then not actually reject the new member. gcc/cp/ * cp-tree.h (base_ctor_omit_inherited_parms): Declare. * class.c (add_method): Refactor main loop, only pass fns to ctor_omit_inherited_parms. (build_cdtor_clones): Rename bool parms. (clone_cdtor): Call base_ctor_omit_inherited_parms. * method.c (base_ctor_omit_inherited_parms): New, broken out of ... (ctor_omit_inherited_parms): ... here, call it with DECL_CLONED_FUNCTION. gcc/testsuite/ * g++.dg/inherit/pr97268.C: New. --- gcc/cp/class.c | 259 +++++++++++++++++---------------- gcc/cp/cp-tree.h | 1 + gcc/cp/method.c | 36 ++++- gcc/testsuite/g++.dg/inherit/pr97268.C | 60 ++++++++ 4 files changed, 222 insertions(+), 134 deletions(-) create mode 100644 gcc/testsuite/g++.dg/inherit/pr97268.C (limited to 'gcc') diff --git a/gcc/cp/class.c b/gcc/cp/class.c index c9a1f75..01780fe 100644 --- a/gcc/cp/class.c +++ b/gcc/cp/class.c @@ -1006,10 +1006,6 @@ add_method (tree type, tree method, bool via_using) for (ovl_iterator iter (current_fns); iter; ++iter) { tree fn = *iter; - tree fn_type; - tree method_type; - tree parms1; - tree parms2; if (TREE_CODE (fn) != TREE_CODE (method)) continue; @@ -1037,10 +1033,8 @@ add_method (tree type, tree method, bool via_using) functions in the derived class override and/or hide member functions with the same name and parameter types in a base class (rather than conflicting). */ - fn_type = TREE_TYPE (fn); - method_type = TREE_TYPE (method); - parms1 = TYPE_ARG_TYPES (fn_type); - parms2 = TYPE_ARG_TYPES (method_type); + tree fn_type = TREE_TYPE (fn); + tree method_type = TREE_TYPE (method); /* Compare the quals on the 'this' parm. Don't compare the whole types, as used functions are treated as @@ -1055,137 +1049,149 @@ add_method (tree type, tree method, bool via_using) || type_memfn_rqual (fn_type) != type_memfn_rqual (method_type))) continue; - /* For templates, the return type and template parameters - must be identical. */ - if (TREE_CODE (fn) == TEMPLATE_DECL - && (!same_type_p (TREE_TYPE (fn_type), - TREE_TYPE (method_type)) - || !comp_template_parms (DECL_TEMPLATE_PARMS (fn), - DECL_TEMPLATE_PARMS (method)))) + tree real_fn = fn; + tree real_method = method; + + /* Templates and conversion ops must match return types. */ + if ((DECL_CONV_FN_P (fn) || TREE_CODE (fn) == TEMPLATE_DECL) + && !same_type_p (TREE_TYPE (fn_type), TREE_TYPE (method_type))) continue; + + /* For templates, the template parameters must be identical. */ + if (TREE_CODE (fn) == TEMPLATE_DECL) + { + if (!comp_template_parms (DECL_TEMPLATE_PARMS (fn), + DECL_TEMPLATE_PARMS (method))) + continue; - if (! DECL_STATIC_FUNCTION_P (fn)) + real_fn = DECL_TEMPLATE_RESULT (fn); + real_method = DECL_TEMPLATE_RESULT (method); + } + + tree parms1 = TYPE_ARG_TYPES (fn_type); + tree parms2 = TYPE_ARG_TYPES (method_type); + if (! DECL_STATIC_FUNCTION_P (real_fn)) parms1 = TREE_CHAIN (parms1); - if (! DECL_STATIC_FUNCTION_P (method)) + if (! DECL_STATIC_FUNCTION_P (real_method)) parms2 = TREE_CHAIN (parms2); - /* Bring back parameters omitted from an inherited ctor. */ - if (ctor_omit_inherited_parms (fn)) - parms1 = FUNCTION_FIRST_USER_PARMTYPE (DECL_ORIGIN (fn)); - if (ctor_omit_inherited_parms (method)) - parms2 = FUNCTION_FIRST_USER_PARMTYPE (DECL_ORIGIN (method)); + /* Bring back parameters omitted from an inherited ctor. The + method and the function can have different omittedness. */ + if (ctor_omit_inherited_parms (real_fn)) + parms1 = FUNCTION_FIRST_USER_PARMTYPE (DECL_CLONED_FUNCTION (real_fn)); + if (ctor_omit_inherited_parms (real_method)) + parms2 = (FUNCTION_FIRST_USER_PARMTYPE + (DECL_CLONED_FUNCTION (real_method))); - if (compparms (parms1, parms2) - && (!DECL_CONV_FN_P (fn) - || same_type_p (TREE_TYPE (fn_type), - TREE_TYPE (method_type)))) - { - if (!equivalently_constrained (fn, method)) - { - if (processing_template_decl) - /* We can't check satisfaction in dependent context, wait until - the class is instantiated. */ - continue; - - special_function_kind sfk = special_memfn_p (method); + if (!compparms (parms1, parms2)) + continue; - if (sfk == sfk_none - || DECL_INHERITED_CTOR (fn) - || TREE_CODE (fn) == TEMPLATE_DECL) - /* Member function templates and non-special member functions - coexist if they are not equivalently constrained. A member - function is not hidden by an inherited constructor. */ - continue; + if (!equivalently_constrained (fn, method)) + { + if (processing_template_decl) + /* We can't check satisfaction in dependent context, wait until + the class is instantiated. */ + continue; - /* P0848: For special member functions, deleted, unsatisfied, or - less constrained overloads are ineligible. We implement this - by removing them from CLASSTYPE_MEMBER_VEC. Destructors don't - use the notion of eligibility, and the selected destructor can - be deleted, but removing unsatisfied or less constrained - overloads has the same effect as overload resolution. */ - bool dtor = (sfk == sfk_destructor); - if (losem == -1) - losem = ((!dtor && DECL_DELETED_FN (method)) - || !constraints_satisfied_p (method)); - bool losef = ((!dtor && DECL_DELETED_FN (fn)) - || !constraints_satisfied_p (fn)); - int win; - if (losem || losef) - win = losem - losef; - else - win = more_constrained (fn, method); - if (win > 0) - /* Leave FN in the method vec, discard METHOD. */ - return false; - else if (win < 0) - { - /* Remove FN, add METHOD. */ - current_fns = iter.remove_node (current_fns); - continue; - } - else - /* Let them coexist for now. */ - continue; - } + special_function_kind sfk = special_memfn_p (method); - /* If these are versions of the same function, process and - move on. */ - if (TREE_CODE (fn) == FUNCTION_DECL - && maybe_version_functions (method, fn, true)) + if (sfk == sfk_none + || DECL_INHERITED_CTOR (fn) + || TREE_CODE (fn) == TEMPLATE_DECL) + /* Member function templates and non-special member functions + coexist if they are not equivalently constrained. A member + function is not hidden by an inherited constructor. */ continue; - if (DECL_INHERITED_CTOR (method)) - { - if (DECL_INHERITED_CTOR (fn)) - { - tree basem = DECL_INHERITED_CTOR_BASE (method); - tree basef = DECL_INHERITED_CTOR_BASE (fn); - if (flag_new_inheriting_ctors) - { - if (basem == basef) - { - /* Inheriting the same constructor along different - paths, combine them. */ - SET_DECL_INHERITED_CTOR - (fn, ovl_make (DECL_INHERITED_CTOR (method), - DECL_INHERITED_CTOR (fn))); - /* And discard the new one. */ - return false; - } - else - /* Inherited ctors can coexist until overload - resolution. */ - continue; - } - error_at (DECL_SOURCE_LOCATION (method), - "%q#D conflicts with version inherited from %qT", - method, basef); - inform (DECL_SOURCE_LOCATION (fn), - "version inherited from %qT declared here", - basef); - } - /* Otherwise defer to the other function. */ - return false; - } - - if (via_using) - /* Defer to the local function. */ + /* P0848: For special member functions, deleted, unsatisfied, or + less constrained overloads are ineligible. We implement this + by removing them from CLASSTYPE_MEMBER_VEC. Destructors don't + use the notion of eligibility, and the selected destructor can + be deleted, but removing unsatisfied or less constrained + overloads has the same effect as overload resolution. */ + bool dtor = (sfk == sfk_destructor); + if (losem == -1) + losem = ((!dtor && DECL_DELETED_FN (method)) + || !constraints_satisfied_p (method)); + bool losef = ((!dtor && DECL_DELETED_FN (fn)) + || !constraints_satisfied_p (fn)); + int win; + if (losem || losef) + win = losem - losef; + else + win = more_constrained (fn, method); + if (win > 0) + /* Leave FN in the method vec, discard METHOD. */ return false; - else if (flag_new_inheriting_ctors - && DECL_INHERITED_CTOR (fn)) + else if (win < 0) { - /* Remove the inherited constructor. */ + /* Remove FN, add METHOD. */ current_fns = iter.remove_node (current_fns); continue; } else + /* Let them coexist for now. */ + continue; + } + + /* If these are versions of the same function, process and + move on. */ + if (TREE_CODE (fn) == FUNCTION_DECL + && maybe_version_functions (method, fn, true)) + continue; + + if (DECL_INHERITED_CTOR (method)) + { + if (!DECL_INHERITED_CTOR (fn)) + /* Defer to the other function. */ + return false; + + tree basem = DECL_INHERITED_CTOR_BASE (method); + tree basef = DECL_INHERITED_CTOR_BASE (fn); + if (flag_new_inheriting_ctors) { - error_at (DECL_SOURCE_LOCATION (method), - "%q#D cannot be overloaded with %q#D", method, fn); - inform (DECL_SOURCE_LOCATION (fn), - "previous declaration %q#D", fn); - return false; + if (basem == basef) + { + /* Inheriting the same constructor along different + paths, combine them. */ + SET_DECL_INHERITED_CTOR + (fn, ovl_make (DECL_INHERITED_CTOR (method), + DECL_INHERITED_CTOR (fn))); + /* And discard the new one. */ + return false; + } + else + /* Inherited ctors can coexist until overload + resolution. */ + continue; } + + error_at (DECL_SOURCE_LOCATION (method), + "%q#D conflicts with version inherited from %qT", + method, basef); + inform (DECL_SOURCE_LOCATION (fn), + "version inherited from %qT declared here", + basef); + return false; + } + + if (via_using) + /* Defer to the local function. */ + return false; + else if (flag_new_inheriting_ctors + && DECL_INHERITED_CTOR (fn)) + { + /* Remove the inherited constructor. */ + current_fns = iter.remove_node (current_fns); + continue; + } + else + { + error_at (DECL_SOURCE_LOCATION (method), + "%q#D cannot be overloaded with %q#D", method, fn); + inform (DECL_SOURCE_LOCATION (fn), + "previous declaration %q#D", fn); + return false; } } @@ -4892,7 +4898,7 @@ build_clone (tree fn, tree name, bool need_vtt_parm_p, will be inserted onto DECL_CHAIN of FN. */ static unsigned -build_cdtor_clones (tree fn, bool needs_vtt_parm_p, bool omit_inherited_parms_p) +build_cdtor_clones (tree fn, bool needs_vtt_p, bool base_omits_inherited_p) { unsigned count = 0; @@ -4901,8 +4907,8 @@ build_cdtor_clones (tree fn, bool needs_vtt_parm_p, bool omit_inherited_parms_p) /* For each constructor, we need two variants: an in-charge version and a not-in-charge version. */ build_clone (fn, complete_ctor_identifier, false, false); - build_clone (fn, base_ctor_identifier, needs_vtt_parm_p, - omit_inherited_parms_p); + build_clone (fn, base_ctor_identifier, needs_vtt_p, + base_omits_inherited_p); count += 2; } else @@ -4924,7 +4930,7 @@ build_cdtor_clones (tree fn, bool needs_vtt_parm_p, bool omit_inherited_parms_p) count++; } build_clone (fn, complete_dtor_identifier, false, false); - build_clone (fn, base_dtor_identifier, needs_vtt_parm_p, false); + build_clone (fn, base_dtor_identifier, needs_vtt_p, false); count += 2; } @@ -4948,9 +4954,10 @@ clone_cdtor (tree fn, bool update_methods) /* Base ctor omits inherited parms it needs a vttparm and inherited from a virtual nase ctor. */ - bool omit_inherited = ctor_omit_inherited_parms (fn); + bool base_omits_inherited = (DECL_MAYBE_IN_CHARGE_CONSTRUCTOR_P (fn) + && base_ctor_omit_inherited_parms (fn)); - unsigned count = build_cdtor_clones (fn, vtt, omit_inherited); + unsigned count = build_cdtor_clones (fn, vtt, base_omits_inherited); /* Note that this is an abstract function that is never emitted. */ DECL_ABSTRACT_P (fn) = true; diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 9f948ae..43e0c18 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6764,6 +6764,7 @@ extern tree get_default_ctor (tree); extern tree get_dtor (tree, tsubst_flags_t); extern tree strip_inheriting_ctors (tree); extern tree inherited_ctor_binfo (tree); +extern bool base_ctor_omit_inherited_parms (tree); extern bool ctor_omit_inherited_parms (tree); extern tree locate_ctor (tree); extern tree implicitly_declare_fn (special_function_kind, tree, diff --git a/gcc/cp/method.c b/gcc/cp/method.c index 1058fd0..6e4c5f7 100644 --- a/gcc/cp/method.c +++ b/gcc/cp/method.c @@ -551,31 +551,51 @@ inherited_ctor_binfo (tree fndecl) return inherited_ctor_binfo (binfo, fndecl); } -/* True if we should omit all user-declared parameters from constructor FN, - because it is a base clone of a ctor inherited from a virtual base. */ + +/* True if we should omit all user-declared parameters from a base + construtor built from complete constructor FN. + That's when the ctor is inherited from a virtual base. */ bool -ctor_omit_inherited_parms (tree fn) +base_ctor_omit_inherited_parms (tree comp_ctor) { + gcc_checking_assert (DECL_MAYBE_IN_CHARGE_CONSTRUCTOR_P (comp_ctor)); + if (!flag_new_inheriting_ctors) /* We only optimize away the parameters in the new model. */ return false; - if (!DECL_BASE_CONSTRUCTOR_P (fn) - || !CLASSTYPE_VBASECLASSES (DECL_CONTEXT (fn))) + + if (!CLASSTYPE_VBASECLASSES (DECL_CONTEXT (comp_ctor))) return false; - if (FUNCTION_FIRST_USER_PARMTYPE (DECL_ORIGIN (fn)) == void_list_node) + if (FUNCTION_FIRST_USER_PARMTYPE (comp_ctor) == void_list_node) /* No user-declared parameters to omit. */ return false; - tree binfo = inherited_ctor_binfo (fn); - for (; binfo; binfo = BINFO_INHERITANCE_CHAIN (binfo)) + for (tree binfo = inherited_ctor_binfo (comp_ctor); + binfo; + binfo = BINFO_INHERITANCE_CHAIN (binfo)) if (BINFO_VIRTUAL_P (binfo)) return true; return false; } + +/* True if we should omit all user-declared parameters from constructor FN, + because it is a base clone of a ctor inherited from a virtual base. */ + +bool +ctor_omit_inherited_parms (tree fn) +{ + gcc_checking_assert (TREE_CODE (fn) == FUNCTION_DECL); + + if (!DECL_BASE_CONSTRUCTOR_P (fn)) + return false; + + return base_ctor_omit_inherited_parms (DECL_CLONED_FUNCTION (fn)); +} + /* True iff constructor(s) INH inherited into BINFO initializes INIT_BINFO. This can be true for multiple virtual bases as well as one direct non-virtual base. */ diff --git a/gcc/testsuite/g++.dg/inherit/pr97268.C b/gcc/testsuite/g++.dg/inherit/pr97268.C new file mode 100644 index 0000000..79a809c --- /dev/null +++ b/gcc/testsuite/g++.dg/inherit/pr97268.C @@ -0,0 +1,60 @@ +// { dg-do compile { target c++11 } } +// { dg-additional-options -Wall } +// PR 97268, ICE due to broken inherited-from-virtual base-ctor +class Handle { +public: + explicit Handle(char const *const &) { } + ~Handle() {} + Handle(const Handle &) = delete; + Handle &operator=(const Handle &) = delete; + +protected: + int lasterr = 0; + +}; + +struct ObjectBase { + ~ObjectBase() {} + +protected: + explicit ObjectBase(const char *lc_, int ln_, Handle &h, unsigned) + : handle(h) { } + +protected: + + Handle &handle; +}; + +template +struct Object : virtual public ObjectBase { + explicit Object(const char *lc_, int ln_, Handle &env); + +protected: + using ObjectBase::ObjectBase; + +}; + +class BetterObjectBase : virtual public ObjectBase { +protected: + BetterObjectBase(const char *lc_, int ln_, Handle &env) + : ObjectBase("", 0, env, 0) {} + +}; + +template +class BetterObject : public Object, public BetterObjectBase { +public: + BetterObject(Handle &env) + : ObjectBase("", 0, env, 0) + , Object("", 0, env, 0) + , BetterObjectBase("", 0, env) {} // { dg-error "use of deleted function" } + +}; + +int main() { + Handle h("handle"); + + BetterObject B(h); + + return 0; +} -- cgit v1.1 From 7ee1c0413e251ff0b6a6d526209ef038b9835320 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Fri, 2 Oct 2020 11:13:26 -0700 Subject: c++: Hash table iteration for namespace-member spelling suggestions For 'no such binding' errors, we iterate over binding levels to find a close match. At the namespace level we were using DECL_ANTICIPATED to skip undeclared builtins. But (a) there are other unnameable things there and (b) decl-anticipated is about to go away. This changes the namespace scanning to iterate over the hash table, and look at non-hidden bindings. This does mean we look at fewer strings (hurrarh), but the order we meet them is somewhat 'random'. Our distance measure is not very fine grained, and a couple of testcases change their suggestion. I notice for the c/c++ common one, we now match the output of the C compiler. For the other one we think 'int' and 'int64_t' have the same distance from 'int64', and now meet the former first. That's a little unfortunate. If it's too problematic I suppose we could sort the strings via an intermediate array before measuring distance. gcc/cp/ * name-lookup.c (consider_decl): New, broken out of ... (consider_binding_level): ... here. Iterate the hash table for namespace bindings. gcc/testsuite/ * c-c++-common/spellcheck-reserved.c: Adjust diagnostic. * g++.dg/spellcheck-typenames.C: Adjust diagnostic. --- gcc/cp/name-lookup.c | 126 +++++++++++++++-------- gcc/testsuite/c-c++-common/spellcheck-reserved.c | 2 +- gcc/testsuite/g++.dg/spellcheck-typenames.C | 4 +- 3 files changed, 87 insertions(+), 45 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index 6204444..4024cea 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -6106,6 +6106,39 @@ qualified_namespace_lookup (tree scope, name_lookup *lookup) return found; } +static void +consider_decl (tree decl, best_match &bm, + bool consider_impl_names) +{ + /* Skip compiler-generated variables (e.g. __for_begin/__for_end + within range for). */ + if (TREE_CODE (decl) == VAR_DECL && DECL_ARTIFICIAL (decl)) + return; + + tree suggestion = DECL_NAME (decl); + if (!suggestion) + return; + + /* Don't suggest names that are for anonymous aggregate types, as + they are an implementation detail generated by the compiler. */ + if (IDENTIFIER_ANON_P (suggestion)) + return; + + const char *suggestion_str = IDENTIFIER_POINTER (suggestion); + + /* Ignore internal names with spaces in them. */ + if (strchr (suggestion_str, ' ')) + return; + + /* Don't suggest names that are reserved for use by the + implementation, unless NAME began with an underscore. */ + if (!consider_impl_names + && name_reserved_for_implementation_p (suggestion_str)) + return; + + bm.consider (suggestion_str); +} + /* Helper function for lookup_name_fuzzy. Traverse binding level LVL, looking for good name matches for NAME (and BM). */ @@ -6129,54 +6162,63 @@ consider_binding_level (tree name, best_match &bm, with an underscore. */ bool consider_implementation_names = (IDENTIFIER_POINTER (name)[0] == '_'); - for (tree t = lvl->names; t; t = TREE_CHAIN (t)) - { - tree d = t; - - /* OVERLOADs or decls from using declaration are wrapped into - TREE_LIST. */ - if (TREE_CODE (d) == TREE_LIST) - d = OVL_FIRST (TREE_VALUE (d)); - - /* Don't use bindings from implicitly declared functions, - as they were likely misspellings themselves. */ - if (TREE_TYPE (d) == error_mark_node) - continue; - - /* Skip anticipated decls of builtin functions. */ - if (TREE_CODE (d) == FUNCTION_DECL - && fndecl_built_in_p (d) - && DECL_ANTICIPATED (d)) - continue; + if (lvl->kind != sk_namespace) + for (tree t = lvl->names; t; t = TREE_CHAIN (t)) + { + tree d = t; - /* Skip compiler-generated variables (e.g. __for_begin/__for_end - within range for). */ - if (TREE_CODE (d) == VAR_DECL - && DECL_ARTIFICIAL (d)) - continue; + /* OVERLOADs or decls from using declaration are wrapped into + TREE_LIST. */ + if (TREE_CODE (d) == TREE_LIST) + d = OVL_FIRST (TREE_VALUE (d)); - tree suggestion = DECL_NAME (d); - if (!suggestion) - continue; - - /* Don't suggest names that are for anonymous aggregate types, as - they are an implementation detail generated by the compiler. */ - if (IDENTIFIER_ANON_P (suggestion)) - continue; + /* Don't use bindings from implicitly declared functions, + as they were likely misspellings themselves. */ + if (TREE_TYPE (d) == error_mark_node) + continue; - const char *suggestion_str = IDENTIFIER_POINTER (suggestion); + /* If we want a typename, ignore non-types. */ + if (kind == FUZZY_LOOKUP_TYPENAME + && TREE_CODE (STRIP_TEMPLATE (d)) != TYPE_DECL) + continue; - /* Ignore internal names with spaces in them. */ - if (strchr (suggestion_str, ' ')) - continue; + consider_decl (d, bm, consider_implementation_names); + } + else + { + /* Iterate over the namespace hash table, that'll have fewer + entries than the decl list. */ + tree ns = lvl->this_entity; - /* Don't suggest names that are reserved for use by the - implementation, unless NAME began with an underscore. */ - if (name_reserved_for_implementation_p (suggestion_str) - && !consider_implementation_names) - continue; + hash_table::iterator end + (DECL_NAMESPACE_BINDINGS (ns)->end ()); + for (hash_table::iterator iter + (DECL_NAMESPACE_BINDINGS (ns)->begin ()); iter != end; ++iter) + { + tree binding = *iter; + tree value = NULL_TREE; - bm.consider (suggestion_str); + if (STAT_HACK_P (binding)) + { + if (!STAT_TYPE_HIDDEN_P (binding) + && STAT_TYPE (binding)) + consider_decl (STAT_TYPE (binding), bm, + consider_implementation_names); + else if (!STAT_DECL_HIDDEN_P (binding)) + value = STAT_DECL (binding); + } + else + value = binding; + + value = ovl_skip_hidden (value); + if (value) + { + value = OVL_FIRST (value); + if (!(kind == FUZZY_LOOKUP_TYPENAME + && TREE_CODE (STRIP_TEMPLATE (value)) != TYPE_DECL)) + consider_decl (value, bm, consider_implementation_names); + } + } } } diff --git a/gcc/testsuite/c-c++-common/spellcheck-reserved.c b/gcc/testsuite/c-c++-common/spellcheck-reserved.c index ed292f2..175ba4a 100644 --- a/gcc/testsuite/c-c++-common/spellcheck-reserved.c +++ b/gcc/testsuite/c-c++-common/spellcheck-reserved.c @@ -30,7 +30,7 @@ void test (const char *buf, char ch) { __builtin_strtchr (buf, ch); /* { dg-line misspelled_reserved } */ /* { dg-warning "did you mean '__builtin_strchr'" "" { target c } misspelled_reserved } */ - /* { dg-error "'__builtin_strtchr' was not declared in this scope; did you mean '__builtin_strrchr'\\?" "" { target c++ } misspelled_reserved } */ + /* { dg-error "'__builtin_strtchr' was not declared in this scope; did you mean '__builtin_strchr'\\?" "" { target c++ } misspelled_reserved } */ } /* Similarly for a name that begins with a single underscore. */ diff --git a/gcc/testsuite/g++.dg/spellcheck-typenames.C b/gcc/testsuite/g++.dg/spellcheck-typenames.C index 6adf724..ff53ecc 100644 --- a/gcc/testsuite/g++.dg/spellcheck-typenames.C +++ b/gcc/testsuite/g++.dg/spellcheck-typenames.C @@ -54,11 +54,11 @@ struct some_thing test_6; // { dg-error "aggregate 'some_thing test_6' has incom { dg-end-multiline-output "" } */ typedef long int64_t; -int64 i; // { dg-error "1: 'int64' does not name a type; did you mean 'int64_t'?" } +int64 i; // { dg-error "1: 'int64' does not name a type; did you mean 'int'?" } /* { dg-begin-multiline-output "" } int64 i; ^~~~~ - int64_t + int { dg-end-multiline-output "" } */ /* Verify that gcc doesn't offer nonsensical suggestions. */ -- cgit v1.1 From 679dbc9dcec0d1240951442947199ba155f494b6 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Fri, 2 Oct 2020 12:21:08 -0700 Subject: c++: Kill DECL_ANTICIPATED Here's the patch to remove DECL_ANTICIPATED, and with it hiddenness is managed entirely in the symbol table. Sadly I couldn't get rid of the actual field without more investigation -- it's repurposed for OMP_PRIVATIZED_MEMBER. It looks like a the VAR-related flags in lang_decl_base are not completely orthogonal, so perhaps some can be turned into an enumeration or something. But that's more than I want to do right now. DECL_FRIEND_P Is still slightly suspect as it appears to mean more than just in-class definition. However, I'm leaving that for now. gcc/cp/ * cp-tree.h (lang_decl_base): anticipated_p is not used for anticipatedness. (DECL_ANTICIPATED): Delete. * decl.c (duplicate_decls): Delete DECL_ANTICIPATED_management, use was_hidden. (cxx_builtin_function): Drop DECL_ANTICIPATED setting. (xref_tag_1): Drop DECL_ANTICIPATED assert. * name-lookup.c (name_lookup::adl_class_only): Drop DECL_ANTICIPATED check. (name_lookup::search_adl): Always dedup. (anticipated_builtin_p): Reimplement. (do_pushdecl): Drop DECL_ANTICIPATED asserts & update. (lookup_elaborated_type_1): Drop DECL_ANTICIPATED update. (do_pushtag): Drop DECL_ANTICIPATED setting. * pt.c (push_template_decl): Likewise. (tsubst_friend_class): Likewise. libcc1/ * libcp1plugin.cc (libcp1plugin.cc): Drop DECL_ANTICIPATED test. --- gcc/cp/cp-tree.h | 13 ++++--------- gcc/cp/decl.c | 31 ++++++++----------------------- gcc/cp/name-lookup.c | 49 ++++++++----------------------------------------- gcc/cp/pt.c | 10 +--------- 4 files changed, 21 insertions(+), 82 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 43e0c18..c9ad751 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -2657,8 +2657,10 @@ struct GTY(()) lang_decl_base { unsigned not_really_extern : 1; /* var or fn */ unsigned initialized_in_class : 1; /* var or fn */ unsigned threadprivate_or_deleted_p : 1; /* var or fn */ - unsigned anticipated_p : 1; /* fn, type or template */ - /* anticipated_p reused as DECL_OMP_PRIVATIZED_MEMBER in var */ + /* anticipated_p is no longer used for anticipated_decls (fn, type + or template). It is used as DECL_OMP_PRIVATIZED_MEMBER in + var. */ + unsigned anticipated_p : 1; unsigned friend_or_tls : 1; /* var, fn, type or template */ unsigned unknown_bound_p : 1; /* var */ unsigned odr_used : 1; /* var or fn */ @@ -4037,13 +4039,6 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter) #define DECL_BUILTIN_P(NODE) \ (DECL_SOURCE_LOCATION(NODE) == BUILTINS_LOCATION) -/* Nonzero if NODE is a DECL which we know about but which has not - been explicitly declared, such as a built-in function or a friend - declared inside a class. */ -#define DECL_ANTICIPATED(NODE) \ - (DECL_LANG_SPECIFIC (TYPE_FUNCTION_OR_TEMPLATE_DECL_CHECK (NODE)) \ - ->u.base.anticipated_p) - /* True for artificial decls added for OpenMP privatized non-static data members. */ #define DECL_OMP_PRIVATIZED_MEMBER(NODE) \ diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 6b306ee..f333a36 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -1444,7 +1444,7 @@ tree duplicate_decls (tree newdecl, tree olddecl, bool hiding, bool was_hidden) { unsigned olddecl_uid = DECL_UID (olddecl); - int olddecl_friend = 0, types_match = 0, hidden_friend = 0; + int olddecl_friend = 0, types_match = 0; int olddecl_hidden_friend = 0; int new_defines_function = 0; tree new_template_info; @@ -1473,7 +1473,7 @@ duplicate_decls (tree newdecl, tree olddecl, bool hiding, bool was_hidden) { /* Avoid warnings redeclaring built-ins which have not been explicitly declared. */ - if (DECL_ANTICIPATED (olddecl)) + if (was_hidden) { if (TREE_PUBLIC (newdecl) && CP_DECL_CONTEXT (newdecl) == global_namespace) @@ -1645,7 +1645,7 @@ duplicate_decls (tree newdecl, tree olddecl, bool hiding, bool was_hidden) /* If a function is explicitly declared "throw ()", propagate that to the corresponding builtin. */ if (DECL_BUILT_IN_CLASS (olddecl) == BUILT_IN_NORMAL - && DECL_ANTICIPATED (olddecl) + && was_hidden && TREE_NOTHROW (newdecl) && !TREE_NOTHROW (olddecl)) { @@ -2139,9 +2139,6 @@ duplicate_decls (tree newdecl, tree olddecl, bool hiding, bool was_hidden) { olddecl_friend = DECL_FRIEND_P (STRIP_TEMPLATE (olddecl)); olddecl_hidden_friend = olddecl_friend && was_hidden; - hidden_friend = olddecl_hidden_friend && hiding; - if (!hidden_friend) - DECL_ANTICIPATED (olddecl) = false; } if (TREE_CODE (newdecl) == TEMPLATE_DECL) @@ -2890,8 +2887,6 @@ duplicate_decls (tree newdecl, tree olddecl, bool hiding, bool was_hidden) DECL_UID (olddecl) = olddecl_uid; if (olddecl_friend) DECL_FRIEND_P (olddecl) = true; - if (hidden_friend) - DECL_ANTICIPATED (olddecl) = true; /* NEWDECL contains the merged attribute lists. Update OLDDECL to be the same. */ @@ -4690,21 +4685,15 @@ cxx_builtin_function (tree decl) const char *name = IDENTIFIER_POINTER (id); bool hiding = false; if (name[0] != '_' || name[1] != '_') - { - /* In the user's namespace, it must be declared before use. */ - DECL_ANTICIPATED (decl) = 1; - hiding = true; - } + /* In the user's namespace, it must be declared before use. */ + hiding = true; else if (IDENTIFIER_LENGTH (id) > strlen ("___chk") && 0 != strncmp (name + 2, "builtin_", strlen ("builtin_")) && 0 == memcmp (name + IDENTIFIER_LENGTH (id) - strlen ("_chk"), "_chk", strlen ("_chk") + 1)) - { - /* Treat __*_chk fortification functions as anticipated as well, - unless they are __builtin_*_chk. */ - DECL_ANTICIPATED (decl) = 1; - hiding = true; - } + /* Treat __*_chk fortification functions as anticipated as well, + unless they are __builtin_*_chk. */ + hiding = true; /* All builtins that don't begin with an '_' should additionally go in the 'std' namespace. */ @@ -15063,10 +15052,6 @@ xref_tag_1 (enum tag_types tag_code, tree name, inform (location_of (t), "previous declaration %qD", t); return error_mark_node; } - - gcc_checking_assert (how == TAG_how::HIDDEN_FRIEND - || !(DECL_LANG_SPECIFIC (TYPE_NAME (t)) - && DECL_ANTICIPATED (TYPE_NAME (t)))); } return t; diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index 4024cea..190b56b 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -842,12 +842,6 @@ name_lookup::adl_class_only (tree type) if (CP_DECL_CONTEXT (fn) != context) continue; - /* Only interested in anticipated friends. (Non-anticipated - ones will have been inserted during the namespace - adl.) */ - if (!DECL_ANTICIPATED (fn)) - continue; - /* Template specializations are never found by name lookup. (Templates themselves can be found, but not template specializations.) */ @@ -1079,11 +1073,8 @@ name_lookup::adl_template_arg (tree arg) tree name_lookup::search_adl (tree fns, vec *args) { - if (fns) - { - deduping = true; - lookup_mark (fns, true); - } + deduping = true; + lookup_mark (fns, true); value = fns; unsigned ix; @@ -2136,19 +2127,9 @@ strip_using_decl (tree decl) static bool anticipated_builtin_p (tree ovl) { - if (TREE_CODE (ovl) != OVERLOAD) - return false; - - if (!OVL_HIDDEN_P (ovl)) - return false; - - tree fn = OVL_FUNCTION (ovl); - gcc_checking_assert (DECL_ANTICIPATED (fn)); - - if (DECL_BUILTIN_P (fn)) - return true; - - return false; + return (TREE_CODE (ovl) == OVERLOAD + && OVL_HIDDEN_P (ovl) + && DECL_BUILTIN_P (OVL_FUNCTION (ovl))); } /* BINDING records an existing declaration for a name in the current scope. @@ -3079,14 +3060,6 @@ do_pushdecl (tree decl, bool hiding) tree *slot = NULL; /* Binding slot in namespace. */ tree old = NULL_TREE; - if (!hiding) - /* We should never unknownly push an anticipated decl. */ - gcc_checking_assert (!((TREE_CODE (decl) == TYPE_DECL - || TREE_CODE (decl) == FUNCTION_DECL - || TREE_CODE (decl) == TEMPLATE_DECL) - && DECL_LANG_SPECIFIC (decl) - && DECL_ANTICIPATED (decl))); - if (level->kind == sk_namespace) { /* We look in the decl's namespace for an existing @@ -3171,8 +3144,6 @@ do_pushdecl (tree decl, bool hiding) /* Don't attempt to push it. */ return error_mark_node; } - /* Hide it from ordinary lookup. */ - DECL_ANTICIPATED (decl) = true; } } @@ -6730,7 +6701,6 @@ lookup_elaborated_type_1 (tree name, TAG_how how) HIDDEN_TYPE_BINDING_P (iter) = false; /* Unanticipate the decl itself. */ - DECL_ANTICIPATED (found) = false; DECL_FRIEND_P (found) = false; gcc_checking_assert (TREE_CODE (found) != TEMPLATE_DECL); @@ -6738,7 +6708,6 @@ lookup_elaborated_type_1 (tree name, TAG_how how) if (tree ti = TYPE_TEMPLATE_INFO (TREE_TYPE (found))) { tree tmpl = TI_TEMPLATE (ti); - DECL_ANTICIPATED (tmpl) = false; DECL_FRIEND_P (tmpl) = false; } } @@ -6799,18 +6768,17 @@ lookup_elaborated_type_1 (tree name, TAG_how how) if (reveal) { /* Reveal the previously hidden thing. */ - DECL_ANTICIPATED (found) = false; DECL_FRIEND_P (found) = false; if (TREE_CODE (found) == TEMPLATE_DECL) { - DECL_ANTICIPATED (DECL_TEMPLATE_RESULT (found)) = false; - DECL_FRIEND_P (DECL_TEMPLATE_RESULT (found)) = false; + tree res = DECL_TEMPLATE_RESULT (found); + if (DECL_LANG_SPECIFIC (res)) + DECL_FRIEND_P (res) = false; } else if (tree ti = TYPE_TEMPLATE_INFO (TREE_TYPE (found))) { tree tmpl = TI_TEMPLATE (ti); - DECL_ANTICIPATED (tmpl) = false; DECL_FRIEND_P (tmpl) = false; } } @@ -7019,7 +6987,6 @@ do_pushtag (tree name, tree type, TAG_how how) ordinary name lookup. Its corresponding TEMPLATE_DECL will be marked in push_template_decl. */ retrofit_lang_decl (tdef); - DECL_ANTICIPATED (tdef) = 1; DECL_FRIEND_P (tdef) = 1; } diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 72efecf..1ab5435 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -6024,10 +6024,7 @@ push_template_decl (tree decl, bool is_friend) { /* Hide template friend classes that haven't been declared yet. */ if (is_friend && TREE_CODE (decl) == TYPE_DECL) - { - DECL_ANTICIPATED (tmpl) = 1; - DECL_FRIEND_P (tmpl) = 1; - } + DECL_FRIEND_P (tmpl) = 1; tmpl = pushdecl_namespace_level (tmpl, /*hiding=*/is_friend); if (tmpl == error_mark_node) @@ -11311,11 +11308,6 @@ tsubst_friend_class (tree friend_tmpl, tree args) CLASSTYPE_TI_ARGS (TREE_TYPE (tmpl)) = INNERMOST_TEMPLATE_ARGS (CLASSTYPE_TI_ARGS (TREE_TYPE (tmpl))); - /* It is hidden. */ - retrofit_lang_decl (DECL_TEMPLATE_RESULT (tmpl)); - DECL_ANTICIPATED (tmpl) - = DECL_ANTICIPATED (DECL_TEMPLATE_RESULT (tmpl)) = true; - /* Substitute into and set the constraints on the new declaration. */ if (tree ci = get_constraints (friend_tmpl)) { -- cgit v1.1 From 47f09ec9717058ada97be33bcbb23ceb6322ba61 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Thu, 1 Oct 2020 16:40:17 -0400 Subject: c++: Fix printing of C++20 template parameter object [PR97014] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No one is interested in the mangled name of the C++20 template parameter object for a class NTTP. So instead of printing required for the satisfaction of ‘positive’ [with T = X<::_ZTAXtl5ratioLin1ELi2EEE>] let's print required for the satisfaction of ‘positive’ [with T = X<{-1, 2}>] I don't think adding a test is necessary for this. gcc/cp/ChangeLog: PR c++/97014 * cxx-pretty-print.c (pp_cxx_template_argument_list): If the argument is template_parm_object_p, print its DECL_INITIAL. --- gcc/cp/cxx-pretty-print.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'gcc') diff --git a/gcc/cp/cxx-pretty-print.c b/gcc/cp/cxx-pretty-print.c index d10c18d..8bea79b 100644 --- a/gcc/cp/cxx-pretty-print.c +++ b/gcc/cp/cxx-pretty-print.c @@ -1910,6 +1910,8 @@ pp_cxx_template_argument_list (cxx_pretty_printer *pp, tree t) if (TYPE_P (arg) || (TREE_CODE (arg) == TEMPLATE_DECL && TYPE_P (DECL_TEMPLATE_RESULT (arg)))) pp->type_id (arg); + else if (template_parm_object_p (arg)) + pp->expression (DECL_INITIAL (arg)); else pp->expression (arg); } -- cgit v1.1 From 9885183c08138752a954b68a3328e749976d8f93 Mon Sep 17 00:00:00 2001 From: David Edelsohn Date: Fri, 2 Oct 2020 12:09:52 -0400 Subject: rs6000: clean up headers in rs6000.c and rs6000-call.c When Andrew Macleod investigated the recent rs6000 bootstrap failure, he suggested a clean up of the headers in rs6000.c and rs6000-call.c. It now is recommended to include ssa.h instead of the individual headers. This also ensures that value-range.h is included and in the correct order so that the tree-ssa-propagate.h inclusion of value-query.h and its dependencies are satisfied. Bootstrapped on powerpc-ibm-aix7.2.0.0 and powerpc64le-linux. gcc/ChangeLog: 2020-10-02 David Edelsohn Andrew MacLeod * config/rs6000/rs6000.c: Include ssa.h. Reorder some headers. * config/rs6000/rs6000-call.c: Same. --- gcc/config/rs6000/rs6000-call.c | 7 ++----- gcc/config/rs6000/rs6000.c | 9 +++------ 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index d10119b..9fdf97b 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -57,17 +57,14 @@ #include "gimplify.h" #include "gimple-fold.h" #include "gimple-iterator.h" -#include "gimple-ssa.h" +#include "ssa.h" +#include "tree-ssa-propagate.h" #include "builtins.h" #include "tree-vector-builder.h" #if TARGET_XCOFF #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ #endif #include "ppc-auxv.h" -#include "value-range.h" -#include "tree-ssa-propagate.h" -#include "tree-vrp.h" -#include "tree-ssanames.h" #include "targhooks.h" #include "opts.h" diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 69c4f36..b58eeae 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -59,11 +59,12 @@ #include "gimplify.h" #include "gimple-fold.h" #include "gimple-iterator.h" -#include "gimple-ssa.h" #include "gimple-walk.h" +#include "ssa.h" +#include "tree-vectorizer.h" +#include "tree-ssa-propagate.h" #include "intl.h" #include "tm-constrs.h" -#include "tree-vectorizer.h" #include "target-globals.h" #include "builtins.h" #include "tree-vector-builder.h" @@ -75,10 +76,6 @@ #endif #include "case-cfn-macros.h" #include "ppc-auxv.h" -#include "value-range.h" -#include "tree-ssa-propagate.h" -#include "tree-vrp.h" -#include "tree-ssanames.h" #include "rs6000-internal.h" #include "opts.h" -- cgit v1.1 From b0b9b8f02a1cdf910e63b726db22de99d90b3259 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sat, 3 Oct 2020 00:16:25 +0000 Subject: Daily bump. --- gcc/ChangeLog | 372 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/cp/ChangeLog | 65 +++++++++ gcc/fortran/ChangeLog | 9 ++ gcc/testsuite/ChangeLog | 92 ++++++++++++ 5 files changed, 539 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index dd0710e..131b253b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,375 @@ +2020-10-02 David Edelsohn + Andrew MacLeod + + * config/rs6000/rs6000.c: Include ssa.h. Reorder some headers. + * config/rs6000/rs6000-call.c: Same. + +2020-10-02 Martin Jambor + + * params.opt (ipa-cp-large-unit-insns): New parameter. + * ipa-cp.c (get_max_overall_size): Use the new parameter. + +2020-10-02 Martin Jambor + + * ipa-cp.c (estimate_local_effects): Add overeall_size to dumped + string. + (decide_about_value): Add dumping new overall_size. + +2020-10-02 Martin Jambor + + * ipa-fnsummary.h (ipa_freqcounting_predicate): New type. + (ipa_fn_summary): Change the type of loop_iterations and loop_strides + to vectors of ipa_freqcounting_predicate. + (ipa_fn_summary::ipa_fn_summary): Construct the new vectors. + (ipa_call_estimates): New fields loops_with_known_iterations and + loops_with_known_strides. + * ipa-cp.c (hint_time_bonus): Multiply param_ipa_cp_loop_hint_bonus + with the expected frequencies of loops with known iteration count or + stride. + * ipa-fnsummary.c (add_freqcounting_predicate): New function. + (ipa_fn_summary::~ipa_fn_summary): Release the new vectors instead of + just two predicates. + (remap_hint_predicate_after_duplication): Replace with function + remap_freqcounting_preds_after_dup. + (ipa_fn_summary_t::duplicate): Use it or duplicate new vectors. + (ipa_dump_fn_summary): Dump the new vectors. + (analyze_function_body): Compute the loop property vectors. + (ipa_call_context::estimate_size_and_time): Calculate also + loops_with_known_iterations and loops_with_known_strides. Adjusted + dumping accordinly. + (remap_hint_predicate): Replace with function + remap_freqcounting_predicate. + (ipa_merge_fn_summary_after_inlining): Use it. + (inline_read_section): Stream loopcounting vectors instead of two + simple predicates. + (ipa_fn_summary_write): Likewise. + * params.opt (ipa-max-loop-predicates): New parameter. + * doc/invoke.texi (ipa-max-loop-predicates): Document new param. + +2020-10-02 Martin Jambor + + * ipa-inline-analysis.c (do_estimate_edge_time): Adjusted to use + ipa_call_estimates. + (do_estimate_edge_size): Likewise. + (do_estimate_edge_hints): Likewise. + * ipa-fnsummary.h (struct ipa_call_estimates): New type. + (ipa_call_context::estimate_size_and_time): Adjusted declaration. + (estimate_ipcp_clone_size_and_time): Likewise. + * ipa-cp.c (hint_time_bonus): Changed the type of the second argument + to ipa_call_estimates. + (perform_estimation_of_a_value): Adjusted to use ipa_call_estimates. + (estimate_local_effects): Likewise. + * ipa-fnsummary.c (ipa_call_context::estimate_size_and_time): Adjusted + to return estimates in a single ipa_call_estimates parameter. + (estimate_ipcp_clone_size_and_time): Likewise. + +2020-10-02 Martin Jambor + + * ipa-fnsummary.h (ipa_cached_call_context): New forward declaration + and class. + (class ipa_call_context): Make friend ipa_cached_call_context. Moved + methods duplicate_from and release to it too. + * ipa-fnsummary.c (ipa_call_context::duplicate_from): Moved to class + ipa_cached_call_context. + (ipa_call_context::release): Likewise, removed the parameter. + * ipa-inline-analysis.c (node_context_cache_entry): Change the type of + ctx to ipa_cached_call_context. + (do_estimate_edge_time): Remove parameter from the call to + ipa_cached_call_context::release. + +2020-10-02 Martin Jambor + + * ipa-prop.h (ipa_auto_call_arg_values): New type. + (class ipa_call_arg_values): Likewise. + (ipa_get_indirect_edge_target): Replaced vector arguments with + ipa_call_arg_values in declaration. Added an overload for + ipa_auto_call_arg_values. + * ipa-fnsummary.h (ipa_call_context): Removed members m_known_vals, + m_known_contexts, m_known_aggs, duplicate_from, release and equal_to, + new members m_avals, store_to_cache and equivalent_to_p. Adjusted + construcotr arguments. + (estimate_ipcp_clone_size_and_time): Replaced vector arguments + with ipa_auto_call_arg_values in declaration. + (evaluate_properties_for_edge): Likewise. + * ipa-cp.c (ipa_get_indirect_edge_target): Adjusted to work on + ipa_call_arg_values rather than on separate vectors. Added an + overload for ipa_auto_call_arg_values. + (devirtualization_time_bonus): Adjusted to work on + ipa_auto_call_arg_values rather than on separate vectors. + (gather_context_independent_values): Adjusted to work on + ipa_auto_call_arg_values rather than on separate vectors. + (perform_estimation_of_a_value): Likewise. + (estimate_local_effects): Likewise. + (modify_known_vectors_with_val): Adjusted both variants to work on + ipa_auto_call_arg_values and rename them to + copy_known_vectors_add_val. + (decide_about_value): Adjusted to work on ipa_call_arg_values rather + than on separate vectors. + (decide_whether_version_node): Likewise. + * ipa-fnsummary.c (evaluate_conditions_for_known_args): Likewise. + (evaluate_properties_for_edge): Likewise. + (ipa_fn_summary_t::duplicate): Likewise. + (estimate_edge_devirt_benefit): Adjusted to work on + ipa_call_arg_values rather than on separate vectors. + (estimate_edge_size_and_time): Likewise. + (estimate_calls_size_and_time_1): Likewise. + (summarize_calls_size_and_time): Adjusted calls to + estimate_edge_size_and_time. + (estimate_calls_size_and_time): Adjusted to work on + ipa_call_arg_values rather than on separate vectors. + (ipa_call_context::ipa_call_context): Construct from a pointer to + ipa_auto_call_arg_values instead of inividual vectors. + (ipa_call_context::duplicate_from): Adjusted to access vectors within + m_avals. + (ipa_call_context::release): Likewise. + (ipa_call_context::equal_to): Likewise. + (ipa_call_context::estimate_size_and_time): Adjusted to work on + ipa_call_arg_values rather than on separate vectors. + (estimate_ipcp_clone_size_and_time): Adjusted to work with + ipa_auto_call_arg_values rather than on separate vectors. + (ipa_merge_fn_summary_after_inlining): Likewise. Adjusted call to + estimate_edge_size_and_time. + (ipa_update_overall_fn_summary): Adjusted call to + estimate_edge_size_and_time. + * ipa-inline-analysis.c (do_estimate_edge_time): Adjusted to work with + ipa_auto_call_arg_values rather than with separate vectors. + (do_estimate_edge_size): Likewise. + (do_estimate_edge_hints): Likewise. + * ipa-prop.c (ipa_auto_call_arg_values::~ipa_auto_call_arg_values): + New destructor. + +2020-10-02 Joe Ramsay + + * config/arm/arm_mve.h (__arm_vmaxnmavq): Remove coercion of scalar + argument. + (__arm_vmaxnmvq): Likewise. + (__arm_vminnmavq): Likewise. + (__arm_vminnmvq): Likewise. + (__arm_vmaxnmavq_p): Likewise. + (__arm_vmaxnmvq_p): Likewise (and delete duplicate definition). + (__arm_vminnmavq_p): Likewise. + (__arm_vminnmvq_p): Likewise. + (__arm_vmaxavq): Likewise. + (__arm_vmaxavq_p): Likewise. + (__arm_vmaxvq): Likewise. + (__arm_vmaxvq_p): Likewise. + (__arm_vminavq): Likewise. + (__arm_vminavq_p): Likewise. + (__arm_vminvq): Likewise. + (__arm_vminvq_p): Likewise. + +2020-10-02 Kyrylo Tkachov + + * config/aarch64/aarch64.c (neoversev1_tunings): Define. + * config/aarch64/aarch64-cores.def (zeus): Use it. + (neoverse-v1): Likewise. + +2020-10-02 Jan Hubicka + + * attr-fnspec.h: Update documentation. + (attr_fnsec::return_desc_size): Set to 2 + (attr_fnsec::arg_desc_size): Set to 2 + * builtin-attrs.def (STR1): Update fnspec. + * internal-fn.def (UBSAN_NULL): Update fnspec. + (UBSAN_VPTR): Update fnspec. + (UBSAN_PTR): Update fnspec. + (ASAN_CHECK): Update fnspec. + (GOACC_DIM_SIZE): Remove fnspec. + (GOACC_DIM_POS): Remove fnspec. + * tree-ssa-alias.c (attr_fnspec::verify): Update verification. + +2020-10-02 Jan Hubicka + + * attr-fnspec.h: New file. + * calls.c (decl_return_flags): Use attr_fnspec. + * gimple.c (gimple_call_arg_flags): Use attr_fnspec. + (gimple_call_return_flags): Use attr_fnspec. + * tree-into-ssa.c (pass_build_ssa::execute): Use attr_fnspec. + * tree-ssa-alias.c (attr_fnspec::verify): New member fuction. + +2020-10-02 Jan Hubicka + + * tree-ssa-alias.c (ao_ref_init_from_ptr_and_range): Break out from ... + (ao_ref_init_from_ptr_and_size): ... here. + +2020-10-02 Jan Hubicka + + * data-streamer-in.c (streamer_read_poly_int64): New function. + * data-streamer-out.c (streamer_write_poly_int64): New function. + * data-streamer.h (streamer_write_poly_int64): Declare. + (streamer_read_poly_int64): Declare. + +2020-10-02 Richard Sandiford + + * config/aarch64/aarch64-protos.h (aarch64_sve_pred_dominates_p): + Delete. + * config/aarch64/aarch64.c (aarch64_sve_pred_dominates_p): Likewise. + * config/aarch64/aarch64-sve.md: Add banner comment describing + how merging predicated FP operations are represented. + (*cond__2): Split into... + (*cond__2_relaxed): ...this and... + (*cond__2_strict): ...this. + (*cond__any): Split into... + (*cond__any_relaxed): ...this and... + (*cond__any_strict): ...this. + (*cond__2): Split into... + (*cond__2_relaxed): ...this and... + (*cond__2_strict): ...this. + (*cond__any): Split into... + (*cond__any_relaxed): ...this + and... + (*cond__any_strict): ...this. + (*cond__2): Split into... + (*cond__2_relaxed): ...this and... + (*cond__2_strict): ...this. + (*cond__2_const): Split into... + (*cond__2_const_relaxed): ...this + and... + (*cond__2_const_strict): ...this. + (*cond__3): Split into... + (*cond__3_relaxed): ...this and... + (*cond__3_strict): ...this. + (*cond__any): Split into... + (*cond__any_relaxed): ...this and... + (*cond__any_strict): ...this. + (*cond__any_const): Split into... + (*cond__any_const_relaxed): ...this + and... + (*cond__any_const_strict): ...this. + (*cond_add_2_const): Split into... + (*cond_add_2_const_relaxed): ...this and... + (*cond_add_2_const_strict): ...this. + (*cond_add_any_const): Split into... + (*cond_add_any_const_relaxed): ...this and... + (*cond_add_any_const_strict): ...this. + (*cond__2): Split into... + (*cond__2_relaxed): ...this and... + (*cond__2_strict): ...this. + (*cond__any): Split into... + (*cond__any_relaxed): ...this and... + (*cond__any_strict): ...this. + (*cond_sub_3_const): Split into... + (*cond_sub_3_const_relaxed): ...this and... + (*cond_sub_3_const_strict): ...this. + (*aarch64_pred_abd): Split into... + (*aarch64_pred_abd_relaxed): ...this and... + (*aarch64_pred_abd_strict): ...this. + (*aarch64_cond_abd_2): Split into... + (*aarch64_cond_abd_2_relaxed): ...this and... + (*aarch64_cond_abd_2_strict): ...this. + (*aarch64_cond_abd_3): Split into... + (*aarch64_cond_abd_3_relaxed): ...this and... + (*aarch64_cond_abd_3_strict): ...this. + (*aarch64_cond_abd_any): Split into... + (*aarch64_cond_abd_any_relaxed): ...this and... + (*aarch64_cond_abd_any_strict): ...this. + (*cond__2): Split into... + (*cond__2_relaxed): ...this and... + (*cond__2_strict): ...this. + (*cond__4): Split into... + (*cond__4_relaxed): ...this and... + (*cond__4_strict): ...this. + (*cond__any): Split into... + (*cond__any_relaxed): ...this and... + (*cond__any_strict): ...this. + (*cond__4): Split into... + (*cond__4_relaxed): ...this and... + (*cond__4_strict): ...this. + (*cond__any): Split into... + (*cond__any_relaxed): ...this and... + (*cond__any_strict): ...this. + (*aarch64_pred_fac): Split into... + (*aarch64_pred_fac_relaxed): ...this and... + (*aarch64_pred_fac_strict): ...this. + (*cond__nontrunc): Split + into... + (*cond__nontrunc_relaxed): + ...this and... + (*cond__nontrunc_strict): + ...this. + (*cond__nonextend): Split + into... + (*cond__nonextend_relaxed): + ...this and... + (*cond__nonextend_strict): + ...this. + * config/aarch64/aarch64-sve2.md + (*cond_): Split into... + (*cond__relaxed): ...this and... + (*cond__strict): ...this. + (*cond__any): Split into... + (*cond__any_relaxed): ...this + and... + (*cond__any_strict): ...this. + (*cond_): Split into... + (*cond__relaxed): ...this and... + (*cond__strict): ...this. + +2020-10-02 Richard Sandiford + + * config/arm/neon.md (*sub3_neon): Use the new mode macros + for the insn condition. + (sub3, *mul3_neon): Likewise. + (mul3add_neon): Likewise. + (mul3add_neon): Likewise. + (mul3negadd_neon): Likewise. + (fma4, fma4, *fmsub4): Likewise. + (quad_halves_v4sf, reduc_plus_scal_): Likewise. + (reduc_plus_scal_, reduc_smin_scal_): Likewise. + (reduc_smin_scal_, reduc_smax_scal_): Likewise. + (reduc_smax_scal_, mul3): Likewise. + (neon_vabd_2, neon_vabd_3): Likewise. + (fma4_intrinsic): Delete. + (neon_vadd): Use the new mode macros to decide which + form of instruction to generate. + (neon_vmla, neon_vmls): Likewise. + (neon_vsub): Likewise. + (neon_vfma): Generate the main fma4 form instead + of using fma4_intrinsic. + +2020-10-02 Martin Liska + + PR gcov-profile/97193 + * coverage.c (coverage_init): GCDA note files should not be + mangled and should end in output directory. + +2020-10-02 Jason Merril + + * gimple.h (gimple_call_operator_delete_p): Rename from + gimple_call_replaceable_operator_delete_p. + * gimple.c (gimple_call_operator_delete_p): Likewise. + * tree.h (DECL_IS_REPLACEABLE_OPERATOR_DELETE_P): Remove. + * tree-ssa-dce.c (mark_all_reaching_defs_necessary_1): Adjust. + (propagate_necessity): Likewise. + (eliminate_unnecessary_stmts): Likewise. + * tree-ssa-structalias.c (find_func_aliases_for_call): Likewise. + +2020-10-02 Richard Biener + + * gimple.h (GF_CALL_FROM_NEW_OR_DELETE): New call flag. + (gimple_call_set_from_new_or_delete): New. + (gimple_call_from_new_or_delete): Likewise. + * gimple.c (gimple_build_call_from_tree): Set + GF_CALL_FROM_NEW_OR_DELETE appropriately. + * ipa-icf-gimple.c (func_checker::compare_gimple_call): + Compare gimple_call_from_new_or_delete. + * tree-ssa-dce.c (mark_all_reaching_defs_necessary_1): Make + sure to only consider new/delete calls from new or delete + expressions. + (propagate_necessity): Likewise. + (eliminate_unnecessary_stmts): Likewise. + * tree-ssa-structalias.c (find_func_aliases_for_call): + Likewise. + +2020-10-02 Jason Merril + + * tree.h (CALL_FROM_NEW_OR_DELETE_P): Move from cp-tree.h. + * tree-core.h: Document new usage of protected_flag. + +2020-10-02 Aldy Hernandez + + * value-range.h (irange::fits_p): New. + 2020-10-01 Alan Modra * config/rs6000/rs6000.c (rs6000_legitimize_address): Use diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 4d58d2f..c28ca09 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20201002 +20201003 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index ed416cc..472fcbe 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,68 @@ +2020-10-02 Marek Polacek + + PR c++/97014 + * cxx-pretty-print.c (pp_cxx_template_argument_list): If the + argument is template_parm_object_p, print its DECL_INITIAL. + +2020-10-02 Nathan Sidwell + + * cp-tree.h (lang_decl_base): anticipated_p is not used for + anticipatedness. + (DECL_ANTICIPATED): Delete. + * decl.c (duplicate_decls): Delete DECL_ANTICIPATED_management, + use was_hidden. + (cxx_builtin_function): Drop DECL_ANTICIPATED setting. + (xref_tag_1): Drop DECL_ANTICIPATED assert. + * name-lookup.c (name_lookup::adl_class_only): Drop + DECL_ANTICIPATED check. + (name_lookup::search_adl): Always dedup. + (anticipated_builtin_p): Reimplement. + (do_pushdecl): Drop DECL_ANTICIPATED asserts & update. + (lookup_elaborated_type_1): Drop DECL_ANTICIPATED update. + (do_pushtag): Drop DECL_ANTICIPATED setting. + * pt.c (push_template_decl): Likewise. + (tsubst_friend_class): Likewise. + +2020-10-02 Nathan Sidwell + + * name-lookup.c (consider_decl): New, broken out of ... + (consider_binding_level): ... here. Iterate the hash table for + namespace bindings. + +2020-10-02 Nathan Sidwell + + * cp-tree.h (base_ctor_omit_inherited_parms): Declare. + * class.c (add_method): Refactor main loop, only pass fns to + ctor_omit_inherited_parms. + (build_cdtor_clones): Rename bool parms. + (clone_cdtor): Call base_ctor_omit_inherited_parms. + * method.c (base_ctor_omit_inherited_parms): New, broken out of + ... + (ctor_omit_inherited_parms): ... here, call it with + DECL_CLONED_FUNCTION. + +2020-10-02 Nathan Sidwell + + * cp-tree.h (cp_fname_init): Delete declaration. + * decl.c (cp_fname_init): Merge into only caller ... + (cp_make_fname): ... here & refactor. + +2020-10-02 Jason Merril + + * call.c (build_operator_new_call): Set CALL_FROM_NEW_OR_DELETE_P. + (build_op_delete_call): Likewise. + * init.c (build_new_1, build_vec_delete_1, build_delete): Not here. + (build_delete): + +2020-10-02 Jason Merril + + * lambda.c (call_from_lambda_thunk_p): New. + * cp-gimplify.c (cp_genericize_r): Use it. + * pt.c (tsubst_copy_and_build): Use it. + * typeck.c (check_return_expr): Use it. + * cp-tree.h: Declare it. + (CALL_FROM_NEW_OR_DELETE_P): Move to gcc/tree.h. + 2020-10-01 Nathan Sidwell * cp-tree.h (DECL_ANTICIPATED): Adjust comment. diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index fc65592..af9cc74 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,12 @@ +2020-10-02 Jan Hubicka + + * trans-decl.c (gfc_build_library_function_decl_with_spec): Verify + fnspec. + (gfc_build_intrinsic_function_decls): Update fnspecs. + (gfc_build_builtin_function_decls): Update fnspecs. + * trans-io.c (gfc_build_io_library_fndecls): Update fnspecs. + * trans-types.c (create_fn_spec): Update fnspecs. + 2020-09-30 Jan Hubicka * trans-decl.c (gfc_build_intrinsic_function_decls): Add traling dots diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3f802a2..e84bf8e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,95 @@ +2020-10-02 Nathan Sidwell + + * c-c++-common/spellcheck-reserved.c: Adjust diagnostic. + * g++.dg/spellcheck-typenames.C: Adjust diagnostic. + +2020-10-02 Nathan Sidwell + + * g++.dg/inherit/pr97268.C: New. + +2020-10-02 Martin Jambor + + * gcc.dg/ipa/ipcp-loophint-1.c: New test. + +2020-10-02 Joe Ramsay + + * gcc.target/arm/mve/intrinsics/vmaxavq_p_s16.c: Add test for mismatched + width of scalar argument. + * gcc.target/arm/mve/intrinsics/vmaxavq_p_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxavq_p_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxavq_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxavq_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxavq_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmavq_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmavq_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmavq_p_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmvq_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmvq_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxnmvq_p_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_p_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_p_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_p_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_p_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_p_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_p_u8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vmaxvq_u8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminavq_p_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminavq_p_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminavq_p_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminavq_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminavq_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminavq_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmavq_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmavq_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmavq_p_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmavq_p_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmvq_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmvq_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmvq_p_f16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminnmvq_p_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_p_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_p_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_p_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_p_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_p_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_p_u8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vminvq_u8.c: Likewise. + +2020-10-02 Richard Sandiford + + * gcc.target/arm/armv8_2-fp16-arith-2.c (float16_t): Use _Float16_t + rather than __fp16. + (float16x4_t, float16x4_t): Likewise. + (fp16_abs): Use __builtin_fabsf16. + +2020-10-02 Alex Coplan + + * gcc.target/aarch64/extend-syntax.c: Fix assembler checks for + ilp32, disable check-function-bodies on ilp32. + * gcc.target/aarch64/subsp.c: Only check second scan-assembler + on lp64 since the code on ilp32 is missing the optimization + needed for this test to pass. + +2020-10-02 Jason Merril + + * g++.dg/pr94314.C: new/delete no longer omitted. + +2020-10-02 Richard Biener + + * g++.dg/tree-ssa/pta-delete-1.C: New testcase. + 2020-10-01 Richard Sandiford * lib/target-supports.exp (check_effective_target_vect_cond_mixed): Add -- cgit v1.1 From 8510e3301bd519352fc20876da8994f68a0c7e93 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Sat, 3 Oct 2020 07:20:48 -0700 Subject: doc: Replace roudnevenl with roundevenl PR other/97280 * doc/extend.texi: Replace roudnevenl with roundevenl --- gcc/doc/extend.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index c9f7299..b9684dc 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -12775,7 +12775,7 @@ Outside strict ISO C mode (@option{-ansi}, @option{-std=c90}, @code{j1f}, @code{j1l}, @code{j1}, @code{jnf}, @code{jnl}, @code{jn}, @code{lgammaf_r}, @code{lgammal_r}, @code{lgamma_r}, @code{mempcpy}, @code{pow10f}, @code{pow10l}, @code{pow10}, @code{printf_unlocked}, -@code{rindex}, @code{roundeven}, @code{roundevenf}, @code{roudnevenl}, +@code{rindex}, @code{roundeven}, @code{roundevenf}, @code{roundevenl}, @code{scalbf}, @code{scalbl}, @code{scalb}, @code{signbit}, @code{signbitf}, @code{signbitl}, @code{signbitd32}, @code{signbitd64}, @code{signbitd128}, @code{significandf}, -- cgit v1.1 From c34db4b6f8a5d80367c709309f9b00cb32630054 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 3 Oct 2020 17:20:16 +0200 Subject: Track access ranges in ipa-modref this patch implements tracking of access ranges. This is only applied when base pointer is an arugment. Incrementally i will extend it to also track TBAA basetype so we can disambiguate ranges for accesses to same basetype (which makes is quite bit more effective). For this reason i track the access offset separately from parameter offset (the second track combined adjustments to the parameter). This is I think last feature I would like to add to the memory access summary this stage1. Further work will be needed to opitmize the summary and merge adjacent range/make collapsing more intelingent (so we do not lose track that often), but I wanted to keep basic patch simple. According to the cc1plus stats: Alias oracle query stats: refs_may_alias_p: 64108082 disambiguations, 74386675 queries ref_maybe_used_by_call_p: 142319 disambiguations, 65004781 queries call_may_clobber_ref_p: 23587 disambiguations, 29420 queries nonoverlapping_component_refs_p: 0 disambiguations, 38117 queries nonoverlapping_refs_since_match_p: 19489 disambiguations, 55748 must overlaps, 76044 queries aliasing_component_refs_p: 54763 disambiguations, 755876 queries TBAA oracle: 24184658 disambiguations 56823187 queries 16260329 are in alias set 0 10617146 queries asked about the same object 125 queries asked about the same alias set 0 access volatile 3960555 are dependent in the DAG 1800374 are aritificially in conflict with void * Modref stats: modref use: 10656 disambiguations, 47037 queries modref clobber: 1473322 disambiguations, 1961464 queries 5027242 tbaa queries (2.563005 per modref query) 649087 base compares (0.330920 per modref query) PTA query stats: pt_solution_includes: 977385 disambiguations, 13609749 queries pt_solutions_intersect: 1032703 disambiguations, 13187507 queries Which should still compare with https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554930.html there is about 2% more load disambiguations and 3.6% more store that is not great, but the TBAA part helps noticeably more and also this should help with -fno-strict-aliasing. I plan to work on improving param tracking too. Bootstrapped/regtested x86_64-linux with the other changes, OK? 2020-10-02 Jan Hubicka * ipa-modref-tree.c (test_insert_search_collapse): Update andling of accesses. (test_merge): Likewise. * ipa-modref-tree.h (struct modref_access_node): Add offset, size, max_size, parm_offset and parm_offset_known. (modref_access_node::useful_p): Constify. (modref_access_node::range_info_useful_p): New predicate. (modref_access_node::operator==): New. (struct modref_parm_map): New structure. (modref_tree::merge): Update for racking parameters) * ipa-modref.c (dump_access): Dump new fields. (get_access): Fill in new fields. (merge_call_side_effects): Update handling of parm map. (write_modref_records): Stream new fields. (read_modref_records): Stream new fields. (compute_parm_map): Update for new parm map. (ipa_merge_modref_summary_after_inlining): Update. (modref_propagate_in_scc): Update. * tree-ssa-alias.c (modref_may_conflict): Handle known ranges. --- gcc/ipa-modref-tree.c | 4 +- gcc/ipa-modref-tree.h | 76 +++++++++++++++++++++++++++++++++---- gcc/ipa-modref.c | 102 ++++++++++++++++++++++++++++++++++++++++---------- gcc/tree-ssa-alias.c | 38 ++++++++++++++++--- 4 files changed, 186 insertions(+), 34 deletions(-) (limited to 'gcc') diff --git a/gcc/ipa-modref-tree.c b/gcc/ipa-modref-tree.c index 499dc60..1a59509 100644 --- a/gcc/ipa-modref-tree.c +++ b/gcc/ipa-modref-tree.c @@ -35,7 +35,7 @@ test_insert_search_collapse () { modref_base_node *base_node; modref_ref_node *ref_node; - modref_access_node a = { -1 }; + modref_access_node a = unspecified_modref_access_node; modref_tree *t = new modref_tree(1, 2, 2); ASSERT_FALSE (t->every_base); @@ -118,7 +118,7 @@ test_merge () { modref_tree *t1, *t2; modref_base_node *base_node; - modref_access_node a = { -1 }; + modref_access_node a = unspecified_modref_access_node; t1 = new modref_tree(3, 4, 1); t1->insert (1, 1, a); diff --git a/gcc/ipa-modref-tree.h b/gcc/ipa-modref-tree.h index abf3fc1..b37280d 100644 --- a/gcc/ipa-modref-tree.h +++ b/gcc/ipa-modref-tree.h @@ -44,17 +44,56 @@ struct ipa_modref_summary; /* Memory access. */ struct GTY(()) modref_access_node { + + /* Access range information (in bits). */ + poly_int64 offset; + poly_int64 size; + poly_int64 max_size; + + /* Offset from parmeter pointer to the base of the access (in bytes). */ + poly_int64 parm_offset; + /* Index of parameter which specifies the base of access. -1 if base is not a function parameter. */ int parm_index; + bool parm_offset_known; /* Return true if access node holds no useful info. */ - bool useful_p () + bool useful_p () const { return parm_index != -1; } + /* Return true if range info is useful. */ + bool range_info_useful_p () const + { + return parm_index != -1 && parm_offset_known; + } + /* Return true if both accesses are the same. */ + bool operator == (modref_access_node &a) const + { + if (parm_index != a.parm_index) + return false; + if (parm_index >= 0) + { + if (parm_offset_known != a.parm_offset_known) + return false; + if (parm_offset_known + && !known_eq (parm_offset, a.parm_offset)) + return false; + } + if (range_info_useful_p () + && (!known_eq (a.offset, offset) + || !known_eq (a.size, size) + || !known_eq (a.max_size, max_size))) + return false; + return true; + } }; +/* Access node specifying no useful info. */ +const modref_access_node unspecified_modref_access_node + = {0, -1, -1, 0, -1, false}; + template struct GTY((user)) modref_ref_node { @@ -74,7 +113,7 @@ struct GTY((user)) modref_ref_node size_t i; modref_access_node *a; FOR_EACH_VEC_SAFE_ELT (accesses, i, a) - if (a->parm_index == access.parm_index) + if (*a == access) return a; return NULL; } @@ -195,6 +234,19 @@ struct GTY((user)) modref_base_node } }; +/* Map translating parameters across function call. */ + +struct modref_parm_map +{ + /* Index of parameter we translate to. + -1 indicates that parameter is unknown + -2 indicates that parmaeter points to local memory and access can be + discarded. */ + int parm_index; + bool parm_offset_known; + poly_int64 parm_offset; +}; + /* Access tree for a single function. */ template struct GTY((user)) modref_tree @@ -363,7 +415,7 @@ struct GTY((user)) modref_tree PARM_MAP, if non-NULL, maps parm indexes of callee to caller. -2 is used to signalize that parameter is local and does not need to be tracked. Return true if something has changed. */ - bool merge (modref_tree *other, vec *parm_map) + bool merge (modref_tree *other, vec *parm_map) { if (!other || every_base) return false; @@ -406,21 +458,31 @@ struct GTY((user)) modref_tree { if (ref_node->every_access) { - modref_access_node a = {-1}; - changed |= insert (base_node->base, ref_node->ref, a); + changed |= insert (base_node->base, + ref_node->ref, + unspecified_modref_access_node); } else FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node) { modref_access_node a = *access_node; + if (a.parm_index != -1 && parm_map) { if (a.parm_index >= (int)parm_map->length ()) a.parm_index = -1; - else if ((*parm_map) [a.parm_index] == -2) + else if ((*parm_map) [a.parm_index].parm_index == -2) continue; else - a.parm_index = (*parm_map) [a.parm_index]; + { + a.parm_offset + += (*parm_map) [a.parm_index].parm_offset; + a.parm_offset_known + &= (*parm_map) + [a.parm_index].parm_offset_known; + a.parm_index + = (*parm_map) [a.parm_index].parm_index; + } } changed |= insert (base_node->base, ref_node->ref, a); } diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 71a7955..a5fa33a 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -143,7 +143,26 @@ modref_summary::useful_p (int ecf_flags) static void dump_access (modref_access_node *a, FILE *out) { - fprintf (out, " Parm %i\n", a->parm_index); + fprintf (out, " access:"); + if (a->parm_index != -1) + { + fprintf (out, " Parm %i", a->parm_index); + if (a->parm_offset_known) + { + fprintf (out, " param offset:"); + print_dec ((poly_int64_pod)a->parm_offset, out, SIGNED); + } + } + if (a->range_info_useful_p ()) + { + fprintf (out, " offset:"); + print_dec ((poly_int64_pod)a->offset, out, SIGNED); + fprintf (out, " size:"); + print_dec ((poly_int64_pod)a->size, out, SIGNED); + fprintf (out, " max_size:"); + print_dec ((poly_int64_pod)a->max_size, out, SIGNED); + } + fprintf (out, "\n"); } /* Dump records TT to OUT. */ @@ -292,14 +311,15 @@ get_modref_function_summary (cgraph_node *func) static modref_access_node get_access (ao_ref *ref) { - modref_access_node a; tree base; - base = ref->ref; - while (handled_component_p (base)) - base = TREE_OPERAND (base, 0); + base = ao_ref_base (ref); + modref_access_node a = {ref->offset, ref->size, ref->max_size, + 0, -1, false}; if (TREE_CODE (base) == MEM_REF || TREE_CODE (base) == TARGET_MEM_REF) { + tree offset = TREE_CODE (base) == MEM_REF + ? TREE_OPERAND (base, 1) : NULL_TREE; base = TREE_OPERAND (base, 0); if (TREE_CODE (base) == SSA_NAME && SSA_NAME_IS_DEFAULT_DEF (base) @@ -316,6 +336,8 @@ get_access (ao_ref *ref) } a.parm_index++; } + a.parm_offset_known + = offset && wi::to_poly_offset (offset).to_shwi (&a.parm_offset); } else a.parm_index = -1; @@ -446,7 +468,7 @@ merge_call_side_effects (modref_summary *cur_summary, gimple *stmt, modref_summary *callee_summary, bool ignore_stores) { - auto_vec parm_map; + auto_vec parm_map; bool changed = false; parm_map.safe_grow (gimple_call_num_args (stmt)); @@ -469,12 +491,14 @@ merge_call_side_effects (modref_summary *cur_summary, } index++; } - parm_map[i] = index; + parm_map[i].parm_index = index; + parm_map[i].parm_offset_known = true; + parm_map[i].parm_offset = 0; } else if (points_to_local_or_readonly_memory_p (op)) - parm_map[i] = -2; + parm_map[i].parm_index = -2; else - parm_map[i] = -1; + parm_map[i].parm_index = -1; } /* Merge with callee's summary. */ @@ -970,7 +994,20 @@ write_modref_records (modref_records_lto *tt, struct output_block *ob) size_t k; modref_access_node *access_node; FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node) - streamer_write_uhwi (ob, access_node->parm_index); + { + streamer_write_uhwi (ob, access_node->parm_index); + if (access_node->parm_index != -1) + { + streamer_write_uhwi (ob, access_node->parm_offset_known); + if (access_node->parm_offset_known) + { + streamer_write_poly_int64 (ob, access_node->parm_offset); + streamer_write_poly_int64 (ob, access_node->offset); + streamer_write_poly_int64 (ob, access_node->size); + streamer_write_poly_int64 (ob, access_node->max_size); + } + } + } } } } @@ -1084,7 +1121,25 @@ read_modref_records (lto_input_block *ib, struct data_in *data_in, for (size_t k = 0; k < naccesses; k++) { int parm_index = streamer_read_uhwi (ib); - modref_access_node a = {parm_index}; + bool parm_offset_known = false; + poly_int64 parm_offset = 0; + poly_int64 offset = 0; + poly_int64 size = -1; + poly_int64 max_size = -1; + + if (parm_index != -1) + { + parm_offset_known = streamer_read_uhwi (ib); + if (parm_offset_known) + { + parm_offset = streamer_read_poly_int64 (ib); + offset = streamer_read_poly_int64 (ib); + size = streamer_read_poly_int64 (ib); + max_size = streamer_read_poly_int64 (ib); + } + } + modref_access_node a = {offset, size, max_size, parm_offset, + parm_index, parm_offset_known}; if (nolto_ref_node) nolto_ref_node->insert_access (a, max_accesses); if (lto_ref_node) @@ -1331,7 +1386,7 @@ ignore_edge (struct cgraph_edge *e) /* Compute parm_map for CALLE_EDGE. */ static void -compute_parm_map (cgraph_edge *callee_edge, vec *parm_map) +compute_parm_map (cgraph_edge *callee_edge, vec *parm_map) { class ipa_edge_args *args; if (ipa_node_params_sum @@ -1357,7 +1412,7 @@ compute_parm_map (cgraph_edge *callee_edge, vec *parm_map) { if (es && es->param[i].points_to_local_or_readonly_memory) { - (*parm_map)[i] = -2; + (*parm_map)[i].parm_index = -2; continue; } @@ -1371,26 +1426,33 @@ compute_parm_map (cgraph_edge *callee_edge, vec *parm_map) (callee_pi, i)); if (cst && points_to_local_or_readonly_memory_p (cst)) { - (*parm_map)[i] = -2; + (*parm_map)[i].parm_index = -2; continue; } } if (jf && jf->type == IPA_JF_PASS_THROUGH) { - (*parm_map)[i] + (*parm_map)[i].parm_index = ipa_get_jf_pass_through_formal_id (jf); + (*parm_map)[i].parm_offset_known + = ipa_get_jf_pass_through_operation (jf) == NOP_EXPR; + (*parm_map)[i].parm_offset = 0; continue; } if (jf && jf->type == IPA_JF_ANCESTOR) - (*parm_map)[i] = ipa_get_jf_ancestor_formal_id (jf); + { + (*parm_map)[i].parm_index = ipa_get_jf_ancestor_formal_id (jf); + (*parm_map)[i].parm_offset_known = true; + (*parm_map)[i].parm_offset = ipa_get_jf_ancestor_offset (jf); + } else - (*parm_map)[i] = -1; + (*parm_map)[i].parm_index = -1; } if (dump_file) { fprintf (dump_file, " Parm map: "); for (i = 0; i < count; i++) - fprintf (dump_file, " %i", (*parm_map)[i]); + fprintf (dump_file, " %i", (*parm_map)[i].parm_index); fprintf (dump_file, "\n"); } } @@ -1432,7 +1494,7 @@ ipa_merge_modref_summary_after_inlining (cgraph_edge *edge) } else { - auto_vec parm_map; + auto_vec parm_map; compute_parm_map (edge, &parm_map); @@ -1598,7 +1660,7 @@ modref_propagate_in_scc (cgraph_node *component_node) } - auto_vec parm_map; + auto_vec parm_map; compute_parm_map (callee_edge, &parm_map); diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c index 3d3a91c..97dc4ac 100644 --- a/gcc/tree-ssa-alias.c +++ b/gcc/tree-ssa-alias.c @@ -2505,8 +2505,8 @@ modref_may_conflict (const gimple *stmt, } /* TBAA checks did not disambiguate, try to use base pointer, for - that we however need to have ref->ref. */ - if (ref_node->every_access || !ref->ref) + that we however need to have ref->ref or ref->base. */ + if (ref_node->every_access || (!ref->ref && !ref->base)) return true; modref_access_node *access_node; @@ -2520,12 +2520,40 @@ modref_may_conflict (const gimple *stmt, >= gimple_call_num_args (stmt)) return true; - alias_stats.modref_baseptr_tests++; - if (ptr_deref_may_alias_ref_p_1 - (gimple_call_arg (stmt, access_node->parm_index), ref)) + tree arg = gimple_call_arg (stmt, access_node->parm_index); + + if (integer_zerop (arg) && flag_delete_null_pointer_checks) + continue; + + if (!POINTER_TYPE_P (TREE_TYPE (arg))) return true; + + /* ao_ref_init_from_ptr_and_range assumes that memory access + starts by the pointed to location. If we did not track the + offset it is possible that it starts before the actual + pointer. */ + if (!access_node->parm_offset_known) + { + if (ptr_deref_may_alias_ref_p_1 (arg, ref)) + return true; + } + else + { + ao_ref ref2; + + ao_ref_init_from_ptr_and_range + (&ref2, arg, true, + access_node->offset + + (access_node->parm_offset + << LOG2_BITS_PER_UNIT), access_node->size, + access_node->max_size); + ref2.ref_alias_set = ref_set; + ref2.base_alias_set = base_set; + if (refs_may_alias_p_1 (&ref2, ref, tbaa_p)) + return true; + } num_tests++; } } -- cgit v1.1 From a1f77106ec3f0005f0cfd3e1d083837afeb432f6 Mon Sep 17 00:00:00 2001 From: Jan Hubicka Date: Sat, 3 Oct 2020 17:20:54 +0200 Subject: Add gcc.dg/tree-ssa/modref-3.c testcase * gcc.dg/tree-ssa/modref-3.c: New test. --- gcc/testsuite/gcc.dg/tree-ssa/modref-3.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/modref-3.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-3.c b/gcc/testsuite/gcc.dg/tree-ssa/modref-3.c new file mode 100644 index 0000000..668c6c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-3.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +struct a +{ + int b; + int c; +}; + +__attribute__ ((noclone, noinline)) +void +test (struct a *a) +{ + a->b = 2; +} +int +foo () +{ + struct a a = {113,114}; + test (&a); + return a.c; +} +int +foo2 (struct a *a) +{ + a->b = 123; + a->c = 124; + test (a); + return a->c; +} +/* { dg-final { scan-tree-dump "return 114" "optimized"} } */ +/* { dg-final { scan-tree-dump "return 124" "optimized"} } */ -- cgit v1.1 From ce531b14126bf5f1dcd70224a1131198ddf58875 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Sat, 3 Oct 2020 21:22:03 +0200 Subject: options: Fix up opts_set saving/restoring for underlying vars of Mask/InverseMask options Seems I've missed that set_option has special treatment for CLVC_BIT_CLEAR/CLVC_BIT_SET. Which means I'll need to change the generic handling, so that for global_options_set elements mentioned in CLVC_BIT_* options are treated differently, instead of using the accumulated bitmasks they'll need to use their specific bitmask variables during the option saving/restoring. Here is a patch that implements that. 2020-10-03 Jakub Jelinek * opth-gen.awk: For variables referenced in Mask and InverseMask, don't use the explicit_mask bitmask array, but add separate explicit_mask_* members with the same types as the variables. * optc-save-gen.awk: Save, restore, compare and hash the separate explicit_mask_* members. --- gcc/optc-save-gen.awk | 46 ++++++++++++++++++++++++++++++++++++++++++++++ gcc/opth-gen.awk | 17 +++++++++++++++-- 2 files changed, 61 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk index b5c27f7..56a5df3 100644 --- a/gcc/optc-save-gen.awk +++ b/gcc/optc-save-gen.awk @@ -516,6 +516,10 @@ if (have_save) { var_save_seen[name]++; otype = var_type_struct(flags[i]) + if (opt_args("Mask", flags[i]) != "" \ + || opt_args("InverseMask", flags[i])) + var_target_explicit_mask[name] = 1; + if (otype ~ "^((un)?signed +)?int *$") var_target_int[n_target_int++] = name; @@ -545,6 +549,7 @@ if (have_save) { } } else { var_target_int[n_target_int++] = "target_flags"; + var_target_explicit_mask["target_flags"] = 1; } have_assert = 0; @@ -608,6 +613,10 @@ for (i = 0; i < n_extra_target_vars; i++) { } for (i = 0; i < n_target_other; i++) { + if (var_target_other[i] in var_target_explicit_mask) { + print " ptr->explicit_mask_" var_target_other[i] " = opts_set->x_" var_target_other[i] ";"; + continue; + } print " if (opts_set->x_" var_target_other[i] ") mask |= HOST_WIDE_INT_1U << " j ";"; j++; if (j == 64) { @@ -630,6 +639,10 @@ for (i = 0; i < n_target_enum; i++) { } for (i = 0; i < n_target_int; i++) { + if (var_target_int[i] in var_target_explicit_mask) { + print " ptr->explicit_mask_" var_target_int[i] " = opts_set->x_" var_target_int[i] ";"; + continue; + } print " if (opts_set->x_" var_target_int[i] ") mask |= HOST_WIDE_INT_1U << " j ";"; j++; if (j == 64) { @@ -739,6 +752,10 @@ for (i = 0; i < n_extra_target_vars; i++) { } for (i = 0; i < n_target_other; i++) { + if (var_target_other[i] in var_target_explicit_mask) { + print " opts_set->x_" var_target_other[i] " = ptr->explicit_mask_" var_target_other[i] ";"; + continue; + } if (j == 64) { print " mask = ptr->explicit_mask[" k "];"; k++; @@ -761,6 +778,10 @@ for (i = 0; i < n_target_enum; i++) { } for (i = 0; i < n_target_int; i++) { + if (var_target_int[i] in var_target_explicit_mask) { + print " opts_set->x_" var_target_int[i] " = ptr->explicit_mask_" var_target_int[i] ";"; + continue; + } if (j == 64) { print " mask = ptr->explicit_mask[" k "];"; k++; @@ -1058,6 +1079,20 @@ print " for (size_t i = 0; i < sizeof (ptr1->explicit_mask) / sizeof (ptr1->exp print " if (ptr1->explicit_mask[i] != ptr2->explicit_mask[i])"; print " return false;" +for (i = 0; i < n_target_other; i++) { + if (var_target_other[i] in var_target_explicit_mask) { + print " if (ptr1->explicit_mask_" var_target_other[i] " != ptr2->explicit_mask_" var_target_other[i] ")"; + print " return false;"; + } +} + +for (i = 0; i < n_target_int; i++) { + if (var_target_int[i] in var_target_explicit_mask) { + print " if (ptr1->explicit_mask_" var_target_int[i] " != ptr2->explicit_mask_" var_target_int[i] ")"; + print " return false;"; + } +} + print " return true;"; print "}"; @@ -1088,6 +1123,17 @@ for (i = 0; i < n_target_val; i++) { } print " for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)"; print " hstate.add_hwi (ptr->explicit_mask[i]);"; + +for (i = 0; i < n_target_other; i++) { + if (var_target_other[i] in var_target_explicit_mask) + print " hstate.add_hwi (ptr->explicit_mask_" var_target_other[i] ");"; +} + +for (i = 0; i < n_target_int; i++) { + if (var_target_int[i] in var_target_explicit_mask) + print " hstate.add_hwi (ptr->explicit_mask_" var_target_int[i] ");"; +} + print " return hstate.end ();"; print "}"; diff --git a/gcc/opth-gen.awk b/gcc/opth-gen.awk index 8fec607..a1ca579 100644 --- a/gcc/opth-gen.awk +++ b/gcc/opth-gen.awk @@ -209,6 +209,7 @@ n_target_int = 0; n_target_enum = 0; n_target_other = 0; n_target_explicit = n_extra_target_vars; +n_target_explicit_mask = 0; for (i = 0; i < n_target_save; i++) { if (target_save_decl[i] ~ "^((un)?signed +)?int +[_" alnum "]+$") @@ -240,6 +241,12 @@ if (have_save) { var_save_seen[name]++; n_target_explicit++; otype = var_type_struct(flags[i]) + + if (opt_args("Mask", flags[i]) != "" \ + || opt_args("InverseMask", flags[i])) + var_target_explicit_mask[n_target_explicit_mask++] \ + = otype "explicit_mask_" name; + if (otype ~ "^((un)?signed +)?int *$") var_target_int[n_target_int++] = otype "x_" name; @@ -259,6 +266,8 @@ if (have_save) { } else { var_target_int[n_target_int++] = "int x_target_flags"; n_target_explicit++; + var_target_explicit_mask[n_target_explicit_mask++] \ + = "int explicit_mask_target_flags"; } for (i = 0; i < n_target_other; i++) { @@ -281,8 +290,12 @@ for (i = 0; i < n_target_char; i++) { print " " var_target_char[i] ";"; } -print " /* " n_target_explicit " members */"; -print " unsigned HOST_WIDE_INT explicit_mask[" int ((n_target_explicit + 63) / 64) "];"; +print " /* " n_target_explicit - n_target_explicit_mask " members */"; +print " unsigned HOST_WIDE_INT explicit_mask[" int ((n_target_explicit - n_target_explicit_mask + 63) / 64) "];"; + +for (i = 0; i < n_target_explicit_mask; i++) { + print " " var_target_explicit_mask[i] ";"; +} print "};"; print ""; -- cgit v1.1 From 11bd94806d488416dfad1b1ff2ff0f98001cd0ca Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sun, 4 Oct 2020 00:16:21 +0000 Subject: Daily bump. --- gcc/ChangeLog | 35 +++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/testsuite/ChangeLog | 4 ++++ 3 files changed, 40 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 131b253b..c9bd8d3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,38 @@ +2020-10-03 Jakub Jelinek + + * opth-gen.awk: For variables referenced in Mask and InverseMask, + don't use the explicit_mask bitmask array, but add separate + explicit_mask_* members with the same types as the variables. + * optc-save-gen.awk: Save, restore, compare and hash the separate + explicit_mask_* members. + +2020-10-03 Jan Hubicka + + * ipa-modref-tree.c (test_insert_search_collapse): Update andling + of accesses. + (test_merge): Likewise. + * ipa-modref-tree.h (struct modref_access_node): Add offset, size, + max_size, parm_offset and parm_offset_known. + (modref_access_node::useful_p): Constify. + (modref_access_node::range_info_useful_p): New predicate. + (modref_access_node::operator==): New. + (struct modref_parm_map): New structure. + (modref_tree::merge): Update for racking parameters) + * ipa-modref.c (dump_access): Dump new fields. + (get_access): Fill in new fields. + (merge_call_side_effects): Update handling of parm map. + (write_modref_records): Stream new fields. + (read_modref_records): Stream new fields. + (compute_parm_map): Update for new parm map. + (ipa_merge_modref_summary_after_inlining): Update. + (modref_propagate_in_scc): Update. + * tree-ssa-alias.c (modref_may_conflict): Handle known ranges. + +2020-10-03 H.J. Lu + + PR other/97280 + * doc/extend.texi: Replace roudnevenl with roundevenl + 2020-10-02 David Edelsohn Andrew MacLeod diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index c28ca09..db90199 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20201003 +20201004 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e84bf8e..93111bb 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2020-10-03 Jan Hubicka + + * gcc.dg/tree-ssa/modref-3.c: New test. + 2020-10-02 Nathan Sidwell * c-c++-common/spellcheck-reserved.c: Adjust diagnostic. -- cgit v1.1 From 35d2c6b6e8a7448a84abbf967feeb78a29117014 Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Sun, 4 Oct 2020 20:24:29 +0200 Subject: PR fortran/97272 - Wrong answer from MAXLOC with character arg The optional KIND argument to the MINLOC/MAXLOC intrinsic must not be passed to the library function, as the kind conversion of the result is treated explicitly elsewhere. gcc/fortran/ChangeLog: PR fortran/97272 * trans-intrinsic.c (strip_kind_from_actual): Helper function for removal of KIND argument. (gfc_conv_intrinsic_minmaxloc): Ignore KIND argument here, as it is treated elsewhere. gcc/testsuite/ChangeLog: PR fortran/97272 * gfortran.dg/pr97272.f90: New test. --- gcc/fortran/trans-intrinsic.c | 19 +++++++++++++++++++ gcc/testsuite/gfortran.dg/pr97272.f90 | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 gcc/testsuite/gfortran.dg/pr97272.f90 (limited to 'gcc') diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c index 3b3bd86..8729bc1 100644 --- a/gcc/fortran/trans-intrinsic.c +++ b/gcc/fortran/trans-intrinsic.c @@ -5073,6 +5073,24 @@ gfc_conv_intrinsic_dot_product (gfc_se * se, gfc_expr * expr) } +/* Remove unneeded kind= argument from actual argument list when the + result conversion is dealt with in a different place. */ + +static void +strip_kind_from_actual (gfc_actual_arglist * actual) +{ + for (gfc_actual_arglist *a = actual; a; a = a->next) + { + gfc_actual_arglist *b = a->next; + if (b && b->name && strcmp (b->name, "kind") == 0) + { + a->next = b->next; + b->next = NULL; + gfc_free_actual_arglist (b); + } + } +} + /* Emit code for minloc or maxloc intrinsic. There are many different cases we need to handle. For performance reasons we sometimes create two loops instead of one, where the second one is much simpler. @@ -5208,6 +5226,7 @@ gfc_conv_intrinsic_minmaxloc (gfc_se * se, gfc_expr * expr, enum tree_code op) { gfc_actual_arglist *a, *b; a = actual; + strip_kind_from_actual (a); while (a->next) { b = a->next; diff --git a/gcc/testsuite/gfortran.dg/pr97272.f90 b/gcc/testsuite/gfortran.dg/pr97272.f90 new file mode 100644 index 0000000..e819038 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr97272.f90 @@ -0,0 +1,19 @@ +! { dg-do run } +! PR fortran/97272 - Wrong answer from MAXLOC with character arg + +program test + implicit none + integer :: i, j, k, l = 10 + character, allocatable :: a(:) + allocate (a(l)) + a(:) = 'a' + l = l - 1 + a(l) = 'b' + i = maxloc (a, dim=1) + j = maxloc (a, dim=1, kind=2) + k = maxloc (a, dim=1, kind=8, back=.true.) +! print *, 'i = ', i, 'a(i) = ', a(i) +! print *, 'j = ', j, 'a(j) = ', a(j) +! print *, 'k = ', k, 'a(k) = ', a(k) + if (i /= l .or. j /= l .or. k /= l) stop 1 +end -- cgit v1.1 From 4347d36f934ac6eeb807f73d48c70b29fc3fd8fb Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Mon, 5 Oct 2020 00:16:18 +0000 Subject: Daily bump. --- gcc/DATESTAMP | 2 +- gcc/fortran/ChangeLog | 8 ++++++++ gcc/testsuite/ChangeLog | 5 +++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index db90199..9d81fe1 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20201004 +20201005 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index af9cc74..19ad11b 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,11 @@ +2020-10-04 Harald Anlauf + + PR fortran/97272 + * trans-intrinsic.c (strip_kind_from_actual): Helper function for + removal of KIND argument. + (gfc_conv_intrinsic_minmaxloc): Ignore KIND argument here, as it + is treated elsewhere. + 2020-10-02 Jan Hubicka * trans-decl.c (gfc_build_library_function_decl_with_spec): Verify diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 93111bb..554563b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-10-04 Harald Anlauf + + PR fortran/97272 + * gfortran.dg/pr97272.f90: New test. + 2020-10-03 Jan Hubicka * gcc.dg/tree-ssa/modref-3.c: New test. -- cgit v1.1 From ab3f4b27abe8abc947e84ef84bfc9a18797c5868 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Tue, 22 Sep 2020 16:38:07 +0200 Subject: [omp, ftracer] Don't duplicate blocks in SIMT region When running the libgomp testsuite on x86_64-linux with nvptx accelerator on the test-case included in this patch, we run into: ... FAIL: libgomp.fortran/pr95654.f90 -O3 -fomit-frame-pointer -funroll-loops \ -fpeel-loops -ftracer -finline-functions execution test ... The test-case is a minimal version of this FAIL: ... FAIL: libgomp.fortran/pr66199-5.f90 -O3 -fomit-frame-pointer -funroll-loops \ -fpeel-loops -ftracer -finline-functions execution test ... but that one has stopped failing at commit c2ebf4f10de "openmp: Add support for non-rect simd and improve collapsed simd support". The problem is that ftracer duplicates a block containing GOMP_SIMT_VOTE_ANY. That is, before ftracer we have (dropping the GOMP_SIMT_ prefix): ... bb4(ENTER_ALLOC) *----------+ | \ | \ | v | * v bb8 *<------------* bb5(VOTE_ANY) *-------------+ | | | | | | | | | v | * v bb7(XCHG_IDX) *<------------* bb6(EXIT) ... The XCHG_IDX internal-fn does inter-SIMT-lane communication, which for nvptx maps onto shfl, an operator which has the requirement that the warp executing the operator is convergent. The warp diverges at bb4, and reconverges at bb5, and does not diverge by going to bb7, so the shfl is indeed executed by a convergent warp. After ftracer, we have: ... bb4(ENTER_ALLOC) *----------+ | \ | \ | \ | \ v v * * bb5(VOTE_ANY) bb8(VOTE_ANY) * * |\ /| | \ +--------+ | | \/ | | /\ | | / +----------v |/ * v bb7(XCHG_IDX) *<--------------* bb6(EXIT) ... The warp diverges again at bb5, but does not reconverge again before bb6, so the shfl is executed by a divergent warp, which causes the FAIL. Fix this by making ftracer ignore blocks containing ENTER_ALLOC, VOTE_ANY and EXIT, effectively treating the SIMT region conservatively. An argument can be made that the test needs to be added in a more generic place, like gimple_can_duplicate_bb_p or some such, and that ftracer then needs to use the generic test. But that's a discussion with a much broader scope, so I'm leaving that for another patch. Bootstrapped and reg-tested on x86_64-linux. Build on x86_64-linux with nvptx accelerator, tested with libgomp. gcc/ChangeLog: PR fortran/95654 * tracer.c (ignore_bb_p): Ignore GOMP_SIMT_ENTER_ALLOC, GOMP_SIMT_VOTE_ANY and GOMP_SIMT_EXIT. libgomp/ChangeLog: 2020-10-05 Tom de Vries PR fortran/95654 * testsuite/libgomp.fortran/pr95654.f90: New test. --- gcc/tracer.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'gcc') diff --git a/gcc/tracer.c b/gcc/tracer.c index 82ede72..5e51752 100644 --- a/gcc/tracer.c +++ b/gcc/tracer.c @@ -108,6 +108,24 @@ ignore_bb_p (const_basic_block bb) return true; } + for (gimple_stmt_iterator gsi = gsi_start_bb (CONST_CAST_BB (bb)); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *g = gsi_stmt (gsi); + + /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be + duplicated as part of its group, or not at all. + The IFN_GOMP_SIMT_VOTE_ANY is currently part of such a group, + so the same holds there, but it could be argued that the + IFN_GOMP_SIMT_VOTE_ANY could be generated after that group, + in which case it could be duplicated. */ + if (is_gimple_call (g) + && (gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC) + || gimple_call_internal_p (g, IFN_GOMP_SIMT_EXIT) + || gimple_call_internal_p (g, IFN_GOMP_SIMT_VOTE_ANY))) + return true; + } + return false; } -- cgit v1.1 From 21f65995e068963d4ceaec5b6730223213e98af7 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 5 Oct 2020 09:09:41 +0200 Subject: store-merging: Fix up -Wnarrowing warning I've noticed a -Wnarrowing warning on gimple-ssa-store-merging.c, this change fixes that up. 2020-10-05 Jakub Jelinek * gimple-ssa-store-merging.c (imm_store_chain_info::output_merged_store): Use ~0U instead of ~0 in unsigned int array initializer. --- gcc/gimple-ssa-store-merging.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c index fa2609f..28fc2e2 100644 --- a/gcc/gimple-ssa-store-merging.c +++ b/gcc/gimple-ssa-store-merging.c @@ -3804,7 +3804,7 @@ imm_store_chain_info::output_merged_store (merged_store_group *group) Similarly, if there is a whole region clear first, prefer expanding it together compared to expanding clear first followed by merged further stores. */ - unsigned cnt[4] = { ~0, ~0, ~0, ~0 }; + unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U }; int pass_min = 0; for (int pass = 0; pass < 4; ++pass) { -- cgit v1.1 From 3c022a4c732163549ee999fb0b1846215cf0a671 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 5 Oct 2020 09:34:42 +0200 Subject: options: Save and restore opts_set for Optimization and Target options fallout > This breaks ia64: > > In file included from ./tm.h:23, > from ../../gcc/gencheck.c:23: > ./options.h:7816:40: error: ISO C++ forbids zero-size array 'explicit_mask' [-Werror=pedantic] > 7816 | unsigned HOST_WIDE_INT explicit_mask[0]; > | ^ > ./options.h:7816:26: error: zero-size array member 'cl_target_option::explicit_mask' not at end of 'struct cl_target_option' [-Werror=pedantic] > 7816 | unsigned HOST_WIDE_INT explicit_mask[0]; > | ^~~~~~~~~~~~~ > ./options.h:7812:16: note: in the definition of 'struct cl_target_option' > 7812 | struct GTY(()) cl_target_option > | ^~~~~~~~~~~~~~~~ Oops, sorry. The following patch should fix that and should also fix streaming of the new explicit_mask_* members. 2020-10-05 Jakub Jelinek * opth-gen.awk: Don't emit explicit_mask array if n_target_explicit is equal to n_target_explicit_mask. * optc-save-gen.awk: Compute has_target_explicit_mask and if false, don't emit code iterating over explicit_mask array elements. Stream also explicit_mask_* target members. --- gcc/optc-save-gen.awk | 54 ++++++++++++++++++++++++++++++++++++++++++--------- gcc/opth-gen.awk | 5 ++++- 2 files changed, 49 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk index 56a5df3..5169acd 100644 --- a/gcc/optc-save-gen.awk +++ b/gcc/optc-save-gen.awk @@ -689,6 +689,10 @@ for (i = 0; i < n_target_string; i++) { if (j != 0) { print " ptr->explicit_mask[" k "] = mask;"; } +has_target_explicit_mask = 0; +if (j != 0 || k != 0) { + has_target_explicit_mask = 1; +} print "}"; @@ -1075,9 +1079,11 @@ for (i = 0; i < n_target_val; i++) { print " return false;"; } -print " for (size_t i = 0; i < sizeof (ptr1->explicit_mask) / sizeof (ptr1->explicit_mask[0]); i++)"; -print " if (ptr1->explicit_mask[i] != ptr2->explicit_mask[i])"; -print " return false;" +if (has_target_explicit_mask) { + print " for (size_t i = 0; i < sizeof (ptr1->explicit_mask) / sizeof (ptr1->explicit_mask[0]); i++)"; + print " if (ptr1->explicit_mask[i] != ptr2->explicit_mask[i])"; + print " return false;" +} for (i = 0; i < n_target_other; i++) { if (var_target_other[i] in var_target_explicit_mask) { @@ -1121,8 +1127,10 @@ for (i = 0; i < n_target_val; i++) { name = var_target_val[i] print " hstate.add_hwi (ptr->" name");"; } -print " for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)"; -print " hstate.add_hwi (ptr->explicit_mask[i]);"; +if (has_target_explicit_mask) { + print " for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)"; + print " hstate.add_hwi (ptr->explicit_mask[i]);"; +} for (i = 0; i < n_target_other; i++) { if (var_target_other[i] in var_target_explicit_mask) @@ -1159,8 +1167,22 @@ for (i = 0; i < n_target_val; i++) { print " bp_pack_value (bp, ptr->" name", 64);"; } -print " for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)"; -print " bp_pack_value (bp, ptr->explicit_mask[i], 64);"; +if (has_target_explicit_mask) { + print " for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)"; + print " bp_pack_value (bp, ptr->explicit_mask[i], 64);"; +} + +for (i = 0; i < n_target_other; i++) { + if (var_target_other[i] in var_target_explicit_mask) { + print " bp_pack_value (bp, ptr->explicit_mask_" var_target_other[i] ", 64);"; + } +} + +for (i = 0; i < n_target_int; i++) { + if (var_target_int[i] in var_target_explicit_mask) { + print " bp_pack_value (bp, ptr->explicit_mask_" var_target_int[i] ", 64);"; + } +} print "}"; @@ -1188,8 +1210,22 @@ for (i = 0; i < n_target_val; i++) { print " ptr->" name" = (" var_target_val_type[i] ") bp_unpack_value (bp, 64);"; } -print " for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)"; -print " ptr->explicit_mask[i] = bp_unpack_value (bp, 64);"; +if (has_target_explicit_mask) { + print " for (size_t i = 0; i < sizeof (ptr->explicit_mask) / sizeof (ptr->explicit_mask[0]); i++)"; + print " ptr->explicit_mask[i] = bp_unpack_value (bp, 64);"; +} + +for (i = 0; i < n_target_other; i++) { + if (var_target_other[i] in var_target_explicit_mask) { + print " ptr->explicit_mask_" var_target_other[i] " = bp_unpack_value (bp, 64);"; + } +} + +for (i = 0; i < n_target_int; i++) { + if (var_target_int[i] in var_target_explicit_mask) { + print " ptr->explicit_mask_" var_target_int[i] " = bp_unpack_value (bp, 64);"; + } +} print "}"; diff --git a/gcc/opth-gen.awk b/gcc/opth-gen.awk index a1ca579..a456b51 100644 --- a/gcc/opth-gen.awk +++ b/gcc/opth-gen.awk @@ -291,7 +291,10 @@ for (i = 0; i < n_target_char; i++) { } print " /* " n_target_explicit - n_target_explicit_mask " members */"; -print " unsigned HOST_WIDE_INT explicit_mask[" int ((n_target_explicit - n_target_explicit_mask + 63) / 64) "];"; +if (n_target_explicit > n_target_explicit_mask) { + print " unsigned HOST_WIDE_INT explicit_mask[" \ + int ((n_target_explicit - n_target_explicit_mask + 63) / 64) "];"; +} for (i = 0; i < n_target_explicit_mask; i++) { print " " var_target_explicit_mask[i] ";"; -- cgit v1.1 From fc998c21c2c49e06cd4e548cd0d21ef25c880dbc Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Mon, 5 Oct 2020 14:03:34 +0200 Subject: [omp, ftracer] Remove incorrect suggestion in ignore_bb_p In commit ab3f4b27abe "[omp, ftracer] Don't duplicate blocks in SIMT region" I added a comment in ignore_bb_p suggesting a reordering of SIMT_VOTE_ANY and SIMT_EXIT, which is not possible since VOTE_ANY may have data dependencies to storage that is deallocated by SIMT_EXIT. I've now opened a PR (PR97291) to describe the problem the reordering was intended to fix. Remove the incorrect suggestion. gcc/ChangeLog: 2020-10-05 Tom de Vries * tracer.c (ignore_bb_p): Remove incorrect suggestion. --- gcc/tracer.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/tracer.c b/gcc/tracer.c index 5e51752..5ee6651 100644 --- a/gcc/tracer.c +++ b/gcc/tracer.c @@ -115,10 +115,8 @@ ignore_bb_p (const_basic_block bb) /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be duplicated as part of its group, or not at all. - The IFN_GOMP_SIMT_VOTE_ANY is currently part of such a group, - so the same holds there, but it could be argued that the - IFN_GOMP_SIMT_VOTE_ANY could be generated after that group, - in which case it could be duplicated. */ + The IFN_GOMP_SIMT_VOTE_ANY is part of such a group, so the same holds + there. */ if (is_gimple_call (g) && (gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC) || gimple_call_internal_p (g, IFN_GOMP_SIMT_EXIT) -- cgit v1.1 From bf490f0636052040cfe1b3882475ac9a0e0337df Mon Sep 17 00:00:00 2001 From: Alex Coplan Date: Mon, 5 Oct 2020 13:45:24 +0100 Subject: arm: Add missing part number for Neoverse V1 This patch adds vendor and part numbers which were missing from the initial entry for Neoverse V1 in AArch32 GCC. gcc/ChangeLog: * config/arm/arm-cpus.in (neoverse-v1): Add missing vendor and part numbers. --- gcc/config/arm/arm-cpus.in | 2 ++ 1 file changed, 2 insertions(+) (limited to 'gcc') diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index 9abb59a..27ce000 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -1519,6 +1519,8 @@ begin cpu neoverse-v1 architecture armv8.4-a+fp16+bf16+i8mm option crypto add FP_ARMv8 CRYPTO costs cortex_a57 + vendor 41 + part 0xd40 end cpu neoverse-v1 # Armv8.5 A-profile Architecture Processors -- cgit v1.1 From 255aa06d40d7b151d1b26cb690e0545f834b3bea Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Mon, 5 Oct 2020 06:36:38 -0700 Subject: c++: Make spell corrections consistent My change to namespace-scope spell corrections ignored the issue that different targets might have different builtins, and therefore perturb iteration order. This fixes it by using an intermediate array of identifier, which we sort before considering. gcc/cp/ * name-lookup.c (maybe_add_fuzzy_decl): New. (maybe_add_fuzzy_binding): New. (consider_binding_level): Use intermediate sortable vector for namespace bindings. gcc/testsuite/ * c-c++-common/spellcheck-reserved.c: Restore diagnostic. --- gcc/cp/name-lookup.c | 116 ++++++++++++++++++----- gcc/testsuite/c-c++-common/spellcheck-reserved.c | 2 +- 2 files changed, 94 insertions(+), 24 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index 190b56b..774c447 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -6077,6 +6077,9 @@ qualified_namespace_lookup (tree scope, name_lookup *lookup) return found; } +/* If DECL is suitably visible to the user, consider its name for + spelling correction. */ + static void consider_decl (tree decl, best_match &bm, bool consider_impl_names) @@ -6110,6 +6113,65 @@ consider_decl (tree decl, best_match &bm, bm.consider (suggestion_str); } +/* If DECL is suitably visible to the user, add its name to VEC and + return true. Otherwise return false. */ + +static bool +maybe_add_fuzzy_decl (auto_vec &vec, tree decl) +{ + /* Skip compiler-generated variables (e.g. __for_begin/__for_end + within range for). */ + if (TREE_CODE (decl) == VAR_DECL && DECL_ARTIFICIAL (decl)) + return false; + + tree suggestion = DECL_NAME (decl); + if (!suggestion) + return false; + + /* Don't suggest names that are for anonymous aggregate types, as + they are an implementation detail generated by the compiler. */ + if (IDENTIFIER_ANON_P (suggestion)) + return false; + + vec.safe_push (suggestion); + + return true; +} + +/* Examing the namespace binding BINDING, and add at most one instance + of the name, if it contains a visible entity of interest. */ + +void +maybe_add_fuzzy_binding (auto_vec &vec, tree binding, + lookup_name_fuzzy_kind kind) +{ + tree value = NULL_TREE; + + if (STAT_HACK_P (binding)) + { + if (!STAT_TYPE_HIDDEN_P (binding) + && STAT_TYPE (binding)) + { + if (maybe_add_fuzzy_decl (vec, STAT_TYPE (binding))) + return; + } + else if (!STAT_DECL_HIDDEN_P (binding)) + value = STAT_DECL (binding); + } + else + value = binding; + + value = ovl_skip_hidden (value); + if (value) + { + value = OVL_FIRST (value); + if (kind != FUZZY_LOOKUP_TYPENAME + || TREE_CODE (STRIP_TEMPLATE (value)) == TYPE_DECL) + if (maybe_add_fuzzy_decl (vec, value)) + return; + } +} + /* Helper function for lookup_name_fuzzy. Traverse binding level LVL, looking for good name matches for NAME (and BM). */ @@ -6157,38 +6219,46 @@ consider_binding_level (tree name, best_match &bm, } else { - /* Iterate over the namespace hash table, that'll have fewer - entries than the decl list. */ + /* We need to iterate over the namespace hash table, in order to + not mention hidden entities. But hash table iteration is + (essentially) unpredictable, our correction-distance measure + is very granular, and we pick the first of equal distances. + Hence, we need to call the distance-measurer in a predictable + order. So, iterate over the namespace hash, inserting + visible names into a vector. Then sort the vector. Then + determine spelling distance. */ + tree ns = lvl->this_entity; + auto_vec vec; hash_table::iterator end (DECL_NAMESPACE_BINDINGS (ns)->end ()); for (hash_table::iterator iter (DECL_NAMESPACE_BINDINGS (ns)->begin ()); iter != end; ++iter) + maybe_add_fuzzy_binding (vec, *iter, kind); + + vec.qsort ([] (const void *a_, const void *b_) + { + return strcmp (IDENTIFIER_POINTER (*(const tree *)a_), + IDENTIFIER_POINTER (*(const tree *)b_)); + }); + + /* Examine longest to shortest. */ + for (unsigned ix = vec.length (); ix--;) { - tree binding = *iter; - tree value = NULL_TREE; + const char *str = IDENTIFIER_POINTER (vec[ix]); - if (STAT_HACK_P (binding)) - { - if (!STAT_TYPE_HIDDEN_P (binding) - && STAT_TYPE (binding)) - consider_decl (STAT_TYPE (binding), bm, - consider_implementation_names); - else if (!STAT_DECL_HIDDEN_P (binding)) - value = STAT_DECL (binding); - } - else - value = binding; + /* Ignore internal names with spaces in them. */ + if (strchr (str, ' ')) + continue; - value = ovl_skip_hidden (value); - if (value) - { - value = OVL_FIRST (value); - if (!(kind == FUZZY_LOOKUP_TYPENAME - && TREE_CODE (STRIP_TEMPLATE (value)) != TYPE_DECL)) - consider_decl (value, bm, consider_implementation_names); - } + /* Don't suggest names that are reserved for use by the + implementation, unless NAME began with an underscore. */ + if (!consider_implementation_names + && name_reserved_for_implementation_p (str)) + continue; + + bm.consider (str); } } } diff --git a/gcc/testsuite/c-c++-common/spellcheck-reserved.c b/gcc/testsuite/c-c++-common/spellcheck-reserved.c index 175ba4a..ed292f2 100644 --- a/gcc/testsuite/c-c++-common/spellcheck-reserved.c +++ b/gcc/testsuite/c-c++-common/spellcheck-reserved.c @@ -30,7 +30,7 @@ void test (const char *buf, char ch) { __builtin_strtchr (buf, ch); /* { dg-line misspelled_reserved } */ /* { dg-warning "did you mean '__builtin_strchr'" "" { target c } misspelled_reserved } */ - /* { dg-error "'__builtin_strtchr' was not declared in this scope; did you mean '__builtin_strchr'\\?" "" { target c++ } misspelled_reserved } */ + /* { dg-error "'__builtin_strtchr' was not declared in this scope; did you mean '__builtin_strrchr'\\?" "" { target c++ } misspelled_reserved } */ } /* Similarly for a name that begins with a single underscore. */ -- cgit v1.1 From 717e402dbf55e7da83d4fc87641ab9e60d4846cb Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Mon, 5 Oct 2020 14:26:04 +0200 Subject: [omp, ftracer] Ignore IFN_GOMP_SIMT_XCHG_* in ignore_bb_p As IFN_GOMP_SIMT_XCHG_* are part of the group marked by IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT, handle them conservatively in ignore_bb_p. Build on x86_64-linux with nvptx accelerator, tested with libgomp. gcc/ChangeLog: 2020-10-05 Tom de Vries * tracer.c (ignore_bb_p): Ignore GOMP_SIMT_XCHG_*. --- gcc/tracer.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/tracer.c b/gcc/tracer.c index 5ee6651..7f32ccb 100644 --- a/gcc/tracer.c +++ b/gcc/tracer.c @@ -115,12 +115,14 @@ ignore_bb_p (const_basic_block bb) /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be duplicated as part of its group, or not at all. - The IFN_GOMP_SIMT_VOTE_ANY is part of such a group, so the same holds - there. */ + The IFN_GOMP_SIMT_VOTE_ANY and IFN_GOMP_SIMT_XCHG_* are part of such a + group, so the same holds there. */ if (is_gimple_call (g) && (gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC) || gimple_call_internal_p (g, IFN_GOMP_SIMT_EXIT) - || gimple_call_internal_p (g, IFN_GOMP_SIMT_VOTE_ANY))) + || gimple_call_internal_p (g, IFN_GOMP_SIMT_VOTE_ANY) + || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_BFLY) + || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_IDX))) return true; } -- cgit v1.1 From ac1c65ad1a16d83ec63674efa07c00b062562f15 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 5 Oct 2020 18:33:17 +0200 Subject: support TARGET_MEM_REF in C/C++ error pretty-printing [PR97197] > See my comment above for Martins attempts to improve things. I don't > really want to try decide what to do with those late diagnostic IL > printing but my commit was blamed for showing target-mem-ref unsupported. > > I don't have much time to spend to think what to best print and what not, > but yes, printing only the MEM_REF part is certainly imprecise. Here is an updated version of the patch that prints TARGET_MEM_REF the way it should be printed - as C representation of what it actually means. Of course it would be better to have the original expressions, but with the late diagnostics we no longer have them. 2020-10-05 Richard Biener Jakub Jelinek PR c++/97197 gcc/cp/ * error.c (dump_expr): Handle TARGET_MEM_REF. gcc/c-family/ * c-pretty-print.c: Include langhooks.h. (c_pretty_printer::postfix_expression): Handle TARGET_MEM_REF as expression. (c_pretty_printer::expression): Handle TARGET_MEM_REF as unary_expression. (c_pretty_printer::unary_expression): Handle TARGET_MEM_REF. --- gcc/c-family/c-pretty-print.c | 59 +++++++++++++++++++++++++++++++++++++++++++ gcc/cp/error.c | 58 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) (limited to 'gcc') diff --git a/gcc/c-family/c-pretty-print.c b/gcc/c-family/c-pretty-print.c index acffd7b..8953e3b 100644 --- a/gcc/c-family/c-pretty-print.c +++ b/gcc/c-family/c-pretty-print.c @@ -29,6 +29,7 @@ along with GCC; see the file COPYING3. If not see #include "intl.h" #include "tree-pretty-print.h" #include "selftest.h" +#include "langhooks.h" /* The pretty-printer code is primarily designed to closely follow (GNU) C and C++ grammars. That is to be contrasted with spaghetti @@ -1693,6 +1694,7 @@ c_pretty_printer::postfix_expression (tree e) break; case MEM_REF: + case TARGET_MEM_REF: expression (e); break; @@ -1859,6 +1861,62 @@ c_pretty_printer::unary_expression (tree e) } break; + case TARGET_MEM_REF: + /* TARGET_MEM_REF can't appear directly from source, but can appear + during late GIMPLE optimizations and through late diagnostic we might + need to support it. Print it as dereferencing of a pointer after + cast to the TARGET_MEM_REF type, with pointer arithmetics on some + pointer to single byte types, so + *(type *)((char *) ptr + step * index + index2) if all the operands + are present and the casts are needed. */ + pp_c_star (this); + if (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (TMR_BASE (e)))) == NULL_TREE + || !integer_onep (TYPE_SIZE_UNIT + (TREE_TYPE (TREE_TYPE (TMR_BASE (e)))))) + { + if (TYPE_SIZE_UNIT (TREE_TYPE (e)) + && integer_onep (TYPE_SIZE_UNIT (TREE_TYPE (e)))) + { + pp_c_left_paren (this); + pp_c_type_cast (this, build_pointer_type (TREE_TYPE (e))); + } + else + { + pp_c_type_cast (this, build_pointer_type (TREE_TYPE (e))); + pp_c_left_paren (this); + pp_c_type_cast (this, build_pointer_type (char_type_node)); + } + } + else if (!lang_hooks.types_compatible_p + (TREE_TYPE (e), TREE_TYPE (TREE_TYPE (TMR_BASE (e))))) + { + pp_c_type_cast (this, build_pointer_type (TREE_TYPE (e))); + pp_c_left_paren (this); + } + else + pp_c_left_paren (this); + pp_c_cast_expression (this, TMR_BASE (e)); + if (TMR_STEP (e) && TMR_INDEX (e)) + { + pp_plus (this); + pp_c_cast_expression (this, TMR_INDEX (e)); + pp_c_star (this); + pp_c_cast_expression (this, TMR_STEP (e)); + } + if (TMR_INDEX2 (e)) + { + pp_plus (this); + pp_c_cast_expression (this, TMR_INDEX2 (e)); + } + if (!integer_zerop (TMR_OFFSET (e))) + { + pp_plus (this); + pp_c_integer_constant (this, + fold_convert (ssizetype, TMR_OFFSET (e))); + } + pp_c_right_paren (this); + break; + case REALPART_EXPR: case IMAGPART_EXPR: pp_c_ws_string (this, code == REALPART_EXPR ? "__real__" : "__imag__"); @@ -2295,6 +2353,7 @@ c_pretty_printer::expression (tree e) case ADDR_EXPR: case INDIRECT_REF: case MEM_REF: + case TARGET_MEM_REF: case NEGATE_EXPR: case BIT_NOT_EXPR: case TRUTH_NOT_EXPR: diff --git a/gcc/cp/error.c b/gcc/cp/error.c index ecb41e8..ad22b00 100644 --- a/gcc/cp/error.c +++ b/gcc/cp/error.c @@ -2400,6 +2400,64 @@ dump_expr (cxx_pretty_printer *pp, tree t, int flags) } break; + case TARGET_MEM_REF: + /* TARGET_MEM_REF can't appear directly from source, but can appear + during late GIMPLE optimizations and through late diagnostic we might + need to support it. Print it as dereferencing of a pointer after + cast to the TARGET_MEM_REF type, with pointer arithmetics on some + pointer to single byte types, so + *(type *)((char *) ptr + step * index + index2) if all the operands + are present and the casts are needed. */ + pp_cxx_star (pp); + pp_cxx_left_paren (pp); + if (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (TMR_BASE (t)))) == NULL_TREE + || !integer_onep (TYPE_SIZE_UNIT + (TREE_TYPE (TREE_TYPE (TMR_BASE (t)))))) + { + if (TYPE_SIZE_UNIT (TREE_TYPE (t)) + && integer_onep (TYPE_SIZE_UNIT (TREE_TYPE (t)))) + { + pp_cxx_left_paren (pp); + dump_type (pp, build_pointer_type (TREE_TYPE (t)), flags); + } + else + { + dump_type (pp, build_pointer_type (TREE_TYPE (t)), flags); + pp_cxx_right_paren (pp); + pp_cxx_left_paren (pp); + pp_cxx_left_paren (pp); + dump_type (pp, build_pointer_type (char_type_node), flags); + } + pp_cxx_right_paren (pp); + } + else if (!same_type_p (TREE_TYPE (t), + TREE_TYPE (TREE_TYPE (TMR_BASE (t))))) + { + dump_type (pp, build_pointer_type (TREE_TYPE (t)), flags); + pp_cxx_right_paren (pp); + pp_cxx_left_paren (pp); + } + dump_expr (pp, TMR_BASE (t), flags); + if (TMR_STEP (t) && TMR_INDEX (t)) + { + pp_cxx_ws_string (pp, "+"); + dump_expr (pp, TMR_INDEX (t), flags); + pp_cxx_ws_string (pp, "*"); + dump_expr (pp, TMR_STEP (t), flags); + } + if (TMR_INDEX2 (t)) + { + pp_cxx_ws_string (pp, "+"); + dump_expr (pp, TMR_INDEX2 (t), flags); + } + if (!integer_zerop (TMR_OFFSET (t))) + { + pp_cxx_ws_string (pp, "+"); + dump_expr (pp, fold_convert (ssizetype, TMR_OFFSET (t)), flags); + } + pp_cxx_right_paren (pp); + break; + case NEGATE_EXPR: case BIT_NOT_EXPR: case TRUTH_NOT_EXPR: -- cgit v1.1 From bd431d26de02180d7fac1a794e2b9d3aaa4df34d Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 5 Oct 2020 17:08:11 +0200 Subject: Import various range-op fixes from ranger branch. This patch imports three fixes from the ranger branch: 1. Fold division by zero into varying instead of undefined. This provides compatibility with existing stuff on trunk. 2. Solver changes for lshift. This should not affect anything on trunk, as it only involves the GORI solver which is yet to be contributed. 3. Preserve existing behavior for ABS([-MIN,-MIN]). This is actually unrepresentable, but trunk has traditionally treated this as [-MIN,-MIN] so this patch just syncs range-ops with the rest of trunk. gcc/ChangeLog: * range-op.cc (operator_div::wi_fold): Return varying for division by zero. (class operator_rshift): Move class up. (operator_abs::wi_fold): Return [-MIN,-MIN] for ABS([-MIN,-MIN]). (operator_tests): Adjust tests. --- gcc/range-op.cc | 164 +++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 114 insertions(+), 50 deletions(-) (limited to 'gcc') diff --git a/gcc/range-op.cc b/gcc/range-op.cc index 3ab268f..11e847f 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -1317,10 +1317,10 @@ operator_div::wi_fold (irange &r, tree type, const wide_int &lh_lb, const wide_int &lh_ub, const wide_int &rh_lb, const wide_int &rh_ub) const { - // If we know we will divide by zero, return undefined. + // If we know we will divide by zero... if (rh_lb == 0 && rh_ub == 0) { - r.set_undefined (); + r.set_varying (type); return; } @@ -1430,6 +1430,27 @@ public: const wide_int &) const; } op_lshift; +class operator_rshift : public cross_product_operator +{ +public: + virtual bool fold_range (irange &r, tree type, + const irange &op1, + const irange &op2) const; + virtual void wi_fold (irange &r, tree type, + const wide_int &lh_lb, + const wide_int &lh_ub, + const wide_int &rh_lb, + const wide_int &rh_ub) const; + virtual bool wi_op_overflows (wide_int &res, + tree type, + const wide_int &w0, + const wide_int &w1) const; + virtual bool op1_range (irange &, tree type, + const irange &lhs, + const irange &op2) const; +} op_rshift; + + bool operator_lshift::fold_range (irange &r, tree type, const irange &op1, @@ -1546,60 +1567,47 @@ operator_lshift::op1_range (irange &r, tree shift_amount; if (op2.singleton_p (&shift_amount)) { - int_range<1> shifted (shift_amount, shift_amount), ub, lb; - const range_operator *rshift_op = range_op_handler (RSHIFT_EXPR, type); - rshift_op->fold_range (ub, type, lhs, shifted); - if (TYPE_UNSIGNED (type)) + wide_int shift = wi::to_wide (shift_amount); + gcc_checking_assert (wi::gt_p (shift, 0, SIGNED)); + + // Work completely in unsigned mode to start. + tree utype = type; + if (TYPE_SIGN (type) == SIGNED) { - r = ub; - return true; + int_range_max tmp = lhs; + utype = unsigned_type_for (type); + range_cast (tmp, utype); + op_rshift.fold_range (r, utype, tmp, op2); } - // For signed types, we can't just do an arithmetic rshift, - // because that will propagate the sign bit. - // - // LHS - // 1110 = OP1 << 1 - // - // Assuming a 4-bit signed integer, a right shift will result in - // OP1=1111, but OP1 could have also been 0111. What we want is - // a range from 0111 to 1111. That is, a range from the logical - // rshift (0111) to the arithmetic rshift (1111). - // - // Perform a logical rshift by doing the rshift as unsigned. - tree unsigned_type = unsigned_type_for (type); - int_range_max unsigned_lhs = lhs; - range_cast (unsigned_lhs, unsigned_type); - rshift_op = range_op_handler (RSHIFT_EXPR, unsigned_type); - rshift_op->fold_range (lb, unsigned_type, unsigned_lhs, shifted); - range_cast (lb, type); - r = lb; - r.union_ (ub); + else + op_rshift.fold_range (r, utype, lhs, op2); + + // Start with ranges which can produce the LHS by right shifting the + // result by the shift amount. + // ie [0x08, 0xF0] = op1 << 2 will start with + // [00001000, 11110000] = op1 << 2 + // [0x02, 0x4C] aka [00000010, 00111100] + + // Then create a range from the LB with the least significant upper bit + // set, to the upper bound with all the bits set. + // This would be [0x42, 0xFC] aka [01000010, 11111100]. + + // Ideally we do this for each subrange, but just lump them all for now. + unsigned low_bits = TYPE_PRECISION (utype) + - TREE_INT_CST_LOW (shift_amount); + wide_int up_mask = wi::mask (low_bits, true, TYPE_PRECISION (utype)); + wide_int new_ub = wi::bit_or (up_mask, r.upper_bound ()); + wide_int new_lb = wi::set_bit (r.lower_bound (), low_bits); + int_range<2> fill_range (utype, new_lb, new_ub); + r.union_ (fill_range); + + if (utype != type) + range_cast (r, type); return true; } return false; } - -class operator_rshift : public cross_product_operator -{ -public: - virtual bool fold_range (irange &r, tree type, - const irange &op1, - const irange &op2) const; - virtual void wi_fold (irange &r, tree type, - const wide_int &lh_lb, - const wide_int &lh_ub, - const wide_int &rh_lb, - const wide_int &rh_ub) const; - virtual bool wi_op_overflows (wide_int &res, - tree type, - const wide_int &w0, - const wide_int &w1) const; - virtual bool op1_range (irange &, tree type, - const irange &lhs, - const irange &op2) const; -} op_rshift; - bool operator_rshift::op1_range (irange &r, tree type, @@ -2825,9 +2833,19 @@ operator_abs::wi_fold (irange &r, tree type, // ABS_EXPR may flip the range around, if the original range // included negative values. if (wi::eq_p (lh_lb, min_value)) - min = max_value; + { + // ABS ([-MIN, -MIN]) isn't representable, but we have traditionally + // returned [-MIN,-MIN] so this preserves that behaviour. PR37078 + if (wi::eq_p (lh_ub, min_value)) + { + r = int_range<1> (type, min_value, min_value); + return; + } + min = max_value; + } else min = wi::abs (lh_lb); + if (wi::eq_p (lh_ub, min_value)) max = max_value; else @@ -3552,6 +3570,52 @@ operator_tests () negatives.intersect (op1); ASSERT_TRUE (negatives.undefined_p ()); } + + if (TYPE_PRECISION (unsigned_type_node) > 31) + { + // unsigned VARYING = op1 << 1 should be VARYING. + int_range<2> lhs (unsigned_type_node); + int_range<2> shift (INT (1), INT (1)); + int_range_max op1; + op_lshift.op1_range (op1, unsigned_type_node, lhs, shift); + ASSERT_TRUE (op1.varying_p ()); + + // 0 = op1 << 1 should be [0,0], [0x8000000, 0x8000000]. + int_range<2> zero (UINT (0), UINT (0)); + op_lshift.op1_range (op1, unsigned_type_node, zero, shift); + ASSERT_TRUE (op1.num_pairs () == 2); + // Remove the [0,0] range. + op1.intersect (zero); + ASSERT_TRUE (op1.num_pairs () == 1); + // op1 << 1 should be [0x8000,0x8000] << 1, + // which should result in [0,0]. + int_range_max result; + op_lshift.fold_range (result, unsigned_type_node, op1, shift); + ASSERT_TRUE (result == zero); + } + // signed VARYING = op1 << 1 should be VARYING. + if (TYPE_PRECISION (integer_type_node) > 31) + { + // unsigned VARYING = op1 << 1 hould be VARYING. + int_range<2> lhs (integer_type_node); + int_range<2> shift (INT (1), INT (1)); + int_range_max op1; + op_lshift.op1_range (op1, integer_type_node, lhs, shift); + ASSERT_TRUE (op1.varying_p ()); + + // 0 = op1 << 1 should be [0,0], [0x8000000, 0x8000000]. + int_range<2> zero (INT (0), INT (0)); + op_lshift.op1_range (op1, integer_type_node, zero, shift); + ASSERT_TRUE (op1.num_pairs () == 2); + // Remove the [0,0] range. + op1.intersect (zero); + ASSERT_TRUE (op1.num_pairs () == 1); + // op1 << 1 shuould be [0x8000,0x8000] << 1, + // which should result in [0,0]. + int_range_max result; + op_lshift.fold_range (result, unsigned_type_node, op1, shift); + ASSERT_TRUE (result == zero); + } } // Run all of the selftests within this file. -- cgit v1.1 From ea6da7f50fe2adc3a09fc10a3f437902c40ebff9 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 5 Oct 2020 17:36:13 +0200 Subject: Cleanup legacy_union and legacy intersect in value_range. These are cleanups so that multi-range union/intersect doesn't have to deal with legacy code. Instead, these should be done in legacy mode. gcc/ChangeLog: * value-range.cc (irange::legacy_intersect): Only handle legacy ranges. (irange::legacy_union): Same. (irange::union_): When unioning legacy with non-legacy, first convert to legacy and do everything in legacy mode. (irange::intersect): Same, but for intersect. * range-op.cc (range_tests): Adjust for above changes. --- gcc/range-op.cc | 21 +++++++++++++++++ gcc/value-range.cc | 67 ++++++++++++++++++++++-------------------------------- 2 files changed, 48 insertions(+), 40 deletions(-) (limited to 'gcc') diff --git a/gcc/range-op.cc b/gcc/range-op.cc index 11e847f..87c6d82 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -3901,6 +3901,27 @@ range_tests () r0.invert (); ASSERT_TRUE (r0.nonzero_p ()); + // test legacy interaction + // r0 = ~[1,1] + r0 = int_range<1> (UINT (1), UINT (1), VR_ANTI_RANGE); + // r1 = ~[3,3] + r1 = int_range<1> (UINT (3), UINT (3), VR_ANTI_RANGE); + + // vv = [0,0][2,2][4, MAX] + int_range<3> vv = r0; + vv.intersect (r1); + + ASSERT_TRUE (vv.contains_p (UINT (2))); + ASSERT_TRUE (vv.num_pairs () == 3); + + // create r0 as legacy [1,1] + r0 = int_range<1> (UINT (1), UINT (1)); + // And union it with [0,0][2,2][4,MAX] multi range + r0.union_ (vv); + // The result should be [0,2][4,MAX], or ~[3,3] but it must contain 2 + ASSERT_TRUE (r0.contains_p (UINT (2))); + + multi_precision_range_tests (); int_range_max_tests (); operator_tests (); diff --git a/gcc/value-range.cc b/gcc/value-range.cc index ed2c322..cdcc6c6 100644 --- a/gcc/value-range.cc +++ b/gcc/value-range.cc @@ -1093,19 +1093,14 @@ intersect_ranges (enum value_range_kind *vr0type, void irange::legacy_intersect (irange *vr0, const irange *vr1) { + gcc_checking_assert (vr0->legacy_mode_p ()); + gcc_checking_assert (vr1->legacy_mode_p ()); /* If either range is VR_VARYING the other one wins. */ if (vr1->varying_p ()) return; if (vr0->varying_p ()) { - /* Avoid the full copy if we already know both sides are simple - and can be trivially copied. */ - if (vr1->legacy_mode_p ()) - { - vr0->set (vr1->min (), vr1->max (), vr1->kind ()); - return; - } - *vr0 = *vr1; + vr0->set (vr1->min (), vr1->max (), vr1->kind ()); return; } @@ -1122,17 +1117,9 @@ irange::legacy_intersect (irange *vr0, const irange *vr1) value_range_kind vr0kind = vr0->kind (); tree vr0min = vr0->min (); tree vr0max = vr0->max (); - /* Handle multi-ranges that can be represented as anti-ranges. */ - if (!vr1->legacy_mode_p () && vr1->maybe_anti_range ()) - { - int_range<3> tmp (*vr1); - tmp.invert (); - intersect_ranges (&vr0kind, &vr0min, &vr0max, - VR_ANTI_RANGE, tmp.min (), tmp.max ()); - } - else - intersect_ranges (&vr0kind, &vr0min, &vr0max, - vr1->kind (), vr1->min (), vr1->max ()); + + intersect_ranges (&vr0kind, &vr0min, &vr0max, + vr1->kind (), vr1->min (), vr1->max ()); /* Make sure to canonicalize the result though as the inversion of a VR_RANGE can still be a VR_RANGE. */ @@ -1427,6 +1414,9 @@ give_up: void irange::legacy_union (irange *vr0, const irange *vr1) { + gcc_checking_assert (vr0->legacy_mode_p ()); + gcc_checking_assert (vr1->legacy_mode_p ()); + /* VR0 has the resulting range if VR1 is undefined or VR0 is varying. */ if (vr1->undefined_p () || vr0->varying_p ()) @@ -1435,16 +1425,10 @@ irange::legacy_union (irange *vr0, const irange *vr1) /* VR1 has the resulting range if VR0 is undefined or VR1 is varying. */ if (vr0->undefined_p ()) { - /* Avoid the full copy if we already know both sides are simple - and can be trivially copied. */ - if (vr1->legacy_mode_p ()) - { - vr0->set (vr1->min (), vr1->max (), vr1->kind ()); - return; - } - *vr0 = *vr1; + vr0->set (vr1->min (), vr1->max (), vr1->kind ()); return; } + if (vr1->varying_p ()) { vr0->set_varying (vr1->type ()); @@ -1454,17 +1438,9 @@ irange::legacy_union (irange *vr0, const irange *vr1) value_range_kind vr0kind = vr0->kind (); tree vr0min = vr0->min (); tree vr0max = vr0->max (); - /* Handle multi-ranges that can be represented as anti-ranges. */ - if (!vr1->legacy_mode_p () && vr1->maybe_anti_range ()) - { - int_range<3> tmp (*vr1); - tmp.invert (); - union_ranges (&vr0kind, &vr0min, &vr0max, - VR_ANTI_RANGE, tmp.min (), tmp.max ()); - } - else - union_ranges (&vr0kind, &vr0min, &vr0max, - vr1->kind (), vr1->min (), vr1->max ()); + + union_ranges (&vr0kind, &vr0min, &vr0max, + vr1->kind (), vr1->min (), vr1->max ()); if (vr0kind == VR_UNDEFINED) vr0->set_undefined (); @@ -1492,6 +1468,12 @@ irange::union_ (const irange *other) { if (legacy_mode_p ()) { + if (!other->legacy_mode_p ()) + { + int_range<1> tmp = *other; + legacy_union (this, &tmp); + return; + } if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Meeting\n "); @@ -1514,8 +1496,7 @@ irange::union_ (const irange *other) if (other->legacy_mode_p ()) { - int_range<2> wider; - wider = *other; + int_range<2> wider = *other; irange_union (wider); } else @@ -1527,6 +1508,12 @@ irange::intersect (const irange *other) { if (legacy_mode_p ()) { + if (!other->legacy_mode_p ()) + { + int_range<1> tmp = *other; + legacy_intersect (this, &tmp); + return; + } if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Intersecting\n "); -- cgit v1.1 From 66a032079309069fec085fff2a014ac217ce5781 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Mon, 5 Oct 2020 18:06:19 -0400 Subject: c++: Fix typo in NON_UNION_CLASS_TYPE_P. gcc/cp/ChangeLog: * cp-tree.h (NON_UNION_CLASS_TYPE_P): Fix typo in a comment. --- gcc/cp/cp-tree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index c9ad751..c7b5e791 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -2064,7 +2064,7 @@ enum languages { lang_c, lang_cplusplus }; #define CLASS_TYPE_P(T) \ (RECORD_OR_UNION_CODE_P (TREE_CODE (T)) && TYPE_LANG_FLAG_5 (T)) -/* Nonzero if T is a class type but not an union. */ +/* Nonzero if T is a class type but not a union. */ #define NON_UNION_CLASS_TYPE_P(T) \ (TREE_CODE (T) == RECORD_TYPE && TYPE_LANG_FLAG_5 (T)) -- cgit v1.1 From 7e9282ae62f5318686dcd58498337090531cd6fc Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 6 Oct 2020 00:16:25 +0000 Subject: Daily bump. --- gcc/ChangeLog | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/c-family/ChangeLog | 11 +++++++++++ gcc/cp/ChangeLog | 17 +++++++++++++++++ gcc/testsuite/ChangeLog | 4 ++++ 5 files changed, 84 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c9bd8d3..cd3901b 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,54 @@ +2020-10-05 Aldy Hernandez + + * value-range.cc (irange::legacy_intersect): Only handle + legacy ranges. + (irange::legacy_union): Same. + (irange::union_): When unioning legacy with non-legacy, + first convert to legacy and do everything in legacy mode. + (irange::intersect): Same, but for intersect. + * range-op.cc (range_tests): Adjust for above changes. + +2020-10-05 Aldy Hernandez + + * range-op.cc (operator_div::wi_fold): Return varying for + division by zero. + (class operator_rshift): Move class up. + (operator_abs::wi_fold): Return [-MIN,-MIN] for ABS([-MIN,-MIN]). + (operator_tests): Adjust tests. + +2020-10-05 Tom de Vries + + * tracer.c (ignore_bb_p): Ignore GOMP_SIMT_XCHG_*. + +2020-10-05 Alex Coplan + + * config/arm/arm-cpus.in (neoverse-v1): Add missing vendor and + part numbers. + +2020-10-05 Tom de Vries + + * tracer.c (ignore_bb_p): Remove incorrect suggestion. + +2020-10-05 Jakub Jelinek + + * opth-gen.awk: Don't emit explicit_mask array if n_target_explicit + is equal to n_target_explicit_mask. + * optc-save-gen.awk: Compute has_target_explicit_mask and if false, + don't emit code iterating over explicit_mask array elements. Stream + also explicit_mask_* target members. + +2020-10-05 Jakub Jelinek + + * gimple-ssa-store-merging.c + (imm_store_chain_info::output_merged_store): Use ~0U instead of ~0 in + unsigned int array initializer. + +2020-10-05 Tom de Vries + + PR fortran/95654 + * tracer.c (ignore_bb_p): Ignore GOMP_SIMT_ENTER_ALLOC, + GOMP_SIMT_VOTE_ANY and GOMP_SIMT_EXIT. + 2020-10-03 Jakub Jelinek * opth-gen.awk: For variables referenced in Mask and InverseMask, diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 9d81fe1..684bf4b 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20201005 +20201006 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 1e36632..9889555 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,14 @@ +2020-10-05 Richard Biener + Jakub Jelinek + + PR c++/97197 + * c-pretty-print.c: Include langhooks.h. + (c_pretty_printer::postfix_expression): Handle TARGET_MEM_REF as + expression. + (c_pretty_printer::expression): Handle TARGET_MEM_REF as + unary_expression. + (c_pretty_printer::unary_expression): Handle TARGET_MEM_REF. + 2020-09-30 Martin Sebor PR middle-end/97189 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 472fcbe..a741e06 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,20 @@ +2020-10-05 Marek Polacek + + * cp-tree.h (NON_UNION_CLASS_TYPE_P): Fix typo in a comment. + +2020-10-05 Richard Biener + Jakub Jelinek + + PR c++/97197 + * error.c (dump_expr): Handle TARGET_MEM_REF. + +2020-10-05 Nathan Sidwell + + * name-lookup.c (maybe_add_fuzzy_decl): New. + (maybe_add_fuzzy_binding): New. + (consider_binding_level): Use intermediate sortable vector for + namespace bindings. + 2020-10-02 Marek Polacek PR c++/97014 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 554563b..5b92a02 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2020-10-05 Nathan Sidwell + + * c-c++-common/spellcheck-reserved.c: Restore diagnostic. + 2020-10-04 Harald Anlauf PR fortran/97272 -- cgit v1.1 From 7164745e1c21ae85c52b727d61092ad0685b46e9 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Sun, 4 Oct 2020 02:03:36 -0400 Subject: gofrontend: correct file reading logic in Stream_from_file The implementation of Stream_from_file mishandled several cases: * It reversed the check for whether bytes were already available in the peek buffer. * It considered positive return values from lseek to be an error, when only a -1 return value indicates an error. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/259437 --- gcc/go/gofrontend/MERGE | 2 +- gcc/go/gofrontend/import.cc | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 9482740..701b2d4 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -801c458a562d22260ff176c26d65639dd32c8a90 +d00febdab0535546ccbf1ef634be1f23b09c8b77 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/import.cc b/gcc/go/gofrontend/import.cc index c63ae24..081afef 100644 --- a/gcc/go/gofrontend/import.cc +++ b/gcc/go/gofrontend/import.cc @@ -1487,7 +1487,7 @@ Stream_from_file::~Stream_from_file() bool Stream_from_file::do_peek(size_t length, const char** bytes) { - if (this->data_.length() <= length) + if (this->data_.length() >= length) { *bytes = this->data_.data(); return true; @@ -1504,7 +1504,7 @@ Stream_from_file::do_peek(size_t length, const char** bytes) return false; } - if (lseek(this->fd_, - got, SEEK_CUR) != 0) + if (lseek(this->fd_, - got, SEEK_CUR) < 0) { if (!this->saw_error()) go_fatal_error(Linemap::unknown_location(), "lseek failed: %m"); @@ -1524,7 +1524,7 @@ Stream_from_file::do_peek(size_t length, const char** bytes) void Stream_from_file::do_advance(size_t skip) { - if (lseek(this->fd_, skip, SEEK_CUR) != 0) + if (lseek(this->fd_, skip, SEEK_CUR) < 0) { if (!this->saw_error()) go_fatal_error(Linemap::unknown_location(), "lseek failed: %m"); @@ -1532,7 +1532,7 @@ Stream_from_file::do_advance(size_t skip) } if (!this->data_.empty()) { - if (this->data_.length() < skip) + if (this->data_.length() > skip) this->data_.erase(0, skip); else this->data_.clear(); -- cgit v1.1 From d2364fb436ff64a4c5293b697f37d53fd58a9f54 Mon Sep 17 00:00:00 2001 From: Andreas Krebbel Date: Tue, 6 Oct 2020 07:56:51 +0200 Subject: IBM Z: Doc: Add z15/arch13 to the list of -march/-mtune options gcc/ChangeLog: * doc/invoke.texi: Add z15/arch13 to the list of documented -march/-mtune options. --- gcc/doc/invoke.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index f623467..7c81d7f 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -27698,7 +27698,7 @@ system representing a certain processor type. Possible values for @var{cpu-type} are @samp{z900}/@samp{arch5}, @samp{z990}/@samp{arch6}, @samp{z9-109}, @samp{z9-ec}/@samp{arch7}, @samp{z10}/@samp{arch8}, @samp{z196}/@samp{arch9}, @samp{zEC12}, @samp{z13}/@samp{arch11}, -@samp{z14}/@samp{arch12}, and @samp{native}. +@samp{z14}/@samp{arch12}, @samp{z15}/@samp{arch13}, and @samp{native}. The default is @option{-march=z900}. -- cgit v1.1 From 3e8fb15a8cfd0e62dd474af9f536863392ed7572 Mon Sep 17 00:00:00 2001 From: Joe Ramsay Date: Tue, 6 Oct 2020 07:33:52 +0100 Subject: arm: Add +nomve and +nomve.fp options to -mcpu=cortex-m55 This patch rearranges feature bits for MVE and FP to implement the following flags for -mcpu=cortex-m55. - +nomve: equivalent to armv8.1-m.main+fp.dp+dsp. - +nomve.fp: equivalent to armv8.1-m.main+mve+fp.dp (+dsp is implied by +mve). - +nofp: equivalent to armv8.1-m.main+mve (+dsp is implied by +mve). - +nodsp: equivalent to armv8.1-m.main+fp.dp. Combinations of the above: - +nomve+nofp: equivalent to armv8.1-m.main+dsp. - +nodsp+nofp: equivalent to armv8.1-m.main. Due to MVE and FP sharing vfp_base, some new syntax was required in the CPU description to implement the concept of 'implied bits'. These are non-named features added to the ISA late, depending on whether one or more features which depend on them are present. This means vfp_base can be present when only one of MVE and FP is removed, but absent when both are removed. gcc/ChangeLog: 2020-07-31 Joe Ramsay * config/arm/arm-cpus.in: (ALL_FPU_INTERNAL): Remove vfp_base. (VFPv2): Remove vfp_base. (MVE): Remove vfp_base. (vfp_base): Redefine as implied bit dependent on MVE or FP (cortex-m55): Add flags to disable MVE, MVE FP, FP and DSP extensions. * config/arm/arm.c (arm_configure_build_target): Add implied bits to ISA. * config/arm/parsecpu.awk: (gen_isa): Print implied bits and their dependencies to ISA header. (gen_data): Add parsing for implied feature bits. gcc/testsuite/ChangeLog: * gcc.target/arm/cortex-m55-nodsp-flag-hard.c: New test. * gcc.target/arm/cortex-m55-nodsp-flag-softfp.c: New test. * gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c: New test. * gcc.target/arm/cortex-m55-nofp-flag-hard.c: New test. * gcc.target/arm/cortex-m55-nofp-flag-softfp.c: New test. * gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c: New test. * gcc.target/arm/cortex-m55-nomve-flag-hard.c: New test. * gcc.target/arm/cortex-m55-nomve-flag-softfp.c: New test. * gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c: New test. * gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c: New test. * gcc.target/arm/multilib.exp: Add tests for -mcpu=cortex-m55. --- gcc/config/arm/arm-cpus.in | 26 ++++++++--- gcc/config/arm/arm.c | 14 ++++++ gcc/config/arm/parsecpu.awk | 51 ++++++++++++++++++++++ .../gcc.target/arm/cortex-m55-nodsp-flag-hard.c | 15 +++++++ .../gcc.target/arm/cortex-m55-nodsp-flag-softfp.c | 15 +++++++ .../arm/cortex-m55-nodsp-nofp-flag-softfp.c | 15 +++++++ .../gcc.target/arm/cortex-m55-nofp-flag-hard.c | 15 +++++++ .../gcc.target/arm/cortex-m55-nofp-flag-softfp.c | 15 +++++++ .../arm/cortex-m55-nofp-nomve-flag-softfp.c | 15 +++++++ .../gcc.target/arm/cortex-m55-nomve-flag-hard.c | 15 +++++++ .../gcc.target/arm/cortex-m55-nomve-flag-softfp.c | 15 +++++++ .../gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c | 15 +++++++ .../arm/cortex-m55-nomve.fp-flag-softfp.c | 15 +++++++ gcc/testsuite/gcc.target/arm/multilib.exp | 16 +++++++ 14 files changed, 250 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-softfp.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c create mode 100644 gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c (limited to 'gcc') diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index 27ce000..8c61ad0 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -135,10 +135,6 @@ define feature armv8_1m_main # Floating point and Neon extensions. # VFPv1 is not supported in GCC. -# This feature bit is enabled for all VFP, MVE and -# MVE with floating point extensions. -define feature vfp_base - # Vector floating point v2. define feature vfpv2 @@ -251,7 +247,7 @@ define fgroup ALL_SIMD ALL_SIMD_INTERNAL ALL_SIMD_EXTERNAL # List of all FPU bits to strip out if -mfpu is used to override the # default. fp16 is deliberately missing from this list. -define fgroup ALL_FPU_INTERNAL vfp_base vfpv2 vfpv3 vfpv4 fpv5 fp16conv fp_dbl ALL_SIMD_INTERNAL +define fgroup ALL_FPU_INTERNAL vfpv2 vfpv3 vfpv4 fpv5 fp16conv fp_dbl ALL_SIMD_INTERNAL # Similarly, but including fp16 and other extensions that aren't part of # -mfpu support. define fgroup ALL_FPU_EXTERNAL fp16 bf16 @@ -296,11 +292,11 @@ define fgroup ARMv8r ARMv8a define fgroup ARMv8_1m_main ARMv8m_main armv8_1m_main # Useful combinations. -define fgroup VFPv2 vfp_base vfpv2 +define fgroup VFPv2 vfpv2 define fgroup VFPv3 VFPv2 vfpv3 define fgroup VFPv4 VFPv3 vfpv4 fp16conv define fgroup FPv5 VFPv4 fpv5 -define fgroup MVE mve vfp_base armv7em +define fgroup MVE mve armv7em define fgroup MVE_FP MVE FPv5 fp16 mve_float define fgroup FP_DBL fp_dbl @@ -310,6 +306,18 @@ define fgroup NEON FP_D32 neon define fgroup CRYPTO NEON crypto define fgroup DOTPROD NEON dotprod +# Implied feature bits. These are for non-named features shared between fgroups. +# Shared feature f belonging to fgroups A and B will be erroneously removed if: +# A and B are enabled by default AND A is disabled by a removal flag. +# To ensure that f is retained, we must add such bits to the ISA after +# processing the removal flags. This is implemented by 'implied bits': +# define implied []+ +# This indicates that, if any of the listed features are enabled, or if any +# member of a listed fgroup is enabled, then will be implicitly enabled. + +# Enabled for all VFP, MVE and MVE with floating point extensions. +define implied vfp_base MVE MVE_FP ALL_FP + # List of all quirk bits to strip out when comparing CPU features with # architectures. # xscale isn't really a 'quirk', but it isn't an architecture either and we @@ -1565,6 +1573,10 @@ begin cpu cortex-m55 cname cortexm55 tune flags LDSCHED architecture armv8.1-m.main+mve.fp+fp.dp + option nomve.fp remove mve_float + option nomve remove mve mve_float + option nofp remove ALL_FP mve_float + option nodsp remove MVE mve_float isa quirk_no_asmcpu costs v7m vendor 41 diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 0e23246..bd7be8f 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -3391,6 +3391,20 @@ arm_configure_build_target (struct arm_build_target *target, bitmap_ior (target->isa, target->isa, fpu_bits); } + /* There may be implied bits which we still need to enable. These are + non-named features which are needed to complete other sets of features, + but cannot be enabled from arm-cpus.in due to being shared between + multiple fgroups. Each entry in all_implied_fbits is of the form + ante -> cons, meaning that if the feature "ante" is enabled, we should + implicitly enable "cons". */ + const struct fbit_implication *impl = all_implied_fbits; + while (impl->ante) + { + if (bitmap_bit_p (target->isa, impl->ante)) + bitmap_set_bit (target->isa, impl->cons); + impl++; + } + if (!arm_selected_tune) arm_selected_tune = arm_selected_cpu; else /* Validate the features passed to -mtune. */ diff --git a/gcc/config/arm/parsecpu.awk b/gcc/config/arm/parsecpu.awk index 7fc3754..9423e8a 100644 --- a/gcc/config/arm/parsecpu.awk +++ b/gcc/config/arm/parsecpu.awk @@ -190,6 +190,23 @@ function gen_isa () { ORS = z print "\n" } + + print "struct fbit_implication {" + print " /* Represents a feature implication, where:" + print " ante IMPLIES cons" + print " meaning that if ante is enabled then we should" + print " also implicitly enable cons. */" + print " enum isa_feature ante;" + print " enum isa_feature cons;" + print "};\n" + print "static const struct fbit_implication all_implied_fbits[] =" + print "{" + for (impl in implied_bits) { + split (impl, impl_parts, SUBSEP) + print " { isa_bit_" impl_parts[2] ", isa_bit_" impl_parts[1] " }," + } + print " { isa_nobit, isa_nobit }" + print "};\n" } function gen_data () { @@ -600,6 +617,40 @@ BEGIN { parse_ok = 1 } +/^define implied / { + if (NF < 4) fatal("syntax: define implied []+\n" \ + "Implied bits must be defined with at least one antecedent.") + toplevel() + fbit = $3 + if (fbit in features) fatal("implied feature " fbit " aliases a real feature") + if (fbit in fgroup) fatal("implied feature " fbit " aliases a feature group") + fcount = NF + features[fbit] = 1 + for (n = 4; n <= fcount; n++) { + ante = $n + if (fbit == ante) fatal("feature cannot imply itself") + else if (ante in features) { + for (impl in implied_bits) { + split(impl, impl_sep, SUBSEP) + if (ante == impl_sep[1]) + fatal(ante " implies implied bit " fbit \ + ". Chained implications not currently supported") + } + implied_bits[fbit, ante] = 1 + } else if (ante in fgroup) { + for (bitcomb in fgrp_bits) { + split(bitcomb, bitsep, SUBSEP) + if (bitsep[1] == ante) { + implied_bits[fbit, bitsep[2]] = 1 + } + } + } else { + fatal("implied bit antecedent " ante " unrecognized") + } + } + parse_ok = 1 +} + /^begin fpu / { if (NF != 3) fatal("syntax: begin fpu ") toplevel() diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c new file mode 100644 index 0000000..b3c7fd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-hard.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-additional-options "-mcpu=cortex-m55+nodsp -mfloat-abi=hard -mfpu=auto --save-temps" } */ +/* { dg-final { scan-assembler "\.arch_extension fp" } } */ +/* { dg-final { scan-assembler "\.arch_extension fp.dp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension dsp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve.fp" } } */ +/* { dg-final { scan-assembler "\.fpu fpv5-d16" } } */ + +int +f () +{ + return 1; +} diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c new file mode 100644 index 0000000..3806554 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-flag-softfp.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-additional-options "-mcpu=cortex-m55+nodsp -mfloat-abi=softfp -mfpu=auto --save-temps" } */ +/* { dg-final { scan-assembler "\.arch_extension fp" } } */ +/* { dg-final { scan-assembler "\.arch_extension fp.dp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension dsp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve.fp" } } */ +/* { dg-final { scan-assembler "\.fpu fpv5-d16" } } */ + +int +f () +{ + return 1; +} diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c new file mode 100644 index 0000000..d22eb4e --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-additional-options "-mcpu=cortex-m55+nodsp+nofp -mfloat-abi=softfp -mfpu=auto --save-temps" } */ +/* { dg-final { scan-assembler-not "\.arch_extension fp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension fp.dp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension dsp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve.fp" } } */ +/* { dg-final { scan-assembler "\.fpu softvfp" } } */ + +int +f () +{ + return 1; +} diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c b/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c new file mode 100644 index 0000000..da1cc25 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-hard.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-additional-options "-mcpu=cortex-m55+nofp -mfloat-abi=hard -mfpu=auto --save-temps" } */ +/* { dg-final { scan-assembler "\.arch_extension mve" } } */ +/* { dg-final { scan-assembler "\.arch_extension dsp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension fp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension fp.dp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve.fp" } } */ +/* { dg-final { scan-assembler-not "\.fpu" } } */ + +int +f () +{ + return 1; +} diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-softfp.c b/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-softfp.c new file mode 100644 index 0000000..0a4fb14 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-flag-softfp.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-additional-options "-mcpu=cortex-m55+nofp -mfloat-abi=softfp -mfpu=auto --save-temps" } */ +/* { dg-final { scan-assembler "\.arch_extension mve" } } */ +/* { dg-final { scan-assembler "\.arch_extension dsp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension fp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension fp.dp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve.fp" } } */ +/* { dg-final { scan-assembler-not "\.fpu" } } */ + +int +f () +{ + return 1; +} diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c b/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c new file mode 100644 index 0000000..2ae7f34 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-additional-options "-mcpu=cortex-m55+nomve+nofp -mfloat-abi=softfp -mfpu=auto --save-temps" } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve.fp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension fp" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension fp.dp" } } */ +/* { dg-final { scan-assembler "\.arch_extension dsp" } } */ +/* { dg-final { scan-assembler-not "\.fpu" } } */ + +int +f () +{ + return 1; +} diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-hard.c b/gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-hard.c new file mode 100644 index 0000000..a6ccd7b --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-hard.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-additional-options "-mcpu=cortex-m55+nomve -mfloat-abi=hard -mfpu=auto --save-temps" } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve" } } */ +/* { dg-final { scan-assembler "\.arch_extension dsp" } } */ +/* { dg-final { scan-assembler "\.arch_extension fp" } } */ +/* { dg-final { scan-assembler "\.arch_extension fp.dp" } } */ +/* { dg-final { scan-assembler "\.fpu fpv5-d16" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve.fp" } } */ + +int +f () +{ + return 1; +} diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-softfp.c b/gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-softfp.c new file mode 100644 index 0000000..2ad976a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nomve-flag-softfp.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-additional-options "-mcpu=cortex-m55+nomve -mfloat-abi=softfp -mfpu=auto --save-temps" } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve" } } */ +/* { dg-final { scan-assembler "\.arch_extension dsp" } } */ +/* { dg-final { scan-assembler "\.arch_extension fp" } } */ +/* { dg-final { scan-assembler "\.arch_extension fp.dp" } } */ +/* { dg-final { scan-assembler "\.fpu fpv5-d16" } } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve.fp" } } */ + +int +f () +{ + return 1; +} diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c b/gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c new file mode 100644 index 0000000..40d54b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-additional-options "-mcpu=cortex-m55+nomve.fp -mfloat-abi=hard -mfpu=auto --save-temps" } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve.fp" } } */ +/* { dg-final { scan-assembler "\.arch_extension mve" } } */ +/* { dg-final { scan-assembler "\.arch_extension dsp" } } */ +/* { dg-final { scan-assembler "\.arch_extension fp" } } */ +/* { dg-final { scan-assembler "\.arch_extension fp.dp" } } */ +/* { dg-final { scan-assembler "\.fpu fpv5-d16" } } */ + +int +f () +{ + return 1; +} diff --git a/gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c b/gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c new file mode 100644 index 0000000..c726803 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c @@ -0,0 +1,15 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-additional-options "-mcpu=cortex-m55+nomve.fp -mfloat-abi=softfp -mfpu=auto --save-temps" } */ +/* { dg-final { scan-assembler-not "\.arch_extension mve.fp" } } */ +/* { dg-final { scan-assembler "\.arch_extension mve" } } */ +/* { dg-final { scan-assembler "\.arch_extension dsp" } } */ +/* { dg-final { scan-assembler "\.arch_extension fp" } } */ +/* { dg-final { scan-assembler "\.arch_extension fp.dp" } } */ +/* { dg-final { scan-assembler "\.fpu fpv5-d16" } } */ + +int +f () +{ + return 1; +} diff --git a/gcc/testsuite/gcc.target/arm/multilib.exp b/gcc/testsuite/gcc.target/arm/multilib.exp index c5f3c02..6aba29e 100644 --- a/gcc/testsuite/gcc.target/arm/multilib.exp +++ b/gcc/testsuite/gcc.target/arm/multilib.exp @@ -824,6 +824,22 @@ if {[multilib_config "rmprofile"] } { {-march=armv8.1-m.main+mve.fp+fp.dp -mfpu=auto -mfloat-abi=softfp} "thumb/v8-m.main+dp/softfp" {-march=armv8.1-m.main+mve+fp.dp -mfpu=auto -mfloat-abi=hard} "thumb/v8-m.main+dp/hard" {-march=armv8.1-m.main+mve.fp+fp.dp -mfpu=auto -mfloat-abi=hard} "thumb/v8-m.main+dp/hard" + {-mcpu=cortex-m55+nomve -mfpu=auto -mfloat-abi=soft} "thumb/v8-m.main/nofp" + {-mcpu=cortex-m55+nomve -mfpu=auto -mfloat-abi=softfp} "thumb/v8-m.main+dp/softfp" + {-mcpu=cortex-m55+nomve -mfpu=auto -mfloat-abi=hard} "thumb/v8-m.main+dp/hard" + {-mcpu=cortex-m55+nomve.fp -mfpu=auto -mfloat-abi=soft} "thumb/v8-m.main/nofp" + {-mcpu=cortex-m55+nomve.fp -mfpu=auto -mfloat-abi=softfp} "thumb/v8-m.main+dp/softfp" + {-mcpu=cortex-m55+nomve.fp -mfpu=auto -mfloat-abi=hard} "thumb/v8-m.main+dp/hard" + {-mcpu=cortex-m55+nofp -mfpu=auto -mfloat-abi=soft} "thumb/v8-m.main/nofp" + {-mcpu=cortex-m55+nofp -mfpu=auto -mfloat-abi=softfp} "thumb/v8-m.main/nofp" + {-mcpu=cortex-m55+nofp -mfpu=auto -mfloat-abi=hard} "thumb/v8.1-m.main+mve/hard" + {-mcpu=cortex-m55+nodsp -mfpu=auto -mfloat-abi=soft} "thumb/v8-m.main/nofp" + {-mcpu=cortex-m55+nodsp -mfpu=auto -mfloat-abi=softfp} "thumb/v8-m.main+dp/softfp" + {-mcpu=cortex-m55+nodsp -mfpu=auto -mfloat-abi=hard} "thumb/v8-m.main+dp/hard" + {-mcpu=cortex-m55+nomve+nofp -mfpu=auto -mfloat-abi=soft} "thumb/v8-m.main/nofp" + {-mcpu=cortex-m55+nomve+nofp -mfpu=auto -mfloat-abi=softfp} "thumb/v8-m.main/nofp" + {-mcpu=cortex-m55+nodsp+nofp -mfpu=auto -mfloat-abi=soft} "thumb/v8-m.main/nofp" + {-mcpu=cortex-m55+nodsp+nofp -mfpu=auto -mfloat-abi=softfp} "thumb/v8-m.main/nofp" } { check_multi_dir $opts $dir } -- cgit v1.1 From 44e20dce597328f3cb00e997fa90b95a2b710d4c Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 6 Oct 2020 09:25:00 +0200 Subject: openmp: Fix ICE in omp_discover_declare_target_tgt_fn_r This ICEs because node->alias_target is (not yet) a FUNCTION_DECL, but IDENTIFIER_NODE. I guess we should retry the discovery before LTO streaming out, the reason to do it this early is that it can affect the gimplification and omp lowering. 2020-10-06 Jakub Jelinek PR middle-end/97289 * omp-offload.c (omp_discover_declare_target_tgt_fn_r): Only follow node->alias_target if it is a FUNCTION_DECL. * c-c++-common/gomp/pr97289.c: New test. --- gcc/omp-offload.c | 3 ++- gcc/testsuite/c-c++-common/gomp/pr97289.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/c-c++-common/gomp/pr97289.c (limited to 'gcc') diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c index 7fb3a72..590007b 100644 --- a/gcc/omp-offload.c +++ b/gcc/omp-offload.c @@ -203,7 +203,8 @@ omp_discover_declare_target_tgt_fn_r (tree *tp, int *walk_subtrees, void *data) symtab_node *node = symtab_node::get (*tp); if (node != NULL) { - while (node->alias_target) + while (node->alias_target + && TREE_CODE (node->alias_target) == FUNCTION_DECL) { if (!omp_declare_target_fn_p (node->decl) && !lookup_attribute ("omp declare target host", diff --git a/gcc/testsuite/c-c++-common/gomp/pr97289.c b/gcc/testsuite/c-c++-common/gomp/pr97289.c new file mode 100644 index 0000000..8331b95 --- /dev/null +++ b/gcc/testsuite/c-c++-common/gomp/pr97289.c @@ -0,0 +1,14 @@ +/* PR middle-end/97289 */ +/* { dg-do compile } */ +/* { dg-require-weak "" } */ +/* { dg-skip-if "" { "hppa*-*-hpux*" "*-*-aix*" "nvptx-*-*" } } */ + +void foo (void); +static void bar (void) __attribute__ ((__weakref__ ("foo"))); + +void +baz (void) +{ +#pragma omp target + bar (); +} -- cgit v1.1 From 952adf021889b5e055085d0ed63942ff97d913de Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Tue, 6 Oct 2020 08:21:56 +0200 Subject: Fix off-by-one storage problem in irange_allocator. gcc/ChangeLog: * value-range.h (irange_allocator::allocate): Increase newir storage by one. --- gcc/value-range.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/value-range.h b/gcc/value-range.h index 94b48e5..7031a823 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -670,7 +670,7 @@ irange_allocator::allocate (unsigned num_pairs) struct newir { irange range; - tree mem[1]; + tree mem[2]; }; size_t nbytes = (sizeof (newir) + sizeof (tree) * 2 * (num_pairs - 1)); struct newir *r = (newir *) obstack_alloc (&m_obstack, nbytes); -- cgit v1.1 From bf510679bb3f9bfd6019666065016bb26a5b5466 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 6 Oct 2020 10:32:22 +0200 Subject: divmod: Match and expand DIVMOD even in some cases of constant divisor [PR97282] As written in the comment, tree-ssa-math-opts.c wouldn't create a DIVMOD ifn call for division + modulo by constant for the fear that during expansion we could generate better code for those cases. If the divisoris a power of two, that is certainly the case always, but otherwise expand_divmod can punt in many cases, e.g. if the division type's precision is above HOST_BITS_PER_WIDE_INT, we don't even call choose_multiplier, because it works on HOST_WIDE_INTs (true, something we should fix eventually now that we have wide_ints), or if pre/post shift is larger than BITS_PER_WORD. So, the following patch recognizes DIVMOD with constant last argument even when it is unclear if expand_divmod will be able to optimize it, and then during DIVMOD expansion if the divisor is constant attempts to expand it as division + modulo and if they actually don't contain any libcalls or division/modulo, they are kept as is, otherwise that sequence is thrown away and divmod optab or libcall is used. 2020-10-06 Jakub Jelinek PR rtl-optimization/97282 * tree-ssa-math-opts.c (divmod_candidate_p): Don't return false for constant op2 if it is not a power of two and the type has precision larger than HOST_BITS_PER_WIDE_INT or BITS_PER_WORD. * internal-fn.c (contains_call_div_mod): New function. (expand_DIVMOD): If last argument is a constant, try to expand it as TRUNC_DIV_EXPR followed by TRUNC_MOD_EXPR, but if the sequence contains any calls or {,U}{DIV,MOD} rtxes, throw it away and use divmod optab or divmod libfunc. * gcc.target/i386/pr97282.c: New test. --- gcc/internal-fn.c | 67 +++++++++++++++++++++++++++++++-- gcc/testsuite/gcc.target/i386/pr97282.c | 25 ++++++++++++ gcc/tree-ssa-math-opts.c | 17 ++++++++- 3 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr97282.c (limited to 'gcc') diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index c897082..92cb3cd 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-phinodes.h" #include "ssa-iterators.h" #include "explow.h" +#include "rtl-iter.h" /* The names of each internal function, indexed by function number. */ const char *const internal_fn_name_array[] = { @@ -2985,6 +2986,32 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab) emit_move_insn (lhs_rtx, ops[0].value); } +/* Helper for expand_DIVMOD. Return true if the sequence starting with + INSN contains any call insns or insns with {,U}{DIV,MOD} rtxes. */ + +static bool +contains_call_div_mod (rtx_insn *insn) +{ + subrtx_iterator::array_type array; + for (; insn; insn = NEXT_INSN (insn)) + if (CALL_P (insn)) + return true; + else if (INSN_P (insn)) + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) + switch (GET_CODE (*iter)) + { + case CALL: + case DIV: + case UDIV: + case MOD: + case UMOD: + return true; + default: + break; + } + return false; + } + /* Expand DIVMOD() using: a) optab handler for udivmod/sdivmod if it is available. b) If optab_handler doesn't exist, generate call to @@ -3007,10 +3034,44 @@ expand_DIVMOD (internal_fn, gcall *call_stmt) rtx op1 = expand_normal (arg1); rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); - rtx quotient, remainder, libfunc; + rtx quotient = NULL_RTX, remainder = NULL_RTX; + rtx_insn *insns = NULL; + + if (TREE_CODE (arg1) == INTEGER_CST) + { + /* For DIVMOD by integral constants, there could be efficient code + expanded inline e.g. using shifts and plus/minus. Try to expand + the division and modulo and if it emits any library calls or any + {,U}{DIV,MOD} rtxes throw it away and use a divmod optab or + divmod libcall. */ + struct separate_ops ops; + ops.code = TRUNC_DIV_EXPR; + ops.type = type; + ops.op0 = make_tree (ops.type, op0); + ops.op1 = arg1; + ops.op2 = NULL_TREE; + ops.location = gimple_location (call_stmt); + start_sequence (); + quotient = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL); + if (contains_call_div_mod (get_insns ())) + quotient = NULL_RTX; + else + { + ops.code = TRUNC_MOD_EXPR; + remainder = expand_expr_real_2 (&ops, NULL_RTX, mode, EXPAND_NORMAL); + if (contains_call_div_mod (get_insns ())) + remainder = NULL_RTX; + } + if (remainder) + insns = get_insns (); + end_sequence (); + } + + if (remainder) + emit_insn (insns); /* Check if optab_handler exists for divmod_optab for given mode. */ - if (optab_handler (tab, mode) != CODE_FOR_nothing) + else if (optab_handler (tab, mode) != CODE_FOR_nothing) { quotient = gen_reg_rtx (mode); remainder = gen_reg_rtx (mode); @@ -3018,7 +3079,7 @@ expand_DIVMOD (internal_fn, gcall *call_stmt) } /* Generate call to divmod libfunc if it exists. */ - else if ((libfunc = optab_libfunc (tab, mode)) != NULL_RTX) + else if (rtx libfunc = optab_libfunc (tab, mode)) targetm.expand_divmod_libfunc (libfunc, mode, op0, op1, "ient, &remainder); diff --git a/gcc/testsuite/gcc.target/i386/pr97282.c b/gcc/testsuite/gcc.target/i386/pr97282.c new file mode 100644 index 0000000..6fb10c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr97282.c @@ -0,0 +1,25 @@ +/* PR rtl-optimization/97282 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler "call\[^\n\r]*__udivmod\[dt]i4" } } */ + +#ifdef __SIZEOF_INT128__ +typedef __uint128_t T; +#else +typedef unsigned long long T; +#endif + +unsigned long +foo (T x) +{ + if (x == 0) + return 0; + + unsigned long ret = 0; + while (x > 0) + { + ret = ret + x % 10; + x = x / 10; + } + return ret; +} diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index bdbb9d9..4927255 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -3567,9 +3567,24 @@ divmod_candidate_p (gassign *stmt) /* Disable the transform if either is a constant, since division-by-constant may have specialized expansion. */ - if (CONSTANT_CLASS_P (op1) || CONSTANT_CLASS_P (op2)) + if (CONSTANT_CLASS_P (op1)) return false; + if (CONSTANT_CLASS_P (op2)) + { + if (integer_pow2p (op2)) + return false; + + if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT + && TYPE_PRECISION (type) <= BITS_PER_WORD) + return false; + + /* If the divisor is not power of 2 and the precision wider than + HWI, expand_divmod punts on that, so in that case it is better + to use divmod optab or libfunc. Similarly if choose_multiplier + might need pre/post shifts of BITS_PER_WORD or more. */ + } + /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should expand using the [su]divv optabs. */ if (TYPE_OVERFLOW_TRAPS (type)) -- cgit v1.1 From 9d63e3ab40778a7dfd20605c8741becbb22b9014 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Sun, 4 Oct 2020 13:23:37 +0200 Subject: [ftracer] Factor out can_duplicate_bb_p Factor out can_duplicate_bb_p out of ignore_bb_p. Also factor out can_duplicate_insn_p and can_duplicate_bb_no_insn_iter_p to expose the parts of can_duplicate_bb_p that are per-bb and per-insn. Bootstrapped and reg-tested on x86_64-linux. gcc/ChangeLog: 2020-10-05 Tom de Vries * tracer.c (can_duplicate_insn_p, can_duplicate_bb_no_insn_iter_p) (can_duplicate_bb_p): New function, factored out of ... (ignore_bb_p): ... here. --- gcc/tracer.c | 72 +++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 23 deletions(-) (limited to 'gcc') diff --git a/gcc/tracer.c b/gcc/tracer.c index 7f32ccb..107b446 100644 --- a/gcc/tracer.c +++ b/gcc/tracer.c @@ -84,49 +84,75 @@ bb_seen_p (basic_block bb) return bitmap_bit_p (bb_seen, bb->index); } -/* Return true if we should ignore the basic block for purposes of tracing. */ -bool -ignore_bb_p (const_basic_block bb) +/* Return true if gimple stmt G can be duplicated. */ +static bool +can_duplicate_insn_p (gimple *g) +{ + /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be + duplicated as part of its group, or not at all. + The IFN_GOMP_SIMT_VOTE_ANY and IFN_GOMP_SIMT_XCHG_* are part of such a + group, so the same holds there. */ + if (is_gimple_call (g) + && (gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC) + || gimple_call_internal_p (g, IFN_GOMP_SIMT_EXIT) + || gimple_call_internal_p (g, IFN_GOMP_SIMT_VOTE_ANY) + || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_BFLY) + || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_IDX))) + return false; + + return true; +} + +/* Return true if BB can be duplicated. Avoid iterating over the insns. */ +static bool +can_duplicate_bb_no_insn_iter_p (const_basic_block bb) { if (bb->index < NUM_FIXED_BLOCKS) - return true; - if (optimize_bb_for_size_p (bb)) - return true; + return false; if (gimple *g = last_stmt (CONST_CAST_BB (bb))) { /* A transaction is a single entry multiple exit region. It must be duplicated in its entirety or not at all. */ if (gimple_code (g) == GIMPLE_TRANSACTION) - return true; + return false; /* An IFN_UNIQUE call must be duplicated as part of its group, or not at all. */ if (is_gimple_call (g) && gimple_call_internal_p (g) && gimple_call_internal_unique_p (g)) - return true; + return false; } + return true; +} + +/* Return true if BB can be duplicated. */ +static bool +can_duplicate_bb_p (const_basic_block bb) +{ + if (!can_duplicate_bb_no_insn_iter_p (bb)) + return false; + for (gimple_stmt_iterator gsi = gsi_start_bb (CONST_CAST_BB (bb)); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *g = gsi_stmt (gsi); + if (!can_duplicate_insn_p (gsi_stmt (gsi))) + return false; - /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be - duplicated as part of its group, or not at all. - The IFN_GOMP_SIMT_VOTE_ANY and IFN_GOMP_SIMT_XCHG_* are part of such a - group, so the same holds there. */ - if (is_gimple_call (g) - && (gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC) - || gimple_call_internal_p (g, IFN_GOMP_SIMT_EXIT) - || gimple_call_internal_p (g, IFN_GOMP_SIMT_VOTE_ANY) - || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_BFLY) - || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_IDX))) - return true; - } + return true; +} + +/* Return true if we should ignore the basic block for purposes of tracing. */ +bool +ignore_bb_p (const_basic_block bb) +{ + if (bb->index < NUM_FIXED_BLOCKS) + return true; + if (optimize_bb_for_size_p (bb)) + return true; - return false; + return !can_duplicate_bb_p (bb); } /* Return number of instructions in the block. */ -- cgit v1.1 From e6d995fddea8d5a6fb0a3bdeccf4191e652f6759 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Sun, 4 Oct 2020 12:01:34 +0200 Subject: [ftracer] Add caching of can_duplicate_bb_p The fix "[omp, ftracer] Don't duplicate blocks in SIMT region" adds iteration over insns in ignore_bb_p, which makes it more expensive. Counteract this by piggybacking the computation of can_duplicate_bb_p onto count_insns, which is called at the start of ftracer. Bootstrapped and reg-tested on x86_64-linux. gcc/ChangeLog: 2020-10-05 Tom de Vries * tracer.c (count_insns): Rename to ... (analyze_bb): ... this. (cache_can_duplicate_bb_p, cached_can_duplicate_bb_p): New function. (ignore_bb_p): Use cached_can_duplicate_bb_p. (tail_duplicate): Call cache_can_duplicate_bb_p. --- gcc/tracer.c | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) (limited to 'gcc') diff --git a/gcc/tracer.c b/gcc/tracer.c index 107b446..e1c2b95 100644 --- a/gcc/tracer.c +++ b/gcc/tracer.c @@ -53,7 +53,7 @@ #include "fibonacci_heap.h" #include "tracer.h" -static int count_insns (basic_block); +static void analyze_bb (basic_block, int *); static bool better_p (const_edge, const_edge); static edge find_best_successor (basic_block); static edge find_best_predecessor (basic_block); @@ -143,6 +143,33 @@ can_duplicate_bb_p (const_basic_block bb) return true; } +static sbitmap can_duplicate_bb; + +/* Cache VAL as value of can_duplicate_bb_p for BB. */ +static inline void +cache_can_duplicate_bb_p (const_basic_block bb, bool val) +{ + if (val) + bitmap_set_bit (can_duplicate_bb, bb->index); +} + +/* Return cached value of can_duplicate_bb_p for BB. */ +static bool +cached_can_duplicate_bb_p (const_basic_block bb) +{ + if (can_duplicate_bb) + { + unsigned int size = SBITMAP_SIZE (can_duplicate_bb); + if ((unsigned int)bb->index < size) + return bitmap_bit_p (can_duplicate_bb, bb->index); + + /* Assume added bb's should not be duplicated. */ + return false; + } + + return can_duplicate_bb_p (bb); +} + /* Return true if we should ignore the basic block for purposes of tracing. */ bool ignore_bb_p (const_basic_block bb) @@ -152,24 +179,27 @@ ignore_bb_p (const_basic_block bb) if (optimize_bb_for_size_p (bb)) return true; - return !can_duplicate_bb_p (bb); + return !cached_can_duplicate_bb_p (bb); } /* Return number of instructions in the block. */ -static int -count_insns (basic_block bb) +static void +analyze_bb (basic_block bb, int *count) { gimple_stmt_iterator gsi; gimple *stmt; int n = 0; + bool can_duplicate = can_duplicate_bb_no_insn_iter_p (bb); for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { stmt = gsi_stmt (gsi); n += estimate_num_insns (stmt, &eni_size_weights); + can_duplicate = can_duplicate && can_duplicate_insn_p (stmt); } - return n; + *count = n; + cache_can_duplicate_bb_p (bb, can_duplicate); } /* Return true if E1 is more frequent than E2. */ @@ -317,6 +347,8 @@ tail_duplicate (void) resize it. */ bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2); bitmap_clear (bb_seen); + can_duplicate_bb = sbitmap_alloc (last_basic_block_for_fn (cfun)); + bitmap_clear (can_duplicate_bb); initialize_original_copy_tables (); if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ) @@ -330,7 +362,8 @@ tail_duplicate (void) FOR_EACH_BB_FN (bb, cfun) { - int n = count_insns (bb); + int n; + analyze_bb (bb, &n); if (!ignore_bb_p (bb)) blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb); @@ -420,6 +453,8 @@ tail_duplicate (void) free_original_copy_tables (); sbitmap_free (bb_seen); + sbitmap_free (can_duplicate_bb); + can_duplicate_bb = NULL; free (trace); free (counts); -- cgit v1.1 From 8988ec5b4232ba1d54a2737d2d03a3161b64300e Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Tue, 6 Oct 2020 10:49:47 +0200 Subject: dbgcnt: report upper limit when lower == upper gcc/ChangeLog: * dbgcnt.c (dbg_cnt): Report also upper limit. --- gcc/dbgcnt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/dbgcnt.c b/gcc/dbgcnt.c index ae98a28..01893ce 100644 --- a/gcc/dbgcnt.c +++ b/gcc/dbgcnt.c @@ -79,7 +79,10 @@ dbg_cnt (enum debug_counter index) { print_limit_reach (map[index].name, v, false); if (min == max) - limits[index].pop (); + { + print_limit_reach (map[index].name, v, true); + limits[index].pop (); + } return true; } else if (v < max) -- cgit v1.1 From a30d4fc5199ba16cec39fd3f9cca878a9699cf4e Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Tue, 6 Oct 2020 11:18:55 +0200 Subject: dbgcnt: print list after compilation gcc/ChangeLog: * common.opt: Remove -fdbg-cnt-list from deferred options. * dbgcnt.c (dbg_cnt_set_limit_by_index): Make a copy to original_limits. (dbg_cnt_list_all_counters): Print also current counter value and print to stderr. * opts-global.c (handle_common_deferred_options): Do not handle -fdbg-cnt-list. * opts.c (common_handle_option): Likewise. * toplev.c (finalize): Handle it after compilation here. --- gcc/common.opt | 2 +- gcc/dbgcnt.c | 25 +++++++++++++++---------- gcc/opts-global.c | 4 ---- gcc/opts.c | 5 ----- gcc/toplev.c | 4 ++++ 5 files changed, 20 insertions(+), 20 deletions(-) (limited to 'gcc') diff --git a/gcc/common.opt b/gcc/common.opt index 292c2de..7e789d1 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1202,7 +1202,7 @@ Common Report Var(flag_data_sections) Place data items into their own section. fdbg-cnt-list -Common Report Var(common_deferred_options) Defer +Common Report Var(flag_dbg_cnt_list) List all available debugging counters with their limits and counts. fdbg-cnt= diff --git a/gcc/dbgcnt.c b/gcc/dbgcnt.c index 01893ce..2a2dd57 100644 --- a/gcc/dbgcnt.c +++ b/gcc/dbgcnt.c @@ -45,6 +45,7 @@ static struct string2counter_map map[debug_counter_number_of_counters] = typedef std::pair limit_tuple; static vec limits[debug_counter_number_of_counters]; +static vec original_limits[debug_counter_number_of_counters]; static unsigned int count[debug_counter_number_of_counters]; @@ -134,6 +135,8 @@ dbg_cnt_set_limit_by_index (enum debug_counter index, const char *name, } } + original_limits[index] = limits[index].copy (); + return true; } @@ -226,25 +229,27 @@ void dbg_cnt_list_all_counters (void) { int i; - printf (" %-30s %s\n", G_("counter name"), G_("closed intervals")); - printf ("-----------------------------------------------------------------\n"); + fprintf (stderr, " %-30s%-15s %s\n", G_("counter name"), + G_("counter value"), G_("closed intervals")); + fprintf (stderr, "-----------------------------------------------------------------\n"); for (i = 0; i < debug_counter_number_of_counters; i++) { - printf (" %-30s ", map[i].name); - if (limits[i].exists ()) + fprintf (stderr, " %-30s%-15d ", map[i].name, count[i]); + if (original_limits[i].exists ()) { - for (int j = limits[i].length () - 1; j >= 0; j--) + for (int j = original_limits[i].length () - 1; j >= 0; j--) { - printf ("[%u, %u]", limits[i][j].first, limits[i][j].second); + fprintf (stderr, "[%u, %u]", original_limits[i][j].first, + original_limits[i][j].second); if (j > 0) - printf (", "); + fprintf (stderr, ", "); } - putchar ('\n'); + fprintf (stderr, "\n"); } else - printf ("unset\n"); + fprintf (stderr, "unset\n"); } - printf ("\n"); + fprintf (stderr, "\n"); } #if CHECKING_P diff --git a/gcc/opts-global.c b/gcc/opts-global.c index b024ab8..1816acf 100644 --- a/gcc/opts-global.c +++ b/gcc/opts-global.c @@ -378,10 +378,6 @@ handle_common_deferred_options (void) dbg_cnt_process_opt (opt->arg); break; - case OPT_fdbg_cnt_list: - dbg_cnt_list_all_counters (); - break; - case OPT_fdebug_prefix_map_: add_debug_prefix_map (opt->arg); break; diff --git a/gcc/opts.c b/gcc/opts.c index 3bda59a..da503c3 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -2361,11 +2361,6 @@ common_handle_option (struct gcc_options *opts, /* Deferred. */ break; - case OPT_fdbg_cnt_list: - /* Deferred. */ - opts->x_exit_after_options = true; - break; - case OPT_fdebug_prefix_map_: case OPT_ffile_prefix_map_: /* Deferred. */ diff --git a/gcc/toplev.c b/gcc/toplev.c index a4cb8bb..8c1e1e1 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -86,6 +86,7 @@ along with GCC; see the file COPYING3. If not see #include "optinfo-emit-json.h" #include "ipa-modref-tree.h" #include "ipa-modref.h" +#include "dbgcnt.h" #if defined(DBX_DEBUGGING_INFO) || defined(XCOFF_DEBUGGING_INFO) #include "dbxout.h" @@ -2213,6 +2214,9 @@ finalize (bool no_backend) if (profile_report) dump_profile_report (); + if (flag_dbg_cnt_list) + dbg_cnt_list_all_counters (); + /* Language-specific end of compilation actions. */ lang_hooks.finish (); } -- cgit v1.1 From a9a88a0a55e131172960fe56013965a7ab4eaf96 Mon Sep 17 00:00:00 2001 From: Srinath Parvathaneni Date: Tue, 6 Oct 2020 14:58:13 +0100 Subject: [PATCH][GCC] arm: Move iterators from mve.md to iterators.md to maintain consistency. To maintain consistency with other Arm Architectures backend, iterators and iterator attributes are moved from mve.md file to iterators.md. Also move enumerators for MVE unspecs from mve.md file to unspecs.md file. gcc/ChangeLog: 2020-10-06 Srinath Parvathaneni * config/arm/iterators.md (MVE_types): Move mode iterator from mve.md to iterators.md. (MVE_VLD_ST): Likewise. (MVE_0): Likewise. (MVE_1): Likewise. (MVE_3): Likewise. (MVE_2): Likewise. (MVE_5): Likewise. (MVE_6): Likewise. (MVE_CNVT): Move mode attribute iterator from mve.md to iterators.md. (MVE_LANES): Likewise. (MVE_constraint): Likewise. (MVE_constraint1): Likewise. (MVE_constraint2): Likewise. (MVE_constraint3): Likewise. (MVE_pred): Likewise. (MVE_pred1): Likewise. (MVE_pred2): Likewise. (MVE_pred3): Likewise. (MVE_B_ELEM): Likewise. (MVE_H_ELEM): Likewise. (V_sz_elem1): Likewise. (V_extr_elem): Likewise. (earlyclobber_32): Likewise. (supf): Move int attribute from mve.md to iterators.md. (mode1): Likewise. (VCVTQ_TO_F): Move int iterator from mve.md to iterators.md. (VMVNQ_N): Likewise. (VREV64Q): Likewise. (VCVTQ_FROM_F): Likewise. (VREV16Q): Likewise. (VCVTAQ): Likewise. (VMVNQ): Likewise. (VDUPQ_N): Likewise. (VCLZQ): Likewise. (VADDVQ): Likewise. (VREV32Q): Likewise. (VMOVLBQ): Likewise. (VMOVLTQ): Likewise. (VCVTPQ): Likewise. (VCVTNQ): Likewise. (VCVTMQ): Likewise. (VADDLVQ): Likewise. (VCTPQ): Likewise. (VCTPQ_M): Likewise. (VCVTQ_N_TO_F): Likewise. (VCREATEQ): Likewise. (VSHRQ_N): Likewise. (VCVTQ_N_FROM_F): Likewise. (VADDLVQ_P): Likewise. (VCMPNEQ): Likewise. (VSHLQ): Likewise. (VABDQ): Likewise. (VADDQ_N): Likewise. (VADDVAQ): Likewise. (VADDVQ_P): Likewise. (VANDQ): Likewise. (VBICQ): Likewise. (VBRSRQ_N): Likewise. (VCADDQ_ROT270): Likewise. (VCADDQ_ROT90): Likewise. (VCMPEQQ): Likewise. (VCMPEQQ_N): Likewise. (VCMPNEQ_N): Likewise. (VEORQ): Likewise. (VHADDQ): Likewise. (VHADDQ_N): Likewise. (VHSUBQ): Likewise. (VHSUBQ_N): Likewise. (VMAXQ): Likewise. (VMAXVQ): Likewise. (VMINQ): Likewise. (VMINVQ): Likewise. (VMLADAVQ): Likewise. (VMULHQ): Likewise. (VMULLBQ_INT): Likewise. (VMULLTQ_INT): Likewise. (VMULQ): Likewise. (VMULQ_N): Likewise. (VORNQ): Likewise. (VORRQ): Likewise. (VQADDQ): Likewise. (VQADDQ_N): Likewise. (VQRSHLQ): Likewise. (VQRSHLQ_N): Likewise. (VQSHLQ): Likewise. (VQSHLQ_N): Likewise. (VQSHLQ_R): Likewise. (VQSUBQ): Likewise. (VQSUBQ_N): Likewise. (VRHADDQ): Likewise. (VRMULHQ): Likewise. (VRSHLQ): Likewise. (VRSHLQ_N): Likewise. (VRSHRQ_N): Likewise. (VSHLQ_N): Likewise. (VSHLQ_R): Likewise. (VSUBQ): Likewise. (VSUBQ_N): Likewise. (VADDLVAQ): Likewise. (VBICQ_N): Likewise. (VMLALDAVQ): Likewise. (VMLALDAVXQ): Likewise. (VMOVNBQ): Likewise. (VMOVNTQ): Likewise. (VORRQ_N): Likewise. (VQMOVNBQ): Likewise. (VQMOVNTQ): Likewise. (VSHLLBQ_N): Likewise. (VSHLLTQ_N): Likewise. (VRMLALDAVHQ): Likewise. (VBICQ_M_N): Likewise. (VCVTAQ_M): Likewise. (VCVTQ_M_TO_F): Likewise. (VQRSHRNBQ_N): Likewise. (VABAVQ): Likewise. (VSHLCQ): Likewise. (VRMLALDAVHAQ): Likewise. (VADDVAQ_P): Likewise. (VCLZQ_M): Likewise. (VCMPEQQ_M_N): Likewise. (VCMPEQQ_M): Likewise. (VCMPNEQ_M_N): Likewise. (VCMPNEQ_M): Likewise. (VDUPQ_M_N): Likewise. (VMAXVQ_P): Likewise. (VMINVQ_P): Likewise. (VMLADAVAQ): Likewise. (VMLADAVQ_P): Likewise. (VMLAQ_N): Likewise. (VMLASQ_N): Likewise. (VMVNQ_M): Likewise. (VPSELQ): Likewise. (VQDMLAHQ_N): Likewise. (VQRDMLAHQ_N): Likewise. (VQRDMLASHQ_N): Likewise. (VQRSHLQ_M_N): Likewise. (VQSHLQ_M_R): Likewise. (VREV64Q_M): Likewise. (VRSHLQ_M_N): Likewise. (VSHLQ_M_R): Likewise. (VSLIQ_N): Likewise. (VSRIQ_N): Likewise. (VMLALDAVQ_P): Likewise. (VQMOVNBQ_M): Likewise. (VMOVLTQ_M): Likewise. (VMOVNBQ_M): Likewise. (VRSHRNTQ_N): Likewise. (VORRQ_M_N): Likewise. (VREV32Q_M): Likewise. (VREV16Q_M): Likewise. (VQRSHRNTQ_N): Likewise. (VMOVNTQ_M): Likewise. (VMOVLBQ_M): Likewise. (VMLALDAVAQ): Likewise. (VQSHRNBQ_N): Likewise. (VSHRNBQ_N): Likewise. (VRSHRNBQ_N): Likewise. (VMLALDAVXQ_P): Likewise. (VQMOVNTQ_M): Likewise. (VMVNQ_M_N): Likewise. (VQSHRNTQ_N): Likewise. (VMLALDAVAXQ): Likewise. (VSHRNTQ_N): Likewise. (VCVTMQ_M): Likewise. (VCVTNQ_M): Likewise. (VCVTPQ_M): Likewise. (VCVTQ_M_N_FROM_F): Likewise. (VCVTQ_M_FROM_F): Likewise. (VRMLALDAVHQ_P): Likewise. (VADDLVAQ_P): Likewise. (VABAVQ_P): Likewise. (VSHLQ_M): Likewise. (VSRIQ_M_N): Likewise. (VSUBQ_M): Likewise. (VCVTQ_M_N_TO_F): Likewise. (VHSUBQ_M): Likewise. (VSLIQ_M_N): Likewise. (VRSHLQ_M): Likewise. (VMINQ_M): Likewise. (VMULLBQ_INT_M): Likewise. (VMULHQ_M): Likewise. (VMULQ_M): Likewise. (VHSUBQ_M_N): Likewise. (VHADDQ_M_N): Likewise. (VORRQ_M): Likewise. (VRMULHQ_M): Likewise. (VQADDQ_M): Likewise. (VRSHRQ_M_N): Likewise. (VQSUBQ_M_N): Likewise. (VADDQ_M): Likewise. (VORNQ_M): Likewise. (VRHADDQ_M): Likewise. (VQSHLQ_M): Likewise. (VANDQ_M): Likewise. (VBICQ_M): Likewise. (VSHLQ_M_N): Likewise. (VCADDQ_ROT270_M): Likewise. (VQRSHLQ_M): Likewise. (VQADDQ_M_N): Likewise. (VADDQ_M_N): Likewise. (VMAXQ_M): Likewise. (VQSUBQ_M): Likewise. (VMLASQ_M_N): Likewise. (VMLADAVAQ_P): Likewise. (VBRSRQ_M_N): Likewise. (VMULQ_M_N): Likewise. (VCADDQ_ROT90_M): Likewise. (VMULLTQ_INT_M): Likewise. (VEORQ_M): Likewise. (VSHRQ_M_N): Likewise. (VSUBQ_M_N): Likewise. (VHADDQ_M): Likewise. (VABDQ_M): Likewise. (VMLAQ_M_N): Likewise. (VQSHLQ_M_N): Likewise. (VMLALDAVAQ_P): Likewise. (VMLALDAVAXQ_P): Likewise. (VQRSHRNBQ_M_N): Likewise. (VQRSHRNTQ_M_N): Likewise. (VQSHRNBQ_M_N): Likewise. (VQSHRNTQ_M_N): Likewise. (VRSHRNBQ_M_N): Likewise. (VRSHRNTQ_M_N): Likewise. (VSHLLBQ_M_N): Likewise. (VSHLLTQ_M_N): Likewise. (VSHRNBQ_M_N): Likewise. (VSHRNTQ_M_N): Likewise. (VSTRWSBQ): Likewise. (VSTRBSOQ): Likewise. (VSTRBQ): Likewise. (VLDRBGOQ): Likewise. (VLDRBQ): Likewise. (VLDRWGBQ): Likewise. (VLD1Q): Likewise. (VLDRHGOQ): Likewise. (VLDRHGSOQ): Likewise. (VLDRHQ): Likewise. (VLDRWQ): Likewise. (VLDRDGBQ): Likewise. (VLDRDGOQ): Likewise. (VLDRDGSOQ): Likewise. (VLDRWGOQ): Likewise. (VLDRWGSOQ): Likewise. (VST1Q): Likewise. (VSTRHSOQ): Likewise. (VSTRHSSOQ): Likewise. (VSTRHQ): Likewise. (VSTRWQ): Likewise. (VSTRDSBQ): Likewise. (VSTRDSOQ): Likewise. (VSTRDSSOQ): Likewise. (VSTRWSOQ): Likewise. (VSTRWSSOQ): Likewise. (VSTRWSBWBQ): Likewise. (VLDRWGBWBQ): Likewise. (VSTRDSBWBQ): Likewise. (VLDRDGBWBQ): Likewise. (VADCIQ): Likewise. (VADCIQ_M): Likewise. (VSBCQ): Likewise. (VSBCQ_M): Likewise. (VSBCIQ): Likewise. (VSBCIQ_M): Likewise. (VADCQ): Likewise. (VADCQ_M): Likewise. (UQRSHLLQ): Likewise. (SQRSHRLQ): Likewise. (VSHLCQ_M): Likewise. * config/arm/mve.md (MVE_types): Move mode iterator to iterators.md from mve.md. (MVE_VLD_ST): Likewise. (MVE_0): Likewise. (MVE_1): Likewise. (MVE_3): Likewise. (MVE_2): Likewise. (MVE_5): Likewise. (MVE_6): Likewise. (MVE_CNVT): Move mode attribute iterator to iterators.md from mve.md. (MVE_LANES): Likewise. (MVE_constraint): Likewise. (MVE_constraint1): Likewise. (MVE_constraint2): Likewise. (MVE_constraint3): Likewise. (MVE_pred): Likewise. (MVE_pred1): Likewise. (MVE_pred2): Likewise. (MVE_pred3): Likewise. (MVE_B_ELEM): Likewise. (MVE_H_ELEM): Likewise. (V_sz_elem1): Likewise. (V_extr_elem): Likewise. (earlyclobber_32): Likewise. (supf): Move int attribute to iterators.md from mve.md. (mode1): Likewise. (VCVTQ_TO_F): Move int iterator to iterators.md from mve.md. (VMVNQ_N): Likewise. (VREV64Q): Likewise. (VCVTQ_FROM_F): Likewise. (VREV16Q): Likewise. (VCVTAQ): Likewise. (VMVNQ): Likewise. (VDUPQ_N): Likewise. (VCLZQ): Likewise. (VADDVQ): Likewise. (VREV32Q): Likewise. (VMOVLBQ): Likewise. (VMOVLTQ): Likewise. (VCVTPQ): Likewise. (VCVTNQ): Likewise. (VCVTMQ): Likewise. (VADDLVQ): Likewise. (VCTPQ): Likewise. (VCTPQ_M): Likewise. (VCVTQ_N_TO_F): Likewise. (VCREATEQ): Likewise. (VSHRQ_N): Likewise. (VCVTQ_N_FROM_F): Likewise. (VADDLVQ_P): Likewise. (VCMPNEQ): Likewise. (VSHLQ): Likewise. (VABDQ): Likewise. (VADDQ_N): Likewise. (VADDVAQ): Likewise. (VADDVQ_P): Likewise. (VANDQ): Likewise. (VBICQ): Likewise. (VBRSRQ_N): Likewise. (VCADDQ_ROT270): Likewise. (VCADDQ_ROT90): Likewise. (VCMPEQQ): Likewise. (VCMPEQQ_N): Likewise. (VCMPNEQ_N): Likewise. (VEORQ): Likewise. (VHADDQ): Likewise. (VHADDQ_N): Likewise. (VHSUBQ): Likewise. (VHSUBQ_N): Likewise. (VMAXQ): Likewise. (VMAXVQ): Likewise. (VMINQ): Likewise. (VMINVQ): Likewise. (VMLADAVQ): Likewise. (VMULHQ): Likewise. (VMULLBQ_INT): Likewise. (VMULLTQ_INT): Likewise. (VMULQ): Likewise. (VMULQ_N): Likewise. (VORNQ): Likewise. (VORRQ): Likewise. (VQADDQ): Likewise. (VQADDQ_N): Likewise. (VQRSHLQ): Likewise. (VQRSHLQ_N): Likewise. (VQSHLQ): Likewise. (VQSHLQ_N): Likewise. (VQSHLQ_R): Likewise. (VQSUBQ): Likewise. (VQSUBQ_N): Likewise. (VRHADDQ): Likewise. (VRMULHQ): Likewise. (VRSHLQ): Likewise. (VRSHLQ_N): Likewise. (VRSHRQ_N): Likewise. (VSHLQ_N): Likewise. (VSHLQ_R): Likewise. (VSUBQ): Likewise. (VSUBQ_N): Likewise. (VADDLVAQ): Likewise. (VBICQ_N): Likewise. (VMLALDAVQ): Likewise. (VMLALDAVXQ): Likewise. (VMOVNBQ): Likewise. (VMOVNTQ): Likewise. (VORRQ_N): Likewise. (VQMOVNBQ): Likewise. (VQMOVNTQ): Likewise. (VSHLLBQ_N): Likewise. (VSHLLTQ_N): Likewise. (VRMLALDAVHQ): Likewise. (VBICQ_M_N): Likewise. (VCVTAQ_M): Likewise. (VCVTQ_M_TO_F): Likewise. (VQRSHRNBQ_N): Likewise. (VABAVQ): Likewise. (VSHLCQ): Likewise. (VRMLALDAVHAQ): Likewise. (VADDVAQ_P): Likewise. (VCLZQ_M): Likewise. (VCMPEQQ_M_N): Likewise. (VCMPEQQ_M): Likewise. (VCMPNEQ_M_N): Likewise. (VCMPNEQ_M): Likewise. (VDUPQ_M_N): Likewise. (VMAXVQ_P): Likewise. (VMINVQ_P): Likewise. (VMLADAVAQ): Likewise. (VMLADAVQ_P): Likewise. (VMLAQ_N): Likewise. (VMLASQ_N): Likewise. (VMVNQ_M): Likewise. (VPSELQ): Likewise. (VQDMLAHQ_N): Likewise. (VQRDMLAHQ_N): Likewise. (VQRDMLASHQ_N): Likewise. (VQRSHLQ_M_N): Likewise. (VQSHLQ_M_R): Likewise. (VREV64Q_M): Likewise. (VRSHLQ_M_N): Likewise. (VSHLQ_M_R): Likewise. (VSLIQ_N): Likewise. (VSRIQ_N): Likewise. (VMLALDAVQ_P): Likewise. (VQMOVNBQ_M): Likewise. (VMOVLTQ_M): Likewise. (VMOVNBQ_M): Likewise. (VRSHRNTQ_N): Likewise. (VORRQ_M_N): Likewise. (VREV32Q_M): Likewise. (VREV16Q_M): Likewise. (VQRSHRNTQ_N): Likewise. (VMOVNTQ_M): Likewise. (VMOVLBQ_M): Likewise. (VMLALDAVAQ): Likewise. (VQSHRNBQ_N): Likewise. (VSHRNBQ_N): Likewise. (VRSHRNBQ_N): Likewise. (VMLALDAVXQ_P): Likewise. (VQMOVNTQ_M): Likewise. (VMVNQ_M_N): Likewise. (VQSHRNTQ_N): Likewise. (VMLALDAVAXQ): Likewise. (VSHRNTQ_N): Likewise. (VCVTMQ_M): Likewise. (VCVTNQ_M): Likewise. (VCVTPQ_M): Likewise. (VCVTQ_M_N_FROM_F): Likewise. (VCVTQ_M_FROM_F): Likewise. (VRMLALDAVHQ_P): Likewise. (VADDLVAQ_P): Likewise. (VABAVQ_P): Likewise. (VSHLQ_M): Likewise. (VSRIQ_M_N): Likewise. (VSUBQ_M): Likewise. (VCVTQ_M_N_TO_F): Likewise. (VHSUBQ_M): Likewise. (VSLIQ_M_N): Likewise. (VRSHLQ_M): Likewise. (VMINQ_M): Likewise. (VMULLBQ_INT_M): Likewise. (VMULHQ_M): Likewise. (VMULQ_M): Likewise. (VHSUBQ_M_N): Likewise. (VHADDQ_M_N): Likewise. (VORRQ_M): Likewise. (VRMULHQ_M): Likewise. (VQADDQ_M): Likewise. (VRSHRQ_M_N): Likewise. (VQSUBQ_M_N): Likewise. (VADDQ_M): Likewise. (VORNQ_M): Likewise. (VRHADDQ_M): Likewise. (VQSHLQ_M): Likewise. (VANDQ_M): Likewise. (VBICQ_M): Likewise. (VSHLQ_M_N): Likewise. (VCADDQ_ROT270_M): Likewise. (VQRSHLQ_M): Likewise. (VQADDQ_M_N): Likewise. (VADDQ_M_N): Likewise. (VMAXQ_M): Likewise. (VQSUBQ_M): Likewise. (VMLASQ_M_N): Likewise. (VMLADAVAQ_P): Likewise. (VBRSRQ_M_N): Likewise. (VMULQ_M_N): Likewise. (VCADDQ_ROT90_M): Likewise. (VMULLTQ_INT_M): Likewise. (VEORQ_M): Likewise. (VSHRQ_M_N): Likewise. (VSUBQ_M_N): Likewise. (VHADDQ_M): Likewise. (VABDQ_M): Likewise. (VMLAQ_M_N): Likewise. (VQSHLQ_M_N): Likewise. (VMLALDAVAQ_P): Likewise. (VMLALDAVAXQ_P): Likewise. (VQRSHRNBQ_M_N): Likewise. (VQRSHRNTQ_M_N): Likewise. (VQSHRNBQ_M_N): Likewise. (VQSHRNTQ_M_N): Likewise. (VRSHRNBQ_M_N): Likewise. (VRSHRNTQ_M_N): Likewise. (VSHLLBQ_M_N): Likewise. (VSHLLTQ_M_N): Likewise. (VSHRNBQ_M_N): Likewise. (VSHRNTQ_M_N): Likewise. (VSTRWSBQ): Likewise. (VSTRBSOQ): Likewise. (VSTRBQ): Likewise. (VLDRBGOQ): Likewise. (VLDRBQ): Likewise. (VLDRWGBQ): Likewise. (VLD1Q): Likewise. (VLDRHGOQ): Likewise. (VLDRHGSOQ): Likewise. (VLDRHQ): Likewise. (VLDRWQ): Likewise. (VLDRDGBQ): Likewise. (VLDRDGOQ): Likewise. (VLDRDGSOQ): Likewise. (VLDRWGOQ): Likewise. (VLDRWGSOQ): Likewise. (VST1Q): Likewise. (VSTRHSOQ): Likewise. (VSTRHSSOQ): Likewise. (VSTRHQ): Likewise. (VSTRWQ): Likewise. (VSTRDSBQ): Likewise. (VSTRDSOQ): Likewise. (VSTRDSSOQ): Likewise. (VSTRWSOQ): Likewise. (VSTRWSSOQ): Likewise. (VSTRWSBWBQ): Likewise. (VLDRWGBWBQ): Likewise. (VSTRDSBWBQ): Likewise. (VLDRDGBWBQ): Likewise. (VADCIQ): Likewise. (VADCIQ_M): Likewise. (VSBCQ): Likewise. (VSBCQ_M): Likewise. (VSBCIQ): Likewise. (VSBCIQ_M): Likewise. (VADCQ): Likewise. (VADCQ_M): Likewise. (UQRSHLLQ): Likewise. (SQRSHRLQ): Likewise. (VSHLCQ_M): Likewise. (define_c_enum "unspec"): Move MVE enumerator to unspecs.md from mve.md. * config/arm/unspecs.md (define_c_enum "unspec"): Move MVE enumerator from mve.md to unspecs.md. --- gcc/config/arm/iterators.md | 464 +++++++++++++++++++++++++ gcc/config/arm/mve.md | 648 ----------------------------------- gcc/config/arm/unspecs.md | 802 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1266 insertions(+), 648 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index c70e3bc..7f8c235 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -259,6 +259,16 @@ (define_mode_iterator VBFCVT [V4BF V8BF]) (define_mode_iterator VBFCVTM [V2SI SF]) +;; MVE mode iterator. +(define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF]) +(define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF]) +(define_mode_iterator MVE_0 [V8HF V4SF]) +(define_mode_iterator MVE_1 [V16QI V8HI V4SI V2DI]) +(define_mode_iterator MVE_3 [V16QI V8HI]) +(define_mode_iterator MVE_2 [V16QI V8HI V4SI]) +(define_mode_iterator MVE_5 [V8HI V4SI]) +(define_mode_iterator MVE_6 [V8HI V4SI]) + ;;---------------------------------------------------------------------------- ;; Code iterators ;;---------------------------------------------------------------------------- @@ -893,6 +903,35 @@ (define_mode_attr cde_suffix [(SI "") (DI "d")]) (define_mode_attr cde_dest [(SI "%0") (DI "%0, %H0")]) +;;MVE mode attribute. +(define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI") + (V4SF "V4SI")]) +(define_mode_attr MVE_LANES [(V16QI "16") (V8HI "8") (V4SI "4")]) + +(define_mode_attr MVE_constraint [ (V16QI "Ra") (V8HI "Rc") (V4SI "Re")]) +(define_mode_attr MVE_constraint1 [ (V8HI "Ra") (V4SI "Rc")]) +(define_mode_attr MVE_constraint2 [(V16QI "Rb") (V8HI "Rd") (V4SI "Rf") + (V8HF "Rd") (V4SF "Rf")]) +(define_mode_attr MVE_constraint3 [ (V8HI "Rb") (V4SI "Rd")]) + +(define_mode_attr MVE_pred [ (V16QI "mve_imm_7") (V8HI "mve_imm_15") + (V4SI "mve_imm_31")]) +(define_mode_attr MVE_pred1 [ (V8HI "mve_imm_7") (V4SI "mve_imm_15")]) +(define_mode_attr MVE_pred2 [(V16QI "mve_imm_8") (V8HI "mve_imm_16") + (V4SI "mve_imm_32") + (V8HF "mve_imm_16") (V4SF "mve_imm_32")]) +(define_mode_attr MVE_pred3 [ (V8HI "mve_imm_8") (V4SI "mve_imm_16")]) + +(define_mode_attr MVE_B_ELEM [ (V16QI "V16QI") (V8HI "V8QI") (V4SI "V4QI")]) +(define_mode_attr MVE_H_ELEM [ (V8HI "V8HI") (V4SI "V4HI")]) + +(define_mode_attr V_sz_elem1 [(V16QI "b") (V8HI "h") (V4SI "w") (V8HF "h") + (V4SF "w")]) +(define_mode_attr V_extr_elem [(V16QI "u8") (V8HI "u16") (V4SI "32") + (V8HF "u16") (V4SF "32")]) +(define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w") + (V8HF "=w") (V4SF "=&w")]) + ;;---------------------------------------------------------------------------- ;; Code attributes ;;---------------------------------------------------------------------------- @@ -1173,6 +1212,186 @@ (define_int_attr mmla_sfx [(UNSPEC_MATMUL_S "s8") (UNSPEC_MATMUL_U "u8") (UNSPEC_MATMUL_US "s8")]) +;;MVE int attribute. +(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") + (VREV16Q_U "u") (VMVNQ_N_S "s") (VMVNQ_N_U "u") + (VCVTAQ_U "u") (VCVTAQ_S "s") (VREV64Q_S "s") + (VREV64Q_U "u") (VMVNQ_S "s") (VMVNQ_U "u") + (VDUPQ_N_U "u") (VDUPQ_N_S"s") (VADDVQ_S "s") + (VADDVQ_U "u") (VADDVQ_S "s") (VADDVQ_U "u") + (VMOVLTQ_U "u") (VMOVLTQ_S "s") (VMOVLBQ_S "s") + (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u") + (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s") + (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u") + (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u") + (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s") + (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u") + (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s") + (VSHRQ_N_U "u") (VCVTQ_N_FROM_F_S "s") (VSHLQ_U "u") + (VCVTQ_N_FROM_F_U "u") (VADDLVQ_P_S "s") (VSHLQ_S "s") + (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s") + (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s") + (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u") + (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VANDQ_S "s") + (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u") + (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s") + (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s") + (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u") + (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s") + (VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u") + (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s") + (VHADDQ_U "u") (VHSUBQ_N_S "s") (VHSUBQ_N_U "u") + (VHSUBQ_S "s") (VMAXQ_S "s") (VMAXQ_U "u") (VHSUBQ_U "u") + (VMAXVQ_S "s") (VMAXVQ_U "u") (VMINQ_S "s") (VMINQ_U "u") + (VMINVQ_S "s") (VMINVQ_U "u") (VMLADAVQ_S "s") + (VMLADAVQ_U "u") (VMULHQ_S "s") (VMULHQ_U "u") + (VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u") (VQADDQ_S "s") + (VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u") (VQADDQ_U "u") + (VMULQ_N_S "s") (VMULQ_N_U "u") (VMULQ_S "s") + (VMULQ_U "u") (VORNQ_S "s") (VORNQ_U "u") (VORRQ_S "s") + (VORRQ_U "u") (VQADDQ_N_S "s") (VQADDQ_N_U "u") + (VQRSHLQ_N_S "s") (VQRSHLQ_N_U "u") (VQRSHLQ_S "s") + (VQRSHLQ_U "u") (VQSHLQ_N_S "s") (VQSHLQ_N_U "u") + (VQSHLQ_R_S "s") (VQSHLQ_R_U "u") (VQSHLQ_S "s") + (VQSHLQ_U "u") (VQSUBQ_N_S "s") (VQSUBQ_N_U "u") + (VQSUBQ_S "s") (VQSUBQ_U "u") (VRHADDQ_S "s") + (VRHADDQ_U "u") (VRMULHQ_S "s") (VRMULHQ_U "u") + (VRSHLQ_N_S "s") (VRSHLQ_N_U "u") (VRSHLQ_S "s") + (VRSHLQ_U "u") (VRSHRQ_N_S "s") (VRSHRQ_N_U "u") + (VSHLQ_N_S "s") (VSHLQ_N_U "u") (VSHLQ_R_S "s") + (VSHLQ_R_U "u") (VSUBQ_N_S "s") (VSUBQ_N_U "u") + (VSUBQ_S "s") (VSUBQ_U "u") (VADDVAQ_S "s") + (VADDVAQ_U "u") (VADDLVAQ_S "s") (VADDLVAQ_U "u") + (VBICQ_N_S "s") (VBICQ_N_U "u") (VMLALDAVQ_U "u") + (VMLALDAVQ_S "s") (VMLALDAVXQ_U "u") (VMLALDAVXQ_S "s") + (VMOVNBQ_U "u") (VMOVNBQ_S "s") (VMOVNTQ_U "u") + (VMOVNTQ_S "s") (VORRQ_N_S "s") (VORRQ_N_U "u") + (VQMOVNBQ_U "u") (VQMOVNBQ_S "s") (VQMOVNTQ_S "s") + (VQMOVNTQ_U "u") (VSHLLBQ_N_U "u") (VSHLLBQ_N_S "s") + (VSHLLTQ_N_U "u") (VSHLLTQ_N_S "s") (VRMLALDAVHQ_U "u") + (VRMLALDAVHQ_S "s") (VBICQ_M_N_S "s") (VBICQ_M_N_U "u") + (VCVTAQ_M_S "s") (VCVTAQ_M_U "u") (VCVTQ_M_TO_F_S "s") + (VCVTQ_M_TO_F_U "u") (VQRSHRNBQ_N_S "s") + (VQRSHRNBQ_N_U "u") (VABAVQ_S "s") (VABAVQ_U "u") + (VRMLALDAVHAQ_U "u") (VRMLALDAVHAQ_S "s") (VSHLCQ_S "s") + (VSHLCQ_U "u") (VADDVAQ_P_S "s") (VADDVAQ_P_U "u") + (VCLZQ_M_S "s") (VCLZQ_M_U "u") (VCMPEQQ_M_N_S "s") + (VCMPEQQ_M_N_U "u") (VCMPEQQ_M_S "s") (VCMPEQQ_M_U "u") + (VCMPNEQ_M_N_S "s") (VCMPNEQ_M_N_U "u") (VCMPNEQ_M_S "s") + (VCMPNEQ_M_U "u") (VDUPQ_M_N_S "s") (VDUPQ_M_N_U "u") + (VMAXVQ_P_S "s") (VMAXVQ_P_U "u") (VMINVQ_P_S "s") + (VMINVQ_P_U "u") (VMLADAVAQ_S "s") (VMLADAVAQ_U "u") + (VMLADAVQ_P_S "s") (VMLADAVQ_P_U "u") (VMLAQ_N_S "s") + (VMLAQ_N_U "u") (VMLASQ_N_S "s") (VMLASQ_N_U "u") + (VMVNQ_M_S "s") (VMVNQ_M_U "u") (VPSELQ_S "s") + (VPSELQ_U "u") (VQDMLAHQ_N_S "s") (VQDMLAHQ_N_U "u") + (VQRDMLAHQ_N_S "s") (VQRDMLAHQ_N_U "u") + (VQRDMLASHQ_N_S "s") (VQRDMLASHQ_N_U "u") + (VQRSHLQ_M_N_S "s") (VQRSHLQ_M_N_U "u") + (VQSHLQ_M_R_S "s") (VQSHLQ_M_R_U "u") (VSRIQ_N_S "s") + (VREV64Q_M_S "s") (VREV64Q_M_U "u") (VSRIQ_N_U "u") + (VRSHLQ_M_N_S "s") (VRSHLQ_M_N_U "u") (VSHLQ_M_R_S "s") + (VSHLQ_M_R_U "u") (VSLIQ_N_S "s") (VSLIQ_N_U "u") + (VMLALDAVQ_P_S "s") (VQMOVNBQ_M_S "s") (VMOVLTQ_M_S "s") + (VMOVNBQ_M_S "s") (VRSHRNTQ_N_S "s") (VORRQ_M_N_S "s") + (VREV32Q_M_S "s") (VQRSHRNTQ_N_S "s") (VMOVNTQ_M_S "s") + (VMOVLBQ_M_S "s") (VMLALDAVAQ_S "s") (VQSHRNBQ_N_S "s") + (VSHRNBQ_N_S "s") (VRSHRNBQ_N_S "s") (VMLALDAVXQ_P_S "s") + (VQMOVNTQ_M_S "s") (VMVNQ_M_N_S "s") (VQSHRNTQ_N_S "s") + (VMLALDAVAXQ_S "s") (VSHRNTQ_N_S "s") (VMLALDAVQ_P_U "u") + (VQMOVNBQ_M_U "u") (VMOVLTQ_M_U "u") (VMOVNBQ_M_U "u") + (VRSHRNTQ_N_U "u") (VORRQ_M_N_U "u") (VREV32Q_M_U "u") + (VREV16Q_M_S "s") (VREV16Q_M_U "u") + (VQRSHRNTQ_N_U "u") (VMOVNTQ_M_U "u") (VMOVLBQ_M_U "u") + (VMLALDAVAQ_U "u") (VQSHRNBQ_N_U "u") (VSHRNBQ_N_U "u") + (VRSHRNBQ_N_U "u") (VMLALDAVXQ_P_U "u") + (VMVNQ_M_N_U "u") (VQSHRNTQ_N_U "u") (VMLALDAVAXQ_U "u") + (VQMOVNTQ_M_U "u") (VSHRNTQ_N_U "u") (VCVTMQ_M_S "s") + (VCVTMQ_M_U "u") (VCVTNQ_M_S "s") (VCVTNQ_M_U "u") + (VCVTPQ_M_S "s") (VCVTPQ_M_U "u") (VADDLVAQ_P_S "s") + (VCVTQ_M_N_FROM_F_U "u") (VCVTQ_M_FROM_F_S "s") + (VCVTQ_M_FROM_F_U "u") (VRMLALDAVHQ_P_U "u") + (VRMLALDAVHQ_P_S "s") (VADDLVAQ_P_U "u") + (VCVTQ_M_N_FROM_F_S "s") (VABAVQ_P_U "u") + (VABAVQ_P_S "s") (VSHLQ_M_S "s") (VSHLQ_M_U "u") + (VSRIQ_M_N_S "s") (VSRIQ_M_N_U "u") (VSUBQ_M_S "s") + (VSUBQ_M_U "u") (VCVTQ_M_N_TO_F_S "s") + (VCVTQ_M_N_TO_F_U "u") (VADDQ_M_N_U "u") + (VSHLQ_M_N_S "s") (VMAXQ_M_U "u") (VHSUBQ_M_N_U "u") + (VMULQ_M_N_S "s") (VQSHLQ_M_U "u") (VRHADDQ_M_S "s") + (VEORQ_M_U "u") (VSHRQ_M_N_U "u") (VCADDQ_ROT90_M_U "u") + (VMLADAVAQ_P_U "u") (VEORQ_M_S "s") (VBRSRQ_M_N_S "s") + (VMULQ_M_U "u") (VQRDMLAHQ_M_N_S "s") (VHSUBQ_M_N_S "s") + (VQRSHLQ_M_S "s") (VMULQ_M_N_U "u") + (VMULQ_M_S "s") (VQSHLQ_M_N_U "u") (VSLIQ_M_N_U "u") + (VMLADAVAQ_P_S "s") (VQRSHLQ_M_U "u") + (VMULLBQ_INT_M_U "u") (VSHLQ_M_N_U "u") (VQSUBQ_M_U "u") + (VQRDMLASHQ_M_N_U "u") (VRSHRQ_M_N_S "s") + (VORNQ_M_S "s") (VCADDQ_ROT270_M_S "s") (VRHADDQ_M_U "u") + (VRSHRQ_M_N_U "u") (VMLASQ_M_N_U "u") (VHSUBQ_M_U "u") + (VQSUBQ_M_N_S "s") (VMULLTQ_INT_M_S "s") + (VORRQ_M_S "s") (VQDMLAHQ_M_N_U "u") (VRSHLQ_M_S "s") + (VHADDQ_M_U "u") (VHADDQ_M_N_S "s") (VMULLTQ_INT_M_U "u") + (VORRQ_M_U "u") (VHADDQ_M_S "s") (VHADDQ_M_N_U "u") + (VQDMLAHQ_M_N_S "s") (VMAXQ_M_S "s") (VORNQ_M_U "u") + (VCADDQ_ROT270_M_U "u") (VQADDQ_M_U "u") + (VQRDMLASHQ_M_N_S "s") (VBICQ_M_U "u") (VMINQ_M_U "u") + (VSUBQ_M_N_S "s") (VMULLBQ_INT_M_S "s") (VQSUBQ_M_S "s") + (VCADDQ_ROT90_M_S "s") (VRMULHQ_M_S "s") (VANDQ_M_U "u") + (VMULHQ_M_S "s") (VADDQ_M_S "s") (VQRDMLAHQ_M_N_U "u") + (VMLASQ_M_N_S "s") (VHSUBQ_M_S "s") (VRMULHQ_M_U "u") + (VQADDQ_M_N_S "s") (VSHRQ_M_N_S "s") (VANDQ_M_S "s") + (VABDQ_M_U "u") (VQSHLQ_M_S "s") (VABDQ_M_S "s") + (VSUBQ_M_N_U "u") (VMLAQ_M_N_S "s") (VBRSRQ_M_N_U "u") + (VADDQ_M_U "u") (VRSHLQ_M_U "u") (VSLIQ_M_N_S "s") + (VQADDQ_M_N_U "u") (VADDQ_M_N_S "s") (VQSUBQ_M_N_U "u") + (VMLAQ_M_N_U "u") (VMINQ_M_S "s") (VMULHQ_M_U "u") + (VQADDQ_M_S "s") (VBICQ_M_S "s") (VQSHLQ_M_N_S "s") + (VQSHRNTQ_M_N_S "s") (VQSHRNTQ_M_N_U "u") + (VSHRNTQ_M_N_U "u") (VSHRNTQ_M_N_S "s") + (VSHRNBQ_M_N_S "s") (VSHRNBQ_M_N_U "u") + (VSHLLTQ_M_N_S "s") (VSHLLTQ_M_N_U "u") + (VSHLLBQ_M_N_S "s") (VSHLLBQ_M_N_U "u") + (VRSHRNTQ_M_N_S "s") (VRSHRNTQ_M_N_U "u") + (VRSHRNBQ_M_N_U "u") (VRSHRNBQ_M_N_S "s") + (VQSHRNTQ_M_N_U "u") (VQSHRNTQ_M_N_S "s") + (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u") + (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u") + (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u") + (VMLALDAVAXQ_P_S "s") (VMLALDAVAXQ_P_U "u") + (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u") + (VSTRWQSB_S "s") (VSTRWQSB_U "u") (VSTRBQSO_S "s") + (VSTRBQSO_U "u") (VSTRBQ_S "s") (VSTRBQ_U "u") + (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRBQ_S "s") + (VLDRBQ_U "u") (VLDRWQGB_S "s") (VLDRWQGB_U "u") + (VLD1Q_S "s") (VLD1Q_U "u") (VLDRHQGO_S "s") + (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u") + (VLDRHQ_S "s") (VLDRHQ_U "u") (VLDRWQ_S "s") + (VLDRWQ_U "u") (VLDRDQGB_S "s") (VLDRDQGB_U "u") + (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s") + (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u") + (VLDRWQGSO_S "s") (VLDRWQGSO_U "u") (VST1Q_S "s") + (VST1Q_U "u") (VSTRHQSO_S "s") (VSTRHQSO_U "u") + (VSTRHQSSO_S "s") (VSTRHQSSO_U "u") (VSTRHQ_S "s") + (VSTRHQ_U "u") (VSTRWQ_S "s") (VSTRWQ_U "u") + (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VSTRDQSO_S "s") + (VSTRDQSO_U "u") (VSTRDQSSO_S "s") (VSTRDQSSO_U "u") + (VSTRWQSO_U "u") (VSTRWQSO_S "s") (VSTRWQSSO_U "u") + (VSTRWQSSO_S "s") (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u") + (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s") + (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") (VADCQ_M_S "s") + (VSTRDQSBWB_U "u") (VSBCQ_U "u") (VSBCQ_M_U "u") + (VSBCQ_S "s") (VSBCQ_M_S "s") (VSBCIQ_U "u") + (VSBCIQ_M_U "u") (VSBCIQ_S "s") (VSBCIQ_M_S "s") + (VADCQ_U "u") (VADCQ_M_U "u") (VADCQ_S "s") + (VADCIQ_U "u") (VADCIQ_M_U "u") (VADCIQ_S "s") + (VADCIQ_M_S "s") (SQRSHRL_64 "64") (SQRSHRL_48 "48") + (UQRSHLL_64 "64") (UQRSHLL_48 "48") (VSHLCQ_M_S "s") + (VSHLCQ_M_U "u")]) + +(define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32") + (VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16") + (VCTP32Q_M "32") (VCTP64Q_M "64")]) ;; Both kinds of return insn. (define_code_iterator RETURNS [return simple_return]) @@ -1248,3 +1467,248 @@ ;; An iterator for CDE MVE accumulator/non-accumulator versions. (define_int_attr a [(UNSPEC_VCDE "") (UNSPEC_VCDEA "a")]) + +;; MVE int iterator. +(define_int_iterator VCVTQ_TO_F [VCVTQ_TO_F_S VCVTQ_TO_F_U]) +(define_int_iterator VMVNQ_N [VMVNQ_N_U VMVNQ_N_S]) +(define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U]) +(define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U]) +(define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S]) +(define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S]) +(define_int_iterator VMVNQ [VMVNQ_U VMVNQ_S]) +(define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S]) +(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S]) +(define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S]) +(define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S]) +(define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U]) +(define_int_iterator VMOVLTQ [VMOVLTQ_U VMOVLTQ_S]) +(define_int_iterator VCVTPQ [VCVTPQ_S VCVTPQ_U]) +(define_int_iterator VCVTNQ [VCVTNQ_S VCVTNQ_U]) +(define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U]) +(define_int_iterator VADDLVQ [VADDLVQ_U VADDLVQ_S]) +(define_int_iterator VCTPQ [VCTP8Q VCTP16Q VCTP32Q VCTP64Q]) +(define_int_iterator VCTPQ_M [VCTP8Q_M VCTP16Q_M VCTP32Q_M VCTP64Q_M]) +(define_int_iterator VCVTQ_N_TO_F [VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U]) +(define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S]) +(define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U]) +(define_int_iterator VCVTQ_N_FROM_F [VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U]) +(define_int_iterator VADDLVQ_P [VADDLVQ_P_S VADDLVQ_P_U]) +(define_int_iterator VCMPNEQ [VCMPNEQ_U VCMPNEQ_S]) +(define_int_iterator VSHLQ [VSHLQ_S VSHLQ_U]) +(define_int_iterator VABDQ [VABDQ_S VABDQ_U]) +(define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U]) +(define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U]) +(define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S]) +(define_int_iterator VANDQ [VANDQ_U VANDQ_S]) +(define_int_iterator VBICQ [VBICQ_S VBICQ_U]) +(define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S]) +(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U]) +(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S]) +(define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S]) +(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U]) +(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S]) +(define_int_iterator VEORQ [VEORQ_U VEORQ_S]) +(define_int_iterator VHADDQ [VHADDQ_S VHADDQ_U]) +(define_int_iterator VHADDQ_N [VHADDQ_N_U VHADDQ_N_S]) +(define_int_iterator VHSUBQ [VHSUBQ_S VHSUBQ_U]) +(define_int_iterator VHSUBQ_N [VHSUBQ_N_U VHSUBQ_N_S]) +(define_int_iterator VMAXQ [VMAXQ_U VMAXQ_S]) +(define_int_iterator VMAXVQ [VMAXVQ_U VMAXVQ_S]) +(define_int_iterator VMINQ [VMINQ_S VMINQ_U]) +(define_int_iterator VMINVQ [VMINVQ_U VMINVQ_S]) +(define_int_iterator VMLADAVQ [VMLADAVQ_U VMLADAVQ_S]) +(define_int_iterator VMULHQ [VMULHQ_S VMULHQ_U]) +(define_int_iterator VMULLBQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S]) +(define_int_iterator VMULLTQ_INT [VMULLTQ_INT_U VMULLTQ_INT_S]) +(define_int_iterator VMULQ [VMULQ_U VMULQ_S]) +(define_int_iterator VMULQ_N [VMULQ_N_U VMULQ_N_S]) +(define_int_iterator VORNQ [VORNQ_U VORNQ_S]) +(define_int_iterator VORRQ [VORRQ_S VORRQ_U]) +(define_int_iterator VQADDQ [VQADDQ_U VQADDQ_S]) +(define_int_iterator VQADDQ_N [VQADDQ_N_S VQADDQ_N_U]) +(define_int_iterator VQRSHLQ [VQRSHLQ_S VQRSHLQ_U]) +(define_int_iterator VQRSHLQ_N [VQRSHLQ_N_S VQRSHLQ_N_U]) +(define_int_iterator VQSHLQ [VQSHLQ_S VQSHLQ_U]) +(define_int_iterator VQSHLQ_N [VQSHLQ_N_S VQSHLQ_N_U]) +(define_int_iterator VQSHLQ_R [VQSHLQ_R_U VQSHLQ_R_S]) +(define_int_iterator VQSUBQ [VQSUBQ_U VQSUBQ_S]) +(define_int_iterator VQSUBQ_N [VQSUBQ_N_S VQSUBQ_N_U]) +(define_int_iterator VRHADDQ [VRHADDQ_S VRHADDQ_U]) +(define_int_iterator VRMULHQ [VRMULHQ_S VRMULHQ_U]) +(define_int_iterator VRSHLQ [VRSHLQ_S VRSHLQ_U]) +(define_int_iterator VRSHLQ_N [VRSHLQ_N_U VRSHLQ_N_S]) +(define_int_iterator VRSHRQ_N [VRSHRQ_N_S VRSHRQ_N_U]) +(define_int_iterator VSHLQ_N [VSHLQ_N_U VSHLQ_N_S]) +(define_int_iterator VSHLQ_R [VSHLQ_R_S VSHLQ_R_U]) +(define_int_iterator VSUBQ [VSUBQ_S VSUBQ_U]) +(define_int_iterator VSUBQ_N [VSUBQ_N_S VSUBQ_N_U]) +(define_int_iterator VADDLVAQ [VADDLVAQ_S VADDLVAQ_U]) +(define_int_iterator VBICQ_N [VBICQ_N_S VBICQ_N_U]) +(define_int_iterator VMLALDAVQ [VMLALDAVQ_U VMLALDAVQ_S]) +(define_int_iterator VMLALDAVXQ [VMLALDAVXQ_U VMLALDAVXQ_S]) +(define_int_iterator VMOVNBQ [VMOVNBQ_U VMOVNBQ_S]) +(define_int_iterator VMOVNTQ [VMOVNTQ_S VMOVNTQ_U]) +(define_int_iterator VORRQ_N [VORRQ_N_U VORRQ_N_S]) +(define_int_iterator VQMOVNBQ [VQMOVNBQ_U VQMOVNBQ_S]) +(define_int_iterator VQMOVNTQ [VQMOVNTQ_U VQMOVNTQ_S]) +(define_int_iterator VSHLLBQ_N [VSHLLBQ_N_S VSHLLBQ_N_U]) +(define_int_iterator VSHLLTQ_N [VSHLLTQ_N_U VSHLLTQ_N_S]) +(define_int_iterator VRMLALDAVHQ [VRMLALDAVHQ_U VRMLALDAVHQ_S]) +(define_int_iterator VBICQ_M_N [VBICQ_M_N_S VBICQ_M_N_U]) +(define_int_iterator VCVTAQ_M [VCVTAQ_M_S VCVTAQ_M_U]) +(define_int_iterator VCVTQ_M_TO_F [VCVTQ_M_TO_F_S VCVTQ_M_TO_F_U]) +(define_int_iterator VQRSHRNBQ_N [VQRSHRNBQ_N_U VQRSHRNBQ_N_S]) +(define_int_iterator VABAVQ [VABAVQ_S VABAVQ_U]) +(define_int_iterator VSHLCQ [VSHLCQ_S VSHLCQ_U]) +(define_int_iterator VRMLALDAVHAQ [VRMLALDAVHAQ_S VRMLALDAVHAQ_U]) +(define_int_iterator VADDVAQ_P [VADDVAQ_P_S VADDVAQ_P_U]) +(define_int_iterator VCLZQ_M [VCLZQ_M_S VCLZQ_M_U]) +(define_int_iterator VCMPEQQ_M_N [VCMPEQQ_M_N_S VCMPEQQ_M_N_U]) +(define_int_iterator VCMPEQQ_M [VCMPEQQ_M_S VCMPEQQ_M_U]) +(define_int_iterator VCMPNEQ_M_N [VCMPNEQ_M_N_S VCMPNEQ_M_N_U]) +(define_int_iterator VCMPNEQ_M [VCMPNEQ_M_S VCMPNEQ_M_U]) +(define_int_iterator VDUPQ_M_N [VDUPQ_M_N_S VDUPQ_M_N_U]) +(define_int_iterator VMAXVQ_P [VMAXVQ_P_S VMAXVQ_P_U]) +(define_int_iterator VMINVQ_P [VMINVQ_P_S VMINVQ_P_U]) +(define_int_iterator VMLADAVAQ [VMLADAVAQ_S VMLADAVAQ_U]) +(define_int_iterator VMLADAVQ_P [VMLADAVQ_P_S VMLADAVQ_P_U]) +(define_int_iterator VMLAQ_N [VMLAQ_N_S VMLAQ_N_U]) +(define_int_iterator VMLASQ_N [VMLASQ_N_S VMLASQ_N_U]) +(define_int_iterator VMVNQ_M [VMVNQ_M_S VMVNQ_M_U]) +(define_int_iterator VPSELQ [VPSELQ_S VPSELQ_U]) +(define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S VQDMLAHQ_N_U]) +(define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S VQRDMLAHQ_N_U]) +(define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S VQRDMLASHQ_N_U]) +(define_int_iterator VQRSHLQ_M_N [VQRSHLQ_M_N_S VQRSHLQ_M_N_U]) +(define_int_iterator VQSHLQ_M_R [VQSHLQ_M_R_S VQSHLQ_M_R_U]) +(define_int_iterator VREV64Q_M [VREV64Q_M_S VREV64Q_M_U]) +(define_int_iterator VRSHLQ_M_N [VRSHLQ_M_N_S VRSHLQ_M_N_U]) +(define_int_iterator VSHLQ_M_R [VSHLQ_M_R_S VSHLQ_M_R_U]) +(define_int_iterator VSLIQ_N [VSLIQ_N_S VSLIQ_N_U]) +(define_int_iterator VSRIQ_N [VSRIQ_N_S VSRIQ_N_U]) +(define_int_iterator VMLALDAVQ_P [VMLALDAVQ_P_U VMLALDAVQ_P_S]) +(define_int_iterator VQMOVNBQ_M [VQMOVNBQ_M_S VQMOVNBQ_M_U]) +(define_int_iterator VMOVLTQ_M [VMOVLTQ_M_U VMOVLTQ_M_S]) +(define_int_iterator VMOVNBQ_M [VMOVNBQ_M_U VMOVNBQ_M_S]) +(define_int_iterator VRSHRNTQ_N [VRSHRNTQ_N_U VRSHRNTQ_N_S]) +(define_int_iterator VORRQ_M_N [VORRQ_M_N_S VORRQ_M_N_U]) +(define_int_iterator VREV32Q_M [VREV32Q_M_S VREV32Q_M_U]) +(define_int_iterator VREV16Q_M [VREV16Q_M_S VREV16Q_M_U]) +(define_int_iterator VQRSHRNTQ_N [VQRSHRNTQ_N_U VQRSHRNTQ_N_S]) +(define_int_iterator VMOVNTQ_M [VMOVNTQ_M_U VMOVNTQ_M_S]) +(define_int_iterator VMOVLBQ_M [VMOVLBQ_M_U VMOVLBQ_M_S]) +(define_int_iterator VMLALDAVAQ [VMLALDAVAQ_S VMLALDAVAQ_U]) +(define_int_iterator VQSHRNBQ_N [VQSHRNBQ_N_U VQSHRNBQ_N_S]) +(define_int_iterator VSHRNBQ_N [VSHRNBQ_N_U VSHRNBQ_N_S]) +(define_int_iterator VRSHRNBQ_N [VRSHRNBQ_N_S VRSHRNBQ_N_U]) +(define_int_iterator VMLALDAVXQ_P [VMLALDAVXQ_P_U VMLALDAVXQ_P_S]) +(define_int_iterator VQMOVNTQ_M [VQMOVNTQ_M_U VQMOVNTQ_M_S]) +(define_int_iterator VMVNQ_M_N [VMVNQ_M_N_U VMVNQ_M_N_S]) +(define_int_iterator VQSHRNTQ_N [VQSHRNTQ_N_U VQSHRNTQ_N_S]) +(define_int_iterator VMLALDAVAXQ [VMLALDAVAXQ_S VMLALDAVAXQ_U]) +(define_int_iterator VSHRNTQ_N [VSHRNTQ_N_S VSHRNTQ_N_U]) +(define_int_iterator VCVTMQ_M [VCVTMQ_M_S VCVTMQ_M_U]) +(define_int_iterator VCVTNQ_M [VCVTNQ_M_S VCVTNQ_M_U]) +(define_int_iterator VCVTPQ_M [VCVTPQ_M_S VCVTPQ_M_U]) +(define_int_iterator VCVTQ_M_N_FROM_F [VCVTQ_M_N_FROM_F_S VCVTQ_M_N_FROM_F_U]) +(define_int_iterator VCVTQ_M_FROM_F [VCVTQ_M_FROM_F_U VCVTQ_M_FROM_F_S]) +(define_int_iterator VRMLALDAVHQ_P [VRMLALDAVHQ_P_S VRMLALDAVHQ_P_U]) +(define_int_iterator VADDLVAQ_P [VADDLVAQ_P_U VADDLVAQ_P_S]) +(define_int_iterator VABAVQ_P [VABAVQ_P_S VABAVQ_P_U]) +(define_int_iterator VSHLQ_M [VSHLQ_M_S VSHLQ_M_U]) +(define_int_iterator VSRIQ_M_N [VSRIQ_M_N_S VSRIQ_M_N_U]) +(define_int_iterator VSUBQ_M [VSUBQ_M_U VSUBQ_M_S]) +(define_int_iterator VCVTQ_M_N_TO_F [VCVTQ_M_N_TO_F_U VCVTQ_M_N_TO_F_S]) +(define_int_iterator VHSUBQ_M [VHSUBQ_M_S VHSUBQ_M_U]) +(define_int_iterator VSLIQ_M_N [VSLIQ_M_N_U VSLIQ_M_N_S]) +(define_int_iterator VRSHLQ_M [VRSHLQ_M_S VRSHLQ_M_U]) +(define_int_iterator VMINQ_M [VMINQ_M_S VMINQ_M_U]) +(define_int_iterator VMULLBQ_INT_M [VMULLBQ_INT_M_U VMULLBQ_INT_M_S]) +(define_int_iterator VMULHQ_M [VMULHQ_M_S VMULHQ_M_U]) +(define_int_iterator VMULQ_M [VMULQ_M_S VMULQ_M_U]) +(define_int_iterator VHSUBQ_M_N [VHSUBQ_M_N_S VHSUBQ_M_N_U]) +(define_int_iterator VHADDQ_M_N [VHADDQ_M_N_S VHADDQ_M_N_U]) +(define_int_iterator VORRQ_M [VORRQ_M_S VORRQ_M_U]) +(define_int_iterator VRMULHQ_M [VRMULHQ_M_U VRMULHQ_M_S]) +(define_int_iterator VQADDQ_M [VQADDQ_M_U VQADDQ_M_S]) +(define_int_iterator VRSHRQ_M_N [VRSHRQ_M_N_S VRSHRQ_M_N_U]) +(define_int_iterator VQSUBQ_M_N [VQSUBQ_M_N_U VQSUBQ_M_N_S]) +(define_int_iterator VADDQ_M [VADDQ_M_U VADDQ_M_S]) +(define_int_iterator VORNQ_M [VORNQ_M_U VORNQ_M_S]) +(define_int_iterator VRHADDQ_M [VRHADDQ_M_U VRHADDQ_M_S]) +(define_int_iterator VQSHLQ_M [VQSHLQ_M_U VQSHLQ_M_S]) +(define_int_iterator VANDQ_M [VANDQ_M_U VANDQ_M_S]) +(define_int_iterator VBICQ_M [VBICQ_M_U VBICQ_M_S]) +(define_int_iterator VSHLQ_M_N [VSHLQ_M_N_S VSHLQ_M_N_U]) +(define_int_iterator VCADDQ_ROT270_M [VCADDQ_ROT270_M_U VCADDQ_ROT270_M_S]) +(define_int_iterator VQRSHLQ_M [VQRSHLQ_M_U VQRSHLQ_M_S]) +(define_int_iterator VQADDQ_M_N [VQADDQ_M_N_U VQADDQ_M_N_S]) +(define_int_iterator VADDQ_M_N [VADDQ_M_N_S VADDQ_M_N_U]) +(define_int_iterator VMAXQ_M [VMAXQ_M_S VMAXQ_M_U]) +(define_int_iterator VQSUBQ_M [VQSUBQ_M_U VQSUBQ_M_S]) +(define_int_iterator VMLASQ_M_N [VMLASQ_M_N_U VMLASQ_M_N_S]) +(define_int_iterator VMLADAVAQ_P [VMLADAVAQ_P_U VMLADAVAQ_P_S]) +(define_int_iterator VBRSRQ_M_N [VBRSRQ_M_N_U VBRSRQ_M_N_S]) +(define_int_iterator VMULQ_M_N [VMULQ_M_N_U VMULQ_M_N_S]) +(define_int_iterator VCADDQ_ROT90_M [VCADDQ_ROT90_M_U VCADDQ_ROT90_M_S]) +(define_int_iterator VMULLTQ_INT_M [VMULLTQ_INT_M_S VMULLTQ_INT_M_U]) +(define_int_iterator VEORQ_M [VEORQ_M_S VEORQ_M_U]) +(define_int_iterator VSHRQ_M_N [VSHRQ_M_N_S VSHRQ_M_N_U]) +(define_int_iterator VSUBQ_M_N [VSUBQ_M_N_S VSUBQ_M_N_U]) +(define_int_iterator VHADDQ_M [VHADDQ_M_S VHADDQ_M_U]) +(define_int_iterator VABDQ_M [VABDQ_M_S VABDQ_M_U]) +(define_int_iterator VMLAQ_M_N [VMLAQ_M_N_S VMLAQ_M_N_U]) +(define_int_iterator VQSHLQ_M_N [VQSHLQ_M_N_S VQSHLQ_M_N_U]) +(define_int_iterator VMLALDAVAQ_P [VMLALDAVAQ_P_U VMLALDAVAQ_P_S]) +(define_int_iterator VMLALDAVAXQ_P [VMLALDAVAXQ_P_U VMLALDAVAXQ_P_S]) +(define_int_iterator VQRSHRNBQ_M_N [VQRSHRNBQ_M_N_U VQRSHRNBQ_M_N_S]) +(define_int_iterator VQRSHRNTQ_M_N [VQRSHRNTQ_M_N_S VQRSHRNTQ_M_N_U]) +(define_int_iterator VQSHRNBQ_M_N [VQSHRNBQ_M_N_U VQSHRNBQ_M_N_S]) +(define_int_iterator VQSHRNTQ_M_N [VQSHRNTQ_M_N_S VQSHRNTQ_M_N_U]) +(define_int_iterator VRSHRNBQ_M_N [VRSHRNBQ_M_N_U VRSHRNBQ_M_N_S]) +(define_int_iterator VRSHRNTQ_M_N [VRSHRNTQ_M_N_U VRSHRNTQ_M_N_S]) +(define_int_iterator VSHLLBQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S]) +(define_int_iterator VSHLLTQ_M_N [VSHLLTQ_M_N_U VSHLLTQ_M_N_S]) +(define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U]) +(define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U]) +(define_int_iterator VSTRWSBQ [VSTRWQSB_S VSTRWQSB_U]) +(define_int_iterator VSTRBSOQ [VSTRBQSO_S VSTRBQSO_U]) +(define_int_iterator VSTRBQ [VSTRBQ_S VSTRBQ_U]) +(define_int_iterator VLDRBGOQ [VLDRBQGO_S VLDRBQGO_U]) +(define_int_iterator VLDRBQ [VLDRBQ_S VLDRBQ_U]) +(define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U]) +(define_int_iterator VLD1Q [VLD1Q_S VLD1Q_U]) +(define_int_iterator VLDRHGOQ [VLDRHQGO_S VLDRHQGO_U]) +(define_int_iterator VLDRHGSOQ [VLDRHQGSO_S VLDRHQGSO_U]) +(define_int_iterator VLDRHQ [VLDRHQ_S VLDRHQ_U]) +(define_int_iterator VLDRWQ [VLDRWQ_S VLDRWQ_U]) +(define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U]) +(define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U]) +(define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U]) +(define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U]) +(define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U]) +(define_int_iterator VST1Q [VST1Q_S VST1Q_U]) +(define_int_iterator VSTRHSOQ [VSTRHQSO_S VSTRHQSO_U]) +(define_int_iterator VSTRHSSOQ [VSTRHQSSO_S VSTRHQSSO_U]) +(define_int_iterator VSTRHQ [VSTRHQ_S VSTRHQ_U]) +(define_int_iterator VSTRWQ [VSTRWQ_S VSTRWQ_U]) +(define_int_iterator VSTRDSBQ [VSTRDQSB_S VSTRDQSB_U]) +(define_int_iterator VSTRDSOQ [VSTRDQSO_S VSTRDQSO_U]) +(define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U]) +(define_int_iterator VSTRWSOQ [VSTRWQSO_S VSTRWQSO_U]) +(define_int_iterator VSTRWSSOQ [VSTRWQSSO_S VSTRWQSSO_U]) +(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U]) +(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U]) +(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U]) +(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U]) +(define_int_iterator VADCIQ [VADCIQ_U VADCIQ_S]) +(define_int_iterator VADCIQ_M [VADCIQ_M_U VADCIQ_M_S]) +(define_int_iterator VSBCQ [VSBCQ_U VSBCQ_S]) +(define_int_iterator VSBCQ_M [VSBCQ_M_U VSBCQ_M_S]) +(define_int_iterator VSBCIQ [VSBCIQ_U VSBCIQ_S]) +(define_int_iterator VSBCIQ_M [VSBCIQ_M_U VSBCIQ_M_S]) +(define_int_iterator VADCQ [VADCQ_U VADCQ_S]) +(define_int_iterator VADCQ_M [VADCQ_M_U VADCQ_M_S]) +(define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48]) +(define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48]) +(define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 465b39a..4322adf 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -17,654 +17,6 @@ ;; along with GCC; see the file COPYING3. If not see ;; . -(define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF]) -(define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF]) -(define_mode_iterator MVE_0 [V8HF V4SF]) -(define_mode_iterator MVE_1 [V16QI V8HI V4SI V2DI]) -(define_mode_iterator MVE_3 [V16QI V8HI]) -(define_mode_iterator MVE_2 [V16QI V8HI V4SI]) -(define_mode_iterator MVE_5 [V8HI V4SI]) -(define_mode_iterator MVE_6 [V8HI V4SI]) - -(define_c_enum "unspec" [VST4Q VRNDXQ_F VRNDQ_F VRNDPQ_F VRNDNQ_F VRNDMQ_F - VRNDAQ_F VREV64Q_F VNEGQ_F VDUPQ_N_F VABSQ_F VREV32Q_F - VCVTTQ_F32_F16 VCVTBQ_F32_F16 VCVTQ_TO_F_S VQNEGQ_S - VCVTQ_TO_F_U VREV16Q_S VREV16Q_U VADDLVQ_S VMVNQ_N_S - VMVNQ_N_U VCVTAQ_S VCVTAQ_U VREV64Q_S VREV64Q_U - VQABSQ_S VNEGQ_S VMVNQ_S VMVNQ_U VDUPQ_N_U VDUPQ_N_S - VCLZQ_U VCLZQ_S VCLSQ_S VADDVQ_S VADDVQ_U VABSQ_S - VREV32Q_U VREV32Q_S VMOVLTQ_U VMOVLTQ_S VMOVLBQ_S - VMOVLBQ_U VCVTQ_FROM_F_S VCVTQ_FROM_F_U VCVTPQ_S - VCVTPQ_U VCVTNQ_S VCVTNQ_U VCVTMQ_S VCVTMQ_U - VADDLVQ_U VCTP8Q VCTP16Q VCTP32Q VCTP64Q VPNOT - VCREATEQ_F VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U VBRSRQ_N_F - VSUBQ_N_F VCREATEQ_U VCREATEQ_S VSHRQ_N_S VSHRQ_N_U - VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U VADDLVQ_P_S - VADDLVQ_P_U VCMPNEQ_U VCMPNEQ_S VSHLQ_S VSHLQ_U VABDQ_S - VADDQ_N_S VADDVAQ_S VADDVQ_P_S VANDQ_S VBICQ_S - VBRSRQ_N_S VCADDQ_ROT270_S VCADDQ_ROT90_S VCMPEQQ_S - VCMPEQQ_N_S VCMPNEQ_N_S VEORQ_S VHADDQ_S VHADDQ_N_S - VHSUBQ_S VHSUBQ_N_S VMAXQ_S VMAXVQ_S VMINQ_S VMINVQ_S - VMLADAVQ_S VMULHQ_S VMULLBQ_INT_S VMULLTQ_INT_S VMULQ_S - VMULQ_N_S VORNQ_S VORRQ_S VQADDQ_S VQADDQ_N_S VQRSHLQ_S - VQRSHLQ_N_S VQSHLQ_S VQSHLQ_N_S VQSHLQ_R_S VQSUBQ_S - VQSUBQ_N_S VRHADDQ_S VRMULHQ_S VRSHLQ_S VRSHLQ_N_S - VRSHRQ_N_S VSHLQ_N_S VSHLQ_R_S VSUBQ_S VSUBQ_N_S - VABDQ_U VADDQ_N_U VADDVAQ_U VADDVQ_P_U VANDQ_U VBICQ_U - VBRSRQ_N_U VCADDQ_ROT270_U VCADDQ_ROT90_U VCMPEQQ_U - VCMPEQQ_N_U VCMPNEQ_N_U VEORQ_U VHADDQ_U VHADDQ_N_U - VHSUBQ_U VHSUBQ_N_U VMAXQ_U VMAXVQ_U VMINQ_U VMINVQ_U - VMLADAVQ_U VMULHQ_U VMULLBQ_INT_U VMULLTQ_INT_U VMULQ_U - VMULQ_N_U VORNQ_U VORRQ_U VQADDQ_U VQADDQ_N_U VQRSHLQ_U - VQRSHLQ_N_U VQSHLQ_U VQSHLQ_N_U VQSHLQ_R_U VQSUBQ_U - VQSUBQ_N_U VRHADDQ_U VRMULHQ_U VRSHLQ_U VRSHLQ_N_U - VRSHRQ_N_U VSHLQ_N_U VSHLQ_R_U VSUBQ_U VSUBQ_N_U - VCMPGEQ_N_S VCMPGEQ_S VCMPGTQ_N_S VCMPGTQ_S VCMPLEQ_N_S - VCMPLEQ_S VCMPLTQ_N_S VCMPLTQ_S VHCADDQ_ROT270_S - VHCADDQ_ROT90_S VMAXAQ_S VMAXAVQ_S VMINAQ_S VMINAVQ_S - VMLADAVXQ_S VMLSDAVQ_S VMLSDAVXQ_S VQDMULHQ_N_S - VQDMULHQ_S VQRDMULHQ_N_S VQRDMULHQ_S VQSHLUQ_N_S - VCMPCSQ_N_U VCMPCSQ_U VCMPHIQ_N_U VCMPHIQ_U VABDQ_M_S - VABDQ_M_U VABDQ_F VADDQ_N_F VANDQ_F VBICQ_F - VCADDQ_ROT270_F VCADDQ_ROT90_F VCMPEQQ_F VCMPEQQ_N_F - VCMPGEQ_F VCMPGEQ_N_F VCMPGTQ_F VCMPGTQ_N_F VCMPLEQ_F - VCMPLEQ_N_F VCMPLTQ_F VCMPLTQ_N_F VCMPNEQ_F VCMPNEQ_N_F - VCMULQ_F VCMULQ_ROT180_F VCMULQ_ROT270_F VCMULQ_ROT90_F - VEORQ_F VMAXNMAQ_F VMAXNMAVQ_F VMAXNMQ_F VMAXNMVQ_F - VMINNMAQ_F VMINNMAVQ_F VMINNMQ_F VMINNMVQ_F VMULQ_F - VMULQ_N_F VORNQ_F VORRQ_F VSUBQ_F VADDLVAQ_U - VADDLVAQ_S VBICQ_N_U VBICQ_N_S VCTP8Q_M VCTP16Q_M - VCTP32Q_M VCTP64Q_M VCVTBQ_F16_F32 VCVTTQ_F16_F32 - VMLALDAVQ_U VMLALDAVXQ_U VMLALDAVXQ_S VMLALDAVQ_S - VMLSLDAVQ_S VMLSLDAVXQ_S VMOVNBQ_U VMOVNBQ_S - VMOVNTQ_U VMOVNTQ_S VORRQ_N_S VORRQ_N_U VQDMULLBQ_N_S - VQDMULLBQ_S VQDMULLTQ_N_S VQDMULLTQ_S VQMOVNBQ_U - VQMOVNBQ_S VQMOVUNBQ_S VQMOVUNTQ_S VRMLALDAVHXQ_S - VRMLSLDAVHQ_S VRMLSLDAVHXQ_S VSHLLBQ_S - VSHLLBQ_U VSHLLTQ_U VSHLLTQ_S VQMOVNTQ_U VQMOVNTQ_S - VSHLLBQ_N_S VSHLLBQ_N_U VSHLLTQ_N_U VSHLLTQ_N_S - VRMLALDAVHQ_U VRMLALDAVHQ_S VMULLTQ_POLY_P - VMULLBQ_POLY_P VBICQ_M_N_S VBICQ_M_N_U VCMPEQQ_M_F - VCVTAQ_M_S VCVTAQ_M_U VCVTQ_M_TO_F_S VCVTQ_M_TO_F_U - VQRSHRNBQ_N_U VQRSHRNBQ_N_S VQRSHRUNBQ_N_S - VRMLALDAVHAQ_S VABAVQ_S VABAVQ_U VSHLCQ_S VSHLCQ_U - VRMLALDAVHAQ_U VABSQ_M_S VADDVAQ_P_S VADDVAQ_P_U - VCLSQ_M_S VCLZQ_M_S VCLZQ_M_U VCMPCSQ_M_N_U - VCMPCSQ_M_U VCMPEQQ_M_N_S VCMPEQQ_M_N_U VCMPEQQ_M_S - VCMPEQQ_M_U VCMPGEQ_M_N_S VCMPGEQ_M_S VCMPGTQ_M_N_S - VCMPGTQ_M_S VCMPHIQ_M_N_U VCMPHIQ_M_U VCMPLEQ_M_N_S - VCMPLEQ_M_S VCMPLTQ_M_N_S VCMPLTQ_M_S VCMPNEQ_M_N_S - VCMPNEQ_M_N_U VCMPNEQ_M_S VCMPNEQ_M_U VDUPQ_M_N_S - VDUPQ_M_N_U VDWDUPQ_N_U VDWDUPQ_WB_U VIWDUPQ_N_U - VIWDUPQ_WB_U VMAXAQ_M_S VMAXAVQ_P_S VMAXVQ_P_S - VMAXVQ_P_U VMINAQ_M_S VMINAVQ_P_S VMINVQ_P_S VMINVQ_P_U - VMLADAVAQ_S VMLADAVAQ_U VMLADAVQ_P_S VMLADAVQ_P_U - VMLADAVXQ_P_S VMLAQ_N_S VMLAQ_N_U VMLASQ_N_S VMLASQ_N_U - VMLSDAVQ_P_S VMLSDAVXQ_P_S VMVNQ_M_S VMVNQ_M_U - VNEGQ_M_S VPSELQ_S VPSELQ_U VQABSQ_M_S VQDMLAHQ_N_S - VQDMLAHQ_N_U VQNEGQ_M_S VQRDMLADHQ_S VQRDMLADHXQ_S - VQRDMLAHQ_N_S VQRDMLAHQ_N_U VQRDMLASHQ_N_S - VQRDMLASHQ_N_U VQRDMLSDHQ_S VQRDMLSDHXQ_S VQRSHLQ_M_N_S - VQRSHLQ_M_N_U VQSHLQ_M_R_S VQSHLQ_M_R_U VREV64Q_M_S - VREV64Q_M_U VRSHLQ_M_N_S VRSHLQ_M_N_U VSHLQ_M_R_S - VSHLQ_M_R_U VSLIQ_N_S VSLIQ_N_U VSRIQ_N_S VSRIQ_N_U - VQDMLSDHXQ_S VQDMLSDHQ_S VQDMLADHXQ_S VQDMLADHQ_S - VMLSDAVAXQ_S VMLSDAVAQ_S VMLADAVAXQ_S - VCMPGEQ_M_F VCMPGTQ_M_N_F VMLSLDAVQ_P_S VRMLALDAVHAXQ_S - VMLSLDAVXQ_P_S VFMAQ_F VMLSLDAVAQ_S VQSHRUNBQ_N_S - VQRSHRUNTQ_N_S VCMLAQ_F VMINNMAQ_M_F VFMASQ_N_F - VDUPQ_M_N_F VCMPGTQ_M_F VCMPLTQ_M_F VRMLSLDAVHQ_P_S - VQSHRUNTQ_N_S VABSQ_M_F VMAXNMAVQ_P_F VFMAQ_N_F - VRMLSLDAVHXQ_P_S VREV32Q_M_F VRMLSLDAVHAQ_S - VRMLSLDAVHAXQ_S VCMPLTQ_M_N_F VCMPNEQ_M_F VRNDAQ_M_F - VRNDPQ_M_F VADDLVAQ_P_S VQMOVUNBQ_M_S VCMPLEQ_M_F - VCMLAQ_ROT180_F VMLSLDAVAXQ_S VRNDXQ_M_F VFMSQ_F - VMINNMVQ_P_F VMAXNMVQ_P_F VPSELQ_F VCMLAQ_ROT90_F - VQMOVUNTQ_M_S VREV64Q_M_F VNEGQ_M_F VRNDMQ_M_F - VCMPLEQ_M_N_F VCMPGEQ_M_N_F VRNDNQ_M_F VMINNMAVQ_P_F - VCMPNEQ_M_N_F VRMLALDAVHQ_P_S VRMLALDAVHXQ_P_S - VCMPEQQ_M_N_F VCMLAQ_ROT270_F VMAXNMAQ_M_F VRNDQ_M_F - VMLALDAVQ_P_U VMLALDAVQ_P_S VQMOVNBQ_M_S VQMOVNBQ_M_U - VMOVLTQ_M_U VMOVLTQ_M_S VMOVNBQ_M_U VMOVNBQ_M_S - VRSHRNTQ_N_U VRSHRNTQ_N_S VORRQ_M_N_S VORRQ_M_N_U - VREV32Q_M_S VREV32Q_M_U VQRSHRNTQ_N_U VQRSHRNTQ_N_S - VMOVNTQ_M_U VMOVNTQ_M_S VMOVLBQ_M_U VMOVLBQ_M_S - VMLALDAVAQ_S VMLALDAVAQ_U VQSHRNBQ_N_U VQSHRNBQ_N_S - VSHRNBQ_N_U VSHRNBQ_N_S VRSHRNBQ_N_S VRSHRNBQ_N_U - VMLALDAVXQ_P_U VMLALDAVXQ_P_S VQMOVNTQ_M_U VQMOVNTQ_M_S - VMVNQ_M_N_U VMVNQ_M_N_S VQSHRNTQ_N_U VQSHRNTQ_N_S - VMLALDAVAXQ_S VMLALDAVAXQ_U VSHRNTQ_N_S VSHRNTQ_N_U - VCVTBQ_M_F16_F32 VCVTBQ_M_F32_F16 VCVTTQ_M_F16_F32 - VCVTTQ_M_F32_F16 VCVTMQ_M_S VCVTMQ_M_U VCVTNQ_M_S - VCVTPQ_M_S VCVTPQ_M_U VCVTQ_M_N_FROM_F_S VCVTNQ_M_U - VREV16Q_M_S VREV16Q_M_U VREV32Q_M VCVTQ_M_FROM_F_U - VCVTQ_M_FROM_F_S VRMLALDAVHQ_P_U VADDLVAQ_P_U - VCVTQ_M_N_FROM_F_U VQSHLUQ_M_N_S VABAVQ_P_S - VABAVQ_P_U VSHLQ_M_S VSHLQ_M_U VSRIQ_M_N_S - VSRIQ_M_N_U VSUBQ_M_U VSUBQ_M_S VCVTQ_M_N_TO_F_U - VCVTQ_M_N_TO_F_S VQADDQ_M_U VQADDQ_M_S - VRSHRQ_M_N_S VSUBQ_M_N_S VSUBQ_M_N_U VBRSRQ_M_N_S - VSUBQ_M_N_F VBICQ_M_F VHADDQ_M_U VBICQ_M_U VBICQ_M_S - VMULQ_M_N_U VHADDQ_M_S VORNQ_M_F VMLAQ_M_N_S VQSUBQ_M_U - VQSUBQ_M_S VMLAQ_M_N_U VQSUBQ_M_N_U VQSUBQ_M_N_S - VMULLTQ_INT_M_S VMULLTQ_INT_M_U VMULQ_M_N_S VMULQ_M_N_F - VMLASQ_M_N_U VMLASQ_M_N_S VMAXQ_M_U VQRDMLAHQ_M_N_U - VCADDQ_ROT270_M_F VCADDQ_ROT270_M_U VCADDQ_ROT270_M_S - VQRSHLQ_M_S VMULQ_M_F VRHADDQ_M_U VSHRQ_M_N_U - VRHADDQ_M_S VMULQ_M_S VMULQ_M_U VQRDMLASHQ_M_N_S - VRSHLQ_M_S VRSHLQ_M_U VRSHRQ_M_N_U VADDQ_M_N_F - VADDQ_M_N_S VADDQ_M_N_U VQRDMLASHQ_M_N_U VMAXQ_M_S - VQRDMLAHQ_M_N_S VORRQ_M_S VORRQ_M_U VORRQ_M_F - VQRSHLQ_M_U VRMULHQ_M_U VRMULHQ_M_S VMINQ_M_S VMINQ_M_U - VANDQ_M_F VANDQ_M_U VANDQ_M_S VHSUBQ_M_N_S VHSUBQ_M_N_U - VMULHQ_M_S VMULHQ_M_U VMULLBQ_INT_M_U - VMULLBQ_INT_M_S VCADDQ_ROT90_M_F - VSHRQ_M_N_S VADDQ_M_U VSLIQ_M_N_U - VQADDQ_M_N_S VBRSRQ_M_N_F VABDQ_M_F VBRSRQ_M_N_U - VEORQ_M_F VSHLQ_M_N_S VQDMLAHQ_M_N_U VQDMLAHQ_M_N_S - VSHLQ_M_N_U VMLADAVAQ_P_U VMLADAVAQ_P_S VSLIQ_M_N_S - VQSHLQ_M_U VQSHLQ_M_S VCADDQ_ROT90_M_U VCADDQ_ROT90_M_S - VORNQ_M_U VORNQ_M_S VQSHLQ_M_N_S VQSHLQ_M_N_U VADDQ_M_S - VHADDQ_M_N_S VADDQ_M_F VQADDQ_M_N_U VEORQ_M_S VEORQ_M_U - VHSUBQ_M_S VHSUBQ_M_U VHADDQ_M_N_U VHCADDQ_ROT90_M_S - VQRDMLSDHQ_M_S VQRDMLSDHXQ_M_S VQRDMLADHXQ_M_S - VQDMULHQ_M_S VMLADAVAXQ_P_S VQDMLADHXQ_M_S - VQRDMULHQ_M_S VMLSDAVAXQ_P_S VQDMULHQ_M_N_S - VHCADDQ_ROT270_M_S VQDMLSDHQ_M_S VQDMLSDHXQ_M_S - VMLSDAVAQ_P_S VQRDMLADHQ_M_S VQDMLADHQ_M_S - VMLALDAVAQ_P_U VMLALDAVAQ_P_S VMLALDAVAXQ_P_U - VQRSHRNBQ_M_N_U VQRSHRNBQ_M_N_S VQRSHRNTQ_M_N_S - VQSHRNBQ_M_N_U VQSHRNBQ_M_N_S VQSHRNTQ_M_N_S - VRSHRNBQ_M_N_U VRSHRNBQ_M_N_S VRSHRNTQ_M_N_U - VSHLLBQ_M_N_U VSHLLBQ_M_N_S VSHLLTQ_M_N_U VSHLLTQ_M_N_S - VSHRNBQ_M_N_S VSHRNBQ_M_N_U VSHRNTQ_M_N_S VSHRNTQ_M_N_U - VMLALDAVAXQ_P_S VQRSHRNTQ_M_N_U VQSHRNTQ_M_N_U - VRSHRNTQ_M_N_S VQRDMULHQ_M_N_S VRMLALDAVHAQ_P_S - VMLSLDAVAQ_P_S VMLSLDAVAXQ_P_S VMULLBQ_POLY_M_P - VMULLTQ_POLY_M_P VQDMULLBQ_M_N_S VQDMULLBQ_M_S - VQDMULLTQ_M_N_S VQDMULLTQ_M_S VQRSHRUNBQ_M_N_S - VQRSHRUNTQ_M_N_SVQSHRUNBQ_M_N_S VQSHRUNTQ_M_N_S - VRMLALDAVHAQ_P_U VRMLALDAVHAXQ_P_S VRMLSLDAVHAQ_P_S - VRMLSLDAVHAXQ_P_S VQRSHRUNTQ_M_N_S VQSHRUNBQ_M_N_S - VCMLAQ_M_F VCMLAQ_ROT180_M_F VCMLAQ_ROT270_M_F - VCMLAQ_ROT90_M_F VCMULQ_M_F VCMULQ_ROT180_M_F - VCMULQ_ROT270_M_F VCMULQ_ROT90_M_F VFMAQ_M_F - VFMAQ_M_N_F VFMASQ_M_N_F VFMSQ_M_F VMAXNMQ_M_F - VMINNMQ_M_F VSUBQ_M_F VSTRWQSB_S VSTRWQSB_U - VSTRBQSO_S VSTRBQSO_U VSTRBQ_S VSTRBQ_U VLDRBQGO_S - VLDRBQGO_U VLDRBQ_S VLDRBQ_U VLDRWQGB_S VLDRWQGB_U - VLD1Q_F VLD1Q_S VLD1Q_U VLDRHQ_F VLDRHQGO_S - VLDRHQGO_U VLDRHQGSO_S VLDRHQGSO_U VLDRHQ_S VLDRHQ_U - VLDRWQ_F VLDRWQ_S VLDRWQ_U VLDRDQGB_S VLDRDQGB_U - VLDRDQGO_S VLDRDQGO_U VLDRDQGSO_S VLDRDQGSO_U - VLDRHQGO_F VLDRHQGSO_F VLDRWQGB_F VLDRWQGO_F - VLDRWQGO_S VLDRWQGO_U VLDRWQGSO_F VLDRWQGSO_S - VLDRWQGSO_U VSTRHQ_F VST1Q_S VST1Q_U VSTRHQSO_S - VSTRHQSO_U VSTRHQSSO_S VSTRHQSSO_U VSTRHQ_S - VSTRHQ_U VSTRWQ_S VSTRWQ_U VSTRWQ_F VST1Q_F VSTRDQSB_S - VSTRDQSB_U VSTRDQSO_S VSTRDQSO_U VSTRDQSSO_S - VSTRDQSSO_U VSTRWQSO_S VSTRWQSO_U VSTRWQSSO_S - VSTRWQSSO_U VSTRHQSO_F VSTRHQSSO_F VSTRWQSB_F - VSTRWQSO_F VSTRWQSSO_F VDDUPQ VDDUPQ_M VDWDUPQ - VDWDUPQ_M VIDUPQ VIDUPQ_M VIWDUPQ VIWDUPQ_M - VSTRWQSBWB_S VSTRWQSBWB_U VLDRWQGBWB_S VLDRWQGBWB_U - VSTRWQSBWB_F VLDRWQGBWB_F VSTRDQSBWB_S VSTRDQSBWB_U - VLDRDQGBWB_S VLDRDQGBWB_U VADCQ_U VADCQ_M_U VADCQ_S - VADCQ_M_S VSBCIQ_U VSBCIQ_S VSBCIQ_M_U VSBCIQ_M_S - VSBCQ_U VSBCQ_S VSBCQ_M_U VSBCQ_M_S VADCIQ_U VADCIQ_M_U - VADCIQ_S VADCIQ_M_S VLD2Q VLD4Q VST2Q SRSHRL SRSHR - URSHR URSHRL SQRSHR UQRSHL UQRSHLL_64 VSHLCQ_M_U - UQRSHLL_48 SQRSHRL_64 SQRSHRL_48 VSHLCQ_M_S]) - -(define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI") - (V4SF "V4SI")]) - -(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") - (VREV16Q_U "u") (VMVNQ_N_S "s") (VMVNQ_N_U "u") - (VCVTAQ_U "u") (VCVTAQ_S "s") (VREV64Q_S "s") - (VREV64Q_U "u") (VMVNQ_S "s") (VMVNQ_U "u") - (VDUPQ_N_U "u") (VDUPQ_N_S"s") (VADDVQ_S "s") - (VADDVQ_U "u") (VADDVQ_S "s") (VADDVQ_U "u") - (VMOVLTQ_U "u") (VMOVLTQ_S "s") (VMOVLBQ_S "s") - (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u") - (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s") - (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u") - (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u") - (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s") - (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u") - (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s") - (VSHRQ_N_U "u") (VCVTQ_N_FROM_F_S "s") (VSHLQ_U "u") - (VCVTQ_N_FROM_F_U "u") (VADDLVQ_P_S "s") (VSHLQ_S "s") - (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s") - (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s") - (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u") - (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VANDQ_S "s") - (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u") - (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s") - (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s") - (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u") - (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s") - (VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u") - (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s") - (VHADDQ_U "u") (VHSUBQ_N_S "s") (VHSUBQ_N_U "u") - (VHSUBQ_S "s") (VMAXQ_S "s") (VMAXQ_U "u") (VHSUBQ_U "u") - (VMAXVQ_S "s") (VMAXVQ_U "u") (VMINQ_S "s") (VMINQ_U "u") - (VMINVQ_S "s") (VMINVQ_U "u") (VMLADAVQ_S "s") - (VMLADAVQ_U "u") (VMULHQ_S "s") (VMULHQ_U "u") - (VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u") (VQADDQ_S "s") - (VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u") (VQADDQ_U "u") - (VMULQ_N_S "s") (VMULQ_N_U "u") (VMULQ_S "s") - (VMULQ_U "u") (VORNQ_S "s") (VORNQ_U "u") (VORRQ_S "s") - (VORRQ_U "u") (VQADDQ_N_S "s") (VQADDQ_N_U "u") - (VQRSHLQ_N_S "s") (VQRSHLQ_N_U "u") (VQRSHLQ_S "s") - (VQRSHLQ_U "u") (VQSHLQ_N_S "s") (VQSHLQ_N_U "u") - (VQSHLQ_R_S "s") (VQSHLQ_R_U "u") (VQSHLQ_S "s") - (VQSHLQ_U "u") (VQSUBQ_N_S "s") (VQSUBQ_N_U "u") - (VQSUBQ_S "s") (VQSUBQ_U "u") (VRHADDQ_S "s") - (VRHADDQ_U "u") (VRMULHQ_S "s") (VRMULHQ_U "u") - (VRSHLQ_N_S "s") (VRSHLQ_N_U "u") (VRSHLQ_S "s") - (VRSHLQ_U "u") (VRSHRQ_N_S "s") (VRSHRQ_N_U "u") - (VSHLQ_N_S "s") (VSHLQ_N_U "u") (VSHLQ_R_S "s") - (VSHLQ_R_U "u") (VSUBQ_N_S "s") (VSUBQ_N_U "u") - (VSUBQ_S "s") (VSUBQ_U "u") (VADDVAQ_S "s") - (VADDVAQ_U "u") (VADDLVAQ_S "s") (VADDLVAQ_U "u") - (VBICQ_N_S "s") (VBICQ_N_U "u") (VMLALDAVQ_U "u") - (VMLALDAVQ_S "s") (VMLALDAVXQ_U "u") (VMLALDAVXQ_S "s") - (VMOVNBQ_U "u") (VMOVNBQ_S "s") (VMOVNTQ_U "u") - (VMOVNTQ_S "s") (VORRQ_N_S "s") (VORRQ_N_U "u") - (VQMOVNBQ_U "u") (VQMOVNBQ_S "s") (VQMOVNTQ_S "s") - (VQMOVNTQ_U "u") (VSHLLBQ_N_U "u") (VSHLLBQ_N_S "s") - (VSHLLTQ_N_U "u") (VSHLLTQ_N_S "s") (VRMLALDAVHQ_U "u") - (VRMLALDAVHQ_S "s") (VBICQ_M_N_S "s") (VBICQ_M_N_U "u") - (VCVTAQ_M_S "s") (VCVTAQ_M_U "u") (VCVTQ_M_TO_F_S "s") - (VCVTQ_M_TO_F_U "u") (VQRSHRNBQ_N_S "s") - (VQRSHRNBQ_N_U "u") (VABAVQ_S "s") (VABAVQ_U "u") - (VRMLALDAVHAQ_U "u") (VRMLALDAVHAQ_S "s") (VSHLCQ_S "s") - (VSHLCQ_U "u") (VADDVAQ_P_S "s") (VADDVAQ_P_U "u") - (VCLZQ_M_S "s") (VCLZQ_M_U "u") (VCMPEQQ_M_N_S "s") - (VCMPEQQ_M_N_U "u") (VCMPEQQ_M_S "s") (VCMPEQQ_M_U "u") - (VCMPNEQ_M_N_S "s") (VCMPNEQ_M_N_U "u") (VCMPNEQ_M_S "s") - (VCMPNEQ_M_U "u") (VDUPQ_M_N_S "s") (VDUPQ_M_N_U "u") - (VMAXVQ_P_S "s") (VMAXVQ_P_U "u") (VMINVQ_P_S "s") - (VMINVQ_P_U "u") (VMLADAVAQ_S "s") (VMLADAVAQ_U "u") - (VMLADAVQ_P_S "s") (VMLADAVQ_P_U "u") (VMLAQ_N_S "s") - (VMLAQ_N_U "u") (VMLASQ_N_S "s") (VMLASQ_N_U "u") - (VMVNQ_M_S "s") (VMVNQ_M_U "u") (VPSELQ_S "s") - (VPSELQ_U "u") (VQDMLAHQ_N_S "s") (VQDMLAHQ_N_U "u") - (VQRDMLAHQ_N_S "s") (VQRDMLAHQ_N_U "u") - (VQRDMLASHQ_N_S "s") (VQRDMLASHQ_N_U "u") - (VQRSHLQ_M_N_S "s") (VQRSHLQ_M_N_U "u") - (VQSHLQ_M_R_S "s") (VQSHLQ_M_R_U "u") (VSRIQ_N_S "s") - (VREV64Q_M_S "s") (VREV64Q_M_U "u") (VSRIQ_N_U "u") - (VRSHLQ_M_N_S "s") (VRSHLQ_M_N_U "u") (VSHLQ_M_R_S "s") - (VSHLQ_M_R_U "u") (VSLIQ_N_S "s") (VSLIQ_N_U "u") - (VMLALDAVQ_P_S "s") (VQMOVNBQ_M_S "s") (VMOVLTQ_M_S "s") - (VMOVNBQ_M_S "s") (VRSHRNTQ_N_S "s") (VORRQ_M_N_S "s") - (VREV32Q_M_S "s") (VQRSHRNTQ_N_S "s") (VMOVNTQ_M_S "s") - (VMOVLBQ_M_S "s") (VMLALDAVAQ_S "s") (VQSHRNBQ_N_S "s") - (VSHRNBQ_N_S "s") (VRSHRNBQ_N_S "s") (VMLALDAVXQ_P_S "s") - (VQMOVNTQ_M_S "s") (VMVNQ_M_N_S "s") (VQSHRNTQ_N_S "s") - (VMLALDAVAXQ_S "s") (VSHRNTQ_N_S "s") (VMLALDAVQ_P_U "u") - (VQMOVNBQ_M_U "u") (VMOVLTQ_M_U "u") (VMOVNBQ_M_U "u") - (VRSHRNTQ_N_U "u") (VORRQ_M_N_U "u") (VREV32Q_M_U "u") - (VREV16Q_M_S "s") (VREV16Q_M_U "u") - (VQRSHRNTQ_N_U "u") (VMOVNTQ_M_U "u") (VMOVLBQ_M_U "u") - (VMLALDAVAQ_U "u") (VQSHRNBQ_N_U "u") (VSHRNBQ_N_U "u") - (VRSHRNBQ_N_U "u") (VMLALDAVXQ_P_U "u") - (VMVNQ_M_N_U "u") (VQSHRNTQ_N_U "u") (VMLALDAVAXQ_U "u") - (VQMOVNTQ_M_U "u") (VSHRNTQ_N_U "u") (VCVTMQ_M_S "s") - (VCVTMQ_M_U "u") (VCVTNQ_M_S "s") (VCVTNQ_M_U "u") - (VCVTPQ_M_S "s") (VCVTPQ_M_U "u") (VADDLVAQ_P_S "s") - (VCVTQ_M_N_FROM_F_U "u") (VCVTQ_M_FROM_F_S "s") - (VCVTQ_M_FROM_F_U "u") (VRMLALDAVHQ_P_U "u") - (VRMLALDAVHQ_P_S "s") (VADDLVAQ_P_U "u") - (VCVTQ_M_N_FROM_F_S "s") (VABAVQ_P_U "u") - (VABAVQ_P_S "s") (VSHLQ_M_S "s") (VSHLQ_M_U "u") - (VSRIQ_M_N_S "s") (VSRIQ_M_N_U "u") (VSUBQ_M_S "s") - (VSUBQ_M_U "u") (VCVTQ_M_N_TO_F_S "s") - (VCVTQ_M_N_TO_F_U "u") (VADDQ_M_N_U "u") - (VSHLQ_M_N_S "s") (VMAXQ_M_U "u") (VHSUBQ_M_N_U "u") - (VMULQ_M_N_S "s") (VQSHLQ_M_U "u") (VRHADDQ_M_S "s") - (VEORQ_M_U "u") (VSHRQ_M_N_U "u") (VCADDQ_ROT90_M_U "u") - (VMLADAVAQ_P_U "u") (VEORQ_M_S "s") (VBRSRQ_M_N_S "s") - (VMULQ_M_U "u") (VQRDMLAHQ_M_N_S "s") (VHSUBQ_M_N_S "s") - (VQRSHLQ_M_S "s") (VMULQ_M_N_U "u") - (VMULQ_M_S "s") (VQSHLQ_M_N_U "u") (VSLIQ_M_N_U "u") - (VMLADAVAQ_P_S "s") (VQRSHLQ_M_U "u") - (VMULLBQ_INT_M_U "u") (VSHLQ_M_N_U "u") (VQSUBQ_M_U "u") - (VQRDMLASHQ_M_N_U "u") (VRSHRQ_M_N_S "s") - (VORNQ_M_S "s") (VCADDQ_ROT270_M_S "s") (VRHADDQ_M_U "u") - (VRSHRQ_M_N_U "u") (VMLASQ_M_N_U "u") (VHSUBQ_M_U "u") - (VQSUBQ_M_N_S "s") (VMULLTQ_INT_M_S "s") - (VORRQ_M_S "s") (VQDMLAHQ_M_N_U "u") (VRSHLQ_M_S "s") - (VHADDQ_M_U "u") (VHADDQ_M_N_S "s") (VMULLTQ_INT_M_U "u") - (VORRQ_M_U "u") (VHADDQ_M_S "s") (VHADDQ_M_N_U "u") - (VQDMLAHQ_M_N_S "s") (VMAXQ_M_S "s") (VORNQ_M_U "u") - (VCADDQ_ROT270_M_U "u") (VQADDQ_M_U "u") - (VQRDMLASHQ_M_N_S "s") (VBICQ_M_U "u") (VMINQ_M_U "u") - (VSUBQ_M_N_S "s") (VMULLBQ_INT_M_S "s") (VQSUBQ_M_S "s") - (VCADDQ_ROT90_M_S "s") (VRMULHQ_M_S "s") (VANDQ_M_U "u") - (VMULHQ_M_S "s") (VADDQ_M_S "s") (VQRDMLAHQ_M_N_U "u") - (VMLASQ_M_N_S "s") (VHSUBQ_M_S "s") (VRMULHQ_M_U "u") - (VQADDQ_M_N_S "s") (VSHRQ_M_N_S "s") (VANDQ_M_S "s") - (VABDQ_M_U "u") (VQSHLQ_M_S "s") (VABDQ_M_S "s") - (VSUBQ_M_N_U "u") (VMLAQ_M_N_S "s") (VBRSRQ_M_N_U "u") - (VADDQ_M_U "u") (VRSHLQ_M_U "u") (VSLIQ_M_N_S "s") - (VQADDQ_M_N_U "u") (VADDQ_M_N_S "s") (VQSUBQ_M_N_U "u") - (VMLAQ_M_N_U "u") (VMINQ_M_S "s") (VMULHQ_M_U "u") - (VQADDQ_M_S "s") (VBICQ_M_S "s") (VQSHLQ_M_N_S "s") - (VQSHRNTQ_M_N_S "s") (VQSHRNTQ_M_N_U "u") - (VSHRNTQ_M_N_U "u") (VSHRNTQ_M_N_S "s") - (VSHRNBQ_M_N_S "s") (VSHRNBQ_M_N_U "u") - (VSHLLTQ_M_N_S "s") (VSHLLTQ_M_N_U "u") - (VSHLLBQ_M_N_S "s") (VSHLLBQ_M_N_U "u") - (VRSHRNTQ_M_N_S "s") (VRSHRNTQ_M_N_U "u") - (VRSHRNBQ_M_N_U "u") (VRSHRNBQ_M_N_S "s") - (VQSHRNTQ_M_N_U "u") (VQSHRNTQ_M_N_S "s") - (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u") - (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u") - (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u") - (VMLALDAVAXQ_P_S "s") (VMLALDAVAXQ_P_U "u") - (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u") - (VSTRWQSB_S "s") (VSTRWQSB_U "u") (VSTRBQSO_S "s") - (VSTRBQSO_U "u") (VSTRBQ_S "s") (VSTRBQ_U "u") - (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRBQ_S "s") - (VLDRBQ_U "u") (VLDRWQGB_S "s") (VLDRWQGB_U "u") - (VLD1Q_S "s") (VLD1Q_U "u") (VLDRHQGO_S "s") - (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u") - (VLDRHQ_S "s") (VLDRHQ_U "u") (VLDRWQ_S "s") - (VLDRWQ_U "u") (VLDRDQGB_S "s") (VLDRDQGB_U "u") - (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s") - (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u") - (VLDRWQGSO_S "s") (VLDRWQGSO_U "u") (VST1Q_S "s") - (VST1Q_U "u") (VSTRHQSO_S "s") (VSTRHQSO_U "u") - (VSTRHQSSO_S "s") (VSTRHQSSO_U "u") (VSTRHQ_S "s") - (VSTRHQ_U "u") (VSTRWQ_S "s") (VSTRWQ_U "u") - (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VSTRDQSO_S "s") - (VSTRDQSO_U "u") (VSTRDQSSO_S "s") (VSTRDQSSO_U "u") - (VSTRWQSO_U "u") (VSTRWQSO_S "s") (VSTRWQSSO_U "u") - (VSTRWQSSO_S "s") (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u") - (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s") - (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") (VADCQ_M_S "s") - (VSTRDQSBWB_U "u") (VSBCQ_U "u") (VSBCQ_M_U "u") - (VSBCQ_S "s") (VSBCQ_M_S "s") (VSBCIQ_U "u") - (VSBCIQ_M_U "u") (VSBCIQ_S "s") (VSBCIQ_M_S "s") - (VADCQ_U "u") (VADCQ_M_U "u") (VADCQ_S "s") - (VADCIQ_U "u") (VADCIQ_M_U "u") (VADCIQ_S "s") - (VADCIQ_M_S "s") (SQRSHRL_64 "64") (SQRSHRL_48 "48") - (UQRSHLL_64 "64") (UQRSHLL_48 "48") (VSHLCQ_M_S "s") - (VSHLCQ_M_U "u")]) - -(define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32") - (VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16") - (VCTP32Q_M "32") (VCTP64Q_M "64")]) -(define_mode_attr MVE_pred2 [(V16QI "mve_imm_8") (V8HI "mve_imm_16") - (V4SI "mve_imm_32") - (V8HF "mve_imm_16") (V4SF "mve_imm_32")]) -(define_mode_attr MVE_constraint2 [(V16QI "Rb") (V8HI "Rd") (V4SI "Rf") - (V8HF "Rd") (V4SF "Rf")]) -(define_mode_attr MVE_LANES [(V16QI "16") (V8HI "8") (V4SI "4")]) -(define_mode_attr MVE_constraint [ (V16QI "Ra") (V8HI "Rc") (V4SI "Re")]) -(define_mode_attr MVE_pred [ (V16QI "mve_imm_7") (V8HI "mve_imm_15") - (V4SI "mve_imm_31")]) -(define_mode_attr MVE_constraint3 [ (V8HI "Rb") (V4SI "Rd")]) -(define_mode_attr MVE_pred3 [ (V8HI "mve_imm_8") (V4SI "mve_imm_16")]) -(define_mode_attr MVE_constraint1 [ (V8HI "Ra") (V4SI "Rc")]) -(define_mode_attr MVE_pred1 [ (V8HI "mve_imm_7") (V4SI "mve_imm_15")]) -(define_mode_attr MVE_B_ELEM [ (V16QI "V16QI") (V8HI "V8QI") (V4SI "V4QI")]) -(define_mode_attr MVE_H_ELEM [ (V8HI "V8HI") (V4SI "V4HI")]) -(define_mode_attr V_sz_elem1 [(V16QI "b") (V8HI "h") (V4SI "w") (V8HF "h") - (V4SF "w")]) -(define_mode_attr V_extr_elem [(V16QI "u8") (V8HI "u16") (V4SI "32") - (V8HF "u16") (V4SF "32")]) - -(define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w") - (V8HF "=w") (V4SF "=&w")]) - -(define_int_iterator VCVTQ_TO_F [VCVTQ_TO_F_S VCVTQ_TO_F_U]) -(define_int_iterator VMVNQ_N [VMVNQ_N_U VMVNQ_N_S]) -(define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U]) -(define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U]) -(define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S]) -(define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S]) -(define_int_iterator VMVNQ [VMVNQ_U VMVNQ_S]) -(define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S]) -(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S]) -(define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S]) -(define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S]) -(define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U]) -(define_int_iterator VMOVLTQ [VMOVLTQ_U VMOVLTQ_S]) -(define_int_iterator VCVTPQ [VCVTPQ_S VCVTPQ_U]) -(define_int_iterator VCVTNQ [VCVTNQ_S VCVTNQ_U]) -(define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U]) -(define_int_iterator VADDLVQ [VADDLVQ_U VADDLVQ_S]) -(define_int_iterator VCTPQ [VCTP8Q VCTP16Q VCTP32Q VCTP64Q]) -(define_int_iterator VCTPQ_M [VCTP8Q_M VCTP16Q_M VCTP32Q_M VCTP64Q_M]) -(define_int_iterator VCVTQ_N_TO_F [VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U]) -(define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S]) -(define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U]) -(define_int_iterator VCVTQ_N_FROM_F [VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U]) -(define_int_iterator VADDLVQ_P [VADDLVQ_P_S VADDLVQ_P_U]) -(define_int_iterator VCMPNEQ [VCMPNEQ_U VCMPNEQ_S]) -(define_int_iterator VSHLQ [VSHLQ_S VSHLQ_U]) -(define_int_iterator VABDQ [VABDQ_S VABDQ_U]) -(define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U]) -(define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U]) -(define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S]) -(define_int_iterator VANDQ [VANDQ_U VANDQ_S]) -(define_int_iterator VBICQ [VBICQ_S VBICQ_U]) -(define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S]) -(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U]) -(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S]) -(define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S]) -(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U]) -(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S]) -(define_int_iterator VEORQ [VEORQ_U VEORQ_S]) -(define_int_iterator VHADDQ [VHADDQ_S VHADDQ_U]) -(define_int_iterator VHADDQ_N [VHADDQ_N_U VHADDQ_N_S]) -(define_int_iterator VHSUBQ [VHSUBQ_S VHSUBQ_U]) -(define_int_iterator VHSUBQ_N [VHSUBQ_N_U VHSUBQ_N_S]) -(define_int_iterator VMAXQ [VMAXQ_U VMAXQ_S]) -(define_int_iterator VMAXVQ [VMAXVQ_U VMAXVQ_S]) -(define_int_iterator VMINQ [VMINQ_S VMINQ_U]) -(define_int_iterator VMINVQ [VMINVQ_U VMINVQ_S]) -(define_int_iterator VMLADAVQ [VMLADAVQ_U VMLADAVQ_S]) -(define_int_iterator VMULHQ [VMULHQ_S VMULHQ_U]) -(define_int_iterator VMULLBQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S]) -(define_int_iterator VMULLTQ_INT [VMULLTQ_INT_U VMULLTQ_INT_S]) -(define_int_iterator VMULQ [VMULQ_U VMULQ_S]) -(define_int_iterator VMULQ_N [VMULQ_N_U VMULQ_N_S]) -(define_int_iterator VORNQ [VORNQ_U VORNQ_S]) -(define_int_iterator VORRQ [VORRQ_S VORRQ_U]) -(define_int_iterator VQADDQ [VQADDQ_U VQADDQ_S]) -(define_int_iterator VQADDQ_N [VQADDQ_N_S VQADDQ_N_U]) -(define_int_iterator VQRSHLQ [VQRSHLQ_S VQRSHLQ_U]) -(define_int_iterator VQRSHLQ_N [VQRSHLQ_N_S VQRSHLQ_N_U]) -(define_int_iterator VQSHLQ [VQSHLQ_S VQSHLQ_U]) -(define_int_iterator VQSHLQ_N [VQSHLQ_N_S VQSHLQ_N_U]) -(define_int_iterator VQSHLQ_R [VQSHLQ_R_U VQSHLQ_R_S]) -(define_int_iterator VQSUBQ [VQSUBQ_U VQSUBQ_S]) -(define_int_iterator VQSUBQ_N [VQSUBQ_N_S VQSUBQ_N_U]) -(define_int_iterator VRHADDQ [VRHADDQ_S VRHADDQ_U]) -(define_int_iterator VRMULHQ [VRMULHQ_S VRMULHQ_U]) -(define_int_iterator VRSHLQ [VRSHLQ_S VRSHLQ_U]) -(define_int_iterator VRSHLQ_N [VRSHLQ_N_U VRSHLQ_N_S]) -(define_int_iterator VRSHRQ_N [VRSHRQ_N_S VRSHRQ_N_U]) -(define_int_iterator VSHLQ_N [VSHLQ_N_U VSHLQ_N_S]) -(define_int_iterator VSHLQ_R [VSHLQ_R_S VSHLQ_R_U]) -(define_int_iterator VSUBQ [VSUBQ_S VSUBQ_U]) -(define_int_iterator VSUBQ_N [VSUBQ_N_S VSUBQ_N_U]) -(define_int_iterator VADDLVAQ [VADDLVAQ_S VADDLVAQ_U]) -(define_int_iterator VBICQ_N [VBICQ_N_S VBICQ_N_U]) -(define_int_iterator VMLALDAVQ [VMLALDAVQ_U VMLALDAVQ_S]) -(define_int_iterator VMLALDAVXQ [VMLALDAVXQ_U VMLALDAVXQ_S]) -(define_int_iterator VMOVNBQ [VMOVNBQ_U VMOVNBQ_S]) -(define_int_iterator VMOVNTQ [VMOVNTQ_S VMOVNTQ_U]) -(define_int_iterator VORRQ_N [VORRQ_N_U VORRQ_N_S]) -(define_int_iterator VQMOVNBQ [VQMOVNBQ_U VQMOVNBQ_S]) -(define_int_iterator VQMOVNTQ [VQMOVNTQ_U VQMOVNTQ_S]) -(define_int_iterator VSHLLBQ_N [VSHLLBQ_N_S VSHLLBQ_N_U]) -(define_int_iterator VSHLLTQ_N [VSHLLTQ_N_U VSHLLTQ_N_S]) -(define_int_iterator VRMLALDAVHQ [VRMLALDAVHQ_U VRMLALDAVHQ_S]) -(define_int_iterator VBICQ_M_N [VBICQ_M_N_S VBICQ_M_N_U]) -(define_int_iterator VCVTAQ_M [VCVTAQ_M_S VCVTAQ_M_U]) -(define_int_iterator VCVTQ_M_TO_F [VCVTQ_M_TO_F_S VCVTQ_M_TO_F_U]) -(define_int_iterator VQRSHRNBQ_N [VQRSHRNBQ_N_U VQRSHRNBQ_N_S]) -(define_int_iterator VABAVQ [VABAVQ_S VABAVQ_U]) -(define_int_iterator VSHLCQ [VSHLCQ_S VSHLCQ_U]) -(define_int_iterator VRMLALDAVHAQ [VRMLALDAVHAQ_S VRMLALDAVHAQ_U]) -(define_int_iterator VADDVAQ_P [VADDVAQ_P_S VADDVAQ_P_U]) -(define_int_iterator VCLZQ_M [VCLZQ_M_S VCLZQ_M_U]) -(define_int_iterator VCMPEQQ_M_N [VCMPEQQ_M_N_S VCMPEQQ_M_N_U]) -(define_int_iterator VCMPEQQ_M [VCMPEQQ_M_S VCMPEQQ_M_U]) -(define_int_iterator VCMPNEQ_M_N [VCMPNEQ_M_N_S VCMPNEQ_M_N_U]) -(define_int_iterator VCMPNEQ_M [VCMPNEQ_M_S VCMPNEQ_M_U]) -(define_int_iterator VDUPQ_M_N [VDUPQ_M_N_S VDUPQ_M_N_U]) -(define_int_iterator VMAXVQ_P [VMAXVQ_P_S VMAXVQ_P_U]) -(define_int_iterator VMINVQ_P [VMINVQ_P_S VMINVQ_P_U]) -(define_int_iterator VMLADAVAQ [VMLADAVAQ_S VMLADAVAQ_U]) -(define_int_iterator VMLADAVQ_P [VMLADAVQ_P_S VMLADAVQ_P_U]) -(define_int_iterator VMLAQ_N [VMLAQ_N_S VMLAQ_N_U]) -(define_int_iterator VMLASQ_N [VMLASQ_N_S VMLASQ_N_U]) -(define_int_iterator VMVNQ_M [VMVNQ_M_S VMVNQ_M_U]) -(define_int_iterator VPSELQ [VPSELQ_S VPSELQ_U]) -(define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S VQDMLAHQ_N_U]) -(define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S VQRDMLAHQ_N_U]) -(define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S VQRDMLASHQ_N_U]) -(define_int_iterator VQRSHLQ_M_N [VQRSHLQ_M_N_S VQRSHLQ_M_N_U]) -(define_int_iterator VQSHLQ_M_R [VQSHLQ_M_R_S VQSHLQ_M_R_U]) -(define_int_iterator VREV64Q_M [VREV64Q_M_S VREV64Q_M_U]) -(define_int_iterator VRSHLQ_M_N [VRSHLQ_M_N_S VRSHLQ_M_N_U]) -(define_int_iterator VSHLQ_M_R [VSHLQ_M_R_S VSHLQ_M_R_U]) -(define_int_iterator VSLIQ_N [VSLIQ_N_S VSLIQ_N_U]) -(define_int_iterator VSRIQ_N [VSRIQ_N_S VSRIQ_N_U]) -(define_int_iterator VMLALDAVQ_P [VMLALDAVQ_P_U VMLALDAVQ_P_S]) -(define_int_iterator VQMOVNBQ_M [VQMOVNBQ_M_S VQMOVNBQ_M_U]) -(define_int_iterator VMOVLTQ_M [VMOVLTQ_M_U VMOVLTQ_M_S]) -(define_int_iterator VMOVNBQ_M [VMOVNBQ_M_U VMOVNBQ_M_S]) -(define_int_iterator VRSHRNTQ_N [VRSHRNTQ_N_U VRSHRNTQ_N_S]) -(define_int_iterator VORRQ_M_N [VORRQ_M_N_S VORRQ_M_N_U]) -(define_int_iterator VREV32Q_M [VREV32Q_M_S VREV32Q_M_U]) -(define_int_iterator VREV16Q_M [VREV16Q_M_S VREV16Q_M_U]) -(define_int_iterator VQRSHRNTQ_N [VQRSHRNTQ_N_U VQRSHRNTQ_N_S]) -(define_int_iterator VMOVNTQ_M [VMOVNTQ_M_U VMOVNTQ_M_S]) -(define_int_iterator VMOVLBQ_M [VMOVLBQ_M_U VMOVLBQ_M_S]) -(define_int_iterator VMLALDAVAQ [VMLALDAVAQ_S VMLALDAVAQ_U]) -(define_int_iterator VQSHRNBQ_N [VQSHRNBQ_N_U VQSHRNBQ_N_S]) -(define_int_iterator VSHRNBQ_N [VSHRNBQ_N_U VSHRNBQ_N_S]) -(define_int_iterator VRSHRNBQ_N [VRSHRNBQ_N_S VRSHRNBQ_N_U]) -(define_int_iterator VMLALDAVXQ_P [VMLALDAVXQ_P_U VMLALDAVXQ_P_S]) -(define_int_iterator VQMOVNTQ_M [VQMOVNTQ_M_U VQMOVNTQ_M_S]) -(define_int_iterator VMVNQ_M_N [VMVNQ_M_N_U VMVNQ_M_N_S]) -(define_int_iterator VQSHRNTQ_N [VQSHRNTQ_N_U VQSHRNTQ_N_S]) -(define_int_iterator VMLALDAVAXQ [VMLALDAVAXQ_S VMLALDAVAXQ_U]) -(define_int_iterator VSHRNTQ_N [VSHRNTQ_N_S VSHRNTQ_N_U]) -(define_int_iterator VCVTMQ_M [VCVTMQ_M_S VCVTMQ_M_U]) -(define_int_iterator VCVTNQ_M [VCVTNQ_M_S VCVTNQ_M_U]) -(define_int_iterator VCVTPQ_M [VCVTPQ_M_S VCVTPQ_M_U]) -(define_int_iterator VCVTQ_M_N_FROM_F [VCVTQ_M_N_FROM_F_S VCVTQ_M_N_FROM_F_U]) -(define_int_iterator VCVTQ_M_FROM_F [VCVTQ_M_FROM_F_U VCVTQ_M_FROM_F_S]) -(define_int_iterator VRMLALDAVHQ_P [VRMLALDAVHQ_P_S VRMLALDAVHQ_P_U]) -(define_int_iterator VADDLVAQ_P [VADDLVAQ_P_U VADDLVAQ_P_S]) -(define_int_iterator VABAVQ_P [VABAVQ_P_S VABAVQ_P_U]) -(define_int_iterator VSHLQ_M [VSHLQ_M_S VSHLQ_M_U]) -(define_int_iterator VSRIQ_M_N [VSRIQ_M_N_S VSRIQ_M_N_U]) -(define_int_iterator VSUBQ_M [VSUBQ_M_U VSUBQ_M_S]) -(define_int_iterator VCVTQ_M_N_TO_F [VCVTQ_M_N_TO_F_U VCVTQ_M_N_TO_F_S]) -(define_int_iterator VHSUBQ_M [VHSUBQ_M_S VHSUBQ_M_U]) -(define_int_iterator VSLIQ_M_N [VSLIQ_M_N_U VSLIQ_M_N_S]) -(define_int_iterator VRSHLQ_M [VRSHLQ_M_S VRSHLQ_M_U]) -(define_int_iterator VMINQ_M [VMINQ_M_S VMINQ_M_U]) -(define_int_iterator VMULLBQ_INT_M [VMULLBQ_INT_M_U VMULLBQ_INT_M_S]) -(define_int_iterator VMULHQ_M [VMULHQ_M_S VMULHQ_M_U]) -(define_int_iterator VMULQ_M [VMULQ_M_S VMULQ_M_U]) -(define_int_iterator VHSUBQ_M_N [VHSUBQ_M_N_S VHSUBQ_M_N_U]) -(define_int_iterator VHADDQ_M_N [VHADDQ_M_N_S VHADDQ_M_N_U]) -(define_int_iterator VORRQ_M [VORRQ_M_S VORRQ_M_U]) -(define_int_iterator VRMULHQ_M [VRMULHQ_M_U VRMULHQ_M_S]) -(define_int_iterator VQADDQ_M [VQADDQ_M_U VQADDQ_M_S]) -(define_int_iterator VRSHRQ_M_N [VRSHRQ_M_N_S VRSHRQ_M_N_U]) -(define_int_iterator VQSUBQ_M_N [VQSUBQ_M_N_U VQSUBQ_M_N_S]) -(define_int_iterator VADDQ_M [VADDQ_M_U VADDQ_M_S]) -(define_int_iterator VORNQ_M [VORNQ_M_U VORNQ_M_S]) -(define_int_iterator VRHADDQ_M [VRHADDQ_M_U VRHADDQ_M_S]) -(define_int_iterator VQSHLQ_M [VQSHLQ_M_U VQSHLQ_M_S]) -(define_int_iterator VANDQ_M [VANDQ_M_U VANDQ_M_S]) -(define_int_iterator VBICQ_M [VBICQ_M_U VBICQ_M_S]) -(define_int_iterator VSHLQ_M_N [VSHLQ_M_N_S VSHLQ_M_N_U]) -(define_int_iterator VCADDQ_ROT270_M [VCADDQ_ROT270_M_U VCADDQ_ROT270_M_S]) -(define_int_iterator VQRSHLQ_M [VQRSHLQ_M_U VQRSHLQ_M_S]) -(define_int_iterator VQADDQ_M_N [VQADDQ_M_N_U VQADDQ_M_N_S]) -(define_int_iterator VADDQ_M_N [VADDQ_M_N_S VADDQ_M_N_U]) -(define_int_iterator VMAXQ_M [VMAXQ_M_S VMAXQ_M_U]) -(define_int_iterator VQSUBQ_M [VQSUBQ_M_U VQSUBQ_M_S]) -(define_int_iterator VMLASQ_M_N [VMLASQ_M_N_U VMLASQ_M_N_S]) -(define_int_iterator VMLADAVAQ_P [VMLADAVAQ_P_U VMLADAVAQ_P_S]) -(define_int_iterator VBRSRQ_M_N [VBRSRQ_M_N_U VBRSRQ_M_N_S]) -(define_int_iterator VMULQ_M_N [VMULQ_M_N_U VMULQ_M_N_S]) -(define_int_iterator VCADDQ_ROT90_M [VCADDQ_ROT90_M_U VCADDQ_ROT90_M_S]) -(define_int_iterator VMULLTQ_INT_M [VMULLTQ_INT_M_S VMULLTQ_INT_M_U]) -(define_int_iterator VEORQ_M [VEORQ_M_S VEORQ_M_U]) -(define_int_iterator VSHRQ_M_N [VSHRQ_M_N_S VSHRQ_M_N_U]) -(define_int_iterator VSUBQ_M_N [VSUBQ_M_N_S VSUBQ_M_N_U]) -(define_int_iterator VHADDQ_M [VHADDQ_M_S VHADDQ_M_U]) -(define_int_iterator VABDQ_M [VABDQ_M_S VABDQ_M_U]) -(define_int_iterator VMLAQ_M_N [VMLAQ_M_N_S VMLAQ_M_N_U]) -(define_int_iterator VQSHLQ_M_N [VQSHLQ_M_N_S VQSHLQ_M_N_U]) -(define_int_iterator VMLALDAVAQ_P [VMLALDAVAQ_P_U VMLALDAVAQ_P_S]) -(define_int_iterator VMLALDAVAXQ_P [VMLALDAVAXQ_P_U VMLALDAVAXQ_P_S]) -(define_int_iterator VQRSHRNBQ_M_N [VQRSHRNBQ_M_N_U VQRSHRNBQ_M_N_S]) -(define_int_iterator VQRSHRNTQ_M_N [VQRSHRNTQ_M_N_S VQRSHRNTQ_M_N_U]) -(define_int_iterator VQSHRNBQ_M_N [VQSHRNBQ_M_N_U VQSHRNBQ_M_N_S]) -(define_int_iterator VQSHRNTQ_M_N [VQSHRNTQ_M_N_S VQSHRNTQ_M_N_U]) -(define_int_iterator VRSHRNBQ_M_N [VRSHRNBQ_M_N_U VRSHRNBQ_M_N_S]) -(define_int_iterator VRSHRNTQ_M_N [VRSHRNTQ_M_N_U VRSHRNTQ_M_N_S]) -(define_int_iterator VSHLLBQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S]) -(define_int_iterator VSHLLTQ_M_N [VSHLLTQ_M_N_U VSHLLTQ_M_N_S]) -(define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U]) -(define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U]) -(define_int_iterator VSTRWSBQ [VSTRWQSB_S VSTRWQSB_U]) -(define_int_iterator VSTRBSOQ [VSTRBQSO_S VSTRBQSO_U]) -(define_int_iterator VSTRBQ [VSTRBQ_S VSTRBQ_U]) -(define_int_iterator VLDRBGOQ [VLDRBQGO_S VLDRBQGO_U]) -(define_int_iterator VLDRBQ [VLDRBQ_S VLDRBQ_U]) -(define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U]) -(define_int_iterator VLD1Q [VLD1Q_S VLD1Q_U]) -(define_int_iterator VLDRHGOQ [VLDRHQGO_S VLDRHQGO_U]) -(define_int_iterator VLDRHGSOQ [VLDRHQGSO_S VLDRHQGSO_U]) -(define_int_iterator VLDRHQ [VLDRHQ_S VLDRHQ_U]) -(define_int_iterator VLDRWQ [VLDRWQ_S VLDRWQ_U]) -(define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U]) -(define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U]) -(define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U]) -(define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U]) -(define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U]) -(define_int_iterator VST1Q [VST1Q_S VST1Q_U]) -(define_int_iterator VSTRHSOQ [VSTRHQSO_S VSTRHQSO_U]) -(define_int_iterator VSTRHSSOQ [VSTRHQSSO_S VSTRHQSSO_U]) -(define_int_iterator VSTRHQ [VSTRHQ_S VSTRHQ_U]) -(define_int_iterator VSTRWQ [VSTRWQ_S VSTRWQ_U]) -(define_int_iterator VSTRDSBQ [VSTRDQSB_S VSTRDQSB_U]) -(define_int_iterator VSTRDSOQ [VSTRDQSO_S VSTRDQSO_U]) -(define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U]) -(define_int_iterator VSTRWSOQ [VSTRWQSO_S VSTRWQSO_U]) -(define_int_iterator VSTRWSSOQ [VSTRWQSSO_S VSTRWQSSO_U]) -(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U]) -(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U]) -(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U]) -(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U]) -(define_int_iterator VADCIQ [VADCIQ_U VADCIQ_S]) -(define_int_iterator VADCIQ_M [VADCIQ_M_U VADCIQ_M_S]) -(define_int_iterator VSBCQ [VSBCQ_U VSBCQ_S]) -(define_int_iterator VSBCQ_M [VSBCQ_M_U VSBCQ_M_S]) -(define_int_iterator VSBCIQ [VSBCIQ_U VSBCIQ_S]) -(define_int_iterator VSBCIQ_M [VSBCIQ_M_U VSBCIQ_M_S]) -(define_int_iterator VADCQ [VADCQ_U VADCQ_S]) -(define_int_iterator VADCQ_M [VADCQ_M_U VADCQ_M_S]) -(define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48]) -(define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48]) -(define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U]) - (define_insn "*mve_mov" [(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Ux,w") (match_operand:MVE_types 1 "general_operand" "w,r,w,Dn,Uxi,r,Dm,w,Ul"))] diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 0a2399d..caee18a 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -519,3 +519,805 @@ UNSPEC_BFMAB UNSPEC_BFMAT ]) + +;; Enumerators for MVE unspecs. +(define_c_enum "unspec" [ + VST4Q + VRNDXQ_F + VRNDQ_F + VRNDPQ_F + VRNDNQ_F + VRNDMQ_F + VRNDAQ_F + VREV64Q_F + VNEGQ_F + VDUPQ_N_F + VABSQ_F + VREV32Q_F + VCVTTQ_F32_F16 + VCVTBQ_F32_F16 + VCVTQ_TO_F_S + VQNEGQ_S + VCVTQ_TO_F_U + VREV16Q_S + VREV16Q_U + VADDLVQ_S + VMVNQ_N_S + VMVNQ_N_U + VCVTAQ_S + VCVTAQ_U + VREV64Q_S + VREV64Q_U + VQABSQ_S + VNEGQ_S + VMVNQ_S + VMVNQ_U + VDUPQ_N_U + VDUPQ_N_S + VCLZQ_U + VCLZQ_S + VCLSQ_S + VADDVQ_S + VADDVQ_U + VABSQ_S + VREV32Q_U + VREV32Q_S + VMOVLTQ_U + VMOVLTQ_S + VMOVLBQ_S + VMOVLBQ_U + VCVTQ_FROM_F_S + VCVTQ_FROM_F_U + VCVTPQ_S + VCVTPQ_U + VCVTNQ_S + VCVTNQ_U + VCVTMQ_S + VCVTMQ_U + VADDLVQ_U + VCTP8Q + VCTP16Q + VCTP32Q + VCTP64Q + VPNOT + VCREATEQ_F + VCVTQ_N_TO_F_S + VCVTQ_N_TO_F_U + VBRSRQ_N_F + VSUBQ_N_F + VCREATEQ_U + VCREATEQ_S + VSHRQ_N_S + VSHRQ_N_U + VCVTQ_N_FROM_F_S + VCVTQ_N_FROM_F_U + VADDLVQ_P_S + VADDLVQ_P_U + VCMPNEQ_U + VCMPNEQ_S + VSHLQ_S + VSHLQ_U + VABDQ_S + VADDQ_N_S + VADDVAQ_S + VADDVQ_P_S + VANDQ_S + VBICQ_S + VBRSRQ_N_S + VCADDQ_ROT270_S + VCADDQ_ROT90_S + VCMPEQQ_S + VCMPEQQ_N_S + VCMPNEQ_N_S + VEORQ_S + VHADDQ_S + VHADDQ_N_S + VHSUBQ_S + VHSUBQ_N_S + VMAXQ_S + VMAXVQ_S + VMINQ_S + VMINVQ_S + VMLADAVQ_S + VMULHQ_S + VMULLBQ_INT_S + VMULLTQ_INT_S + VMULQ_S + VMULQ_N_S + VORNQ_S + VORRQ_S + VQADDQ_S + VQADDQ_N_S + VQRSHLQ_S + VQRSHLQ_N_S + VQSHLQ_S + VQSHLQ_N_S + VQSHLQ_R_S + VQSUBQ_S + VQSUBQ_N_S + VRHADDQ_S + VRMULHQ_S + VRSHLQ_S + VRSHLQ_N_S + VRSHRQ_N_S + VSHLQ_N_S + VSHLQ_R_S + VSUBQ_S + VSUBQ_N_S + VABDQ_U + VADDQ_N_U + VADDVAQ_U + VADDVQ_P_U + VANDQ_U + VBICQ_U + VBRSRQ_N_U + VCADDQ_ROT270_U + VCADDQ_ROT90_U + VCMPEQQ_U + VCMPEQQ_N_U + VCMPNEQ_N_U + VEORQ_U + VHADDQ_U + VHADDQ_N_U + VHSUBQ_U + VHSUBQ_N_U + VMAXQ_U + VMAXVQ_U + VMINQ_U + VMINVQ_U + VMLADAVQ_U + VMULHQ_U + VMULLBQ_INT_U + VMULLTQ_INT_U + VMULQ_U + VMULQ_N_U + VORNQ_U + VORRQ_U + VQADDQ_U + VQADDQ_N_U + VQRSHLQ_U + VQRSHLQ_N_U + VQSHLQ_U + VQSHLQ_N_U + VQSHLQ_R_U + VQSUBQ_U + VQSUBQ_N_U + VRHADDQ_U + VRMULHQ_U + VRSHLQ_U + VRSHLQ_N_U + VRSHRQ_N_U + VSHLQ_N_U + VSHLQ_R_U + VSUBQ_U + VSUBQ_N_U + VCMPGEQ_N_S + VCMPGEQ_S + VCMPGTQ_N_S + VCMPGTQ_S + VCMPLEQ_N_S + VCMPLEQ_S + VCMPLTQ_N_S + VCMPLTQ_S + VHCADDQ_ROT270_S + VHCADDQ_ROT90_S + VMAXAQ_S + VMAXAVQ_S + VMINAQ_S + VMINAVQ_S + VMLADAVXQ_S + VMLSDAVQ_S + VMLSDAVXQ_S + VQDMULHQ_N_S + VQDMULHQ_S + VQRDMULHQ_N_S + VQRDMULHQ_S + VQSHLUQ_N_S + VCMPCSQ_N_U + VCMPCSQ_U + VCMPHIQ_N_U + VCMPHIQ_U + VABDQ_M_S + VABDQ_M_U + VABDQ_F + VADDQ_N_F + VANDQ_F + VBICQ_F + VCADDQ_ROT270_F + VCADDQ_ROT90_F + VCMPEQQ_F + VCMPEQQ_N_F + VCMPGEQ_F + VCMPGEQ_N_F + VCMPGTQ_F + VCMPGTQ_N_F + VCMPLEQ_F + VCMPLEQ_N_F + VCMPLTQ_F + VCMPLTQ_N_F + VCMPNEQ_F + VCMPNEQ_N_F + VCMULQ_F + VCMULQ_ROT180_F + VCMULQ_ROT270_F + VCMULQ_ROT90_F + VEORQ_F + VMAXNMAQ_F + VMAXNMAVQ_F + VMAXNMQ_F + VMAXNMVQ_F + VMINNMAQ_F + VMINNMAVQ_F + VMINNMQ_F + VMINNMVQ_F + VMULQ_F + VMULQ_N_F + VORNQ_F + VORRQ_F + VSUBQ_F + VADDLVAQ_U + VADDLVAQ_S + VBICQ_N_U + VBICQ_N_S + VCTP8Q_M + VCTP16Q_M + VCTP32Q_M + VCTP64Q_M + VCVTBQ_F16_F32 + VCVTTQ_F16_F32 + VMLALDAVQ_U + VMLALDAVXQ_U + VMLALDAVXQ_S + VMLALDAVQ_S + VMLSLDAVQ_S + VMLSLDAVXQ_S + VMOVNBQ_U + VMOVNBQ_S + VMOVNTQ_U + VMOVNTQ_S + VORRQ_N_S + VORRQ_N_U + VQDMULLBQ_N_S + VQDMULLBQ_S + VQDMULLTQ_N_S + VQDMULLTQ_S + VQMOVNBQ_U + VQMOVNBQ_S + VQMOVUNBQ_S + VQMOVUNTQ_S + VRMLALDAVHXQ_S + VRMLSLDAVHQ_S + VRMLSLDAVHXQ_S + VSHLLBQ_S + VSHLLBQ_U + VSHLLTQ_U + VSHLLTQ_S + VQMOVNTQ_U + VQMOVNTQ_S + VSHLLBQ_N_S + VSHLLBQ_N_U + VSHLLTQ_N_U + VSHLLTQ_N_S + VRMLALDAVHQ_U + VRMLALDAVHQ_S + VMULLTQ_POLY_P + VMULLBQ_POLY_P + VBICQ_M_N_S + VBICQ_M_N_U + VCMPEQQ_M_F + VCVTAQ_M_S + VCVTAQ_M_U + VCVTQ_M_TO_F_S + VCVTQ_M_TO_F_U + VQRSHRNBQ_N_U + VQRSHRNBQ_N_S + VQRSHRUNBQ_N_S + VRMLALDAVHAQ_S + VABAVQ_S + VABAVQ_U + VSHLCQ_S + VSHLCQ_U + VRMLALDAVHAQ_U + VABSQ_M_S + VADDVAQ_P_S + VADDVAQ_P_U + VCLSQ_M_S + VCLZQ_M_S + VCLZQ_M_U + VCMPCSQ_M_N_U + VCMPCSQ_M_U + VCMPEQQ_M_N_S + VCMPEQQ_M_N_U + VCMPEQQ_M_S + VCMPEQQ_M_U + VCMPGEQ_M_N_S + VCMPGEQ_M_S + VCMPGTQ_M_N_S + VCMPGTQ_M_S + VCMPHIQ_M_N_U + VCMPHIQ_M_U + VCMPLEQ_M_N_S + VCMPLEQ_M_S + VCMPLTQ_M_N_S + VCMPLTQ_M_S + VCMPNEQ_M_N_S + VCMPNEQ_M_N_U + VCMPNEQ_M_S + VCMPNEQ_M_U + VDUPQ_M_N_S + VDUPQ_M_N_U + VDWDUPQ_N_U + VDWDUPQ_WB_U + VIWDUPQ_N_U + VIWDUPQ_WB_U + VMAXAQ_M_S + VMAXAVQ_P_S + VMAXVQ_P_S + VMAXVQ_P_U + VMINAQ_M_S + VMINAVQ_P_S + VMINVQ_P_S + VMINVQ_P_U + VMLADAVAQ_S + VMLADAVAQ_U + VMLADAVQ_P_S + VMLADAVQ_P_U + VMLADAVXQ_P_S + VMLAQ_N_S + VMLAQ_N_U + VMLASQ_N_S + VMLASQ_N_U + VMLSDAVQ_P_S + VMLSDAVXQ_P_S + VMVNQ_M_S + VMVNQ_M_U + VNEGQ_M_S + VPSELQ_S + VPSELQ_U + VQABSQ_M_S + VQDMLAHQ_N_S + VQDMLAHQ_N_U + VQNEGQ_M_S + VQRDMLADHQ_S + VQRDMLADHXQ_S + VQRDMLAHQ_N_S + VQRDMLAHQ_N_U + VQRDMLASHQ_N_S + VQRDMLASHQ_N_U + VQRDMLSDHQ_S + VQRDMLSDHXQ_S + VQRSHLQ_M_N_S + VQRSHLQ_M_N_U + VQSHLQ_M_R_S + VQSHLQ_M_R_U + VREV64Q_M_S + VREV64Q_M_U + VRSHLQ_M_N_S + VRSHLQ_M_N_U + VSHLQ_M_R_S + VSHLQ_M_R_U + VSLIQ_N_S + VSLIQ_N_U + VSRIQ_N_S + VSRIQ_N_U + VQDMLSDHXQ_S + VQDMLSDHQ_S + VQDMLADHXQ_S + VQDMLADHQ_S + VMLSDAVAXQ_S + VMLSDAVAQ_S + VMLADAVAXQ_S + VCMPGEQ_M_F + VCMPGTQ_M_N_F + VMLSLDAVQ_P_S + VRMLALDAVHAXQ_S + VMLSLDAVXQ_P_S + VFMAQ_F + VMLSLDAVAQ_S + VQSHRUNBQ_N_S + VQRSHRUNTQ_N_S + VCMLAQ_F + VMINNMAQ_M_F + VFMASQ_N_F + VDUPQ_M_N_F + VCMPGTQ_M_F + VCMPLTQ_M_F + VRMLSLDAVHQ_P_S + VQSHRUNTQ_N_S + VABSQ_M_F + VMAXNMAVQ_P_F + VFMAQ_N_F + VRMLSLDAVHXQ_P_S + VREV32Q_M_F + VRMLSLDAVHAQ_S + VRMLSLDAVHAXQ_S + VCMPLTQ_M_N_F + VCMPNEQ_M_F + VRNDAQ_M_F + VRNDPQ_M_F + VADDLVAQ_P_S + VQMOVUNBQ_M_S + VCMPLEQ_M_F + VCMLAQ_ROT180_F + VMLSLDAVAXQ_S + VRNDXQ_M_F + VFMSQ_F + VMINNMVQ_P_F + VMAXNMVQ_P_F + VPSELQ_F + VCMLAQ_ROT90_F + VQMOVUNTQ_M_S + VREV64Q_M_F + VNEGQ_M_F + VRNDMQ_M_F + VCMPLEQ_M_N_F + VCMPGEQ_M_N_F + VRNDNQ_M_F + VMINNMAVQ_P_F + VCMPNEQ_M_N_F + VRMLALDAVHQ_P_S + VRMLALDAVHXQ_P_S + VCMPEQQ_M_N_F + VCMLAQ_ROT270_F + VMAXNMAQ_M_F + VRNDQ_M_F + VMLALDAVQ_P_U + VMLALDAVQ_P_S + VQMOVNBQ_M_S + VQMOVNBQ_M_U + VMOVLTQ_M_U + VMOVLTQ_M_S + VMOVNBQ_M_U + VMOVNBQ_M_S + VRSHRNTQ_N_U + VRSHRNTQ_N_S + VORRQ_M_N_S + VORRQ_M_N_U + VREV32Q_M_S + VREV32Q_M_U + VQRSHRNTQ_N_U + VQRSHRNTQ_N_S + VMOVNTQ_M_U + VMOVNTQ_M_S + VMOVLBQ_M_U + VMOVLBQ_M_S + VMLALDAVAQ_S + VMLALDAVAQ_U + VQSHRNBQ_N_U + VQSHRNBQ_N_S + VSHRNBQ_N_U + VSHRNBQ_N_S + VRSHRNBQ_N_S + VRSHRNBQ_N_U + VMLALDAVXQ_P_U + VMLALDAVXQ_P_S + VQMOVNTQ_M_U + VQMOVNTQ_M_S + VMVNQ_M_N_U + VMVNQ_M_N_S + VQSHRNTQ_N_U + VQSHRNTQ_N_S + VMLALDAVAXQ_S + VMLALDAVAXQ_U + VSHRNTQ_N_S + VSHRNTQ_N_U + VCVTBQ_M_F16_F32 + VCVTBQ_M_F32_F16 + VCVTTQ_M_F16_F32 + VCVTTQ_M_F32_F16 + VCVTMQ_M_S + VCVTMQ_M_U + VCVTNQ_M_S + VCVTPQ_M_S + VCVTPQ_M_U + VCVTQ_M_N_FROM_F_S + VCVTNQ_M_U + VREV16Q_M_S + VREV16Q_M_U + VREV32Q_M + VCVTQ_M_FROM_F_U + VCVTQ_M_FROM_F_S + VRMLALDAVHQ_P_U + VADDLVAQ_P_U + VCVTQ_M_N_FROM_F_U + VQSHLUQ_M_N_S + VABAVQ_P_S + VABAVQ_P_U + VSHLQ_M_S + VSHLQ_M_U + VSRIQ_M_N_S + VSRIQ_M_N_U + VSUBQ_M_U + VSUBQ_M_S + VCVTQ_M_N_TO_F_U + VCVTQ_M_N_TO_F_S + VQADDQ_M_U + VQADDQ_M_S + VRSHRQ_M_N_S + VSUBQ_M_N_S + VSUBQ_M_N_U + VBRSRQ_M_N_S + VSUBQ_M_N_F + VBICQ_M_F + VHADDQ_M_U + VBICQ_M_U + VBICQ_M_S + VMULQ_M_N_U + VHADDQ_M_S + VORNQ_M_F + VMLAQ_M_N_S + VQSUBQ_M_U + VQSUBQ_M_S + VMLAQ_M_N_U + VQSUBQ_M_N_U + VQSUBQ_M_N_S + VMULLTQ_INT_M_S + VMULLTQ_INT_M_U + VMULQ_M_N_S + VMULQ_M_N_F + VMLASQ_M_N_U + VMLASQ_M_N_S + VMAXQ_M_U + VQRDMLAHQ_M_N_U + VCADDQ_ROT270_M_F + VCADDQ_ROT270_M_U + VCADDQ_ROT270_M_S + VQRSHLQ_M_S + VMULQ_M_F + VRHADDQ_M_U + VSHRQ_M_N_U + VRHADDQ_M_S + VMULQ_M_S + VMULQ_M_U + VQRDMLASHQ_M_N_S + VRSHLQ_M_S + VRSHLQ_M_U + VRSHRQ_M_N_U + VADDQ_M_N_F + VADDQ_M_N_S + VADDQ_M_N_U + VQRDMLASHQ_M_N_U + VMAXQ_M_S + VQRDMLAHQ_M_N_S + VORRQ_M_S + VORRQ_M_U + VORRQ_M_F + VQRSHLQ_M_U + VRMULHQ_M_U + VRMULHQ_M_S + VMINQ_M_S + VMINQ_M_U + VANDQ_M_F + VANDQ_M_U + VANDQ_M_S + VHSUBQ_M_N_S + VHSUBQ_M_N_U + VMULHQ_M_S + VMULHQ_M_U + VMULLBQ_INT_M_U + VMULLBQ_INT_M_S + VCADDQ_ROT90_M_F + VSHRQ_M_N_S + VADDQ_M_U + VSLIQ_M_N_U + VQADDQ_M_N_S + VBRSRQ_M_N_F + VABDQ_M_F + VBRSRQ_M_N_U + VEORQ_M_F + VSHLQ_M_N_S + VQDMLAHQ_M_N_U + VQDMLAHQ_M_N_S + VSHLQ_M_N_U + VMLADAVAQ_P_U + VMLADAVAQ_P_S + VSLIQ_M_N_S + VQSHLQ_M_U + VQSHLQ_M_S + VCADDQ_ROT90_M_U + VCADDQ_ROT90_M_S + VORNQ_M_U + VORNQ_M_S + VQSHLQ_M_N_S + VQSHLQ_M_N_U + VADDQ_M_S + VHADDQ_M_N_S + VADDQ_M_F + VQADDQ_M_N_U + VEORQ_M_S + VEORQ_M_U + VHSUBQ_M_S + VHSUBQ_M_U + VHADDQ_M_N_U + VHCADDQ_ROT90_M_S + VQRDMLSDHQ_M_S + VQRDMLSDHXQ_M_S + VQRDMLADHXQ_M_S + VQDMULHQ_M_S + VMLADAVAXQ_P_S + VQDMLADHXQ_M_S + VQRDMULHQ_M_S + VMLSDAVAXQ_P_S + VQDMULHQ_M_N_S + VHCADDQ_ROT270_M_S + VQDMLSDHQ_M_S + VQDMLSDHXQ_M_S + VMLSDAVAQ_P_S + VQRDMLADHQ_M_S + VQDMLADHQ_M_S + VMLALDAVAQ_P_U + VMLALDAVAQ_P_S + VMLALDAVAXQ_P_U + VQRSHRNBQ_M_N_U + VQRSHRNBQ_M_N_S + VQRSHRNTQ_M_N_S + VQSHRNBQ_M_N_U + VQSHRNBQ_M_N_S + VQSHRNTQ_M_N_S + VRSHRNBQ_M_N_U + VRSHRNBQ_M_N_S + VRSHRNTQ_M_N_U + VSHLLBQ_M_N_U + VSHLLBQ_M_N_S + VSHLLTQ_M_N_U + VSHLLTQ_M_N_S + VSHRNBQ_M_N_S + VSHRNBQ_M_N_U + VSHRNTQ_M_N_S + VSHRNTQ_M_N_U + VMLALDAVAXQ_P_S + VQRSHRNTQ_M_N_U + VQSHRNTQ_M_N_U + VRSHRNTQ_M_N_S + VQRDMULHQ_M_N_S + VRMLALDAVHAQ_P_S + VMLSLDAVAQ_P_S + VMLSLDAVAXQ_P_S + VMULLBQ_POLY_M_P + VMULLTQ_POLY_M_P + VQDMULLBQ_M_N_S + VQDMULLBQ_M_S + VQDMULLTQ_M_N_S + VQDMULLTQ_M_S + VQRSHRUNBQ_M_N_S + VQSHRUNBQ_M_N_S + VQSHRUNTQ_M_N_S + VRMLALDAVHAQ_P_U + VRMLALDAVHAXQ_P_S + VRMLSLDAVHAQ_P_S + VRMLSLDAVHAXQ_P_S + VQRSHRUNTQ_M_N_S + VCMLAQ_M_F + VCMLAQ_ROT180_M_F + VCMLAQ_ROT270_M_F + VCMLAQ_ROT90_M_F + VCMULQ_M_F + VCMULQ_ROT180_M_F + VCMULQ_ROT270_M_F + VCMULQ_ROT90_M_F + VFMAQ_M_F + VFMAQ_M_N_F + VFMASQ_M_N_F + VFMSQ_M_F + VMAXNMQ_M_F + VMINNMQ_M_F + VSUBQ_M_F + VSTRWQSB_S + VSTRWQSB_U + VSTRBQSO_S + VSTRBQSO_U + VSTRBQ_S + VSTRBQ_U + VLDRBQGO_S + VLDRBQGO_U + VLDRBQ_S + VLDRBQ_U + VLDRWQGB_S + VLDRWQGB_U + VLD1Q_F + VLD1Q_S + VLD1Q_U + VLDRHQ_F + VLDRHQGO_S + VLDRHQGO_U + VLDRHQGSO_S + VLDRHQGSO_U + VLDRHQ_S + VLDRHQ_U + VLDRWQ_F + VLDRWQ_S + VLDRWQ_U + VLDRDQGB_S + VLDRDQGB_U + VLDRDQGO_S + VLDRDQGO_U + VLDRDQGSO_S + VLDRDQGSO_U + VLDRHQGO_F + VLDRHQGSO_F + VLDRWQGB_F + VLDRWQGO_F + VLDRWQGO_S + VLDRWQGO_U + VLDRWQGSO_F + VLDRWQGSO_S + VLDRWQGSO_U + VSTRHQ_F + VST1Q_S + VST1Q_U + VSTRHQSO_S + VSTRHQ_U + VSTRWQ_S + VSTRWQ_U + VSTRWQ_F + VST1Q_F + VSTRDQSB_S + VSTRDQSB_U + VSTRDQSO_S + VSTRDQSO_U + VSTRDQSSO_S + VSTRDQSSO_U + VSTRWQSO_S + VSTRWQSO_U + VSTRWQSSO_S + VSTRWQSSO_U + VSTRHQSO_F + VSTRHQSSO_F + VSTRWQSB_F + VSTRWQSO_F + VSTRWQSSO_F + VDDUPQ + VDDUPQ_M + VDWDUPQ + VDWDUPQ_M + VIDUPQ + VIDUPQ_M + VIWDUPQ + VIWDUPQ_M + VSTRWQSBWB_S + VSTRWQSBWB_U + VLDRWQGBWB_S + VLDRWQGBWB_U + VSTRWQSBWB_F + VLDRWQGBWB_F + VSTRDQSBWB_S + VSTRDQSBWB_U + VLDRDQGBWB_S + VLDRDQGBWB_U + VADCQ_U + VADCQ_M_U + VADCQ_S + VADCQ_M_S + VSBCIQ_U + VSBCIQ_S + VSBCIQ_M_U + VSBCIQ_M_S + VSBCQ_U + VSBCQ_S + VSBCQ_M_U + VSBCQ_M_S + VADCIQ_U + VADCIQ_M_U + VADCIQ_S + VADCIQ_M_S + VLD2Q + VLD4Q + VST2Q + VSHLCQ_M_U + VSHLCQ_M_S + VSTRHQSO_U + VSTRHQSSO_S + VSTRHQSSO_U + VSTRHQ_S + SRSHRL + SRSHR + URSHR + URSHRL + SQRSHR + UQRSHL + UQRSHLL_64 + UQRSHLL_48 + SQRSHRL_64 + SQRSHRL_48 + VSHLCQ_M_ +]) -- cgit v1.1 From 3f2e15c2e66af9cca1dfe24ad7e9692f511ebd06 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Tue, 6 Oct 2020 13:07:25 +0200 Subject: [openacc] Fix acc declare for VLAs Consider test-case test.c, with VLA A: ... int main (void) { int N = 1000; int A[N]; #pragma acc declare copy(A) return 0; } ... compiled using: ... $ gcc test.c -fopenacc -S -fdump-tree-all ... At original, we have: ... #pragma acc declare map(tofrom:A); ... but at gimple, we have a map (to:A.1), but not a map (from:A.1): ... int[0:D.2074] * A.1; { int A[0:D.2074] [value-expr: *A.1]; saved_stack.2 = __builtin_stack_save (); try { A.1 = __builtin_alloca_with_align (D.2078, 32); #pragma omp target oacc_declare map(to:(*A.1) [len: D.2076]) } finally { __builtin_stack_restore (saved_stack.2); } } ... This is caused by the following incompatibility. When storing the desired from clause in oacc_declare_returns, we use 'A.1' as the key: ... 10898 oacc_declare_returns->put (decl, c); (gdb) call debug_generic_expr (decl) A.1 (gdb) call debug_generic_expr (c) map(from:(*A.1)) ... but when looking it up, we use 'A' as the key: ... (gdb) 1471 tree *c = oacc_declare_returns->get (t); (gdb) call debug_generic_expr (t) A ... Fix this by extracing the 'A.1' lookup key from 'A' using the decl-expr. In addition, unshare the looked up value, to fix avoid running into an "incorrect sharing of tree nodes" error. Using these two fixes, we get our desired: ... finally { + #pragma omp target oacc_declare map(from:(*A.1)) __builtin_stack_restore (saved_stack.2); } ... Build on x86_64-linux with nvptx accelerator, tested libgomp. gcc/ChangeLog: 2020-10-06 Tom de Vries PR middle-end/90861 * gimplify.c (gimplify_bind_expr): Handle lookup in oacc_declare_returns using key with decl-expr. libgomp/ChangeLog: 2020-10-06 Tom de Vries PR middle-end/90861 * testsuite/libgomp.oacc-c-c++-common/declare-vla.c: Remove xfail. --- gcc/gimplify.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 2dea03cc..fa89e79 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -1468,15 +1468,22 @@ gimplify_bind_expr (tree *expr_p, gimple_seq *pre_p) if (flag_openacc && oacc_declare_returns != NULL) { - tree *c = oacc_declare_returns->get (t); + tree key = t; + if (DECL_HAS_VALUE_EXPR_P (key)) + { + key = DECL_VALUE_EXPR (key); + if (TREE_CODE (key) == INDIRECT_REF) + key = TREE_OPERAND (key, 0); + } + tree *c = oacc_declare_returns->get (key); if (c != NULL) { if (ret_clauses) OMP_CLAUSE_CHAIN (*c) = ret_clauses; - ret_clauses = *c; + ret_clauses = unshare_expr (*c); - oacc_declare_returns->remove (t); + oacc_declare_returns->remove (key); if (oacc_declare_returns->is_empty ()) { -- cgit v1.1 From 29c650cd899496c4f9bc069d03d0d7ecfb632176 Mon Sep 17 00:00:00 2001 From: Dennis Zhang Date: Tue, 6 Oct 2020 16:53:46 +0100 Subject: arm: Enable MVE SIMD modes for vectorization This patch enables SIMD modes for MVE auto-vectorization. In this patch, the integer and float MVE SIMD modes are returned by arm_preferred_simd_mode (TARGET_VECTORIZE_PREFERRED_SIMD_MODE hook) when MVE or MVE_FLOAT is enabled. Then the expanders for auto-vectorization can be used for generating MVE SIMD code. This patch also fixes bugs in MVE vreiterpretq_*.c tests which are revealed by the enabled MVE SIMD modes. The tests are for checking the MVE reinterpret intrinsics. There are two functions in each of the tests. The two functions contain the pattern of identical code so that they are folded in icf pass. Because of icf, the instruction count only checks one function which is 8. However when the SIMD modes are enabled, the estimation of the code size becomes smaller so that inlining is applied after icf, then the instruction count becomes 16 which causes failure of the tests. Because the icf is not the expected pattern to be tested but causes above issues, -fno-ipa-icf is applied to the tests to avoid unstable instruction count. gcc/ChangeLog: 2020-10-05 Dennis Zhang * config/arm/arm.c (arm_preferred_simd_mode): Enable MVE SIMD modes. gcc/testsuite/ChangeLog: 2020-10-05 Dennis Zhang * gcc.target/arm/mve/intrinsics/vreinterpretq_f16.c: Use additional option -fno-ipa-icf and change the instruction count from 8 to 16. * gcc.target/arm/mve/intrinsics/vreinterpretq_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vreinterpretq_s16.c: Likewise. * gcc.target/arm/mve/intrinsics/vreinterpretq_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vreinterpretq_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vreinterpretq_s8.c: Likewise. * gcc.target/arm/mve/intrinsics/vreinterpretq_u16.c: Likewise. * gcc.target/arm/mve/intrinsics/vreinterpretq_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vreinterpretq_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vreinterpretq_u8.c: Likewise. --- gcc/ChangeLog | 4 ++++ gcc/config/arm/arm.c | 24 ++++++++++++++++++++++ gcc/testsuite/ChangeLog | 14 +++++++++++++ .../arm/mve/intrinsics/vreinterpretq_f16.c | 4 ++-- .../arm/mve/intrinsics/vreinterpretq_f32.c | 4 ++-- .../arm/mve/intrinsics/vreinterpretq_s16.c | 4 ++-- .../arm/mve/intrinsics/vreinterpretq_s32.c | 4 ++-- .../arm/mve/intrinsics/vreinterpretq_s64.c | 4 ++-- .../arm/mve/intrinsics/vreinterpretq_s8.c | 4 ++-- .../arm/mve/intrinsics/vreinterpretq_u16.c | 4 ++-- .../arm/mve/intrinsics/vreinterpretq_u32.c | 4 ++-- .../arm/mve/intrinsics/vreinterpretq_u64.c | 4 ++-- .../arm/mve/intrinsics/vreinterpretq_u8.c | 4 ++-- 13 files changed, 62 insertions(+), 20 deletions(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index cd3901b..743e497 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,7 @@ +2020-10-05 Dennis Zhang + + * config/arm/arm.c (arm_preferred_simd_mode): Enable MVE SIMD modes. + 2020-10-05 Aldy Hernandez * value-range.cc (irange::legacy_intersect): Only handle diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index bd7be8f..5fdc143 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -28964,6 +28964,30 @@ arm_preferred_simd_mode (scalar_mode mode) default:; } + if (TARGET_HAVE_MVE) + switch (mode) + { + case QImode: + return V16QImode; + case HImode: + return V8HImode; + case SImode: + return V4SImode; + + default:; + } + + if (TARGET_HAVE_MVE_FLOAT) + switch (mode) + { + case HFmode: + return V8HFmode; + case SFmode: + return V4SFmode; + + default:; + } + return word_mode; } diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5b92a02..703cc68 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,17 @@ +2020-10-05 Dennis Zhang + + * gcc.target/arm/mve/intrinsics/vreinterpretq_f16.c: Use additional + option -fno-ipa-icf and change the instruction count from 8 to 16. + * gcc.target/arm/mve/intrinsics/vreinterpretq_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_s64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_u64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_u8.c: Likewise. + 2020-10-05 Nathan Sidwell * c-c++-common/spellcheck-reserved.c: Restore diagnostic. diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_f16.c index f59f697..2398d89 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_f16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_f16.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ /* { dg-add-options arm_v8_1m_mve_fp } */ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -fno-ipa-icf" } */ #include "arm_mve.h" int8x16_t value1; @@ -41,4 +41,4 @@ foo1 () return vaddq_f16 (r7, vreinterpretq_f16 (value9)); } -/* { dg-final { scan-assembler-times "vadd.f16" 8 } } */ +/* { dg-final { scan-assembler-times "vadd.f16" 16 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_f32.c index dac47c7..5a58dc6 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_f32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_f32.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ /* { dg-add-options arm_v8_1m_mve_fp } */ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -fno-ipa-icf" } */ #include "arm_mve.h" int16x8_t value1; @@ -41,4 +41,4 @@ foo1 () return vaddq_f32 (r7, vreinterpretq_f32 (value9)); } -/* { dg-final { scan-assembler-times "vadd.f32" 8 } } */ +/* { dg-final { scan-assembler-times "vadd.f32" 16 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s16.c index edc2f2f..9ab05e9 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s16.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ /* { dg-add-options arm_v8_1m_mve_fp } */ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -fno-ipa-icf" } */ #include "arm_mve.h" int8x16_t value1; @@ -41,4 +41,4 @@ foo1 () return vaddq_s16 (r7, vreinterpretq_s16 (value9)); } -/* { dg-final { scan-assembler-times "vadd.i16" 8 } } */ +/* { dg-final { scan-assembler-times "vadd.i16" 16 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s32.c index 880de06..fbfff1f 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s32.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ /* { dg-add-options arm_v8_1m_mve_fp } */ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -fno-ipa-icf" } */ #include "arm_mve.h" int16x8_t value1; @@ -41,4 +41,4 @@ foo1 () return vaddq_s32 (r7, vreinterpretq_s32 (value9)); } -/* { dg-final { scan-assembler-times "vadd.i32" 8 } } */ +/* { dg-final { scan-assembler-times "vadd.i32" 16 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s64.c index b0e8154..beb6b92 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s64.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ /* { dg-add-options arm_v8_1m_mve_fp } */ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -fno-ipa-icf" } */ #include "arm_mve.h" int16x8_t value1; @@ -42,4 +42,4 @@ foo1 (mve_pred16_t __p) return vpselq_s64 (r7, vreinterpretq_s64 (value9), __p); } -/* { dg-final { scan-assembler-times "vpsel" 8 } } */ +/* { dg-final { scan-assembler-times "vpsel" 16 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s8.c index a5ceebb..727d89b 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_s8.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ /* { dg-add-options arm_v8_1m_mve_fp } */ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -fno-ipa-icf" } */ #include "arm_mve.h" int16x8_t value1; @@ -41,4 +41,4 @@ foo1 () return vaddq_s8 (r7, vreinterpretq_s8 (value9)); } -/* { dg-final { scan-assembler-times "vadd.i8" 8 } } */ +/* { dg-final { scan-assembler-times "vadd.i8" 16 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u16.c index cd31c23..600f6d7 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u16.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u16.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ /* { dg-add-options arm_v8_1m_mve_fp } */ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -fno-ipa-icf" } */ #include "arm_mve.h" int8x16_t value1; @@ -41,4 +41,4 @@ foo1 () return vaddq_u16 (r7, vreinterpretq_u16 (value9)); } -/* { dg-final { scan-assembler-times "vadd.i16" 8 } } */ +/* { dg-final { scan-assembler-times "vadd.i16" 16 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u32.c index faa66c9..d536ae8 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u32.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u32.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ /* { dg-add-options arm_v8_1m_mve_fp } */ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -fno-ipa-icf" } */ #include "arm_mve.h" int16x8_t value1; @@ -41,4 +41,4 @@ foo1 () return vaddq_u32 (r7, vreinterpretq_u32 (value9)); } -/* { dg-final { scan-assembler-times "vadd.i32" 8 } } */ +/* { dg-final { scan-assembler-times "vadd.i32" 16 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u64.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u64.c index 853b28a..abc4361 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u64.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u64.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ /* { dg-add-options arm_v8_1m_mve_fp } */ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -fno-ipa-icf" } */ #include "arm_mve.h" int16x8_t value1; @@ -42,4 +42,4 @@ foo1 (mve_pred16_t __p) return vpselq_u64 (r7, vreinterpretq_u64 (value9), __p); } -/* { dg-final { scan-assembler-times "vpsel" 8 } } */ +/* { dg-final { scan-assembler-times "vpsel" 16 } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u8.c index bdf8cd5..c138e5b 100644 --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u8.c +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vreinterpretq_u8.c @@ -1,6 +1,6 @@ /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ /* { dg-add-options arm_v8_1m_mve_fp } */ -/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-O2 -fno-ipa-icf" } */ #include "arm_mve.h" int16x8_t value1; @@ -41,4 +41,4 @@ foo1 () return vaddq_u8 (r7, vreinterpretq_u8 (value9)); } -/* { dg-final { scan-assembler-times "vadd.i8" 8 } } */ +/* { dg-final { scan-assembler-times "vadd.i8" 16 } } */ -- cgit v1.1 From 90e88fd376bb9ad6223a1f5ccd803d1bd9539b05 Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Tue, 6 Oct 2020 12:12:53 -0400 Subject: Ranger classes. Add the 8 ranger files and the Makefile changes to build it. 2020-10-06 Andrew MacLeod * Makefile.in (OBJS): Add gimple-range*.o. * gimple-range.h: New file. * gimple-range.cc: New file. * gimple-range-cache.h: New file. * gimple-range-cache.cc: New file. * gimple-range-edge.h: New file. * gimple-range-edge.cc: New file. * gimple-range-gori.h: New file. * gimple-range-gori.cc: New file. --- gcc/Makefile.in | 4 + gcc/gimple-range-cache.cc | 877 ++++++++++++++++++++++++++++++ gcc/gimple-range-cache.h | 120 ++++ gcc/gimple-range-edge.cc | 197 +++++++ gcc/gimple-range-edge.h | 55 ++ gcc/gimple-range-gori.cc | 1321 +++++++++++++++++++++++++++++++++++++++++++++ gcc/gimple-range-gori.h | 138 +++++ gcc/gimple-range.cc | 1284 +++++++++++++++++++++++++++++++++++++++++++ gcc/gimple-range.h | 170 ++++++ 9 files changed, 4166 insertions(+) create mode 100644 gcc/gimple-range-cache.cc create mode 100644 gcc/gimple-range-cache.h create mode 100644 gcc/gimple-range-edge.cc create mode 100644 gcc/gimple-range-edge.h create mode 100644 gcc/gimple-range-gori.cc create mode 100644 gcc/gimple-range-gori.h create mode 100644 gcc/gimple-range.cc create mode 100644 gcc/gimple-range.h (limited to 'gcc') diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 50d6c83..5a8fb0d 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1369,6 +1369,10 @@ OBJS = \ gimple-loop-versioning.o \ gimple-low.o \ gimple-pretty-print.o \ + gimple-range.o \ + gimple-range-cache.o \ + gimple-range-edge.o \ + gimple-range-gori.o \ gimple-ssa-backprop.o \ gimple-ssa-evrp.o \ gimple-ssa-evrp-analyze.o \ diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc new file mode 100644 index 0000000..13b9933 --- /dev/null +++ b/gcc/gimple-range-cache.cc @@ -0,0 +1,877 @@ +/* Gimple ranger SSA cache implementation. + Copyright (C) 2017-2020 Free Software Foundation, Inc. + Contributed by Andrew MacLeod . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "insn-codes.h" +#include "tree.h" +#include "gimple.h" +#include "ssa.h" +#include "gimple-pretty-print.h" +#include "gimple-range.h" + +// During contructor, allocate the vector of ssa_names. + +non_null_ref::non_null_ref () +{ + m_nn.create (0); + m_nn.safe_grow_cleared (num_ssa_names); + bitmap_obstack_initialize (&m_bitmaps); +} + +// Free any bitmaps which were allocated,a swell as the vector itself. + +non_null_ref::~non_null_ref () +{ + bitmap_obstack_release (&m_bitmaps); + m_nn.release (); +} + +// Return true if NAME has a non-null dereference in block bb. If this is the +// first query for NAME, calculate the summary first. + +bool +non_null_ref::non_null_deref_p (tree name, basic_block bb) +{ + if (!POINTER_TYPE_P (TREE_TYPE (name))) + return false; + + unsigned v = SSA_NAME_VERSION (name); + if (!m_nn[v]) + process_name (name); + + return bitmap_bit_p (m_nn[v], bb->index); +} + +// Allocate an populate the bitmap for NAME. An ON bit for a block +// index indicates there is a non-null reference in that block. In +// order to populate the bitmap, a quick run of all the immediate uses +// are made and the statement checked to see if a non-null dereference +// is made on that statement. + +void +non_null_ref::process_name (tree name) +{ + unsigned v = SSA_NAME_VERSION (name); + use_operand_p use_p; + imm_use_iterator iter; + bitmap b; + + // Only tracked for pointers. + if (!POINTER_TYPE_P (TREE_TYPE (name))) + return; + + // Already processed if a bitmap has been allocated. + if (m_nn[v]) + return; + + b = BITMAP_ALLOC (&m_bitmaps); + + // Loop over each immediate use and see if it implies a non-null value. + FOR_EACH_IMM_USE_FAST (use_p, iter, name) + { + gimple *s = USE_STMT (use_p); + unsigned index = gimple_bb (s)->index; + tree value; + enum tree_code comp_code; + + // If bit is already set for this block, dont bother looking again. + if (bitmap_bit_p (b, index)) + continue; + + // If we can infer a != 0 range, then set the bit for this BB + if (infer_value_range (s, name, &comp_code, &value)) + { + if (comp_code == NE_EXPR && integer_zerop (value)) + bitmap_set_bit (b, index); + } + } + + m_nn[v] = b; +} + +// ------------------------------------------------------------------------- + +// This class implements a cache of ranges indexed by basic block. It +// represents all that is known about an SSA_NAME on entry to each +// block. It caches a range-for-type varying range so it doesn't need +// to be reformed all the time. If a range is ever always associated +// with a type, we can use that instead. Whenever varying is being +// set for a block, the cache simply points to this cached one rather +// than create a new one each time. + +class ssa_block_ranges +{ +public: + ssa_block_ranges (tree t, irange_allocator *allocator); + ~ssa_block_ranges (); + + void set_bb_range (const basic_block bb, const irange &r); + void set_bb_varying (const basic_block bb); + bool get_bb_range (irange &r, const basic_block bb); + bool bb_range_p (const basic_block bb); + + void dump(FILE *f); +private: + vec m_tab; + irange *m_type_range; + tree m_type; + irange_allocator *m_irange_allocator; +}; + + +// Initialize a block cache for an ssa_name of type T. + +ssa_block_ranges::ssa_block_ranges (tree t, irange_allocator *allocator) +{ + gcc_checking_assert (TYPE_P (t)); + m_type = t; + m_irange_allocator = allocator; + + m_tab.create (0); + m_tab.safe_grow_cleared (last_basic_block_for_fn (cfun)); + + // Create the cached type range. + m_type_range = m_irange_allocator->allocate (2); + m_type_range->set_varying (t); + + m_tab[ENTRY_BLOCK_PTR_FOR_FN (cfun)->index] = m_type_range; +} + +// Destruct block range. + +ssa_block_ranges::~ssa_block_ranges () +{ + m_tab.release (); +} + +// Set the range for block BB to be R. + +void +ssa_block_ranges::set_bb_range (const basic_block bb, const irange &r) +{ + irange *m = m_irange_allocator->allocate (r); + m_tab[bb->index] = m; +} + +// Set the range for block BB to the range for the type. + +void +ssa_block_ranges::set_bb_varying (const basic_block bb) +{ + m_tab[bb->index] = m_type_range; +} + +// Return the range associated with block BB in R. Return false if +// there is no range. + +bool +ssa_block_ranges::get_bb_range (irange &r, const basic_block bb) +{ + irange *m = m_tab[bb->index]; + if (m) + { + r = *m; + return true; + } + return false; +} + +// Return true if a range is present. + +bool +ssa_block_ranges::bb_range_p (const basic_block bb) +{ + return m_tab[bb->index] != NULL; +} + + +// Print the list of known ranges for file F in a nice format. + +void +ssa_block_ranges::dump (FILE *f) +{ + basic_block bb; + int_range_max r; + + FOR_EACH_BB_FN (bb, cfun) + if (get_bb_range (r, bb)) + { + fprintf (f, "BB%d -> ", bb->index); + r.dump (f); + fprintf (f, "\n"); + } +} + +// ------------------------------------------------------------------------- + +// Initialize the block cache. + +block_range_cache::block_range_cache () +{ + m_ssa_ranges.create (0); + m_ssa_ranges.safe_grow_cleared (num_ssa_names); + m_irange_allocator = new irange_allocator; +} + +// Remove any m_block_caches which have been created. + +block_range_cache::~block_range_cache () +{ + unsigned x; + for (x = 0; x < m_ssa_ranges.length (); ++x) + { + if (m_ssa_ranges[x]) + delete m_ssa_ranges[x]; + } + delete m_irange_allocator; + // Release the vector itself. + m_ssa_ranges.release (); +} + +// Return a reference to the m_block_cache for NAME. If it has not been +// accessed yet, allocate it. + +ssa_block_ranges & +block_range_cache::get_block_ranges (tree name) +{ + unsigned v = SSA_NAME_VERSION (name); + if (v >= m_ssa_ranges.length ()) + m_ssa_ranges.safe_grow_cleared (num_ssa_names + 1); + + if (!m_ssa_ranges[v]) + m_ssa_ranges[v] = new ssa_block_ranges (TREE_TYPE (name), m_irange_allocator); + + return *(m_ssa_ranges[v]); +} + +// Set the range for NAME on entry to block BB to R. + +void +block_range_cache::set_bb_range (tree name, const basic_block bb, + const irange &r) +{ + return get_block_ranges (name).set_bb_range (bb, r); +} + +// Set the range for NAME on entry to block BB to varying. + +void +block_range_cache::set_bb_varying (tree name, const basic_block bb) +{ + return get_block_ranges (name).set_bb_varying (bb); +} + +// Return the range for NAME on entry to BB in R. Return true if there +// is one. + +bool +block_range_cache::get_bb_range (irange &r, tree name, const basic_block bb) +{ + return get_block_ranges (name).get_bb_range (r, bb); +} + +// Return true if NAME has a range set in block BB. + +bool +block_range_cache::bb_range_p (tree name, const basic_block bb) +{ + return get_block_ranges (name).bb_range_p (bb); +} + +// Print all known block caches to file F. + +void +block_range_cache::dump (FILE *f) +{ + unsigned x; + for (x = 0; x < m_ssa_ranges.length (); ++x) + { + if (m_ssa_ranges[x]) + { + fprintf (f, " Ranges for "); + print_generic_expr (f, ssa_name (x), TDF_NONE); + fprintf (f, ":\n"); + m_ssa_ranges[x]->dump (f); + fprintf (f, "\n"); + } + } +} + +// Print all known ranges on entry to blobk BB to file F. + +void +block_range_cache::dump (FILE *f, basic_block bb, bool print_varying) +{ + unsigned x; + int_range_max r; + bool summarize_varying = false; + for (x = 1; x < m_ssa_ranges.length (); ++x) + { + if (!gimple_range_ssa_p (ssa_name (x))) + continue; + if (m_ssa_ranges[x] && m_ssa_ranges[x]->get_bb_range (r, bb)) + { + if (!print_varying && r.varying_p ()) + { + summarize_varying = true; + continue; + } + print_generic_expr (f, ssa_name (x), TDF_NONE); + fprintf (f, "\t"); + r.dump(f); + fprintf (f, "\n"); + } + } + // If there were any varying entries, lump them all together. + if (summarize_varying) + { + fprintf (f, "VARYING_P on entry : "); + for (x = 1; x < num_ssa_names; ++x) + { + if (!gimple_range_ssa_p (ssa_name (x))) + continue; + if (m_ssa_ranges[x] && m_ssa_ranges[x]->get_bb_range (r, bb)) + { + if (r.varying_p ()) + { + print_generic_expr (f, ssa_name (x), TDF_NONE); + fprintf (f, " "); + } + } + } + fprintf (f, "\n"); + } +} + +// ------------------------------------------------------------------------- + +// Initialize a global cache. + +ssa_global_cache::ssa_global_cache () +{ + m_tab.create (0); + m_tab.safe_grow_cleared (num_ssa_names); + m_irange_allocator = new irange_allocator; +} + +// Deconstruct a global cache. + +ssa_global_cache::~ssa_global_cache () +{ + m_tab.release (); + delete m_irange_allocator; +} + +// Retrieve the global range of NAME from cache memory if it exists. +// Return the value in R. + +bool +ssa_global_cache::get_global_range (irange &r, tree name) const +{ + unsigned v = SSA_NAME_VERSION (name); + if (v >= m_tab.length ()) + return false; + + irange *stow = m_tab[v]; + if (!stow) + return false; + r = *stow; + return true; +} + +// Set the range for NAME to R in the global cache. + +void +ssa_global_cache::set_global_range (tree name, const irange &r) +{ + unsigned v = SSA_NAME_VERSION (name); + if (v >= m_tab.length ()) + m_tab.safe_grow_cleared (num_ssa_names + 1); + + irange *m = m_tab[v]; + if (m && m->fits_p (r)) + *m = r; + else + m_tab[v] = m_irange_allocator->allocate (r); +} + +// Set the range for NAME to R in the glonbal cache. + +void +ssa_global_cache::clear_global_range (tree name) +{ + unsigned v = SSA_NAME_VERSION (name); + if (v >= m_tab.length ()) + m_tab.safe_grow_cleared (num_ssa_names + 1); + m_tab[v] = NULL; +} + +// Clear the global cache. + +void +ssa_global_cache::clear () +{ + memset (m_tab.address(), 0, m_tab.length () * sizeof (irange *)); +} + +// Dump the contents of the global cache to F. + +void +ssa_global_cache::dump (FILE *f) +{ + unsigned x; + int_range_max r; + fprintf (f, "Non-varying global ranges:\n"); + fprintf (f, "=========================:\n"); + for ( x = 1; x < num_ssa_names; x++) + if (gimple_range_ssa_p (ssa_name (x)) && + get_global_range (r, ssa_name (x)) && !r.varying_p ()) + { + print_generic_expr (f, ssa_name (x), TDF_NONE); + fprintf (f, " : "); + r.dump (f); + fprintf (f, "\n"); + } + fputc ('\n', f); +} + +// -------------------------------------------------------------------------- + +ranger_cache::ranger_cache (range_query &q) : query (q) +{ + m_workback.create (0); + m_workback.safe_grow_cleared (last_basic_block_for_fn (cfun)); + m_update_list.create (0); + m_update_list.safe_grow_cleared (last_basic_block_for_fn (cfun)); + m_update_list.truncate (0); + m_poor_value_list.create (0); + m_poor_value_list.safe_grow_cleared (20); + m_poor_value_list.truncate (0); +} + +ranger_cache::~ranger_cache () +{ + m_poor_value_list.release (); + m_workback.release (); + m_update_list.release (); +} + +// Push a request for a new lookup in block BB of name. Return true if +// the request is actually made (ie, isn't a duplicate). + +bool +ranger_cache::push_poor_value (basic_block bb, tree name) +{ + if (m_poor_value_list.length ()) + { + // Don't push anything else to the same block. If there are multiple + // things required, another request will come during a later evaluation + // and this prevents oscillation building uneccessary depth. + if ((m_poor_value_list.last ()).bb == bb) + return false; + } + + struct update_record rec; + rec.bb = bb; + rec.calc = name; + m_poor_value_list.safe_push (rec); + return true; +} + +// Provide lookup for the gori-computes class to access the best known range +// of an ssa_name in any given basic block. Note, this does no additonal +// lookups, just accesses the data that is already known. + +void +ranger_cache::ssa_range_in_bb (irange &r, tree name, basic_block bb) +{ + gimple *s = SSA_NAME_DEF_STMT (name); + basic_block def_bb = ((s && gimple_bb (s)) ? gimple_bb (s) : + ENTRY_BLOCK_PTR_FOR_FN (cfun)); + if (bb == def_bb) + { + // NAME is defined in this block, so request its current value + if (!m_globals.get_global_range (r, name)) + { + // If it doesn't have a value calculated, it means it's a + // "poor" value being used in some calculation. Queue it up + // as a poor value to be improved later. + r = gimple_range_global (name); + if (push_poor_value (bb, name)) + { + if (DEBUG_RANGE_CACHE) + { + fprintf (dump_file, + "*CACHE* no global def in bb %d for ", bb->index); + print_generic_expr (dump_file, name, TDF_SLIM); + fprintf (dump_file, " depth : %d\n", + m_poor_value_list.length ()); + } + } + } + } + // Look for the on-entry value of name in BB from the cache. + else if (!m_on_entry.get_bb_range (r, name, bb)) + { + // If it has no entry then mark this as a poor value. + if (push_poor_value (bb, name)) + { + if (DEBUG_RANGE_CACHE) + { + fprintf (dump_file, + "*CACHE* no on entry range in bb %d for ", bb->index); + print_generic_expr (dump_file, name, TDF_SLIM); + fprintf (dump_file, " depth : %d\n", m_poor_value_list.length ()); + } + } + // Try to pick up any known global value as a best guess for now. + if (!m_globals.get_global_range (r, name)) + r = gimple_range_global (name); + } + + // Check if pointers have any non-null dereferences. Non-call + // exceptions mean we could throw in the middle of the block, so just + // punt for now on those. + if (r.varying_p () && m_non_null.non_null_deref_p (name, bb) && + !cfun->can_throw_non_call_exceptions) + r = range_nonzero (TREE_TYPE (name)); +} + +// Return a static range for NAME on entry to basic block BB in R. If +// calc is true, fill any cache entries required between BB and the +// def block for NAME. Otherwise, return false if the cache is empty. + +bool +ranger_cache::block_range (irange &r, basic_block bb, tree name, bool calc) +{ + gcc_checking_assert (gimple_range_ssa_p (name)); + + if (calc) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (name); + basic_block def_bb = NULL; + if (def_stmt) + def_bb = gimple_bb (def_stmt);; + if (!def_bb) + { + // If we get to the entry block, this better be a default def + // or range_on_entry was called for a block not dominated by + // the def. + gcc_checking_assert (SSA_NAME_IS_DEFAULT_DEF (name)); + def_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun); + } + + // There is no range on entry for the definition block. + if (def_bb == bb) + return false; + + // Otherwise, go figure out what is known in predecessor blocks. + fill_block_cache (name, bb, def_bb); + gcc_checking_assert (m_on_entry.bb_range_p (name, bb)); + } + return m_on_entry.get_bb_range (r, name, bb); +} + +// Add BB to the list of blocks to update, unless it's already in the list. + +void +ranger_cache::add_to_update (basic_block bb) +{ + if (!m_update_list.contains (bb)) + m_update_list.quick_push (bb); +} + +// If there is anything in the iterative update_list, continue +// processing NAME until the list of blocks is empty. + +void +ranger_cache::iterative_cache_update (tree name) +{ + basic_block bb; + edge_iterator ei; + edge e; + int_range_max new_range; + int_range_max current_range; + int_range_max e_range; + + // Process each block by seeing if its calculated range on entry is + // the same as its cached value. If there is a difference, update + // the cache to reflect the new value, and check to see if any + // successors have cache entries which may need to be checked for + // updates. + + while (m_update_list.length () > 0) + { + bb = m_update_list.pop (); + gcc_checking_assert (m_on_entry.bb_range_p (name, bb)); + m_on_entry.get_bb_range (current_range, name, bb); + + // Calculate the "new" range on entry by unioning the pred edges. + new_range.set_undefined (); + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, " edge %d->%d :", e->src->index, bb->index); + // Get whatever range we can for this edge. + if (!outgoing_edge_range_p (e_range, e, name)) + { + ssa_range_in_bb (e_range, name, e->src); + if (DEBUG_RANGE_CACHE) + { + fprintf (dump_file, "No outgoing edge range, picked up "); + e_range.dump(dump_file); + fprintf (dump_file, "\n"); + } + } + else + { + if (DEBUG_RANGE_CACHE) + { + fprintf (dump_file, "outgoing range :"); + e_range.dump(dump_file); + fprintf (dump_file, "\n"); + } + } + new_range.union_ (e_range); + if (new_range.varying_p ()) + break; + } + + if (DEBUG_RANGE_CACHE) + { + fprintf (dump_file, "FWD visiting block %d for ", bb->index); + print_generic_expr (dump_file, name, TDF_SLIM); + fprintf (dump_file, " starting range : "); + current_range.dump (dump_file); + fprintf (dump_file, "\n"); + } + + // If the range on entry has changed, update it. + if (new_range != current_range) + { + if (DEBUG_RANGE_CACHE) + { + fprintf (dump_file, " Updating range to "); + new_range.dump (dump_file); + fprintf (dump_file, "\n Updating blocks :"); + } + m_on_entry.set_bb_range (name, bb, new_range); + // Mark each successor that has a range to re-check its range + FOR_EACH_EDGE (e, ei, bb->succs) + if (m_on_entry.bb_range_p (name, e->dest)) + { + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, " bb%d",e->dest->index); + add_to_update (e->dest); + } + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, "\n"); + } + } + if (DEBUG_RANGE_CACHE) + { + fprintf (dump_file, "DONE visiting blocks for "); + print_generic_expr (dump_file, name, TDF_SLIM); + fprintf (dump_file, "\n"); + } +} + +// Make sure that the range-on-entry cache for NAME is set for block BB. +// Work back through the CFG to DEF_BB ensuring the range is calculated +// on the block/edges leading back to that point. + +void +ranger_cache::fill_block_cache (tree name, basic_block bb, basic_block def_bb) +{ + edge_iterator ei; + edge e; + int_range_max block_result; + int_range_max undefined; + unsigned poor_list_start = m_poor_value_list.length (); + + // At this point we shouldn't be looking at the def, entry or exit block. + gcc_checking_assert (bb != def_bb && bb != ENTRY_BLOCK_PTR_FOR_FN (cfun) && + bb != EXIT_BLOCK_PTR_FOR_FN (cfun)); + + // If the block cache is set, then we've already visited this block. + if (m_on_entry.bb_range_p (name, bb)) + return; + + // Visit each block back to the DEF. Initialize each one to UNDEFINED. + // m_visited at the end will contain all the blocks that we needed to set + // the range_on_entry cache for. + m_workback.truncate (0); + m_workback.quick_push (bb); + undefined.set_undefined (); + m_on_entry.set_bb_range (name, bb, undefined); + gcc_checking_assert (m_update_list.length () == 0); + + if (DEBUG_RANGE_CACHE) + { + fprintf (dump_file, "\n"); + print_generic_expr (dump_file, name, TDF_SLIM); + fprintf (dump_file, " : "); + } + + while (m_workback.length () > 0) + { + basic_block node = m_workback.pop (); + if (DEBUG_RANGE_CACHE) + { + fprintf (dump_file, "BACK visiting block %d for ", node->index); + print_generic_expr (dump_file, name, TDF_SLIM); + fprintf (dump_file, "\n"); + } + + FOR_EACH_EDGE (e, ei, node->preds) + { + basic_block pred = e->src; + int_range_max r; + + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, " %d->%d ",e->src->index, e->dest->index); + + // If the pred block is the def block add this BB to update list. + if (pred == def_bb) + { + add_to_update (node); + continue; + } + + // If the pred is entry but NOT def, then it is used before + // defined, it'll get set to [] and no need to update it. + if (pred == ENTRY_BLOCK_PTR_FOR_FN (cfun)) + { + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, "entry: bail."); + continue; + } + + // Regardless of whether we have visited pred or not, if the + // pred has a non-null reference, revisit this block. + if (m_non_null.non_null_deref_p (name, pred)) + { + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, "nonnull: update "); + add_to_update (node); + } + + // If the pred block already has a range, or if it can contribute + // something new. Ie, the edge generates a range of some sort. + if (m_on_entry.get_bb_range (r, name, pred)) + { + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, "has cache, "); + if (!r.undefined_p () || has_edge_range_p (e, name)) + { + add_to_update (node); + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, "update. "); + } + continue; + } + + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, "pushing undefined pred block. "); + // If the pred hasn't been visited (has no range), add it to + // the list. + gcc_checking_assert (!m_on_entry.bb_range_p (name, pred)); + m_on_entry.set_bb_range (name, pred, undefined); + m_workback.quick_push (pred); + } + } + + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, "\n"); + + // Now fill in the marked blocks with values. + iterative_cache_update (name); + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, " iterative update done.\n"); + + // Now that the cache has been updated, check to see if there were any + // SSA_NAMES used in filling the cache which were "poor values". + // We can evaluate them, and inject any new values into the iteration + // list, and see if it improves any on-entry values. + if (poor_list_start != m_poor_value_list.length ()) + { + gcc_checking_assert (poor_list_start < m_poor_value_list.length ()); + while (poor_list_start < m_poor_value_list.length ()) + { + // Find a range for this unresolved value. + // Note, this may spawn new cache filling cycles, but by the time it + // is finished, the work vectors will all be back to the same state + // as before the call. The update record vector will always be + // returned to the current state upon return. + struct update_record rec = m_poor_value_list.pop (); + basic_block calc_bb = rec.bb; + int_range_max tmp; + + // The update work list should be empty at this point. + gcc_checking_assert (m_update_list.length () == 0); + + if (DEBUG_RANGE_CACHE) + { + fprintf (dump_file, "(%d:%d)Calculating ", + m_poor_value_list.length () + 1, poor_list_start); + print_generic_expr (dump_file, name, TDF_SLIM); + fprintf (dump_file, " used poor value for "); + print_generic_expr (dump_file, rec.calc, TDF_SLIM); + fprintf (dump_file, " in bb%d, trying to improve:\n", + calc_bb->index); + } + + // It must have at least one edge, pick edge 0. we just want to + // calculate a range at the exit from the block so the caches feeding + // this block will be filled up. + gcc_checking_assert (EDGE_SUCC (calc_bb, 0)); + query.range_on_edge (tmp, EDGE_SUCC (calc_bb, 0), rec.calc); + + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, " Checking successors of bb%d :", + calc_bb->index); + + // Try recalculating any successor blocks with the new value. + // Note that even if this value is refined from the initial value, + // it may not affect the calculation, but the iterative update + // will resolve that efficently. + FOR_EACH_EDGE (e, ei, calc_bb->succs) + { + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, "bb%d: ", e->dest->index); + // Only update active cache entries. + if (m_on_entry.bb_range_p (name, e->dest)) + { + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, "update "); + add_to_update (e->dest); + } + } + if (DEBUG_RANGE_CACHE) + fprintf (dump_file, "\n"); + // Now see if there is a new value. + iterative_cache_update (name); + } + } + +} diff --git a/gcc/gimple-range-cache.h b/gcc/gimple-range-cache.h new file mode 100644 index 0000000..29ab01e --- /dev/null +++ b/gcc/gimple-range-cache.h @@ -0,0 +1,120 @@ +/* Header file for gimple ranger SSA cache. + Copyright (C) 2017-2020 Free Software Foundation, Inc. + Contributed by Andrew MacLeod . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_SSA_RANGE_CACHE_H +#define GCC_SSA_RANGE_CACHE_H + +#include "gimple-range-gori.h" + +// Class used to track non-null references of an SSA name. A vector +// of bitmaps indexed by SSA name is maintained. When indexed by +// basic block, an on-bit indicates there is a non-null dereference +// for that SSA in that block. + +class non_null_ref +{ +public: + non_null_ref (); + ~non_null_ref (); + bool non_null_deref_p (tree name, basic_block bb); +private: + vec m_nn; + void process_name (tree name); + bitmap_obstack m_bitmaps; +}; + +// This class manages a vector of pointers to ssa_block ranges. It +// provides the basis for the "range on entry" cache for all +// SSA names. + +class block_range_cache +{ +public: + block_range_cache (); + ~block_range_cache (); + + void set_bb_range (tree name, const basic_block bb, const irange &r); + void set_bb_varying (tree name, const basic_block bb); + bool get_bb_range (irange &r, tree name, const basic_block bb); + bool bb_range_p (tree name, const basic_block bb); + + void dump (FILE *f); + void dump (FILE *f, basic_block bb, bool print_varying = true); +private: + vec m_ssa_ranges; + ssa_block_ranges &get_block_ranges (tree name); + irange_allocator *m_irange_allocator; +}; + +// This global cache is used with the range engine as markers for what +// has been visited during this incarnation. Once the ranger evaluates +// a name, it is typically not re-evaluated again. + +class ssa_global_cache +{ +public: + ssa_global_cache (); + ~ssa_global_cache (); + bool get_global_range (irange &r, tree name) const; + void set_global_range (tree name, const irange &r); + void clear_global_range (tree name); + void clear (); + void dump (FILE *f = stderr); +private: + vec m_tab; + class irange_allocator *m_irange_allocator; +}; + +// This class provides all the caches a global ranger may need, and makes +// them available for gori-computes to query so outgoing edges can be +// properly calculated. + +class ranger_cache : public gori_compute_cache +{ +public: + ranger_cache (class range_query &q); + ~ranger_cache (); + + virtual void ssa_range_in_bb (irange &r, tree name, basic_block bb); + bool block_range (irange &r, basic_block bb, tree name, bool calc = true); + + ssa_global_cache m_globals; + block_range_cache m_on_entry; + non_null_ref m_non_null; +private: + void add_to_update (basic_block bb); + void fill_block_cache (tree name, basic_block bb, basic_block def_bb); + void iterative_cache_update (tree name); + + vec m_workback; + vec m_update_list; + + // Iterative "poor value" calculations. + struct update_record + { + basic_block bb; // Block which value needs to be calculated in. + tree calc; // SSA_NAME which needs its value calculated. + }; + bool push_poor_value (basic_block bb, tree name); + vec m_poor_value_list; + class range_query &query; +}; + +#endif // GCC_SSA_RANGE_CACHE_H diff --git a/gcc/gimple-range-edge.cc b/gcc/gimple-range-edge.cc new file mode 100644 index 0000000..c5ee54f --- /dev/null +++ b/gcc/gimple-range-edge.cc @@ -0,0 +1,197 @@ +/* Gimple range edge functionaluity. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Andrew MacLeod + and Aldy Hernandez . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "gimple.h" +#include "ssa.h" +#include "gimple-pretty-print.h" +#include "gimple-iterator.h" +#include "tree-cfg.h" +#include "gimple-range.h" + +// If there is a range control statment at the end of block BB, return it. +// Otherwise return NULL. + +gimple * +gimple_outgoing_range_stmt_p (basic_block bb) +{ + gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); + if (!gsi_end_p (gsi)) + { + gimple *s = gsi_stmt (gsi); + if (is_a (s) && gimple_range_handler (s)) + return gsi_stmt (gsi); + gswitch *sw = dyn_cast (s); + if (sw && irange::supports_type_p (TREE_TYPE (gimple_switch_index (sw)))) + return gsi_stmt (gsi); + } + return NULL; +} + + +outgoing_range::outgoing_range () +{ + m_edge_table = NULL; +} + +outgoing_range::~outgoing_range () +{ + if (m_edge_table) + delete m_edge_table; +} + + +// Get a range for a switch edge E from statement S and return it in R. +// Use a cached value if it exists, or calculate it if not. + +bool +outgoing_range::get_edge_range (irange &r, gimple *s, edge e) +{ + gcc_checking_assert (is_a (s)); + gswitch *sw = as_a (s); + + // ADA currently has cases where the index is 64 bits and the case + // arguments are 32 bit, causing a trap when we create a case_range. + // Until this is resolved (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87798) + // punt on switches where the labels dont match the argument. + if (gimple_switch_num_labels (sw) > 1 && + TYPE_PRECISION (TREE_TYPE (CASE_LOW (gimple_switch_label (sw, 1)))) != + TYPE_PRECISION (TREE_TYPE (gimple_switch_index (sw)))) + return false; + + if (!m_edge_table) + m_edge_table = new hash_map (n_edges_for_fn (cfun)); + + irange **val = m_edge_table->get (e); + if (!val) + { + calc_switch_ranges (sw); + val = m_edge_table->get (e); + gcc_checking_assert (val); + } + r = **val; + return true; +} + + +// Calculate all switch edges from SW and cache them in the hash table. + +void +outgoing_range::calc_switch_ranges (gswitch *sw) +{ + bool existed; + unsigned x, lim; + lim = gimple_switch_num_labels (sw); + tree type = TREE_TYPE (gimple_switch_index (sw)); + + edge default_edge = gimple_switch_default_edge (cfun, sw); + irange *&default_slot = m_edge_table->get_or_insert (default_edge, &existed); + + // This should be the first call into this switch. For the default + // range case, start with varying and intersect each other case from + // it. + + gcc_checking_assert (!existed); + + // Allocate an int_range_max for default case. + default_slot = m_range_allocator.allocate (255); + default_slot->set_varying (type); + + for (x = 1; x < lim; x++) + { + edge e = gimple_switch_edge (cfun, sw, x); + + // If this edge is the same as the default edge, do nothing else. + if (e == default_edge) + continue; + + tree low = CASE_LOW (gimple_switch_label (sw, x)); + tree high = CASE_HIGH (gimple_switch_label (sw, x)); + if (!high) + high = low; + + // Remove the case range from the default case. + int_range_max def_range (low, high); + range_cast (def_range, type); + def_range.invert (); + default_slot->intersect (def_range); + + // Create/union this case with anything on else on the edge. + int_range_max case_range (low, high); + range_cast (case_range, type); + irange *&slot = m_edge_table->get_or_insert (e, &existed); + if (existed) + { + case_range.union_ (*slot); + if (slot->fits_p (case_range)) + { + *slot = case_range; + continue; + } + } + // If there was an existing range and it doesn't fit, we lose the memory. + // It'll get reclaimed when the obstack is freed. This seems less + // intrusive than allocating max ranges for each case. + slot = m_range_allocator.allocate (case_range); + } +} + + +// Calculate the range forced on on edge E by control flow, return it +// in R. Return the statment which defines the range, otherwise +// return NULL + +gimple * +outgoing_range::edge_range_p (irange &r, edge e) +{ + // Determine if there is an outgoing edge. + gimple *s = gimple_outgoing_range_stmt_p (e->src); + if (!s) + return NULL; + + if (is_a (s)) + { + if (e->flags & EDGE_TRUE_VALUE) + r = int_range<2> (boolean_true_node, boolean_true_node); + else if (e->flags & EDGE_FALSE_VALUE) + r = int_range<2> (boolean_false_node, boolean_false_node); + else + gcc_unreachable (); + return s; + } + + gcc_checking_assert (is_a (s)); + gswitch *sw = as_a (s); + tree type = TREE_TYPE (gimple_switch_index (sw)); + + if (!irange::supports_type_p (type)) + return NULL; + + if (get_edge_range (r, sw, e)) + return s; + + return NULL; +} diff --git a/gcc/gimple-range-edge.h b/gcc/gimple-range-edge.h new file mode 100644 index 0000000..400c814 --- /dev/null +++ b/gcc/gimple-range-edge.h @@ -0,0 +1,55 @@ +/* Gimple range edge header file. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Andrew MacLeod + and Aldy Hernandez . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GIMPLE_RANGE_EDGE_H +#define GIMPLE_RANGE_EDGE_H + +// This class is used to query ranges on constant edges in GIMPLE. +// +// For a COND_EXPR, the TRUE edge will return [1,1] and the false edge a [0,0]. +// +// For SWITCH_EXPR, it is awkward to calculate ranges. When a request +// is made, the entire switch is evalauted and the results cached. +// Any future requests to that switch will use the cached value, providing +// dramatic decrease in computation time. +// +// The API is simple, just ask for the range on the edge. +// The return value is NULL for no range, or the branch statement which the +// edge gets the range from, along with the range. + +class outgoing_range +{ +public: + outgoing_range (); + ~outgoing_range (); + gimple *edge_range_p (irange &r, edge e); +private: + void calc_switch_ranges (gswitch *sw); + bool get_edge_range (irange &r, gimple *s, edge e); + + hash_map *m_edge_table; + irange_allocator m_range_allocator; +}; + +// If there is a range control statment at the end of block BB, return it. +gimple *gimple_outgoing_range_stmt_p (basic_block bb); + +#endif // GIMPLE_RANGE_EDGE_H diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc new file mode 100644 index 0000000..eaf1a44 --- /dev/null +++ b/gcc/gimple-range-gori.cc @@ -0,0 +1,1321 @@ +/* Gimple range GORI functions. + Copyright (C) 2017-2020 Free Software Foundation, Inc. + Contributed by Andrew MacLeod + and Aldy Hernandez . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "gimple.h" +#include "ssa.h" +#include "gimple-pretty-print.h" +#include "gimple-range.h" + + +/* RANGE_DEF_CHAIN is used to determine what SSA names in a block can + have range information calculated for them, and what the + dependencies on each other are. + + Information for a basic block is calculated once and stored. It is + only calculated the first time a query is made, so if no queries + are made, there is little overhead. + + The def_chain bitmap is indexed by SSA_NAME_VERSION. Bits are set + within this bitmap to indicate SSA names that are defined in the + SAME block and used to calculate this SSA name. + + + : + _1 = x_4(D) + -2; + _2 = _1 * 4; + j_7 = foo (); + q_5 = _2 + 3; + if (q_5 <= 13) + + _1 : x_4(D) + _2 : 1 x_4(D) + q_5 : _1 _2 x_4(D) + + This dump indicates the bits set in the def_chain vector. + as well as demonstrates the def_chain bits for the related ssa_names. + + Checking the chain for _2 indicates that _1 and x_4 are used in + its evaluation. + + Def chains also only include statements which are valid gimple + so a def chain will only span statements for which the range + engine implements operations for. */ + + +class range_def_chain +{ +public: + range_def_chain (); + ~range_def_chain (); + bool has_def_chain (tree name); + bitmap get_def_chain (tree name); + bool in_chain_p (tree name, tree def); +private: + vec m_def_chain; // SSA_NAME : def chain components. + void build_def_chain (tree name, bitmap result, basic_block bb); +}; + + +// Construct a range_def_chain. + +range_def_chain::range_def_chain () +{ + m_def_chain.create (0); + m_def_chain.safe_grow_cleared (num_ssa_names); +} + +// Destruct a range_def_chain. + +range_def_chain::~range_def_chain () +{ + unsigned x; + for (x = 0; x < m_def_chain.length (); ++x) + if (m_def_chain[x]) + BITMAP_FREE (m_def_chain[x]); + m_def_chain.release (); +} + +// Return true if NAME is in the def chain of DEF. If BB is provided, +// only return true if the defining statement of DEF is in BB. + +bool +range_def_chain::in_chain_p (tree name, tree def) +{ + gcc_checking_assert (gimple_range_ssa_p (def)); + gcc_checking_assert (gimple_range_ssa_p (name)); + + // Get the defintion chain for DEF. + bitmap chain = get_def_chain (def); + + if (chain == NULL) + return false; + return bitmap_bit_p (chain, SSA_NAME_VERSION (name)); +} + +// Build def_chains for NAME if it is in BB. Copy the def chain into RESULT. + +void +range_def_chain::build_def_chain (tree name, bitmap result, basic_block bb) +{ + bitmap b; + gimple *def_stmt = SSA_NAME_DEF_STMT (name); + // Add this operand into the result. + bitmap_set_bit (result, SSA_NAME_VERSION (name)); + + if (gimple_bb (def_stmt) == bb && !is_a(def_stmt)) + { + // Get the def chain for the operand. + b = get_def_chain (name); + // If there was one, copy it into result. + if (b) + bitmap_ior_into (result, b); + } +} + +// Return TRUE if NAME has been processed for a def_chain. + +inline bool +range_def_chain::has_def_chain (tree name) +{ + // Ensure there is an entry in the internal vector. + unsigned v = SSA_NAME_VERSION (name); + if (v >= m_def_chain.length ()) + m_def_chain.safe_grow_cleared (num_ssa_names + 1); + return (m_def_chain[v] != NULL); +} + +// Calculate the def chain for NAME and all of its dependent +// operands. Only using names in the same BB. Return the bitmap of +// all names in the m_def_chain. This only works for supported range +// statements. + +bitmap +range_def_chain::get_def_chain (tree name) +{ + tree ssa1, ssa2, ssa3; + unsigned v = SSA_NAME_VERSION (name); + + // If it has already been processed, just return the cached value. + if (has_def_chain (name)) + return m_def_chain[v]; + + // No definition chain for default defs. + if (SSA_NAME_IS_DEFAULT_DEF (name)) + return NULL; + + gimple *stmt = SSA_NAME_DEF_STMT (name); + if (gimple_range_handler (stmt)) + { + ssa1 = gimple_range_ssa_p (gimple_range_operand1 (stmt)); + ssa2 = gimple_range_ssa_p (gimple_range_operand2 (stmt)); + ssa3 = NULL_TREE; + } + else if (is_a (stmt) + && gimple_assign_rhs_code (stmt) == COND_EXPR) + { + gassign *st = as_a (stmt); + ssa1 = gimple_range_ssa_p (gimple_assign_rhs1 (st)); + ssa2 = gimple_range_ssa_p (gimple_assign_rhs2 (st)); + ssa3 = gimple_range_ssa_p (gimple_assign_rhs3 (st)); + } + else + return NULL; + + basic_block bb = gimple_bb (stmt); + + m_def_chain[v] = BITMAP_ALLOC (NULL); + + if (ssa1) + build_def_chain (ssa1, m_def_chain[v], bb); + if (ssa2) + build_def_chain (ssa2, m_def_chain[v], bb); + if (ssa3) + build_def_chain (ssa3, m_def_chain[v], bb); + + // If we run into pathological cases where the defintion chains are + // huge (ie huge basic block fully unrolled) we might be able to limit + // this by deciding here that if some criteria is satisfied, we change the + // def_chain back to be just the ssa-names. That will help prevent chains + // of a_2 = b_6 + a_8 from creating a pathological case. + return m_def_chain[v]; +} + +// ------------------------------------------------------------------- + +/* GORI_MAP is used to accumulate what SSA names in a block can + generate range information, and provides tools for the block ranger + to enable it to efficiently calculate these ranges. + + GORI stands for "Generates Outgoing Range Information." + + It utilizes the range_def_chain class to contruct def_chains. + Information for a basic block is calculated once and stored. It is + only calculated the first time a query is made. If no queries are + made, there is little overhead. + + one bitmap is maintained for each basic block: + m_outgoing : a set bit indicates a range can be generated for a name. + + Generally speaking, the m_outgoing vector is the union of the + entire def_chain of all SSA names used in the last statement of the + block which generate ranges. */ + +class gori_map : public range_def_chain +{ +public: + gori_map (); + ~gori_map (); + + bool is_export_p (tree name, basic_block bb); + bool def_chain_in_export_p (tree name, basic_block bb); + + void dump (FILE *f); + void dump (FILE *f, basic_block bb); +private: + bitmap_obstack m_bitmaps; + vec m_outgoing; // BB: Outgoing ranges calculatable on edges + void maybe_add_gori (tree name, basic_block bb); + void calculate_gori (basic_block bb); + bitmap exports (basic_block bb); +}; + + +// Initialize a gori-map structure. + +gori_map::gori_map () +{ + m_outgoing.create (0); + m_outgoing.safe_grow_cleared (last_basic_block_for_fn (cfun)); + bitmap_obstack_initialize (&m_bitmaps); +} + +// Free any memory the GORI map allocated. + +gori_map::~gori_map () +{ + bitmap_obstack_release (&m_bitmaps); + m_outgoing.release (); +} + +// Return the bitmap vector of all export from BB. Calculate if necessary. + +bitmap +gori_map::exports (basic_block bb) +{ + if (!m_outgoing[bb->index]) + calculate_gori (bb); + return m_outgoing[bb->index]; +} + +// Return true if NAME is can have ranges generated for it from basic +// block BB. + +bool +gori_map::is_export_p (tree name, basic_block bb) +{ + return bitmap_bit_p (exports (bb), SSA_NAME_VERSION (name)); +} + +// Return true if any element in the def chain of NAME is in the +// export list for BB. + +bool +gori_map::def_chain_in_export_p (tree name, basic_block bb) +{ + bitmap a = exports (bb); + bitmap b = get_def_chain (name); + if (a && b) + return bitmap_intersect_p (a, b); + return false; +} + +// If NAME is non-NULL and defined in block BB, calculate the def +// chain and add it to m_outgoing. + +void +gori_map::maybe_add_gori (tree name, basic_block bb) +{ + if (name) + { + gimple *s = SSA_NAME_DEF_STMT (name); + bitmap r = get_def_chain (name); + // Check if there is a def chain, and it is in this block. + if (r && gimple_bb (s) == bb) + bitmap_copy (m_outgoing[bb->index], r); + // Def chain doesn't include itself, and even if there isn't a + // def chain, this name should be added to exports. + bitmap_set_bit (m_outgoing[bb->index], SSA_NAME_VERSION (name)); + } +} + +// Calculate all the required information for BB. + +void +gori_map::calculate_gori (basic_block bb) +{ + tree name; + if (bb->index >= (signed int)m_outgoing.length ()) + m_outgoing.safe_grow_cleared (last_basic_block_for_fn (cfun)); + gcc_checking_assert (m_outgoing[bb->index] == NULL); + m_outgoing[bb->index] = BITMAP_ALLOC (&m_bitmaps); + + // If this block's last statement may generate range informaiton, go + // calculate it. + gimple *stmt = gimple_outgoing_range_stmt_p (bb); + if (!stmt) + return; + if (is_a (stmt)) + { + gcond *gc = as_a(stmt); + name = gimple_range_ssa_p (gimple_cond_lhs (gc)); + maybe_add_gori (name, gimple_bb (stmt)); + + name = gimple_range_ssa_p (gimple_cond_rhs (gc)); + maybe_add_gori (name, gimple_bb (stmt)); + } + else + { + gswitch *gs = as_a(stmt); + name = gimple_range_ssa_p (gimple_switch_index (gs)); + maybe_add_gori (name, gimple_bb (stmt)); + } +} + +// Dump the table information for BB to file F. + +void +gori_map::dump (FILE *f, basic_block bb) +{ + bool header = false; + const char *header_string = "bb%-4d "; + const char *header2 = " "; + bool printed_something = false;; + unsigned x, y; + bitmap_iterator bi; + + // BB was not processed. + if (!m_outgoing[bb->index]) + return; + + // Dump the def chain for each SSA_NAME defined in BB. + for (x = 1; x < num_ssa_names; x++) + { + tree name = ssa_name (x); + if (!name) + continue; + gimple *stmt = SSA_NAME_DEF_STMT (name); + bitmap chain = (has_def_chain (name) ? get_def_chain (name) : NULL); + if (stmt && gimple_bb (stmt) == bb && chain && !bitmap_empty_p (chain)) + { + fprintf (f, header_string, bb->index); + header_string = header2; + header = true; + print_generic_expr (f, name, TDF_SLIM); + fprintf (f, " : "); + EXECUTE_IF_SET_IN_BITMAP (chain, 0, y, bi) + { + print_generic_expr (f, ssa_name (y), TDF_SLIM); + fprintf (f, " "); + } + fprintf (f, "\n"); + } + } + + printed_something |= header; + + // Now dump the export vector. + header = false; + EXECUTE_IF_SET_IN_BITMAP (m_outgoing[bb->index], 0, y, bi) + { + if (!header) + { + fprintf (f, header_string, bb->index); + fprintf (f, "exports: "); + header_string = header2; + header = true; + } + print_generic_expr (f, ssa_name (y), TDF_SLIM); + fprintf (f, " "); + } + if (header) + fputc ('\n', f); + + printed_something |= header; + if (printed_something) + fprintf (f, "\n"); +} + +// Dump the entire GORI map structure to file F. + +void +gori_map::dump (FILE *f) +{ + basic_block bb; + FOR_EACH_BB_FN (bb, cfun) + { + dump (f, bb); + if (m_outgoing[bb->index]) + fprintf (f, "\n"); + } +} + +DEBUG_FUNCTION void +debug (gori_map &g) +{ + g.dump (stderr); +} + +// ------------------------------------------------------------------- + +// Construct a gori_compute object. + +gori_compute::gori_compute () +{ + // Create a boolean_type true and false range. + m_bool_zero = int_range<2> (boolean_false_node, boolean_false_node); + m_bool_one = int_range<2> (boolean_true_node, boolean_true_node); + m_gori_map = new gori_map; +} + +// Destruct a gori_compute_object. + +gori_compute::~gori_compute () +{ + delete m_gori_map; +} + +// Provide a default of VARYING for all incoming SSA names. + +void +gori_compute::ssa_range_in_bb (irange &r, tree name, basic_block) +{ + r.set_varying (TREE_TYPE (name)); +} + +void +gori_compute::expr_range_in_bb (irange &r, tree expr, basic_block bb) +{ + if (gimple_range_ssa_p (expr)) + ssa_range_in_bb (r, expr, bb); + else + get_tree_range (r, expr); +} + +// Calculate the range for NAME if the lhs of statement S has the +// range LHS. Return the result in R. Return false if no range can be +// calculated. + +bool +gori_compute::compute_name_range_op (irange &r, gimple *stmt, + const irange &lhs, tree name) +{ + int_range_max op1_range, op2_range; + + tree op1 = gimple_range_operand1 (stmt); + tree op2 = gimple_range_operand2 (stmt); + + // Operand 1 is the name being looked for, evaluate it. + if (op1 == name) + { + expr_range_in_bb (op1_range, op1, gimple_bb (stmt)); + if (!op2) + { + // The second parameter to a unary operation is the range + // for the type of operand1, but if it can be reduced + // further, the results will be better. Start with what we + // know of the range of OP1 instead of the full type. + return gimple_range_calc_op1 (r, stmt, lhs, op1_range); + } + // If we need the second operand, get a value and evaluate. + expr_range_in_bb (op2_range, op2, gimple_bb (stmt)); + if (gimple_range_calc_op1 (r, stmt, lhs, op2_range)) + r.intersect (op1_range); + else + r = op1_range; + return true; + } + + if (op2 == name) + { + expr_range_in_bb (op1_range, op1, gimple_bb (stmt)); + expr_range_in_bb (r, op2, gimple_bb (stmt)); + if (gimple_range_calc_op2 (op2_range, stmt, lhs, op1_range)) + r.intersect (op2_range); + return true; + } + return false; +} + +// Given the switch S, return an evaluation in R for NAME when the lhs +// evaluates to LHS. Returning false means the name being looked for +// was not resolvable. + +bool +gori_compute::compute_operand_range_switch (irange &r, gswitch *s, + const irange &lhs, + tree name) +{ + tree op1 = gimple_switch_index (s); + + // If name matches, the range is simply the range from the edge. + // Empty ranges are viral as they are on a path which isn't + // executable. + if (op1 == name || lhs.undefined_p ()) + { + r = lhs; + return true; + } + + // If op1 is in the defintion chain, pass lhs back. + if (gimple_range_ssa_p (op1) && m_gori_map->in_chain_p (name, op1)) + return compute_operand_range (r, SSA_NAME_DEF_STMT (op1), lhs, name); + + return false; +} + +// Return TRUE if GS is a logical && or || expression. + +static inline bool +is_gimple_logical_p (const gimple *gs) +{ + // Look for boolean and/or condition. + if (gimple_code (gs) == GIMPLE_ASSIGN) + switch (gimple_expr_code (gs)) + { + case TRUTH_AND_EXPR: + case TRUTH_OR_EXPR: + return true; + + case BIT_AND_EXPR: + case BIT_IOR_EXPR: + // Bitwise operations on single bits are logical too. + if (types_compatible_p (TREE_TYPE (gimple_assign_rhs1 (gs)), + boolean_type_node)) + return true; + break; + + default: + break; + } + return false; +} + +// Return an evaluation for NAME as it would appear in STMT when the +// statement's lhs evaluates to LHS. If successful, return TRUE and +// store the evaluation in R, otherwise return FALSE. + +bool +gori_compute::compute_operand_range (irange &r, gimple *stmt, + const irange &lhs, tree name) +{ + // Empty ranges are viral as they are on an unexecutable path. + if (lhs.undefined_p ()) + { + r.set_undefined (); + return true; + } + if (is_a (stmt)) + return compute_operand_range_switch (r, as_a (stmt), lhs, name); + if (!gimple_range_handler (stmt)) + return false; + + tree op1 = gimple_range_ssa_p (gimple_range_operand1 (stmt)); + tree op2 = gimple_range_ssa_p (gimple_range_operand2 (stmt)); + + // The base ranger handles NAME on this statement. + if (op1 == name || op2 == name) + return compute_name_range_op (r, stmt, lhs, name); + + if (is_gimple_logical_p (stmt)) + return compute_logical_operands (r, stmt, lhs, name); + + // NAME is not in this stmt, but one of the names in it ought to be + // derived from it. + bool op1_in_chain = op1 && m_gori_map->in_chain_p (name, op1); + bool op2_in_chain = op2 && m_gori_map->in_chain_p (name, op2); + if (op1_in_chain && op2_in_chain) + return compute_operand1_and_operand2_range (r, stmt, lhs, name); + if (op1_in_chain) + return compute_operand1_range (r, stmt, lhs, name); + if (op2_in_chain) + return compute_operand2_range (r, stmt, lhs, name); + + // If neither operand is derived, this statement tells us nothing. + return false; +} + +// Return TRUE if range R is either a true or false compatible range. + +static bool +range_is_either_true_or_false (const irange &r) +{ + if (r.undefined_p ()) + return false; + + // This is complicated by the fact that Ada has multi-bit booleans, + // so true can be ~[0, 0] (i.e. [1,MAX]). + tree type = r.type (); + gcc_checking_assert (types_compatible_p (type, boolean_type_node)); + return (r.singleton_p () || !r.contains_p (build_zero_cst (type))); +} + +// A pair of ranges for true/false paths. + +struct tf_range +{ + tf_range () { } + tf_range (const irange &t_range, const irange &f_range) + { + true_range = t_range; + false_range = f_range; + } + int_range_max true_range, false_range; +}; + +// Evaluate a binary logical expression by combining the true and +// false ranges for each of the operands based on the result value in +// the LHS. + +bool +gori_compute::logical_combine (irange &r, enum tree_code code, + const irange &lhs, + const tf_range &op1, const tf_range &op2) +{ + if (op1.true_range.varying_p () + && op1.false_range.varying_p () + && op2.true_range.varying_p () + && op2.false_range.varying_p ()) + return false; + + // This is not a simple fold of a logical expression, rather it + // determines ranges which flow through the logical expression. + // + // Assuming x_8 is an unsigned char, and relational statements: + // b_1 = x_8 < 20 + // b_2 = x_8 > 5 + // consider the logical expression and branch: + // c_2 = b_1 && b_2 + // if (c_2) + // + // To determine the range of x_8 on either edge of the branch, one + // must first determine what the range of x_8 is when the boolean + // values of b_1 and b_2 are both true and false. + // b_1 TRUE x_8 = [0, 19] + // b_1 FALSE x_8 = [20, 255] + // b_2 TRUE x_8 = [6, 255] + // b_2 FALSE x_8 = [0,5]. + // + // These ranges are then combined based on the expected outcome of + // the branch. The range on the TRUE side of the branch must satisfy + // b_1 == true && b_2 == true + // + // In terms of x_8, that means both x_8 == [0, 19] and x_8 = [6, 255] + // must be true. The range of x_8 on the true side must be the + // intersection of both ranges since both must be true. Thus the + // range of x_8 on the true side is [6, 19]. + // + // To determine the ranges on the FALSE side, all 3 combinations of + // failing ranges must be considered, and combined as any of them + // can cause the false result. + // + // If the LHS can be TRUE or FALSE, then evaluate both a TRUE and + // FALSE results and combine them. If we fell back to VARYING any + // range restrictions that have been discovered up to this point + // would be lost. + if (!range_is_either_true_or_false (lhs)) + { + int_range_max r1; + if (logical_combine (r1, code, m_bool_zero, op1, op2) + && logical_combine (r, code, m_bool_one, op1, op2)) + { + r.union_ (r1); + return true; + } + return false; + } + + switch (code) + { + // A logical AND combines ranges from 2 boolean conditions. + // c_2 = b_1 && b_2 + case TRUTH_AND_EXPR: + case BIT_AND_EXPR: + if (!lhs.zero_p ()) + { + // The TRUE side is the intersection of the the 2 true ranges. + r = op1.true_range; + r.intersect (op2.true_range); + } + else + { + // The FALSE side is the union of the other 3 cases. + int_range_max ff (op1.false_range); + ff.intersect (op2.false_range); + int_range_max tf (op1.true_range); + tf.intersect (op2.false_range); + int_range_max ft (op1.false_range); + ft.intersect (op2.true_range); + r = ff; + r.union_ (tf); + r.union_ (ft); + } + break; + // A logical OR combines ranges from 2 boolean conditons. + // c_2 = b_1 || b_2 + case TRUTH_OR_EXPR: + case BIT_IOR_EXPR: + if (lhs.zero_p ()) + { + // An OR operation will only take the FALSE path if both + // operands are false, so [20, 255] intersect [0, 5] is the + // union: [0,5][20,255]. + r = op1.false_range; + r.intersect (op2.false_range); + } + else + { + // The TRUE side of an OR operation will be the union of + // the other three combinations. + int_range_max tt (op1.true_range); + tt.intersect (op2.true_range); + int_range_max tf (op1.true_range); + tf.intersect (op2.false_range); + int_range_max ft (op1.false_range); + ft.intersect (op2.true_range); + r = tt; + r.union_ (tf); + r.union_ (ft); + } + break; + default: + gcc_unreachable (); + } + + return true; +} + +// Helper function for compute_logical_operands_in_chain that computes +// the range of logical statements that can be computed without +// chasing down operands. These are things like [0 = x | y] where we +// know neither operand can be non-zero, or [1 = x & y] where we know +// neither operand can be zero. + +bool +gori_compute::optimize_logical_operands (tf_range &range, + gimple *stmt, + const irange &lhs, + tree name, + tree op) +{ + enum tree_code code = gimple_expr_code (stmt); + + // Optimize [0 = x | y], since neither operand can ever be non-zero. + if ((code == BIT_IOR_EXPR || code == TRUTH_OR_EXPR) && lhs.zero_p ()) + { + if (!compute_operand_range (range.false_range, SSA_NAME_DEF_STMT (op), + m_bool_zero, name)) + expr_range_in_bb (range.false_range, name, gimple_bb (stmt)); + range.true_range = range.false_range; + return true; + } + // Optimize [1 = x & y], since neither operand can ever be zero. + if ((code == BIT_AND_EXPR || code == TRUTH_AND_EXPR) && lhs == m_bool_one) + { + if (!compute_operand_range (range.true_range, SSA_NAME_DEF_STMT (op), + m_bool_one, name)) + expr_range_in_bb (range.true_range, name, gimple_bb (stmt)); + range.false_range = range.true_range; + return true; + } + return false; +} + +// Given a logical STMT, calculate true and false ranges for each +// potential path of NAME, assuming NAME came through the OP chain if +// OP_IN_CHAIN is true. + +void +gori_compute::compute_logical_operands_in_chain (tf_range &range, + gimple *stmt, + const irange &lhs, + tree name, + tree op, bool op_in_chain) +{ + if (!op_in_chain) + { + // If op is not in chain, use its known value. + expr_range_in_bb (range.true_range, name, gimple_bb (stmt)); + range.false_range = range.true_range; + return; + } + if (optimize_logical_operands (range, stmt, lhs, name, op)) + return; + + // Calulate ranges for true and false on both sides, since the false + // path is not always a simple inversion of the true side. + if (!compute_operand_range (range.true_range, SSA_NAME_DEF_STMT (op), + m_bool_one, name)) + expr_range_in_bb (range.true_range, name, gimple_bb (stmt)); + if (!compute_operand_range (range.false_range, SSA_NAME_DEF_STMT (op), + m_bool_zero, name)) + expr_range_in_bb (range.false_range, name, gimple_bb (stmt)); +} + +// Given a logical STMT, calculate true and false for each potential +// path using NAME, and resolve the outcome based on the logical +// operator. + +bool +gori_compute::compute_logical_operands (irange &r, gimple *stmt, + const irange &lhs, + tree name) +{ + // Reaching this point means NAME is not in this stmt, but one of + // the names in it ought to be derived from it. + tree op1 = gimple_range_operand1 (stmt); + tree op2 = gimple_range_operand2 (stmt); + gcc_checking_assert (op1 != name && op2 != name); + + bool op1_in_chain = (gimple_range_ssa_p (op1) + && m_gori_map->in_chain_p (name, op1)); + bool op2_in_chain = (gimple_range_ssa_p (op2) + && m_gori_map->in_chain_p (name, op2)); + + // If neither operand is derived, then this stmt tells us nothing. + if (!op1_in_chain && !op2_in_chain) + return false; + + tf_range op1_range, op2_range; + compute_logical_operands_in_chain (op1_range, stmt, lhs, + name, op1, op1_in_chain); + compute_logical_operands_in_chain (op2_range, stmt, lhs, + name, op2, op2_in_chain); + return logical_combine (r, gimple_expr_code (stmt), lhs, + op1_range, op2_range); +} + +// Calculate a range for NAME from the operand 1 position of STMT +// assuming the result of the statement is LHS. Return the range in +// R, or false if no range could be calculated. + +bool +gori_compute::compute_operand1_range (irange &r, gimple *stmt, + const irange &lhs, tree name) +{ + int_range_max op1_range, op2_range; + tree op1 = gimple_range_operand1 (stmt); + tree op2 = gimple_range_operand2 (stmt); + + expr_range_in_bb (op1_range, op1, gimple_bb (stmt)); + + // Now calcuated the operand and put that result in r. + if (op2) + { + expr_range_in_bb (op2_range, op2, gimple_bb (stmt)); + if (!gimple_range_calc_op1 (r, stmt, lhs, op2_range)) + return false; + } + else + { + // We pass op1_range to the unary operation. Nomally it's a + // hidden range_for_type parameter, but sometimes having the + // actual range can result in better information. + if (!gimple_range_calc_op1 (r, stmt, lhs, op1_range)) + return false; + } + + // Intersect the calculated result with the known result. + op1_range.intersect (r); + + gimple *src_stmt = SSA_NAME_DEF_STMT (op1); + // If def stmt is outside of this BB, then name must be an import. + if (!src_stmt || (gimple_bb (src_stmt) != gimple_bb (stmt))) + { + // If this isn't the right import statement, then abort calculation. + if (!src_stmt || gimple_get_lhs (src_stmt) != name) + return false; + return compute_name_range_op (r, src_stmt, op1_range, name); + } + // Then feed this range back as the LHS of the defining statement. + return compute_operand_range (r, src_stmt, op1_range, name); +} + + +// Calculate a range for NAME from the operand 2 position of S +// assuming the result of the statement is LHS. Return the range in +// R, or false if no range could be calculated. + +bool +gori_compute::compute_operand2_range (irange &r, gimple *stmt, + const irange &lhs, tree name) +{ + int_range_max op1_range, op2_range; + tree op1 = gimple_range_operand1 (stmt); + tree op2 = gimple_range_operand2 (stmt); + + expr_range_in_bb (op1_range, op1, gimple_bb (stmt)); + expr_range_in_bb (op2_range, op2, gimple_bb (stmt)); + + // Intersect with range for op2 based on lhs and op1. + if (gimple_range_calc_op2 (r, stmt, lhs, op1_range)) + op2_range.intersect (r); + + gimple *src_stmt = SSA_NAME_DEF_STMT (op2); + // If def stmt is outside of this BB, then name must be an import. + if (!src_stmt || (gimple_bb (src_stmt) != gimple_bb (stmt))) + { + // If this isn't the right src statement, then abort calculation. + if (!src_stmt || gimple_get_lhs (src_stmt) != name) + return false; + return compute_name_range_op (r, src_stmt, op2_range, name); + } + // Then feed this range back as the LHS of the defining statement. + return compute_operand_range (r, src_stmt, op2_range, name); +} + +// Calculate a range for NAME from both operand positions of S +// assuming the result of the statement is LHS. Return the range in +// R, or false if no range could be calculated. + +bool +gori_compute::compute_operand1_and_operand2_range + (irange &r, + gimple *stmt, + const irange &lhs, + tree name) +{ + int_range_max op_range; + + // Calculate a good a range for op2. Since op1 == op2, this will + // have already included whatever the actual range of name is. + if (!compute_operand2_range (op_range, stmt, lhs, name)) + return false; + + // Now get the range thru op1. + if (!compute_operand1_range (r, stmt, lhs, name)) + return false; + + // Whichever range is the most permissive is the one we need to + // use. (?) OR is that true? Maybe this should be intersection? + r.union_ (op_range); + return true; +} + +// Return TRUE if a range can be calcalated for NAME on edge E. + +bool +gori_compute::has_edge_range_p (edge e, tree name) +{ + return (m_gori_map->is_export_p (name, e->src) + || m_gori_map->def_chain_in_export_p (name, e->src)); +} + +// Dump what is known to GORI computes to listing file F. + +void +gori_compute::dump (FILE *f) +{ + m_gori_map->dump (f); +} + +// Calculate a range on edge E and return it in R. Try to evaluate a +// range for NAME on this edge. Return FALSE if this is either not a +// control edge or NAME is not defined by this edge. + +bool +gori_compute::outgoing_edge_range_p (irange &r, edge e, tree name) +{ + int_range_max lhs; + + gcc_checking_assert (gimple_range_ssa_p (name)); + // Determine if there is an outgoing edge. + gimple *stmt = outgoing.edge_range_p (lhs, e); + if (!stmt) + return false; + + // If NAME can be calculated on the edge, use that. + if (m_gori_map->is_export_p (name, e->src)) + return compute_operand_range (r, stmt, lhs, name); + + // Otherwise see if NAME is derived from something that can be + // calculated. This performs no dynamic lookups whatsover, so it is + // low cost. + return false; +} + +// -------------------------------------------------------------------------- + +// Cache for SSAs that appear on the RHS of a boolean assignment. +// +// Boolean assignments of logical expressions (i.e. LHS = j_5 > 999) +// have SSA operands whose range depend on the LHS of the assigment. +// That is, the range of j_5 when LHS is true is different than when +// LHS is false. +// +// This class caches the TRUE/FALSE ranges of such SSAs to avoid +// recomputing. + +class logical_stmt_cache +{ +public: + logical_stmt_cache (); + ~logical_stmt_cache (); + void set_range (tree lhs, tree name, const tf_range &); + bool get_range (tf_range &r, tree lhs, tree name) const; + bool cacheable_p (gimple *, const irange *lhs_range = NULL) const; + void dump (FILE *, gimple *stmt) const; + tree same_cached_name (tree lhs1, tree lh2) const; +private: + tree cached_name (tree lhs) const; + void slot_diagnostics (tree lhs, const tf_range &range) const; + struct cache_entry + { + cache_entry (tree name, const irange &t_range, const irange &f_range); + void dump (FILE *out) const; + tree name; + tf_range range; + }; + vec m_ssa_cache; +}; + +logical_stmt_cache::cache_entry::cache_entry (tree name, + const irange &t_range, + const irange &f_range) + : name (name), range (t_range, f_range) +{ +} + +logical_stmt_cache::logical_stmt_cache () +{ + m_ssa_cache.create (num_ssa_names + num_ssa_names / 10); + m_ssa_cache.safe_grow_cleared (num_ssa_names); +} + +logical_stmt_cache::~logical_stmt_cache () +{ + for (unsigned i = 0; i < m_ssa_cache.length (); ++i) + if (m_ssa_cache[i]) + delete m_ssa_cache[i]; + m_ssa_cache.release (); +} + +// Dump cache_entry to OUT. + +void +logical_stmt_cache::cache_entry::dump (FILE *out) const +{ + fprintf (out, "name="); + print_generic_expr (out, name, TDF_SLIM); + fprintf (out, " "); + range.true_range.dump (out); + fprintf (out, ", "); + range.false_range.dump (out); + fprintf (out, "\n"); +} + +// Update range for cache entry of NAME as it appears in the defining +// statement of LHS. + +void +logical_stmt_cache::set_range (tree lhs, tree name, const tf_range &range) +{ + unsigned version = SSA_NAME_VERSION (lhs); + if (version >= m_ssa_cache.length ()) + m_ssa_cache.safe_grow_cleared (num_ssa_names + num_ssa_names / 10); + + cache_entry *slot = m_ssa_cache[version]; + slot_diagnostics (lhs, range); + if (slot) + { + // The IL must have changed. Update the carried SSA name for + // consistency. Testcase is libgomp.fortran/doacross1.f90. + if (slot->name != name) + slot->name = name; + return; + } + m_ssa_cache[version] + = new cache_entry (name, range.true_range, range.false_range); +} + +// If there is a cached entry of NAME, set it in R and return TRUE, +// otherwise return FALSE. LHS is the defining statement where NAME +// appeared. + +bool +logical_stmt_cache::get_range (tf_range &r, tree lhs, tree name) const +{ + gcc_checking_assert (cacheable_p (SSA_NAME_DEF_STMT (lhs))); + if (cached_name (lhs) == name) + { + unsigned version = SSA_NAME_VERSION (lhs); + if (m_ssa_cache[version]) + { + r = m_ssa_cache[version]->range; + return true; + } + } + return false; +} + +// If the defining statement of LHS is in the cache, return the SSA +// operand being cached. That is, return SSA for LHS = SSA .RELOP. OP2. + +tree +logical_stmt_cache::cached_name (tree lhs) const +{ + unsigned version = SSA_NAME_VERSION (lhs); + + if (version >= m_ssa_cache.length ()) + return NULL; + + if (m_ssa_cache[version]) + return m_ssa_cache[version]->name; + return NULL; +} + +// Return TRUE if the cached name for LHS1 is the same as the +// cached name for LHS2. + +tree +logical_stmt_cache::same_cached_name (tree lhs1, tree lhs2) const +{ + tree name = cached_name (lhs1); + if (name && name == cached_name (lhs2)) + return name; + return NULL; +} + +// Return TRUE if STMT is a statement we are interested in caching. +// LHS_RANGE is any known range for the LHS of STMT. + +bool +logical_stmt_cache::cacheable_p (gimple *stmt, const irange *lhs_range) const +{ + if (gimple_code (stmt) == GIMPLE_ASSIGN + && types_compatible_p (TREE_TYPE (gimple_assign_lhs (stmt)), + boolean_type_node) + && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME) + { + switch (gimple_expr_code (stmt)) + { + case LT_EXPR: + case LE_EXPR: + case GT_EXPR: + case GE_EXPR: + case EQ_EXPR: + case NE_EXPR: + case TRUTH_AND_EXPR: + case BIT_AND_EXPR: + case TRUTH_OR_EXPR: + case BIT_IOR_EXPR: + return !lhs_range || range_is_either_true_or_false (*lhs_range); + default: + return false; + } + } + return false; +} + +// Output debugging diagnostics for the cache entry for LHS. RANGE is +// the new range that is being cached. + +void +logical_stmt_cache::slot_diagnostics (tree lhs, const tf_range &range) const +{ + gimple *stmt = SSA_NAME_DEF_STMT (lhs); + unsigned version = SSA_NAME_VERSION (lhs); + cache_entry *slot = m_ssa_cache[version]; + + if (!slot) + { + if (DEBUG_RANGE_CACHE) + { + fprintf (dump_file ? dump_file : stderr, "registering range for: "); + dump (dump_file ? dump_file : stderr, stmt); + } + return; + } + if (DEBUG_RANGE_CACHE) + fprintf (dump_file ? dump_file : stderr, + "reusing range for SSA #%d\n", version); + if (CHECKING_P && (slot->range.true_range != range.true_range + || slot->range.false_range != range.false_range)) + { + fprintf (stderr, "FATAL: range altered for cached: "); + dump (stderr, stmt); + fprintf (stderr, "Attempt to change to:\n"); + fprintf (stderr, "TRUE="); + range.true_range.dump (stderr); + fprintf (stderr, ", FALSE="); + range.false_range.dump (stderr); + fprintf (stderr, "\n"); + gcc_unreachable (); + } +} + +// Dump the cache information for STMT. + +void +logical_stmt_cache::dump (FILE *out, gimple *stmt) const +{ + tree lhs = gimple_assign_lhs (stmt); + cache_entry *entry = m_ssa_cache[SSA_NAME_VERSION (lhs)]; + + print_gimple_stmt (out, stmt, 0, TDF_SLIM); + if (entry) + { + fprintf (out, "\tname = "); + print_generic_expr (out, entry->name); + fprintf (out, " lhs(%d)= ", SSA_NAME_VERSION (lhs)); + print_generic_expr (out, lhs); + fprintf (out, "\n\tTRUE="); + entry->range.true_range.dump (out); + fprintf (out, ", FALSE="); + entry->range.false_range.dump (out); + fprintf (out, "\n"); + } + else + fprintf (out, "[EMPTY]\n"); +} + +gori_compute_cache::gori_compute_cache () +{ + m_cache = new logical_stmt_cache; +} + +gori_compute_cache::~gori_compute_cache () +{ + delete m_cache; +} + +// Caching version of compute_operand_range. If NAME, as it appears +// in STMT, has already been cached return it from the cache, +// otherwise compute the operand range as normal and cache it. + +bool +gori_compute_cache::compute_operand_range (irange &r, gimple *stmt, + const irange &lhs_range, tree name) +{ + bool cacheable = m_cache->cacheable_p (stmt, &lhs_range); + if (cacheable) + { + tree lhs = gimple_assign_lhs (stmt); + tf_range range; + if (m_cache->get_range (range, lhs, name)) + { + if (lhs_range.zero_p ()) + r = range.false_range; + else + r = range.true_range; + return true; + } + } + if (super::compute_operand_range (r, stmt, lhs_range, name)) + { + if (cacheable) + cache_stmt (stmt); + return true; + } + return false; +} + +// Cache STMT if possible. + +void +gori_compute_cache::cache_stmt (gimple *stmt) +{ + gcc_checking_assert (m_cache->cacheable_p (stmt)); + enum tree_code code = gimple_expr_code (stmt); + tree lhs = gimple_assign_lhs (stmt); + tree op1 = gimple_range_operand1 (stmt); + tree op2 = gimple_range_operand2 (stmt); + int_range_max r_true_side, r_false_side; + + // LHS = s_5 > 999. + if (TREE_CODE (op2) == INTEGER_CST) + { + range_operator *handler = range_op_handler (code, TREE_TYPE (lhs)); + int_range_max op2_range; + expr_range_in_bb (op2_range, op2, gimple_bb (stmt)); + tree type = TREE_TYPE (op1); + handler->op1_range (r_true_side, type, m_bool_one, op2_range); + handler->op1_range (r_false_side, type, m_bool_zero, op2_range); + m_cache->set_range (lhs, op1, tf_range (r_true_side, r_false_side)); + } + // LHS = s_5 > b_8. + else if (tree cached_name = m_cache->same_cached_name (op1, op2)) + { + tf_range op1_range, op2_range; + gcc_assert (m_cache->get_range (op1_range, op1, cached_name)); + gcc_assert (m_cache->get_range (op2_range, op2, cached_name)); + gcc_assert (logical_combine (r_true_side, code, m_bool_one, + op1_range, op2_range)); + gcc_assert (logical_combine (r_false_side, code, m_bool_zero, + op1_range, op2_range)); + m_cache->set_range (lhs, cached_name, + tf_range (r_true_side, r_false_side)); + } +} diff --git a/gcc/gimple-range-gori.h b/gcc/gimple-range-gori.h new file mode 100644 index 0000000..8ef452b --- /dev/null +++ b/gcc/gimple-range-gori.h @@ -0,0 +1,138 @@ +/* Header file for gimple range GORI structures. + Copyright (C) 2017-2020 Free Software Foundation, Inc. + Contributed by Andrew MacLeod + and Aldy Hernandez . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_GIMPLE_RANGE_GORI_H +#define GCC_GIMPLE_RANGE_GORI_H + + +// This class is used to determine which SSA_NAMES can have ranges +// calculated for them on outgoing edges from basic blocks. This represents +// ONLY the effect of the basic block edge->src on a range. +// +// There are 2 primary entry points: +// +// has_edge_range_p (edge e, tree name) +// returns true if the outgoing edge *may* be able to produce range +// information for ssa_name NAME on edge E. +// FALSE is returned if this edge does not affect the range of NAME. +// +// outgoing_edge_range_p (irange &range, edge e, tree name) +// Actually does the calculation of RANGE for name on E +// This represents application of whatever static range effect edge E +// may have on NAME, not any cumulative effect. + +// There are also some internal APIs +// +// ssa_range_in_bb () is an internal routine which is used to start any +// calculation chain using SSA_NAMES which come from outside the block. ie +// a_2 = b_4 - 8 +// if (a_2 < 30) +// on the true edge, a_2 is known to be [0, 29] +// b_4 can be calculated as [8, 37] +// during this calculation, b_4 is considered an "import" and ssa_range_in_bb +// is queried for a starting range which is used in the calculation. +// A default value of VARYING provides the raw static info for the edge. +// +// If there is any known range for b_4 coming into this block, it can refine +// the results. This allows for cascading results to be propogated. +// if b_4 is [100, 200] on entry to the block, feeds into the calculation +// of a_2 = [92, 192], and finally on the true edge the range would be +// an empty range [] because it is not possible for the true edge to be taken. +// +// expr_range_in_bb is simply a wrapper which calls ssa_range_in_bb for +// SSA_NAMES and otherwise simply calculates the range of the expression. +// +// The remaining routines are internal use only. + +class gori_compute +{ +public: + gori_compute (); + ~gori_compute (); + bool outgoing_edge_range_p (irange &r, edge e, tree name); + bool has_edge_range_p (edge e, tree name); + void dump (FILE *f); +protected: + virtual void ssa_range_in_bb (irange &r, tree name, basic_block bb); + virtual bool compute_operand_range (irange &r, gimple *stmt, + const irange &lhs, tree name); + + void expr_range_in_bb (irange &r, tree expr, basic_block bb); + bool compute_logical_operands (irange &r, gimple *stmt, + const irange &lhs, + tree name); + void compute_logical_operands_in_chain (class tf_range &range, + gimple *stmt, const irange &lhs, + tree name, tree op, + bool op_in_chain); + bool optimize_logical_operands (tf_range &range, gimple *stmt, + const irange &lhs, tree name, tree op); + bool logical_combine (irange &r, enum tree_code code, const irange &lhs, + const class tf_range &op1_range, + const class tf_range &op2_range); + int_range<2> m_bool_zero; // Boolean false cached. + int_range<2> m_bool_one; // Boolean true cached. + +private: + bool compute_operand_range_switch (irange &r, gswitch *stmt, + const irange &lhs, tree name); + bool compute_name_range_op (irange &r, gimple *stmt, const irange &lhs, + tree name); + bool compute_operand1_range (irange &r, gimple *stmt, const irange &lhs, + tree name); + bool compute_operand2_range (irange &r, gimple *stmt, const irange &lhs, + tree name); + bool compute_operand1_and_operand2_range (irange &r, gimple *stmt, + const irange &lhs, tree name); + + class gori_map *m_gori_map; + outgoing_range outgoing; // Edge values for COND_EXPR & SWITCH_EXPR. +}; + + +// This class adds a cache to gori_computes for logical expressions. +// bool result = x && y +// requires calcuation of both X and Y for both true and false results. +// There are 4 combinations [0,0][0,0] [0,0][1,1] [1,1][0,0] and [1,1][1,1]. +// Note that each pair of possible results for X and Y are used twice, and +// the calcuation of those results are the same each time. +// +// The cache simply checks if a stmt is cachable, and if so, saves both the +// true and false results for the next time the query is made. +// +// This is used to speed up long chains of logical operations which +// quickly become exponential. + +class gori_compute_cache : public gori_compute +{ +public: + gori_compute_cache (); + ~gori_compute_cache (); +protected: + virtual bool compute_operand_range (irange &r, gimple *stmt, + const irange &lhs, tree name); +private: + void cache_stmt (gimple *); + typedef gori_compute super; + class logical_stmt_cache *m_cache; +}; + +#endif // GCC_GIMPLE_RANGE_GORI_H diff --git a/gcc/gimple-range.cc b/gcc/gimple-range.cc new file mode 100644 index 0000000..75c03d6 --- /dev/null +++ b/gcc/gimple-range.cc @@ -0,0 +1,1284 @@ +/* Code for GIMPLE range related routines. + Copyright (C) 2019-2020 Free Software Foundation, Inc. + Contributed by Andrew MacLeod + and Aldy Hernandez . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "insn-codes.h" +#include "rtl.h" +#include "tree.h" +#include "gimple.h" +#include "ssa.h" +#include "gimple-pretty-print.h" +#include "gimple-iterator.h" +#include "optabs-tree.h" +#include "gimple-fold.h" +#include "tree-cfg.h" +#include "fold-const.h" +#include "tree-cfg.h" +#include "wide-int.h" +#include "fold-const.h" +#include "case-cfn-macros.h" +#include "omp-general.h" +#include "cfgloop.h" +#include "tree-ssa-loop.h" +#include "tree-scalar-evolution.h" +#include "dbgcnt.h" +#include "alloc-pool.h" +#include "vr-values.h" +#include "gimple-range.h" + + +// Adjust the range for a pointer difference where the operands came +// from a memchr. +// +// This notices the following sequence: +// +// def = __builtin_memchr (arg, 0, sz) +// n = def - arg +// +// The range for N can be narrowed to [0, PTRDIFF_MAX - 1]. + +static void +adjust_pointer_diff_expr (irange &res, const gimple *diff_stmt) +{ + tree op0 = gimple_assign_rhs1 (diff_stmt); + tree op1 = gimple_assign_rhs2 (diff_stmt); + tree op0_ptype = TREE_TYPE (TREE_TYPE (op0)); + tree op1_ptype = TREE_TYPE (TREE_TYPE (op1)); + gimple *call; + + if (TREE_CODE (op0) == SSA_NAME + && TREE_CODE (op1) == SSA_NAME + && (call = SSA_NAME_DEF_STMT (op0)) + && is_gimple_call (call) + && gimple_call_builtin_p (call, BUILT_IN_MEMCHR) + && TYPE_MODE (op0_ptype) == TYPE_MODE (char_type_node) + && TYPE_PRECISION (op0_ptype) == TYPE_PRECISION (char_type_node) + && TYPE_MODE (op1_ptype) == TYPE_MODE (char_type_node) + && TYPE_PRECISION (op1_ptype) == TYPE_PRECISION (char_type_node) + && gimple_call_builtin_p (call, BUILT_IN_MEMCHR) + && vrp_operand_equal_p (op1, gimple_call_arg (call, 0)) + && integer_zerop (gimple_call_arg (call, 1))) + { + tree max = vrp_val_max (ptrdiff_type_node); + wide_int wmax = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max))); + tree expr_type = gimple_expr_type (diff_stmt); + tree range_min = build_zero_cst (expr_type); + tree range_max = wide_int_to_tree (expr_type, wmax - 1); + int_range<2> r (range_min, range_max); + res.intersect (r); + } +} + +// This function looks for situations when walking the use/def chains +// may provide additonal contextual range information not exposed on +// this statement. Like knowing the IMAGPART return value from a +// builtin function is a boolean result. + +// We should rework how we're called, as we have an op_unknown entry +// for IMAGPART_EXPR and POINTER_DIFF_EXPR in range-ops just so this +// function gets called. + +static void +gimple_range_adjustment (irange &res, const gimple *stmt) +{ + switch (gimple_expr_code (stmt)) + { + case POINTER_DIFF_EXPR: + adjust_pointer_diff_expr (res, stmt); + return; + + case IMAGPART_EXPR: + { + tree name = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); + if (TREE_CODE (name) == SSA_NAME) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (name); + if (def_stmt && is_gimple_call (def_stmt) + && gimple_call_internal_p (def_stmt)) + { + switch (gimple_call_internal_fn (def_stmt)) + { + case IFN_ADD_OVERFLOW: + case IFN_SUB_OVERFLOW: + case IFN_MUL_OVERFLOW: + case IFN_ATOMIC_COMPARE_EXCHANGE: + { + int_range<2> r; + r.set_varying (boolean_type_node); + tree type = TREE_TYPE (gimple_assign_lhs (stmt)); + range_cast (r, type); + res.intersect (r); + } + default: + break; + } + } + } + break; + } + + default: + break; + } +} + +// Return a range in R for the tree EXPR. Return true if a range is +// representable. + +bool +get_tree_range (irange &r, tree expr) +{ + tree type; + if (TYPE_P (expr)) + type = expr; + else + type = TREE_TYPE (expr); + + // Return false if the type isn't suported. + if (!irange::supports_type_p (type)) + return false; + + switch (TREE_CODE (expr)) + { + case INTEGER_CST: + r.set (expr, expr); + return true; + + case SSA_NAME: + r = gimple_range_global (expr); + return true; + + case ADDR_EXPR: + { + // Handle &var which can show up in phi arguments. + bool ov; + if (tree_single_nonzero_warnv_p (expr, &ov)) + { + r = range_nonzero (type); + return true; + } + break; + } + + default: + break; + } + r.set_varying (type); + return true; +} + +// Fold this unary statement using R1 as operand1's range, returning +// the result in RES. Return false if the operation fails. + +bool +gimple_range_fold (irange &res, const gimple *stmt, const irange &r1) +{ + gcc_checking_assert (gimple_range_handler (stmt)); + + tree type = gimple_expr_type (stmt); + // Unary SSA operations require the LHS type as the second range. + int_range<2> r2 (type); + + return gimple_range_fold (res, stmt, r1, r2); +} + +// Fold this binary statement using R1 and R2 as the operands ranges, +// returning the result in RES. Return false if the operation fails. + +bool +gimple_range_fold (irange &res, const gimple *stmt, + const irange &r1, const irange &r2) +{ + gcc_checking_assert (gimple_range_handler (stmt)); + + gimple_range_handler (stmt)->fold_range (res, gimple_expr_type (stmt), + r1, r2); + + // If there are any gimple lookups, do those now. + gimple_range_adjustment (res, stmt); + return true; +} + +// Return the base of the RHS of an assignment. + +tree +gimple_range_base_of_assignment (const gimple *stmt) +{ + gcc_checking_assert (gimple_code (stmt) == GIMPLE_ASSIGN); + tree op1 = gimple_assign_rhs1 (stmt); + if (gimple_assign_rhs_code (stmt) == ADDR_EXPR) + return get_base_address (TREE_OPERAND (op1, 0)); + return op1; +} + +// Return the first operand of this statement if it is a valid operand +// supported by ranges, otherwise return NULL_TREE. Special case is +// &(SSA_NAME expr), return the SSA_NAME instead of the ADDR expr. + +tree +gimple_range_operand1 (const gimple *stmt) +{ + gcc_checking_assert (gimple_range_handler (stmt)); + + switch (gimple_code (stmt)) + { + case GIMPLE_COND: + return gimple_cond_lhs (stmt); + case GIMPLE_ASSIGN: + { + tree base = gimple_range_base_of_assignment (stmt); + if (base && TREE_CODE (base) == MEM_REF) + { + // If the base address is an SSA_NAME, we return it + // here. This allows processing of the range of that + // name, while the rest of the expression is simply + // ignored. The code in range_ops will see the + // ADDR_EXPR and do the right thing. + tree ssa = TREE_OPERAND (base, 0); + if (TREE_CODE (ssa) == SSA_NAME) + return ssa; + } + return base; + } + default: + break; + } + return NULL; +} + +// Return the second operand of statement STMT, otherwise return NULL_TREE. + +tree +gimple_range_operand2 (const gimple *stmt) +{ + gcc_checking_assert (gimple_range_handler (stmt)); + + switch (gimple_code (stmt)) + { + case GIMPLE_COND: + return gimple_cond_rhs (stmt); + case GIMPLE_ASSIGN: + if (gimple_num_ops (stmt) >= 3) + return gimple_assign_rhs2 (stmt); + default: + break; + } + return NULL_TREE; +} + +// Calculate what we can determine of the range of this unary +// statement's operand if the lhs of the expression has the range +// LHS_RANGE. Return false if nothing can be determined. + +bool +gimple_range_calc_op1 (irange &r, const gimple *stmt, const irange &lhs_range) +{ + gcc_checking_assert (gimple_num_ops (stmt) < 3); + + // An empty range is viral. + tree type = TREE_TYPE (gimple_range_operand1 (stmt)); + if (lhs_range.undefined_p ()) + { + r.set_undefined (); + return true; + } + // Unary operations require the type of the first operand in the + // second range position. + int_range<2> type_range (type); + return gimple_range_handler (stmt)->op1_range (r, type, lhs_range, + type_range); +} + +// Calculate what we can determine of the range of this statement's +// first operand if the lhs of the expression has the range LHS_RANGE +// and the second operand has the range OP2_RANGE. Return false if +// nothing can be determined. + +bool +gimple_range_calc_op1 (irange &r, const gimple *stmt, + const irange &lhs_range, const irange &op2_range) +{ + // Unary operation are allowed to pass a range in for second operand + // as there are often additional restrictions beyond the type which + // can be imposed. See operator_cast::op1_range(). + tree type = TREE_TYPE (gimple_range_operand1 (stmt)); + // An empty range is viral. + if (op2_range.undefined_p () || lhs_range.undefined_p ()) + { + r.set_undefined (); + return true; + } + return gimple_range_handler (stmt)->op1_range (r, type, lhs_range, + op2_range); +} + +// Calculate what we can determine of the range of this statement's +// second operand if the lhs of the expression has the range LHS_RANGE +// and the first operand has the range OP1_RANGE. Return false if +// nothing can be determined. + +bool +gimple_range_calc_op2 (irange &r, const gimple *stmt, + const irange &lhs_range, const irange &op1_range) +{ + tree type = TREE_TYPE (gimple_range_operand2 (stmt)); + // An empty range is viral. + if (op1_range.undefined_p () || lhs_range.undefined_p ()) + { + r.set_undefined (); + return true; + } + return gimple_range_handler (stmt)->op2_range (r, type, lhs_range, + op1_range); +} + +// Calculate a range for statement S and return it in R. If NAME is provided it +// represents the SSA_NAME on the LHS of the statement. It is only required +// if there is more than one lhs/output. If a range cannot +// be calculated, return false. + +bool +gimple_ranger::calc_stmt (irange &r, gimple *s, tree name) +{ + bool res = false; + // If name is specified, make sure it is an LHS of S. + gcc_checking_assert (name ? SSA_NAME_DEF_STMT (name) == s : true); + + if (gimple_range_handler (s)) + res = range_of_range_op (r, s); + else if (is_a(s)) + res = range_of_phi (r, as_a (s)); + else if (is_a(s)) + res = range_of_call (r, as_a (s)); + else if (is_a (s) && gimple_assign_rhs_code (s) == COND_EXPR) + res = range_of_cond_expr (r, as_a (s)); + else + { + // If no name is specified, try the expression kind. + if (!name) + { + tree t = gimple_expr_type (s); + if (!irange::supports_type_p (t)) + return false; + r.set_varying (t); + return true; + } + // We don't understand the stmt, so return the global range. + r = gimple_range_global (name); + return true; + } + if (res) + { + if (r.undefined_p ()) + return true; + if (name && TREE_TYPE (name) != r.type ()) + range_cast (r, TREE_TYPE (name)); + return true; + } + return false; +} + +// Calculate a range for range_op statement S and return it in R. If any +// If a range cannot be calculated, return false. + +bool +gimple_ranger::range_of_range_op (irange &r, gimple *s) +{ + int_range_max range1, range2; + tree type = gimple_expr_type (s); + gcc_checking_assert (irange::supports_type_p (type)); + + tree op1 = gimple_range_operand1 (s); + tree op2 = gimple_range_operand2 (s); + + if (range_of_non_trivial_assignment (r, s)) + return true; + + if (range_of_expr (range1, op1, s)) + { + if (!op2) + return gimple_range_fold (r, s, range1); + + if (range_of_expr (range2, op2, s)) + return gimple_range_fold (r, s, range1, range2); + } + r.set_varying (type); + return true; +} + +// Calculate the range of a non-trivial assignment. That is, is one +// inolving arithmetic on an SSA name (for example, an ADDR_EXPR). +// Return the range in R. +// +// If a range cannot be calculated, return false. + +bool +gimple_ranger::range_of_non_trivial_assignment (irange &r, gimple *stmt) +{ + if (gimple_code (stmt) != GIMPLE_ASSIGN) + return false; + + tree base = gimple_range_base_of_assignment (stmt); + if (base && TREE_CODE (base) == MEM_REF + && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME) + { + int_range_max range1; + tree ssa = TREE_OPERAND (base, 0); + if (range_of_expr (range1, ssa, stmt)) + { + tree type = TREE_TYPE (ssa); + range_operator *op = range_op_handler (POINTER_PLUS_EXPR, type); + int_range<2> offset (TREE_OPERAND (base, 1), TREE_OPERAND (base, 1)); + op->fold_range (r, type, range1, offset); + return true; + } + } + return false; +} + +// Calculate a range for phi statement S and return it in R. +// If a range cannot be calculated, return false. + +bool +gimple_ranger::range_of_phi (irange &r, gphi *phi) +{ + tree phi_def = gimple_phi_result (phi); + tree type = TREE_TYPE (phi_def); + int_range_max arg_range; + unsigned x; + + if (!irange::supports_type_p (type)) + return false; + + // Start with an empty range, unioning in each argument's range. + r.set_undefined (); + for (x = 0; x < gimple_phi_num_args (phi); x++) + { + tree arg = gimple_phi_arg_def (phi, x); + edge e = gimple_phi_arg_edge (phi, x); + + range_on_edge (arg_range, e, arg); + r.union_ (arg_range); + // Once the value reaches varying, stop looking. + if (r.varying_p ()) + break; + } + + // If SCEV is available, query if this PHI has any knonwn values. + if (scev_initialized_p () && !POINTER_TYPE_P (TREE_TYPE (phi_def))) + { + value_range loop_range; + class loop *l = loop_containing_stmt (phi); + if (l) + { + range_of_ssa_name_with_loop_info (loop_range, phi_def, l, phi); + if (!loop_range.varying_p ()) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " Loops range found for "); + print_generic_expr (dump_file, phi_def, TDF_SLIM); + fprintf (dump_file, ": "); + loop_range.dump (dump_file); + fprintf (dump_file, " and calculated range :"); + r.dump (dump_file); + fprintf (dump_file, "\n"); + } + r.intersect (loop_range); + } + } + } + + return true; +} + +// Calculate a range for call statement S and return it in R. +// If a range cannot be calculated, return false. + +bool +gimple_ranger::range_of_call (irange &r, gcall *call) +{ + tree type = gimple_call_return_type (call); + tree lhs = gimple_call_lhs (call); + bool strict_overflow_p; + + if (!irange::supports_type_p (type)) + return false; + + if (range_of_builtin_call (r, call)) + ; + else if (gimple_stmt_nonnegative_warnv_p (call, &strict_overflow_p)) + r.set (build_int_cst (type, 0), TYPE_MAX_VALUE (type)); + else if (gimple_call_nonnull_result_p (call) + || gimple_call_nonnull_arg (call)) + r = range_nonzero (type); + else + r.set_varying (type); + + // If there is an LHS, intersect that with what is known. + if (lhs) + { + value_range def; + def = gimple_range_global (lhs); + r.intersect (def); + } + return true; +} + + +void +gimple_ranger::range_of_builtin_ubsan_call (irange &r, gcall *call, + tree_code code) +{ + gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR + || code == MULT_EXPR); + tree type = gimple_call_return_type (call); + range_operator *op = range_op_handler (code, type); + gcc_checking_assert (op); + int_range_max ir0, ir1; + tree arg0 = gimple_call_arg (call, 0); + tree arg1 = gimple_call_arg (call, 1); + gcc_assert (range_of_expr (ir0, arg0, call)); + gcc_assert (range_of_expr (ir1, arg1, call)); + + bool saved_flag_wrapv = flag_wrapv; + // Pretend the arithmetic is wrapping. If there is any overflow, + // we'll complain, but will actually do wrapping operation. + flag_wrapv = 1; + op->fold_range (r, type, ir0, ir1); + flag_wrapv = saved_flag_wrapv; + + // If for both arguments vrp_valueize returned non-NULL, this should + // have been already folded and if not, it wasn't folded because of + // overflow. Avoid removing the UBSAN_CHECK_* calls in that case. + if (r.singleton_p ()) + r.set_varying (type); +} + + +bool +gimple_ranger::range_of_builtin_call (irange &r, gcall *call) +{ + combined_fn func = gimple_call_combined_fn (call); + if (func == CFN_LAST) + return false; + + tree type = gimple_call_return_type (call); + tree arg; + int mini, maxi, zerov, prec; + scalar_int_mode mode; + + switch (func) + { + case CFN_BUILT_IN_CONSTANT_P: + if (cfun->after_inlining) + { + r.set_zero (type); + // r.equiv_clear (); + return true; + } + arg = gimple_call_arg (call, 0); + if (range_of_expr (r, arg, call) && r.singleton_p ()) + { + r.set (build_one_cst (type), build_one_cst (type)); + return true; + } + break; + + CASE_CFN_FFS: + CASE_CFN_POPCOUNT: + // __builtin_ffs* and __builtin_popcount* return [0, prec]. + arg = gimple_call_arg (call, 0); + prec = TYPE_PRECISION (TREE_TYPE (arg)); + mini = 0; + maxi = prec; + gcc_assert (range_of_expr (r, arg, call)); + // If arg is non-zero, then ffs or popcount are non-zero. + if (!range_includes_zero_p (&r)) + mini = 1; + // If some high bits are known to be zero, decrease the maximum. + if (!r.undefined_p ()) + { + wide_int max = r.upper_bound (); + maxi = wi::floor_log2 (max) + 1; + } + r.set (build_int_cst (type, mini), build_int_cst (type, maxi)); + return true; + + CASE_CFN_PARITY: + r.set (build_zero_cst (type), build_one_cst (type)); + return true; + + CASE_CFN_CLZ: + // __builtin_c[lt]z* return [0, prec-1], except when the + // argument is 0, but that is undefined behavior. + // + // On many targets where the CLZ RTL or optab value is defined + // for 0, the value is prec, so include that in the range by + // default. + arg = gimple_call_arg (call, 0); + prec = TYPE_PRECISION (TREE_TYPE (arg)); + mini = 0; + maxi = prec; + mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); + if (optab_handler (clz_optab, mode) != CODE_FOR_nothing + && CLZ_DEFINED_VALUE_AT_ZERO (mode, zerov) + // Only handle the single common value. + && zerov != prec) + // Magic value to give up, unless we can prove arg is non-zero. + mini = -2; + + gcc_assert (range_of_expr (r, arg, call)); + // From clz of minimum we can compute result maximum. + if (r.constant_p ()) + { + maxi = prec - 1 - wi::floor_log2 (r.lower_bound ()); + if (maxi != prec) + mini = 0; + } + else if (!range_includes_zero_p (&r)) + { + maxi = prec - 1; + mini = 0; + } + if (mini == -2) + break; + // From clz of maximum we can compute result minimum. + if (r.constant_p ()) + { + mini = prec - 1 - wi::floor_log2 (r.upper_bound ()); + if (mini == prec) + break; + } + if (mini == -2) + break; + r.set (build_int_cst (type, mini), build_int_cst (type, maxi)); + return true; + + CASE_CFN_CTZ: + // __builtin_ctz* return [0, prec-1], except for when the + // argument is 0, but that is undefined behavior. + // + // If there is a ctz optab for this mode and + // CTZ_DEFINED_VALUE_AT_ZERO, include that in the range, + // otherwise just assume 0 won't be seen. + arg = gimple_call_arg (call, 0); + prec = TYPE_PRECISION (TREE_TYPE (arg)); + mini = 0; + maxi = prec - 1; + mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg)); + if (optab_handler (ctz_optab, mode) != CODE_FOR_nothing + && CTZ_DEFINED_VALUE_AT_ZERO (mode, zerov)) + { + // Handle only the two common values. + if (zerov == -1) + mini = -1; + else if (zerov == prec) + maxi = prec; + else + // Magic value to give up, unless we can prove arg is non-zero. + mini = -2; + } + gcc_assert (range_of_expr (r, arg, call)); + if (!r.undefined_p ()) + { + if (r.lower_bound () != 0) + { + mini = 0; + maxi = prec - 1; + } + // If some high bits are known to be zero, we can decrease + // the maximum. + wide_int max = r.upper_bound (); + if (max == 0) + break; + maxi = wi::floor_log2 (max); + } + if (mini == -2) + break; + r.set (build_int_cst (type, mini), build_int_cst (type, maxi)); + return true; + + CASE_CFN_CLRSB: + arg = gimple_call_arg (call, 0); + prec = TYPE_PRECISION (TREE_TYPE (arg)); + r.set (build_int_cst (type, 0), build_int_cst (type, prec - 1)); + return true; + case CFN_UBSAN_CHECK_ADD: + range_of_builtin_ubsan_call (r, call, PLUS_EXPR); + return true; + case CFN_UBSAN_CHECK_SUB: + range_of_builtin_ubsan_call (r, call, MINUS_EXPR); + return true; + case CFN_UBSAN_CHECK_MUL: + range_of_builtin_ubsan_call (r, call, MULT_EXPR); + return true; + + case CFN_GOACC_DIM_SIZE: + case CFN_GOACC_DIM_POS: + // Optimizing these two internal functions helps the loop + // optimizer eliminate outer comparisons. Size is [1,N] + // and pos is [0,N-1]. + { + bool is_pos = func == CFN_GOACC_DIM_POS; + int axis = oacc_get_ifn_dim_arg (call); + int size = oacc_get_fn_dim_size (current_function_decl, axis); + if (!size) + // If it's dynamic, the backend might know a hardware limitation. + size = targetm.goacc.dim_limit (axis); + + r.set (build_int_cst (type, is_pos ? 0 : 1), + size + ? build_int_cst (type, size - is_pos) : vrp_val_max (type)); + return true; + } + + case CFN_BUILT_IN_STRLEN: + if (tree lhs = gimple_call_lhs (call)) + if (ptrdiff_type_node + && (TYPE_PRECISION (ptrdiff_type_node) + == TYPE_PRECISION (TREE_TYPE (lhs)))) + { + tree type = TREE_TYPE (lhs); + tree max = vrp_val_max (ptrdiff_type_node); + wide_int wmax + = wi::to_wide (max, TYPE_PRECISION (TREE_TYPE (max))); + tree range_min = build_zero_cst (type); + // To account for the terminating NULL, the maximum length + // is one less than the maximum array size, which in turn + // is one less than PTRDIFF_MAX (or SIZE_MAX where it's + // smaller than the former type). + // FIXME: Use max_object_size() - 1 here. + tree range_max = wide_int_to_tree (type, wmax - 2); + r.set (range_min, range_max); + return true; + } + break; + default: + break; + } + return false; +} + + + +// Calculate a range for COND_EXPR statement S and return it in R. +// If a range cannot be calculated, return false. + +bool +gimple_ranger::range_of_cond_expr (irange &r, gassign *s) +{ + int_range_max cond_range, range1, range2; + tree cond = gimple_assign_rhs1 (s); + tree op1 = gimple_assign_rhs2 (s); + tree op2 = gimple_assign_rhs3 (s); + + gcc_checking_assert (gimple_assign_rhs_code (s) == COND_EXPR); + gcc_checking_assert (useless_type_conversion_p (TREE_TYPE (op1), + TREE_TYPE (op2))); + if (!irange::supports_type_p (TREE_TYPE (op1))) + return false; + + gcc_assert (range_of_expr (cond_range, cond, s)); + gcc_assert (range_of_expr (range1, op1, s)); + gcc_assert (range_of_expr (range2, op2, s)); + + // If the condition is known, choose the appropriate expression. + if (cond_range.singleton_p ()) + { + // False, pick second operand. + if (cond_range.zero_p ()) + r = range2; + else + r = range1; + } + else + { + r = range1; + r.union_ (range2); + } + return true; +} + +bool +gimple_ranger::range_of_expr (irange &r, tree expr, gimple *stmt) +{ + if (!gimple_range_ssa_p (expr)) + return get_tree_range (r, expr); + + // If there is no statement, just get the global value. + if (!stmt) + { + if (!m_cache.m_globals.get_global_range (r, expr)) + r = gimple_range_global (expr); + return true; + } + + basic_block bb = gimple_bb (stmt); + gimple *def_stmt = SSA_NAME_DEF_STMT (expr); + + // If name is defined in this block, try to get an range from S. + if (def_stmt && gimple_bb (def_stmt) == bb) + gcc_assert (range_of_stmt (r, def_stmt, expr)); + else + // Otherwise OP comes from outside this block, use range on entry. + range_on_entry (r, bb, expr); + + // No range yet, see if there is a dereference in the block. + // We don't care if it's between the def and a use within a block + // because the entire block must be executed anyway. + // FIXME:?? For non-call exceptions we could have a statement throw + // which causes an early block exit. + // in which case we may need to walk from S back to the def/top of block + // to make sure the deref happens between S and there before claiming + // there is a deref. Punt for now. + if (!cfun->can_throw_non_call_exceptions && r.varying_p () && + m_cache.m_non_null.non_null_deref_p (expr, bb)) + r = range_nonzero (TREE_TYPE (expr)); + + return true; +} + +// Return the range of NAME on entry to block BB in R. + +void +gimple_ranger::range_on_entry (irange &r, basic_block bb, tree name) +{ + int_range_max entry_range; + gcc_checking_assert (gimple_range_ssa_p (name)); + + // Start with any known range + gcc_assert (range_of_stmt (r, SSA_NAME_DEF_STMT (name), name)); + + // Now see if there is any on_entry value which may refine it. + if (m_cache.block_range (entry_range, bb, name)) + r.intersect (entry_range); +} + +// Calculate the range for NAME at the end of block BB and return it in R. +// Return false if no range can be calculated. + +void +gimple_ranger::range_on_exit (irange &r, basic_block bb, tree name) +{ + // on-exit from the exit block? + gcc_checking_assert (bb != EXIT_BLOCK_PTR_FOR_FN (cfun)); + + gimple *s = last_stmt (bb); + // If there is no statement in the block and this isn't the entry + // block, go get the range_on_entry for this block. For the entry + // block, a NULL stmt will return the global value for NAME. + if (!s && bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)) + range_on_entry (r, bb, name); + else + gcc_assert (range_of_expr (r, name, s)); + gcc_checking_assert (r.undefined_p () + || types_compatible_p (r.type(), TREE_TYPE (name))); +} + +// Calculate a range for NAME on edge E and return it in R. + +bool +gimple_ranger::range_on_edge (irange &r, edge e, tree name) +{ + int_range_max edge_range; + gcc_checking_assert (irange::supports_type_p (TREE_TYPE (name))); + + // PHI arguments can be constants, catch these here. + if (!gimple_range_ssa_p (name)) + { + gcc_assert (range_of_expr (r, name)); + return true; + } + + range_on_exit (r, e->src, name); + gcc_checking_assert (r.undefined_p () + || types_compatible_p (r.type(), TREE_TYPE (name))); + + // Check to see if NAME is defined on edge e. + if (m_cache.outgoing_edge_range_p (edge_range, e, name)) + r.intersect (edge_range); + + return true; +} + +// Calculate a range for statement S and return it in R. If NAME is +// provided it represents the SSA_NAME on the LHS of the statement. +// It is only required if there is more than one lhs/output. Check +// the global cache for NAME first to see if the evaluation can be +// avoided. If a range cannot be calculated, return false. + +bool +gimple_ranger::range_of_stmt (irange &r, gimple *s, tree name) +{ + // If no name, simply call the base routine. + if (!name) + name = gimple_get_lhs (s); + + if (!name) + return calc_stmt (r, s, NULL_TREE); + + gcc_checking_assert (TREE_CODE (name) == SSA_NAME && + irange::supports_type_p (TREE_TYPE (name))); + + // If this STMT has already been processed, return that value. + if (m_cache.m_globals.get_global_range (r, name)) + return true; + // Avoid infinite recursion by initializing global cache + int_range_max tmp = gimple_range_global (name); + m_cache.m_globals.set_global_range (name, tmp); + + gcc_assert (calc_stmt (r, s, name)); + + if (is_a (s)) + r.intersect (tmp); + m_cache.m_globals.set_global_range (name, r); + return true; +} + +// This routine will export whatever global ranges are known to GCC +// SSA_RANGE_NAME_INFO fields. + +void +gimple_ranger::export_global_ranges () +{ + unsigned x; + int_range_max r; + if (dump_file) + { + fprintf (dump_file, "Exported global range table\n"); + fprintf (dump_file, "===========================\n"); + } + + for ( x = 1; x < num_ssa_names; x++) + { + tree name = ssa_name (x); + if (name && !SSA_NAME_IN_FREE_LIST (name) + && gimple_range_ssa_p (name) + && m_cache.m_globals.get_global_range (r, name) + && !r.varying_p()) + { + // Make sure the new range is a subset of the old range. + int_range_max old_range; + old_range = gimple_range_global (name); + old_range.intersect (r); + /* Disable this while we fix tree-ssa/pr61743-2.c. */ + //gcc_checking_assert (old_range == r); + + // WTF? Can't write non-null pointer ranges?? stupid set_range_info! + if (!POINTER_TYPE_P (TREE_TYPE (name)) && !r.undefined_p ()) + { + value_range vr = r; + set_range_info (name, vr); + if (dump_file) + { + print_generic_expr (dump_file, name , TDF_SLIM); + fprintf (dump_file, " --> "); + vr.dump (dump_file); + fprintf (dump_file, "\n"); + fprintf (dump_file, " irange : "); + r.dump (dump_file); + fprintf (dump_file, "\n"); + } + } + } + } +} + +// Print the known table values to file F. + +void +gimple_ranger::dump (FILE *f) +{ + basic_block bb; + + FOR_EACH_BB_FN (bb, cfun) + { + unsigned x; + edge_iterator ei; + edge e; + int_range_max range; + fprintf (f, "\n=========== BB %d ============\n", bb->index); + m_cache.m_on_entry.dump (f, bb); + + dump_bb (f, bb, 4, TDF_NONE); + + // Now find any globals defined in this block. + for (x = 1; x < num_ssa_names; x++) + { + tree name = ssa_name (x); + if (gimple_range_ssa_p (name) && SSA_NAME_DEF_STMT (name) && + gimple_bb (SSA_NAME_DEF_STMT (name)) == bb && + m_cache.m_globals.get_global_range (range, name)) + { + if (!range.varying_p ()) + { + print_generic_expr (f, name, TDF_SLIM); + fprintf (f, " : "); + range.dump (f); + fprintf (f, "\n"); + } + + } + } + + // And now outgoing edges, if they define anything. + FOR_EACH_EDGE (e, ei, bb->succs) + { + for (x = 1; x < num_ssa_names; x++) + { + tree name = gimple_range_ssa_p (ssa_name (x)); + if (name && m_cache.outgoing_edge_range_p (range, e, name)) + { + gimple *s = SSA_NAME_DEF_STMT (name); + // Only print the range if this is the def block, or + // the on entry cache for either end of the edge is + // set. + if ((s && bb == gimple_bb (s)) || + m_cache.block_range (range, bb, name, false) || + m_cache.block_range (range, e->dest, name, false)) + { + range_on_edge (range, e, name); + if (!range.varying_p ()) + { + fprintf (f, "%d->%d ", e->src->index, + e->dest->index); + char c = ' '; + if (e->flags & EDGE_TRUE_VALUE) + fprintf (f, " (T)%c", c); + else if (e->flags & EDGE_FALSE_VALUE) + fprintf (f, " (F)%c", c); + else + fprintf (f, " "); + print_generic_expr (f, name, TDF_SLIM); + fprintf(f, " : \t"); + range.dump(f); + fprintf (f, "\n"); + } + } + } + } + } + } + + m_cache.m_globals.dump (dump_file); + fprintf (f, "\n"); + + if (dump_flags & TDF_DETAILS) + { + fprintf (f, "\nDUMPING GORI MAP\n"); + m_cache.dump (f); + fprintf (f, "\n"); + } +} + +// If SCEV has any information about phi node NAME, return it as a range in R. + +void +gimple_ranger::range_of_ssa_name_with_loop_info (irange &r, tree name, + class loop *l, gphi *phi) +{ + gcc_checking_assert (TREE_CODE (name) == SSA_NAME); + tree min, max, type = TREE_TYPE (name); + if (bounds_of_var_in_loop (&min, &max, this, l, phi, name)) + { + // ?? We could do better here. Since MIN/MAX can only be an + // SSA, SSA +- INTEGER_CST, or INTEGER_CST, we could easily call + // the ranger and solve anything not an integer. + if (TREE_CODE (min) != INTEGER_CST) + min = vrp_val_min (type); + if (TREE_CODE (max) != INTEGER_CST) + max = vrp_val_max (type); + r.set (min, max); + } + else + r.set_varying (type); +} + +// -------------------------------------------------------------------------- +// trace_ranger implementation. + + +trace_ranger::trace_ranger () +{ + indent = 0; + trace_count = 0; +} + +// If dumping, return true and print the prefix for the next output line. + +bool +trace_ranger::dumping (unsigned counter, bool trailing) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + // Print counter index as well as INDENT spaces. + if (!trailing) + fprintf (dump_file, " %-7u ", counter); + else + fprintf (dump_file, " "); + unsigned x; + for (x = 0; x< indent; x++) + fputc (' ', dump_file); + return true; + } + return false; +} + +// After calling a routine, if dumping, print the CALLER, NAME, and RESULT, +// returning RESULT. + +bool +trace_ranger::trailer (unsigned counter, const char *caller, bool result, + tree name, const irange &r) +{ + if (dumping (counter, true)) + { + indent -= bump; + fputs(result ? "TRUE : " : "FALSE : ", dump_file); + fprintf (dump_file, "(%u) ", counter); + fputs (caller, dump_file); + fputs (" (",dump_file); + if (name) + print_generic_expr (dump_file, name, TDF_SLIM); + fputs (") ",dump_file); + if (result) + { + r.dump (dump_file); + fputc('\n', dump_file); + } + else + fputc('\n', dump_file); + // Marks the end of a request. + if (indent == 0) + fputc('\n', dump_file); + } + return result; +} + +// Tracing version of range_on_edge. Call it with printing wrappers. + +bool +trace_ranger::range_on_edge (irange &r, edge e, tree name) +{ + unsigned idx = ++trace_count; + if (dumping (idx)) + { + fprintf (dump_file, "range_on_edge ("); + print_generic_expr (dump_file, name, TDF_SLIM); + fprintf (dump_file, ") on edge %d->%d\n", e->src->index, e->dest->index); + indent += bump; + } + + bool res = gimple_ranger::range_on_edge (r, e, name); + trailer (idx, "range_on_edge", true, name, r); + return res; +} + +// Tracing version of range_on_entry. Call it with printing wrappers. + +void +trace_ranger::range_on_entry (irange &r, basic_block bb, tree name) +{ + unsigned idx = ++trace_count; + if (dumping (idx)) + { + fprintf (dump_file, "range_on_entry ("); + print_generic_expr (dump_file, name, TDF_SLIM); + fprintf (dump_file, ") to BB %d\n", bb->index); + indent += bump; + } + + gimple_ranger::range_on_entry (r, bb, name); + + trailer (idx, "range_on_entry", true, name, r); +} + +// Tracing version of range_on_exit. Call it with printing wrappers. + +void +trace_ranger::range_on_exit (irange &r, basic_block bb, tree name) +{ + unsigned idx = ++trace_count; + if (dumping (idx)) + { + fprintf (dump_file, "range_on_exit ("); + print_generic_expr (dump_file, name, TDF_SLIM); + fprintf (dump_file, ") from BB %d\n", bb->index); + indent += bump; + } + + gimple_ranger::range_on_exit (r, bb, name); + + trailer (idx, "range_on_exit", true, name, r); +} + +// Tracing version of range_of_stmt. Call it with printing wrappers. + +bool +trace_ranger::range_of_stmt (irange &r, gimple *s, tree name) +{ + bool res; + unsigned idx = ++trace_count; + if (dumping (idx)) + { + fprintf (dump_file, "range_of_stmt ("); + if (name) + print_generic_expr (dump_file, name, TDF_SLIM); + fputs (") at stmt ", dump_file); + print_gimple_stmt (dump_file, s, 0, TDF_SLIM); + indent += bump; + } + + res = gimple_ranger::range_of_stmt (r, s, name); + + return trailer (idx, "range_of_stmt", res, name, r); +} + +// Tracing version of range_of_expr. Call it with printing wrappers. + +bool +trace_ranger::range_of_expr (irange &r, tree name, gimple *s) +{ + bool res; + unsigned idx = ++trace_count; + if (dumping (idx)) + { + fprintf (dump_file, "range_of_expr("); + print_generic_expr (dump_file, name, TDF_SLIM); + fputs (")", dump_file); + if (s) + { + fputs (" at stmt ", dump_file); + print_gimple_stmt (dump_file, s, 0, TDF_SLIM); + } + else + fputs ("\n", dump_file); + indent += bump; + } + + res = gimple_ranger::range_of_expr (r, name, s); + + return trailer (idx, "range_of_expr", res, name, r); +} diff --git a/gcc/gimple-range.h b/gcc/gimple-range.h new file mode 100644 index 0000000..4d35e72 --- /dev/null +++ b/gcc/gimple-range.h @@ -0,0 +1,170 @@ +/* Header file for the GIMPLE range interface. + Copyright (C) 2019-2020 Free Software Foundation, Inc. + Contributed by Andrew MacLeod + and Aldy Hernandez . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef GCC_GIMPLE_RANGE_STMT_H +#define GCC_GIMPLE_RANGE_STMT_H + + +#include "range.h" +#include "range-op.h" +#include "gimple-range-edge.h" +#include "gimple-range-gori.h" +#include "gimple-range-cache.h" +#include "value-query.h" + +// This is the basic range generator interface. +// +// This base class provides all the API entry points, but only provides +// functionality at the statement level. Ie, it can calculate ranges on +// statements, but does no additonal lookup. +// +// All the range_of_* methods will return a range if the types is +// supported by the range engine. It may be the full range for the +// type, AKA varying_p or it may be a refined range. If the range +// type is not supported, then false is returned. Non-statement +// related methods return whatever the current global value is. + + +class gimple_ranger : public range_query +{ +public: + gimple_ranger () : m_cache (*this) { } + virtual bool range_of_stmt (irange &r, gimple *, tree name = NULL) OVERRIDE; + virtual bool range_of_expr (irange &r, tree name, gimple * = NULL) OVERRIDE; + virtual bool range_on_edge (irange &r, edge e, tree name) OVERRIDE; + virtual void range_on_entry (irange &r, basic_block bb, tree name); + virtual void range_on_exit (irange &r, basic_block bb, tree name); + void export_global_ranges (); + void dump (FILE *f); +protected: + bool calc_stmt (irange &r, gimple *s, tree name = NULL_TREE); + bool range_of_range_op (irange &r, gimple *s); + bool range_of_call (irange &r, gcall *call); + bool range_of_cond_expr (irange &r, gassign* cond); + ranger_cache m_cache; +private: + bool range_of_phi (irange &r, gphi *phi); + bool range_of_non_trivial_assignment (irange &r, gimple *s); + bool range_of_builtin_call (irange &r, gcall *call); + void range_of_builtin_ubsan_call (irange &r, gcall *call, tree_code code); + bool range_with_loop_info (irange &r, tree name); + void range_of_ssa_name_with_loop_info (irange &, tree, class loop *, + gphi *); +}; + +// Calculate a basic range for a tree expression. +extern bool get_tree_range (irange &r, tree expr); + +// These routines provide a GIMPLE interface to the range-ops code. +extern tree gimple_range_operand1 (const gimple *s); +extern tree gimple_range_operand2 (const gimple *s); +extern tree gimple_range_base_of_assignment (const gimple *s); +extern bool gimple_range_fold (irange &res, const gimple *s, + const irange &r1); +extern bool gimple_range_fold (irange &res, const gimple *s, + const irange &r1, + const irange &r2); +extern bool gimple_range_calc_op1 (irange &r, const gimple *s, + const irange &lhs_range); +extern bool gimple_range_calc_op1 (irange &r, const gimple *s, + const irange &lhs_range, + const irange &op2_range); +extern bool gimple_range_calc_op2 (irange &r, const gimple *s, + const irange &lhs_range, + const irange &op1_range); + + +// Return the range_operator pointer for this statement. This routine +// can also be used to gate whether a routine is range-ops enabled. + +static inline range_operator * +gimple_range_handler (const gimple *s) +{ + if ((gimple_code (s) == GIMPLE_ASSIGN) || (gimple_code (s) == GIMPLE_COND)) + return range_op_handler (gimple_expr_code (s), gimple_expr_type (s)); + return NULL; +} + +// Return EXP if it is an SSA_NAME with a type supported by gimple ranges. + +static inline tree +gimple_range_ssa_p (tree exp) +{ + if (exp && TREE_CODE (exp) == SSA_NAME && + !SSA_NAME_IS_VIRTUAL_OPERAND (exp) && + irange::supports_type_p (TREE_TYPE (exp))) + return exp; + return NULL_TREE; +} + +// Return the legacy GCC global range for NAME if it has one, otherwise +// return VARYING. + +static inline value_range +gimple_range_global (tree name) +{ + gcc_checking_assert (gimple_range_ssa_p (name)); + tree type = TREE_TYPE (name); +#if 0 + // Reenable picking up global ranges when we are OK failing tests that look + // for builtin_unreachable in the code, like + // RUNTESTFLAGS=dg.exp=pr61034.C check-g++ + // pre-optimizations (inlining) set a global range which causes the ranger + // to remove the condition which leads to builtin_unreachable. + if (!POINTER_TYPE_P (type) && SSA_NAME_RANGE_INFO (name)) + { + // Return a range from an SSA_NAME's available range. + wide_int min, max; + enum value_range_kind kind = get_range_info (name, &min, &max); + return value_range (type, min, max, kind); + } +#endif + // Otherwise return range for the type. + return value_range (type); +} + + +// This class overloads the ranger routines to provide tracing facilties +// Entry and exit values to each of the APIs is placed in the dumpfile. + +class trace_ranger : public gimple_ranger +{ +public: + trace_ranger (); + virtual bool range_of_stmt (irange &r, gimple *s, tree name = NULL_TREE); + virtual bool range_of_expr (irange &r, tree name, gimple *s = NULL); + virtual bool range_on_edge (irange &r, edge e, tree name); + virtual void range_on_entry (irange &r, basic_block bb, tree name); + virtual void range_on_exit (irange &r, basic_block bb, tree name); +private: + static const unsigned bump = 2; + unsigned indent; + unsigned trace_count; // Current trace index count. + + bool dumping (unsigned counter, bool trailing = false); + bool trailer (unsigned counter, const char *caller, bool result, tree name, + const irange &r); +}; + +// Flag to enable debugging the various internal Caches. +#define DEBUG_RANGE_CACHE (dump_file && (flag_evrp_mode & EVRP_MODE_DEBUG)) + +#endif // GCC_GIMPLE_RANGE_STMT_H -- cgit v1.1 From fcae5121154d1c3382b056bcc2c563cedac28e74 Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Tue, 6 Oct 2020 12:53:09 -0400 Subject: Hybrid EVRP and testcases Provide a hybrid EVRP pass which uses legacy EVRP and adds additonal enhancements from the new ranger infrastructure. A New option is also provided, -fevrp-mode= And adjust testcases gcc/ChangeLog: 2020-10-06 Andrew MacLeod * flag-types.h (enum evrp_mode): New enumerated type EVRP_MODE_*. * common.opt (fevrp-mode): New undocumented flag. * gimple-ssa-evrp.c: Include gimple-range.h (class rvrp_folder): EVRP folding using ranger exclusively. (rvrp_folder::rvrp_folder): New. (rvrp_folder::~rvrp_folder): New. (rvrp_folder::value_of_expr): New. Use rangers value_of_expr. (rvrp_folder::value_on_edge): New. Use rangers value_on_edge. (rvrp_folder::value_of_Stmt): New. Use rangers value_of_stmt. (rvrp_folder::fold_stmt): New. Call the simplifier. (class hybrid_folder): EVRP folding using both engines. (hybrid_folder::hybrid_folder): New. (hybrid_folder::~hybrid_folder): New. (hybrid_folder::fold_stmt): New. Simplify with one engne, then the other. (hybrid_folder::value_of_expr): New. Use both value routines. (hybrid_folder::value_on_edge): New. Use both value routines. (hybrid_folder::value_of_stmt): New. Use both value routines. (hybrid_folder::choose_value): New. Choose between range_analzyer and rangers values. (execute_early_vrp): Choose a folder based on flag_evrp_mode. * vr-values.c (simplify_using_ranges::fold_cond): Try range_of_stmt first to see if it returns a value. (simplify_using_ranges::simplify_switch_using_ranges): Return true if any changes were made to the switch. gcc/testsuite/ChangeLog: 2020-10-06 Andrew MacLeod * gcc.dg/pr81192.c: Disable EVRP pass. * gcc.dg/tree-ssa/pr77445-2.c: Ditto. * gcc.dg/tree-ssa/ssa-dom-thread-6.c: Adjust. * gcc.dg/tree-ssa/ssa-dom-thread-7.c: Ditto. --- gcc/common.opt | 31 ++++ gcc/flag-types.h | 13 ++ gcc/gimple-ssa-evrp.c | 221 ++++++++++++++++++++++- gcc/testsuite/gcc.dg/pr81192.c | 18 +- gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c | 38 +++- gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c | 27 ++- gcc/vr-values.c | 31 +++- 8 files changed, 365 insertions(+), 16 deletions(-) (limited to 'gcc') diff --git a/gcc/common.opt b/gcc/common.opt index 7e789d1..e2bd90c 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2870,6 +2870,37 @@ ftree-vrp Common Report Var(flag_tree_vrp) Init(0) Optimization Perform Value Range Propagation on trees. +fevrp-mode= +Common Undocumented Joined RejectNegative Enum(evrp_mode) Var(flag_evrp_mode) Init(EVRP_MODE_EVRP_FIRST) Optimization +-fevrp-mode=[legacy|ranger|legacy-first|ranger-first|ranger-trace|ranger-debug|trace|debug] Specifies the mode Early VRP should operate in. + +Enum +Name(evrp_mode) Type(enum evrp_mode) UnknownError(unknown evrp mode %qs) + +EnumValue +Enum(evrp_mode) String(legacy) Value(EVRP_MODE_EVRP_ONLY) + +EnumValue +Enum(evrp_mode) String(ranger) Value(EVRP_MODE_RVRP_ONLY) + +EnumValue +Enum(evrp_mode) String(legacy-first) Value(EVRP_MODE_EVRP_FIRST) + +EnumValue +Enum(evrp_mode) String(ranger-first) Value(EVRP_MODE_RVRP_FIRST) + +EnumValue +Enum(evrp_mode) String(ranger-trace) Value(EVRP_MODE_RVRP_TRACE) + +EnumValue +Enum(evrp_mode) String(ranger-debug) Value(EVRP_MODE_RVRP_DEBUG) + +EnumValue +Enum(evrp_mode) String(trace) Value(EVRP_MODE_TRACE) + +EnumValue +Enum(evrp_mode) String(debug) Value(EVRP_MODE_DEBUG) + fsplit-paths Common Report Var(flag_split_paths) Init(0) Optimization Split paths leading to loop backedges. diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 852ea76..cd0f7f8 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -382,4 +382,17 @@ enum parloops_schedule_type PARLOOPS_SCHEDULE_RUNTIME }; +/* EVRP mode. */ +enum evrp_mode +{ + EVRP_MODE_EVRP_FIRST = 0, + EVRP_MODE_EVRP_ONLY = 1, + EVRP_MODE_RVRP_ONLY = 2, + EVRP_MODE_RVRP_FIRST = 3, + EVRP_MODE_TRACE = 4, + EVRP_MODE_DEBUG = 8 | EVRP_MODE_TRACE, + EVRP_MODE_RVRP_TRACE = EVRP_MODE_RVRP_ONLY | EVRP_MODE_TRACE, + EVRP_MODE_RVRP_DEBUG = EVRP_MODE_RVRP_ONLY | EVRP_MODE_DEBUG +}; + #endif /* ! GCC_FLAG_TYPES_H */ diff --git a/gcc/gimple-ssa-evrp.c b/gcc/gimple-ssa-evrp.c index 60bf82a..6be32d7a 100644 --- a/gcc/gimple-ssa-evrp.c +++ b/gcc/gimple-ssa-evrp.c @@ -41,6 +41,10 @@ along with GCC; see the file COPYING3. If not see #include "tree-cfgcleanup.h" #include "vr-values.h" #include "gimple-ssa-evrp-analyze.h" +#include "gimple-range.h" + +// This is the classic EVRP folder which uses a dominator walk and pushes +// ranges into the next block if it is a single predecessor block. class evrp_folder : public substitute_and_fold_engine { @@ -98,12 +102,195 @@ public: m_range_analyzer.set_defs_to_varying (stmt); } -private: +protected: DISABLE_COPY_AND_ASSIGN (evrp_folder); evrp_range_analyzer m_range_analyzer; simplify_using_ranges simplifier; }; +// This is a ranger based folder which continues to use the dominator +// walk to access the substitute and fold machinery. Ranges are calculated +// on demand. + +class rvrp_folder : public substitute_and_fold_engine +{ +public: + + rvrp_folder () : substitute_and_fold_engine (), m_simplifier () + { + if (flag_evrp_mode & EVRP_MODE_TRACE) + m_ranger = new trace_ranger (); + else + m_ranger = new gimple_ranger (); + m_simplifier.set_range_query (m_ranger); + } + + ~rvrp_folder () + { + if (dump_file && (dump_flags & TDF_DETAILS)) + m_ranger->dump (dump_file); + delete m_ranger; + } + + tree value_of_expr (tree name, gimple *s = NULL) OVERRIDE + { + return m_ranger->value_of_expr (name, s); + } + + tree value_on_edge (edge e, tree name) OVERRIDE + { + return m_ranger->value_on_edge (e, name); + } + + tree value_of_stmt (gimple *s, tree name = NULL) OVERRIDE + { + return m_ranger->value_of_stmt (s, name); + } + + bool fold_stmt (gimple_stmt_iterator *gsi) OVERRIDE + { + return m_simplifier.simplify (gsi); + } + +private: + DISABLE_COPY_AND_ASSIGN (rvrp_folder); + gimple_ranger *m_ranger; + simplify_using_ranges m_simplifier; +}; + +// In a hybrid folder, start with an EVRP folder, and add the required +// fold_stmt bits to either try the ranger first or second. +// +// The 3 value_* routines will always query both EVRP and the ranger for +// a result, and ensure they return the same value. If either returns a value +// when the other doesn't, it is flagged in the listing, and the discoverd +// value is returned. +// +// The simplifier is unable to process 2 different sources, thus we try to +// use one engine, and if it fails to simplify, try using the other engine. +// It is reported when the first attempt fails and the second succeeds. + +class hybrid_folder : public evrp_folder +{ +public: + hybrid_folder (bool evrp_first) + { + if (flag_evrp_mode & EVRP_MODE_TRACE) + m_ranger = new trace_ranger (); + else + m_ranger = new gimple_ranger (); + + if (evrp_first) + { + first = &m_range_analyzer; + second = m_ranger; + } + else + { + first = m_ranger; + second = &m_range_analyzer; + } + } + + ~hybrid_folder () + { + if (dump_file && (dump_flags & TDF_DETAILS)) + m_ranger->dump (dump_file); + delete m_ranger; + } + + bool fold_stmt (gimple_stmt_iterator *gsi) OVERRIDE + { + simplifier.set_range_query (first); + if (simplifier.simplify (gsi)) + return true; + + simplifier.set_range_query (second); + if (simplifier.simplify (gsi)) + { + if (dump_file) + fprintf (dump_file, "EVRP:hybrid: Second query simplifed stmt\n"); + return true; + } + return false; + } + + tree value_of_expr (tree name, gimple *) OVERRIDE; + tree value_on_edge (edge, tree name) OVERRIDE; + tree value_of_stmt (gimple *, tree name) OVERRIDE; + +private: + DISABLE_COPY_AND_ASSIGN (hybrid_folder); + gimple_ranger *m_ranger; + range_query *first; + range_query *second; + tree choose_value (tree evrp_val, tree ranger_val); +}; + + +tree +hybrid_folder::value_of_expr (tree op, gimple *stmt) +{ + tree evrp_ret = evrp_folder::value_of_expr (op, stmt); + tree ranger_ret = m_ranger->value_of_expr (op, stmt); + return choose_value (evrp_ret, ranger_ret); +} + +tree +hybrid_folder::value_on_edge (edge e, tree op) +{ + tree evrp_ret = evrp_folder::value_on_edge (e, op); + tree ranger_ret = m_ranger->value_on_edge (e, op); + return choose_value (evrp_ret, ranger_ret); +} + +tree +hybrid_folder::value_of_stmt (gimple *stmt, tree op) +{ + tree evrp_ret = evrp_folder::value_of_stmt (stmt, op); + tree ranger_ret = m_ranger->value_of_stmt (stmt, op); + return choose_value (evrp_ret, ranger_ret); +} + +// Given trees returned by EVRP and Ranger, choose/report the value to use +// by the folder. + +tree +hybrid_folder::choose_value (tree evrp_val, tree ranger_val) +{ + if (!ranger_val) + { + // If neither returned a value, return NULL_TREE. + if (!evrp_val) + return NULL_TREE; + + // Otherwise EVRP found something. + if (dump_file) + { + fprintf (dump_file, "EVRP:hybrid: EVRP found singleton "); + print_generic_expr (dump_file, evrp_val); + fprintf (dump_file, "\n"); + } + return evrp_val; + } + + // Otherwise ranger found a value, if they match we're good. + if (evrp_val && !compare_values (evrp_val, ranger_val)) + return evrp_val; + + // We should never get different singletons. + gcc_checking_assert (!evrp_val); + + // Now ranger has found a value, but EVRP did not. + if (dump_file) + { + fprintf (dump_file, "EVRP:hybrid: RVRP found singleton "); + print_generic_expr (dump_file, ranger_val); + fprintf (dump_file, "\n"); + } + return ranger_val; +} + /* Main entry point for the early vrp pass which is a simplified non-iterative version of vrp where basic blocks are visited in dominance order. Value ranges discovered in early vrp will also be used by ipa-vrp. */ @@ -120,8 +307,36 @@ execute_early_vrp () scev_initialize (); calculate_dominance_info (CDI_DOMINATORS); - evrp_folder folder; - folder.substitute_and_fold (); + // only the last 2 bits matter for choosing the folder. + switch (flag_evrp_mode & EVRP_MODE_RVRP_FIRST) + { + case EVRP_MODE_EVRP_ONLY: + { + evrp_folder folder; + folder.substitute_and_fold (); + break; + } + case EVRP_MODE_RVRP_ONLY: + { + rvrp_folder folder; + folder.substitute_and_fold (); + break; + } + case EVRP_MODE_EVRP_FIRST: + { + hybrid_folder folder (true); + folder.substitute_and_fold (); + break; + } + case EVRP_MODE_RVRP_FIRST: + { + hybrid_folder folder (false); + folder.substitute_and_fold (); + break; + } + default: + gcc_unreachable (); + } scev_finalize (); loop_optimizer_finalize (); diff --git a/gcc/testsuite/gcc.dg/pr81192.c b/gcc/testsuite/gcc.dg/pr81192.c index 0049f37..71bbc13 100644 --- a/gcc/testsuite/gcc.dg/pr81192.c +++ b/gcc/testsuite/gcc.dg/pr81192.c @@ -1,4 +1,20 @@ -/* { dg-options "-Os -fdump-tree-pre-details" } */ +/* { dg-options "-Os -fdump-tree-pre-details -fdisable-tree-evrp" } */ + +/* Disable tree-evrp because the new version of evrp sees + : + if (j_8(D) != 2147483647) + goto ; [50.00%] + else + goto ; [50.00%] + : + iftmp.2_11 = j_8(D) + 1; + : + # iftmp.2_12 = PHI + +EVRP now recognizes a constant can be propagated into the 3->5 edge and +produces + # iftmp.2_12 = PHI <2147483647(3), iftmp.2_11(4)> +which causes the situation being tested to dissapear before we get to PRE. */ #if __SIZEOF_INT__ == 2 #define unsigned __UINT32_TYPE__ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c index 9c22c53..cf74e15 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-thread-details-blocks-stats" } */ +/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-thread-details-blocks-stats" } */ typedef enum STATES { START=0, INVALID, diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c index 551fbac..16a9ef4 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c @@ -1,7 +1,41 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-thread1-details -fdump-tree-thread2-details" } */ -/* { dg-final { scan-tree-dump-times "FSM" 3 "thread1" } } */ -/* { dg-final { scan-tree-dump-times "FSM" 5 "thread2" } } */ + +/* All the threads in the thread1 dump start on a X->BB12 edge, as can + be seen in the dump: + + Registering FSM jump thread: (x, 12) incoming edge; ... + etc + etc + + Before the new evrp, we were threading paths that started at the + following edges: + + Registering FSM jump thread: (10, 12) incoming edge + Registering FSM jump thread: (6, 12) incoming edge + Registering FSM jump thread: (9, 12) incoming edge + + This was because the PHI at BB12 had constant values coming in from + BB10, BB6, and BB9: + + # state_10 = PHI + + Now with the new evrp, we get: + + # state_10 = PHI <0(7), 0(10), state_11(5), 1(6), 0(8), 2(9), 1(11)> + + Thus, we have 3 more paths that are known to be constant and can be + threaded. Which means that by the second threading pass, we can + only find one profitable path. + + For the record, all these extra constants are better paths coming + out of switches. For example: + + SWITCH_BB -> BBx -> BBy -> BBz -> PHI + + We now know the value of the switch index at PHI. */ +/* { dg-final { scan-tree-dump-times "FSM" 6 "thread1" } } */ +/* { dg-final { scan-tree-dump-times "FSM" 1 "thread2" } } */ int sum0, sum1, sum2, sum3; int foo (char *s, char **ret) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c index a339557..bad5bc1 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c @@ -1,20 +1,31 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-thread1-stats -fdump-tree-thread2-stats -fdump-tree-dom2-stats -fdump-tree-thread3-stats -fdump-tree-dom3-stats -fdump-tree-vrp2-stats -fno-guess-branch-probability" } */ -/* { dg-final { scan-tree-dump "Jumps threaded: 16" "thread1" } } */ -/* { dg-final { scan-tree-dump "Jumps threaded: 9" "thread2" } } */ + +/* Here we have the same issue as was commented in ssa-dom-thread-6.c. + The PHI coming into the threader has a lot more constants, so the + threader can thread more paths. + +$ diff clean/a.c.105t.mergephi2 a.c.105t.mergephi2 +252c252 +< # s_50 = PHI +--- +> # s_50 = PHI +272a273 + + I spot checked a few and they all have the same pattern. We are + basically tracking the switch index better through multiple + paths. */ + +/* { dg-final { scan-tree-dump "Jumps threaded: 19" "thread1" } } */ +/* { dg-final { scan-tree-dump "Jumps threaded: 8" "thread2" } } */ /* { dg-final { scan-tree-dump-not "Jumps threaded" "dom2" } } */ + /* aarch64 has the highest CASE_VALUES_THRESHOLD in GCC. It's high enough to change decisions in switch expansion which in turn can expose new jump threading opportunities. Skip the later tests on aarch64. */ /* { dg-final { scan-tree-dump-not "Jumps threaded" "dom3" { target { ! aarch64*-*-* } } } } */ /* { dg-final { scan-tree-dump-not "Jumps threaded" "vrp2" { target { ! aarch64*-*-* } } } } */ -/* Most architectures get 3 threadable paths here, whereas aarch64 and - possibly others get 5. We really should rewrite threading tests to - test a specific IL sequence, not gobs of code whose IL can vary - from architecture to architecture. */ -/* { dg-final { scan-tree-dump "Jumps threaded: \[35\]" "thread3" } } */ - enum STATE { S0=0, SI, diff --git a/gcc/vr-values.c b/gcc/vr-values.c index 4d7dfd0..88aa672 100644 --- a/gcc/vr-values.c +++ b/gcc/vr-values.c @@ -3606,6 +3606,35 @@ simplify_using_ranges::fold_cond (gcond *cond) some point we should merge all variants of this code. */ edge taken_edge; vrp_visit_cond_stmt (cond, &taken_edge); + + int_range_max r; + if (query->range_of_stmt (r, cond) && r.singleton_p ()) + { + // COND has already been folded if arguments are constant. + if (TREE_CODE (gimple_cond_lhs (cond)) != SSA_NAME + && TREE_CODE (gimple_cond_rhs (cond)) != SSA_NAME) + return false; + + if (r.zero_p ()) + { + gcc_checking_assert (!taken_edge + || taken_edge->flags & EDGE_FALSE_VALUE); + if (dump_file && (dump_flags & TDF_DETAILS) && !taken_edge) + fprintf (dump_file, "\nPredicate evaluates to: 0\n"); + gimple_cond_make_false (cond); + } + else + { + gcc_checking_assert (!taken_edge + || taken_edge->flags & EDGE_TRUE_VALUE); + if (dump_file && (dump_flags & TDF_DETAILS) && !taken_edge) + fprintf (dump_file, "\nPredicate evaluates to: 1\n"); + gimple_cond_make_true (cond); + } + update_stmt (cond); + return true; + } + if (taken_edge) { if (taken_edge->flags & EDGE_TRUE_VALUE) @@ -3947,7 +3976,7 @@ simplify_using_ranges::simplify_switch_using_ranges (gswitch *stmt) su.stmt = stmt; su.vec = vec2; to_update_switch_stmts.safe_push (su); - return false; + return true; } void -- cgit v1.1 From f5431aeaf5551995e1ec4455e33545bbc1170a1b Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Sun, 4 Oct 2020 23:40:40 -0400 Subject: compiler: avoid undefined behavior in Import::read For some implementations of Stream, advancing the stream will invalidate the previously-returned peek buffer. Copy the peek buffer before advancing in Import::read to avoid this undefined behavior. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/259438 --- gcc/go/gofrontend/MERGE | 2 +- gcc/go/gofrontend/gogo.cc | 2 +- gcc/go/gofrontend/import.cc | 9 +++++---- gcc/go/gofrontend/import.h | 8 ++++---- 4 files changed, 11 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 701b2d4..c5c02aa 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -d00febdab0535546ccbf1ef634be1f23b09c8b77 +613e530547549f4220c4571ea913acbe5fa56f72 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/gogo.cc b/gcc/go/gofrontend/gogo.cc index aef1c47..f40f131 100644 --- a/gcc/go/gofrontend/gogo.cc +++ b/gcc/go/gofrontend/gogo.cc @@ -6212,7 +6212,7 @@ Function::import_func(Import* imp, std::string* pname, return false; } - *body = imp->read(static_cast(llen)); + imp->read(static_cast(llen), body); } return true; diff --git a/gcc/go/gofrontend/import.cc b/gcc/go/gofrontend/import.cc index 081afef..c6c1178 100644 --- a/gcc/go/gofrontend/import.cc +++ b/gcc/go/gofrontend/import.cc @@ -1375,8 +1375,8 @@ Import::read_name() // Read LENGTH bytes from the stream. -std::string -Import::read(size_t length) +void +Import::read(size_t length, std::string* out) { const char* data; if (!this->stream_->peek(length, &data)) @@ -1385,10 +1385,11 @@ Import::read(size_t length) go_error_at(this->location_, "import error at %d: expected %d bytes", this->stream_->pos(), static_cast(length)); this->stream_->set_saw_error(); - return ""; + *out = std::string(""); + return; } + *out = std::string(data, length); this->advance(length); - return std::string(data, length); } // Turn a string into a integer with appropriate error handling. diff --git a/gcc/go/gofrontend/import.h b/gcc/go/gofrontend/import.h index b12b3b8..1d8aae4 100644 --- a/gcc/go/gofrontend/import.h +++ b/gcc/go/gofrontend/import.h @@ -240,10 +240,10 @@ class Import : public Import_expression get_char() { return this->stream_->get_char(); } - // Read LENGTH characters into a string and advance past them. On - // EOF reports an error and returns an empty string. - std::string - read(size_t length); + // Read LENGTH characters into *OUT and advance past them. On + // EOF reports an error and sets *OUT to an empty string. + void + read(size_t length, std::string* out); // Return true at the end of the stream. bool -- cgit v1.1 From a9802204603616df14ed47d05f1b86f1bd08d8fb Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Tue, 6 Oct 2020 23:34:21 +0200 Subject: c-c++-common/goacc/declare-pr90861.c: Remove xfail gcc/testsuite/ChangeLog PR middle-end/90861 * c-c++-common/goacc/declare-pr90861.c: Remove xfail. --- gcc/testsuite/c-c++-common/goacc/declare-pr90861.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/c-c++-common/goacc/declare-pr90861.c b/gcc/testsuite/c-c++-common/goacc/declare-pr90861.c index 7c90562..c5487bd 100644 --- a/gcc/testsuite/c-c++-common/goacc/declare-pr90861.c +++ b/gcc/testsuite/c-c++-common/goacc/declare-pr90861.c @@ -17,5 +17,5 @@ void f2 (void) int A_f2[N_f2]; #pragma acc declare copy(A_f2) /* { dg-final { scan-tree-dump-times {#pragma omp target oacc_declare map\(to:\(\*A_f2} 1 gimple } } - { dg-final { scan-tree-dump-times {#pragma omp target oacc_declare map\(from:\(\*A_f2} 1 gimple { xfail *-*-* } } } TODO PR90861 */ + { dg-final { scan-tree-dump-times {#pragma omp target oacc_declare map\(from:\(\*A_f2} 1 gimple } } */ } -- cgit v1.1 From 85307b4e938d42201d6c232f5d9259f91133a303 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Mon, 5 Oct 2020 17:48:19 -0400 Subject: c++: typename in out-of-class member function definitions [PR97297] I was notified that our P0634R3 (Down with typename) implementation has a flaw: when we have an out-of-class member function definition, we still required 'typename' for its parameters. For example here: template struct S { int simple(T::type); }; template int S::simple(/* typename */T::type) { return 0; } the 'typename' isn't necessary per [temp.res]/5.2.4. We have a qualified name here ("S::simple") so we know it's already been declared so we can look it up to see if it's a function template or a variable template. In this case, the P0634R3 code in cp_parser_direct_declarator wasn't looking into uninstantiated templates and didn't find the member function 'simple' -- cp_parser_lookup_name returned a SCOPE_REF which means that the qualifying scope was dependent. With this fix, we find the BASELINK for 'simple', don't clear CP_PARSER_FLAGS_TYPENAME_OPTIONAL from the flags, and the typename is implicitly assumed. gcc/cp/ChangeLog: PR c++/97297 * parser.c (cp_parser_direct_declarator): When checking if a name is a function template declaration for the P0634R3 case, look in uninstantiated templates too. gcc/testsuite/ChangeLog: PR c++/97297 * g++.dg/cpp2a/typename18.C: New test. --- gcc/cp/parser.c | 10 ++++++++-- gcc/testsuite/g++.dg/cpp2a/typename18.C | 21 +++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/typename18.C (limited to 'gcc') diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index cb44227..2002c05 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -21788,8 +21788,14 @@ cp_parser_direct_declarator (cp_parser* parser, templates, assume S::p to name a type. Otherwise, don't. */ tree decl - = cp_parser_lookup_name_simple (parser, unqualified_name, - token->location); + = cp_parser_lookup_name (parser, unqualified_name, + none_type, + /*is_template=*/false, + /*is_namespace=*/false, + /*check_dependency=*/false, + /*ambiguous_decls=*/NULL, + token->location); + if (!is_overloaded_fn (decl) /* Allow template diff --git a/gcc/testsuite/g++.dg/cpp2a/typename18.C b/gcc/testsuite/g++.dg/cpp2a/typename18.C new file mode 100644 index 0000000..9946866 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/typename18.C @@ -0,0 +1,21 @@ +// PR c++/97297 +// { dg-do compile { target c++20 } } + +template +struct S { + int simple(T::type); + + template + int member(U::type); +}; + +template +int S::simple(T::type) { + return 1; +} + +template +template +int S::member(U::type) { + return 2; +} -- cgit v1.1 From 8e97b9052dbe0ead25019d4b37490f285d1f9c94 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 7 Oct 2020 00:16:35 +0000 Subject: Daily bump. --- gcc/ChangeLog | 664 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/cp/ChangeLog | 7 + gcc/testsuite/ChangeLog | 55 ++++ 4 files changed, 727 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 743e497..5f9e8ac 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,667 @@ +2020-10-06 Andrew MacLeod + + * flag-types.h (enum evrp_mode): New enumerated type EVRP_MODE_*. + * common.opt (fevrp-mode): New undocumented flag. + * gimple-ssa-evrp.c: Include gimple-range.h + (class rvrp_folder): EVRP folding using ranger exclusively. + (rvrp_folder::rvrp_folder): New. + (rvrp_folder::~rvrp_folder): New. + (rvrp_folder::value_of_expr): New. Use rangers value_of_expr. + (rvrp_folder::value_on_edge): New. Use rangers value_on_edge. + (rvrp_folder::value_of_Stmt): New. Use rangers value_of_stmt. + (rvrp_folder::fold_stmt): New. Call the simplifier. + (class hybrid_folder): EVRP folding using both engines. + (hybrid_folder::hybrid_folder): New. + (hybrid_folder::~hybrid_folder): New. + (hybrid_folder::fold_stmt): New. Simplify with one engne, then the + other. + (hybrid_folder::value_of_expr): New. Use both value routines. + (hybrid_folder::value_on_edge): New. Use both value routines. + (hybrid_folder::value_of_stmt): New. Use both value routines. + (hybrid_folder::choose_value): New. Choose between range_analzyer and + rangers values. + (execute_early_vrp): Choose a folder based on flag_evrp_mode. + * vr-values.c (simplify_using_ranges::fold_cond): Try range_of_stmt + first to see if it returns a value. + (simplify_using_ranges::simplify_switch_using_ranges): Return true if + any changes were made to the switch. + +2020-10-06 Andrew MacLeod + + * Makefile.in (OBJS): Add gimple-range*.o. + * gimple-range.h: New file. + * gimple-range.cc: New file. + * gimple-range-cache.h: New file. + * gimple-range-cache.cc: New file. + * gimple-range-edge.h: New file. + * gimple-range-edge.cc: New file. + * gimple-range-gori.h: New file. + * gimple-range-gori.cc: New file. + +2020-10-06 Dennis Zhang + + * config/arm/arm.c (arm_preferred_simd_mode): Enable MVE SIMD modes. + +2020-10-06 Tom de Vries + + PR middle-end/90861 + * gimplify.c (gimplify_bind_expr): Handle lookup in + oacc_declare_returns using key with decl-expr. + +2020-10-06 Srinath Parvathaneni + + * config/arm/iterators.md (MVE_types): Move mode iterator from mve.md to + iterators.md. + (MVE_VLD_ST): Likewise. + (MVE_0): Likewise. + (MVE_1): Likewise. + (MVE_3): Likewise. + (MVE_2): Likewise. + (MVE_5): Likewise. + (MVE_6): Likewise. + (MVE_CNVT): Move mode attribute iterator from mve.md to iterators.md. + (MVE_LANES): Likewise. + (MVE_constraint): Likewise. + (MVE_constraint1): Likewise. + (MVE_constraint2): Likewise. + (MVE_constraint3): Likewise. + (MVE_pred): Likewise. + (MVE_pred1): Likewise. + (MVE_pred2): Likewise. + (MVE_pred3): Likewise. + (MVE_B_ELEM): Likewise. + (MVE_H_ELEM): Likewise. + (V_sz_elem1): Likewise. + (V_extr_elem): Likewise. + (earlyclobber_32): Likewise. + (supf): Move int attribute from mve.md to iterators.md. + (mode1): Likewise. + (VCVTQ_TO_F): Move int iterator from mve.md to iterators.md. + (VMVNQ_N): Likewise. + (VREV64Q): Likewise. + (VCVTQ_FROM_F): Likewise. + (VREV16Q): Likewise. + (VCVTAQ): Likewise. + (VMVNQ): Likewise. + (VDUPQ_N): Likewise. + (VCLZQ): Likewise. + (VADDVQ): Likewise. + (VREV32Q): Likewise. + (VMOVLBQ): Likewise. + (VMOVLTQ): Likewise. + (VCVTPQ): Likewise. + (VCVTNQ): Likewise. + (VCVTMQ): Likewise. + (VADDLVQ): Likewise. + (VCTPQ): Likewise. + (VCTPQ_M): Likewise. + (VCVTQ_N_TO_F): Likewise. + (VCREATEQ): Likewise. + (VSHRQ_N): Likewise. + (VCVTQ_N_FROM_F): Likewise. + (VADDLVQ_P): Likewise. + (VCMPNEQ): Likewise. + (VSHLQ): Likewise. + (VABDQ): Likewise. + (VADDQ_N): Likewise. + (VADDVAQ): Likewise. + (VADDVQ_P): Likewise. + (VANDQ): Likewise. + (VBICQ): Likewise. + (VBRSRQ_N): Likewise. + (VCADDQ_ROT270): Likewise. + (VCADDQ_ROT90): Likewise. + (VCMPEQQ): Likewise. + (VCMPEQQ_N): Likewise. + (VCMPNEQ_N): Likewise. + (VEORQ): Likewise. + (VHADDQ): Likewise. + (VHADDQ_N): Likewise. + (VHSUBQ): Likewise. + (VHSUBQ_N): Likewise. + (VMAXQ): Likewise. + (VMAXVQ): Likewise. + (VMINQ): Likewise. + (VMINVQ): Likewise. + (VMLADAVQ): Likewise. + (VMULHQ): Likewise. + (VMULLBQ_INT): Likewise. + (VMULLTQ_INT): Likewise. + (VMULQ): Likewise. + (VMULQ_N): Likewise. + (VORNQ): Likewise. + (VORRQ): Likewise. + (VQADDQ): Likewise. + (VQADDQ_N): Likewise. + (VQRSHLQ): Likewise. + (VQRSHLQ_N): Likewise. + (VQSHLQ): Likewise. + (VQSHLQ_N): Likewise. + (VQSHLQ_R): Likewise. + (VQSUBQ): Likewise. + (VQSUBQ_N): Likewise. + (VRHADDQ): Likewise. + (VRMULHQ): Likewise. + (VRSHLQ): Likewise. + (VRSHLQ_N): Likewise. + (VRSHRQ_N): Likewise. + (VSHLQ_N): Likewise. + (VSHLQ_R): Likewise. + (VSUBQ): Likewise. + (VSUBQ_N): Likewise. + (VADDLVAQ): Likewise. + (VBICQ_N): Likewise. + (VMLALDAVQ): Likewise. + (VMLALDAVXQ): Likewise. + (VMOVNBQ): Likewise. + (VMOVNTQ): Likewise. + (VORRQ_N): Likewise. + (VQMOVNBQ): Likewise. + (VQMOVNTQ): Likewise. + (VSHLLBQ_N): Likewise. + (VSHLLTQ_N): Likewise. + (VRMLALDAVHQ): Likewise. + (VBICQ_M_N): Likewise. + (VCVTAQ_M): Likewise. + (VCVTQ_M_TO_F): Likewise. + (VQRSHRNBQ_N): Likewise. + (VABAVQ): Likewise. + (VSHLCQ): Likewise. + (VRMLALDAVHAQ): Likewise. + (VADDVAQ_P): Likewise. + (VCLZQ_M): Likewise. + (VCMPEQQ_M_N): Likewise. + (VCMPEQQ_M): Likewise. + (VCMPNEQ_M_N): Likewise. + (VCMPNEQ_M): Likewise. + (VDUPQ_M_N): Likewise. + (VMAXVQ_P): Likewise. + (VMINVQ_P): Likewise. + (VMLADAVAQ): Likewise. + (VMLADAVQ_P): Likewise. + (VMLAQ_N): Likewise. + (VMLASQ_N): Likewise. + (VMVNQ_M): Likewise. + (VPSELQ): Likewise. + (VQDMLAHQ_N): Likewise. + (VQRDMLAHQ_N): Likewise. + (VQRDMLASHQ_N): Likewise. + (VQRSHLQ_M_N): Likewise. + (VQSHLQ_M_R): Likewise. + (VREV64Q_M): Likewise. + (VRSHLQ_M_N): Likewise. + (VSHLQ_M_R): Likewise. + (VSLIQ_N): Likewise. + (VSRIQ_N): Likewise. + (VMLALDAVQ_P): Likewise. + (VQMOVNBQ_M): Likewise. + (VMOVLTQ_M): Likewise. + (VMOVNBQ_M): Likewise. + (VRSHRNTQ_N): Likewise. + (VORRQ_M_N): Likewise. + (VREV32Q_M): Likewise. + (VREV16Q_M): Likewise. + (VQRSHRNTQ_N): Likewise. + (VMOVNTQ_M): Likewise. + (VMOVLBQ_M): Likewise. + (VMLALDAVAQ): Likewise. + (VQSHRNBQ_N): Likewise. + (VSHRNBQ_N): Likewise. + (VRSHRNBQ_N): Likewise. + (VMLALDAVXQ_P): Likewise. + (VQMOVNTQ_M): Likewise. + (VMVNQ_M_N): Likewise. + (VQSHRNTQ_N): Likewise. + (VMLALDAVAXQ): Likewise. + (VSHRNTQ_N): Likewise. + (VCVTMQ_M): Likewise. + (VCVTNQ_M): Likewise. + (VCVTPQ_M): Likewise. + (VCVTQ_M_N_FROM_F): Likewise. + (VCVTQ_M_FROM_F): Likewise. + (VRMLALDAVHQ_P): Likewise. + (VADDLVAQ_P): Likewise. + (VABAVQ_P): Likewise. + (VSHLQ_M): Likewise. + (VSRIQ_M_N): Likewise. + (VSUBQ_M): Likewise. + (VCVTQ_M_N_TO_F): Likewise. + (VHSUBQ_M): Likewise. + (VSLIQ_M_N): Likewise. + (VRSHLQ_M): Likewise. + (VMINQ_M): Likewise. + (VMULLBQ_INT_M): Likewise. + (VMULHQ_M): Likewise. + (VMULQ_M): Likewise. + (VHSUBQ_M_N): Likewise. + (VHADDQ_M_N): Likewise. + (VORRQ_M): Likewise. + (VRMULHQ_M): Likewise. + (VQADDQ_M): Likewise. + (VRSHRQ_M_N): Likewise. + (VQSUBQ_M_N): Likewise. + (VADDQ_M): Likewise. + (VORNQ_M): Likewise. + (VRHADDQ_M): Likewise. + (VQSHLQ_M): Likewise. + (VANDQ_M): Likewise. + (VBICQ_M): Likewise. + (VSHLQ_M_N): Likewise. + (VCADDQ_ROT270_M): Likewise. + (VQRSHLQ_M): Likewise. + (VQADDQ_M_N): Likewise. + (VADDQ_M_N): Likewise. + (VMAXQ_M): Likewise. + (VQSUBQ_M): Likewise. + (VMLASQ_M_N): Likewise. + (VMLADAVAQ_P): Likewise. + (VBRSRQ_M_N): Likewise. + (VMULQ_M_N): Likewise. + (VCADDQ_ROT90_M): Likewise. + (VMULLTQ_INT_M): Likewise. + (VEORQ_M): Likewise. + (VSHRQ_M_N): Likewise. + (VSUBQ_M_N): Likewise. + (VHADDQ_M): Likewise. + (VABDQ_M): Likewise. + (VMLAQ_M_N): Likewise. + (VQSHLQ_M_N): Likewise. + (VMLALDAVAQ_P): Likewise. + (VMLALDAVAXQ_P): Likewise. + (VQRSHRNBQ_M_N): Likewise. + (VQRSHRNTQ_M_N): Likewise. + (VQSHRNBQ_M_N): Likewise. + (VQSHRNTQ_M_N): Likewise. + (VRSHRNBQ_M_N): Likewise. + (VRSHRNTQ_M_N): Likewise. + (VSHLLBQ_M_N): Likewise. + (VSHLLTQ_M_N): Likewise. + (VSHRNBQ_M_N): Likewise. + (VSHRNTQ_M_N): Likewise. + (VSTRWSBQ): Likewise. + (VSTRBSOQ): Likewise. + (VSTRBQ): Likewise. + (VLDRBGOQ): Likewise. + (VLDRBQ): Likewise. + (VLDRWGBQ): Likewise. + (VLD1Q): Likewise. + (VLDRHGOQ): Likewise. + (VLDRHGSOQ): Likewise. + (VLDRHQ): Likewise. + (VLDRWQ): Likewise. + (VLDRDGBQ): Likewise. + (VLDRDGOQ): Likewise. + (VLDRDGSOQ): Likewise. + (VLDRWGOQ): Likewise. + (VLDRWGSOQ): Likewise. + (VST1Q): Likewise. + (VSTRHSOQ): Likewise. + (VSTRHSSOQ): Likewise. + (VSTRHQ): Likewise. + (VSTRWQ): Likewise. + (VSTRDSBQ): Likewise. + (VSTRDSOQ): Likewise. + (VSTRDSSOQ): Likewise. + (VSTRWSOQ): Likewise. + (VSTRWSSOQ): Likewise. + (VSTRWSBWBQ): Likewise. + (VLDRWGBWBQ): Likewise. + (VSTRDSBWBQ): Likewise. + (VLDRDGBWBQ): Likewise. + (VADCIQ): Likewise. + (VADCIQ_M): Likewise. + (VSBCQ): Likewise. + (VSBCQ_M): Likewise. + (VSBCIQ): Likewise. + (VSBCIQ_M): Likewise. + (VADCQ): Likewise. + (VADCQ_M): Likewise. + (UQRSHLLQ): Likewise. + (SQRSHRLQ): Likewise. + (VSHLCQ_M): Likewise. + * config/arm/mve.md (MVE_types): Move mode iterator to iterators.md from mve.md. + (MVE_VLD_ST): Likewise. + (MVE_0): Likewise. + (MVE_1): Likewise. + (MVE_3): Likewise. + (MVE_2): Likewise. + (MVE_5): Likewise. + (MVE_6): Likewise. + (MVE_CNVT): Move mode attribute iterator to iterators.md from mve.md. + (MVE_LANES): Likewise. + (MVE_constraint): Likewise. + (MVE_constraint1): Likewise. + (MVE_constraint2): Likewise. + (MVE_constraint3): Likewise. + (MVE_pred): Likewise. + (MVE_pred1): Likewise. + (MVE_pred2): Likewise. + (MVE_pred3): Likewise. + (MVE_B_ELEM): Likewise. + (MVE_H_ELEM): Likewise. + (V_sz_elem1): Likewise. + (V_extr_elem): Likewise. + (earlyclobber_32): Likewise. + (supf): Move int attribute to iterators.md from mve.md. + (mode1): Likewise. + (VCVTQ_TO_F): Move int iterator to iterators.md from mve.md. + (VMVNQ_N): Likewise. + (VREV64Q): Likewise. + (VCVTQ_FROM_F): Likewise. + (VREV16Q): Likewise. + (VCVTAQ): Likewise. + (VMVNQ): Likewise. + (VDUPQ_N): Likewise. + (VCLZQ): Likewise. + (VADDVQ): Likewise. + (VREV32Q): Likewise. + (VMOVLBQ): Likewise. + (VMOVLTQ): Likewise. + (VCVTPQ): Likewise. + (VCVTNQ): Likewise. + (VCVTMQ): Likewise. + (VADDLVQ): Likewise. + (VCTPQ): Likewise. + (VCTPQ_M): Likewise. + (VCVTQ_N_TO_F): Likewise. + (VCREATEQ): Likewise. + (VSHRQ_N): Likewise. + (VCVTQ_N_FROM_F): Likewise. + (VADDLVQ_P): Likewise. + (VCMPNEQ): Likewise. + (VSHLQ): Likewise. + (VABDQ): Likewise. + (VADDQ_N): Likewise. + (VADDVAQ): Likewise. + (VADDVQ_P): Likewise. + (VANDQ): Likewise. + (VBICQ): Likewise. + (VBRSRQ_N): Likewise. + (VCADDQ_ROT270): Likewise. + (VCADDQ_ROT90): Likewise. + (VCMPEQQ): Likewise. + (VCMPEQQ_N): Likewise. + (VCMPNEQ_N): Likewise. + (VEORQ): Likewise. + (VHADDQ): Likewise. + (VHADDQ_N): Likewise. + (VHSUBQ): Likewise. + (VHSUBQ_N): Likewise. + (VMAXQ): Likewise. + (VMAXVQ): Likewise. + (VMINQ): Likewise. + (VMINVQ): Likewise. + (VMLADAVQ): Likewise. + (VMULHQ): Likewise. + (VMULLBQ_INT): Likewise. + (VMULLTQ_INT): Likewise. + (VMULQ): Likewise. + (VMULQ_N): Likewise. + (VORNQ): Likewise. + (VORRQ): Likewise. + (VQADDQ): Likewise. + (VQADDQ_N): Likewise. + (VQRSHLQ): Likewise. + (VQRSHLQ_N): Likewise. + (VQSHLQ): Likewise. + (VQSHLQ_N): Likewise. + (VQSHLQ_R): Likewise. + (VQSUBQ): Likewise. + (VQSUBQ_N): Likewise. + (VRHADDQ): Likewise. + (VRMULHQ): Likewise. + (VRSHLQ): Likewise. + (VRSHLQ_N): Likewise. + (VRSHRQ_N): Likewise. + (VSHLQ_N): Likewise. + (VSHLQ_R): Likewise. + (VSUBQ): Likewise. + (VSUBQ_N): Likewise. + (VADDLVAQ): Likewise. + (VBICQ_N): Likewise. + (VMLALDAVQ): Likewise. + (VMLALDAVXQ): Likewise. + (VMOVNBQ): Likewise. + (VMOVNTQ): Likewise. + (VORRQ_N): Likewise. + (VQMOVNBQ): Likewise. + (VQMOVNTQ): Likewise. + (VSHLLBQ_N): Likewise. + (VSHLLTQ_N): Likewise. + (VRMLALDAVHQ): Likewise. + (VBICQ_M_N): Likewise. + (VCVTAQ_M): Likewise. + (VCVTQ_M_TO_F): Likewise. + (VQRSHRNBQ_N): Likewise. + (VABAVQ): Likewise. + (VSHLCQ): Likewise. + (VRMLALDAVHAQ): Likewise. + (VADDVAQ_P): Likewise. + (VCLZQ_M): Likewise. + (VCMPEQQ_M_N): Likewise. + (VCMPEQQ_M): Likewise. + (VCMPNEQ_M_N): Likewise. + (VCMPNEQ_M): Likewise. + (VDUPQ_M_N): Likewise. + (VMAXVQ_P): Likewise. + (VMINVQ_P): Likewise. + (VMLADAVAQ): Likewise. + (VMLADAVQ_P): Likewise. + (VMLAQ_N): Likewise. + (VMLASQ_N): Likewise. + (VMVNQ_M): Likewise. + (VPSELQ): Likewise. + (VQDMLAHQ_N): Likewise. + (VQRDMLAHQ_N): Likewise. + (VQRDMLASHQ_N): Likewise. + (VQRSHLQ_M_N): Likewise. + (VQSHLQ_M_R): Likewise. + (VREV64Q_M): Likewise. + (VRSHLQ_M_N): Likewise. + (VSHLQ_M_R): Likewise. + (VSLIQ_N): Likewise. + (VSRIQ_N): Likewise. + (VMLALDAVQ_P): Likewise. + (VQMOVNBQ_M): Likewise. + (VMOVLTQ_M): Likewise. + (VMOVNBQ_M): Likewise. + (VRSHRNTQ_N): Likewise. + (VORRQ_M_N): Likewise. + (VREV32Q_M): Likewise. + (VREV16Q_M): Likewise. + (VQRSHRNTQ_N): Likewise. + (VMOVNTQ_M): Likewise. + (VMOVLBQ_M): Likewise. + (VMLALDAVAQ): Likewise. + (VQSHRNBQ_N): Likewise. + (VSHRNBQ_N): Likewise. + (VRSHRNBQ_N): Likewise. + (VMLALDAVXQ_P): Likewise. + (VQMOVNTQ_M): Likewise. + (VMVNQ_M_N): Likewise. + (VQSHRNTQ_N): Likewise. + (VMLALDAVAXQ): Likewise. + (VSHRNTQ_N): Likewise. + (VCVTMQ_M): Likewise. + (VCVTNQ_M): Likewise. + (VCVTPQ_M): Likewise. + (VCVTQ_M_N_FROM_F): Likewise. + (VCVTQ_M_FROM_F): Likewise. + (VRMLALDAVHQ_P): Likewise. + (VADDLVAQ_P): Likewise. + (VABAVQ_P): Likewise. + (VSHLQ_M): Likewise. + (VSRIQ_M_N): Likewise. + (VSUBQ_M): Likewise. + (VCVTQ_M_N_TO_F): Likewise. + (VHSUBQ_M): Likewise. + (VSLIQ_M_N): Likewise. + (VRSHLQ_M): Likewise. + (VMINQ_M): Likewise. + (VMULLBQ_INT_M): Likewise. + (VMULHQ_M): Likewise. + (VMULQ_M): Likewise. + (VHSUBQ_M_N): Likewise. + (VHADDQ_M_N): Likewise. + (VORRQ_M): Likewise. + (VRMULHQ_M): Likewise. + (VQADDQ_M): Likewise. + (VRSHRQ_M_N): Likewise. + (VQSUBQ_M_N): Likewise. + (VADDQ_M): Likewise. + (VORNQ_M): Likewise. + (VRHADDQ_M): Likewise. + (VQSHLQ_M): Likewise. + (VANDQ_M): Likewise. + (VBICQ_M): Likewise. + (VSHLQ_M_N): Likewise. + (VCADDQ_ROT270_M): Likewise. + (VQRSHLQ_M): Likewise. + (VQADDQ_M_N): Likewise. + (VADDQ_M_N): Likewise. + (VMAXQ_M): Likewise. + (VQSUBQ_M): Likewise. + (VMLASQ_M_N): Likewise. + (VMLADAVAQ_P): Likewise. + (VBRSRQ_M_N): Likewise. + (VMULQ_M_N): Likewise. + (VCADDQ_ROT90_M): Likewise. + (VMULLTQ_INT_M): Likewise. + (VEORQ_M): Likewise. + (VSHRQ_M_N): Likewise. + (VSUBQ_M_N): Likewise. + (VHADDQ_M): Likewise. + (VABDQ_M): Likewise. + (VMLAQ_M_N): Likewise. + (VQSHLQ_M_N): Likewise. + (VMLALDAVAQ_P): Likewise. + (VMLALDAVAXQ_P): Likewise. + (VQRSHRNBQ_M_N): Likewise. + (VQRSHRNTQ_M_N): Likewise. + (VQSHRNBQ_M_N): Likewise. + (VQSHRNTQ_M_N): Likewise. + (VRSHRNBQ_M_N): Likewise. + (VRSHRNTQ_M_N): Likewise. + (VSHLLBQ_M_N): Likewise. + (VSHLLTQ_M_N): Likewise. + (VSHRNBQ_M_N): Likewise. + (VSHRNTQ_M_N): Likewise. + (VSTRWSBQ): Likewise. + (VSTRBSOQ): Likewise. + (VSTRBQ): Likewise. + (VLDRBGOQ): Likewise. + (VLDRBQ): Likewise. + (VLDRWGBQ): Likewise. + (VLD1Q): Likewise. + (VLDRHGOQ): Likewise. + (VLDRHGSOQ): Likewise. + (VLDRHQ): Likewise. + (VLDRWQ): Likewise. + (VLDRDGBQ): Likewise. + (VLDRDGOQ): Likewise. + (VLDRDGSOQ): Likewise. + (VLDRWGOQ): Likewise. + (VLDRWGSOQ): Likewise. + (VST1Q): Likewise. + (VSTRHSOQ): Likewise. + (VSTRHSSOQ): Likewise. + (VSTRHQ): Likewise. + (VSTRWQ): Likewise. + (VSTRDSBQ): Likewise. + (VSTRDSOQ): Likewise. + (VSTRDSSOQ): Likewise. + (VSTRWSOQ): Likewise. + (VSTRWSSOQ): Likewise. + (VSTRWSBWBQ): Likewise. + (VLDRWGBWBQ): Likewise. + (VSTRDSBWBQ): Likewise. + (VLDRDGBWBQ): Likewise. + (VADCIQ): Likewise. + (VADCIQ_M): Likewise. + (VSBCQ): Likewise. + (VSBCQ_M): Likewise. + (VSBCIQ): Likewise. + (VSBCIQ_M): Likewise. + (VADCQ): Likewise. + (VADCQ_M): Likewise. + (UQRSHLLQ): Likewise. + (SQRSHRLQ): Likewise. + (VSHLCQ_M): Likewise. + (define_c_enum "unspec"): Move MVE enumerator to unspecs.md from mve.md. + * config/arm/unspecs.md (define_c_enum "unspec"): Move MVE enumerator from + mve.md to unspecs.md. + +2020-10-06 Martin Liska + + * common.opt: Remove -fdbg-cnt-list from deferred options. + * dbgcnt.c (dbg_cnt_set_limit_by_index): Make a copy + to original_limits. + (dbg_cnt_list_all_counters): Print also current counter value + and print to stderr. + * opts-global.c (handle_common_deferred_options): Do not handle + -fdbg-cnt-list. + * opts.c (common_handle_option): Likewise. + * toplev.c (finalize): Handle it after compilation here. + +2020-10-06 Martin Liska + + * dbgcnt.c (dbg_cnt): Report also upper limit. + +2020-10-06 Tom de Vries + + * tracer.c (count_insns): Rename to ... + (analyze_bb): ... this. + (cache_can_duplicate_bb_p, cached_can_duplicate_bb_p): New function. + (ignore_bb_p): Use cached_can_duplicate_bb_p. + (tail_duplicate): Call cache_can_duplicate_bb_p. + +2020-10-06 Tom de Vries + + * tracer.c (can_duplicate_insn_p, can_duplicate_bb_no_insn_iter_p) + (can_duplicate_bb_p): New function, factored out of ... + (ignore_bb_p): ... here. + +2020-10-06 Jakub Jelinek + + PR rtl-optimization/97282 + * tree-ssa-math-opts.c (divmod_candidate_p): Don't return false for + constant op2 if it is not a power of two and the type has precision + larger than HOST_BITS_PER_WIDE_INT or BITS_PER_WORD. + * internal-fn.c (contains_call_div_mod): New function. + (expand_DIVMOD): If last argument is a constant, try to expand it as + TRUNC_DIV_EXPR followed by TRUNC_MOD_EXPR, but if the sequence + contains any calls or {,U}{DIV,MOD} rtxes, throw it away and use + divmod optab or divmod libfunc. + +2020-10-06 Aldy Hernandez + + * value-range.h (irange_allocator::allocate): Increase + newir storage by one. + +2020-10-06 Jakub Jelinek + + PR middle-end/97289 + * omp-offload.c (omp_discover_declare_target_tgt_fn_r): Only follow + node->alias_target if it is a FUNCTION_DECL. + +2020-10-06 Joe Ramsay + + * config/arm/arm-cpus.in: + (ALL_FPU_INTERNAL): Remove vfp_base. + (VFPv2): Remove vfp_base. + (MVE): Remove vfp_base. + (vfp_base): Redefine as implied bit dependent on MVE or FP + (cortex-m55): Add flags to disable MVE, MVE FP, FP and DSP extensions. + * config/arm/arm.c (arm_configure_build_target): Add implied bits to ISA. + * config/arm/parsecpu.awk: + (gen_isa): Print implied bits and their dependencies to ISA header. + (gen_data): Add parsing for implied feature bits. + +2020-10-06 Andreas Krebbel + + * doc/invoke.texi: Add z15/arch13 to the list of documented + -march/-mtune options. + 2020-10-05 Dennis Zhang * config/arm/arm.c (arm_preferred_simd_mode): Enable MVE SIMD modes. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 684bf4b..8478d98 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20201006 +20201007 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index a741e06..dcd5fd3 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,10 @@ +2020-10-06 Marek Polacek + + PR c++/97297 + * parser.c (cp_parser_direct_declarator): When checking if a + name is a function template declaration for the P0634R3 case, + look in uninstantiated templates too. + 2020-10-05 Marek Polacek * cp-tree.h (NON_UNION_CLASS_TYPE_P): Fix typo in a comment. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 703cc68..6466356 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,58 @@ +2020-10-06 Marek Polacek + + PR c++/97297 + * g++.dg/cpp2a/typename18.C: New test. + +2020-10-06 Tobias Burnus + + PR middle-end/90861 + * c-c++-common/goacc/declare-pr90861.c: Remove xfail. + +2020-10-06 Andrew MacLeod + + * gcc.dg/pr81192.c: Disable EVRP pass. + * gcc.dg/tree-ssa/pr77445-2.c: Ditto. + * gcc.dg/tree-ssa/ssa-dom-thread-6.c: Adjust. + * gcc.dg/tree-ssa/ssa-dom-thread-7.c: Ditto. + +2020-10-06 Dennis Zhang + + * gcc.target/arm/mve/intrinsics/vreinterpretq_f16.c: Use additional + option -fno-ipa-icf and change the instruction count from 8 to 16. + * gcc.target/arm/mve/intrinsics/vreinterpretq_f32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_s16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_s32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_s64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_s8.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_u16.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_u32.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_u64.c: Likewise. + * gcc.target/arm/mve/intrinsics/vreinterpretq_u8.c: Likewise. + +2020-10-06 Jakub Jelinek + + PR rtl-optimization/97282 + * gcc.target/i386/pr97282.c: New test. + +2020-10-06 Jakub Jelinek + + PR middle-end/97289 + * c-c++-common/gomp/pr97289.c: New test. + +2020-10-06 Joe Ramsay + + * gcc.target/arm/cortex-m55-nodsp-flag-hard.c: New test. + * gcc.target/arm/cortex-m55-nodsp-flag-softfp.c: New test. + * gcc.target/arm/cortex-m55-nodsp-nofp-flag-softfp.c: New test. + * gcc.target/arm/cortex-m55-nofp-flag-hard.c: New test. + * gcc.target/arm/cortex-m55-nofp-flag-softfp.c: New test. + * gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c: New test. + * gcc.target/arm/cortex-m55-nomve-flag-hard.c: New test. + * gcc.target/arm/cortex-m55-nomve-flag-softfp.c: New test. + * gcc.target/arm/cortex-m55-nomve.fp-flag-hard.c: New test. + * gcc.target/arm/cortex-m55-nomve.fp-flag-softfp.c: New test. + * gcc.target/arm/multilib.exp: Add tests for -mcpu=cortex-m55. + 2020-10-05 Dennis Zhang * gcc.target/arm/mve/intrinsics/vreinterpretq_f16.c: Use additional -- cgit v1.1 From ebc77ce3a4c70730b4e38d68f88693eadbdc8712 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Wed, 7 Oct 2020 07:22:43 +0200 Subject: [tree-ssa-loop-ch] Add missing NULL test for dump_file If we change gimple_can_duplicate_bb_p to return false instead of true, we run into a segfault in ch_base::copy_headers due to using dump_file while it's NULL: ... if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs, true)) { fprintf (dump_file, "Duplication failed.\n"); continue; } ... Fix this by adding the missing dump_file != NULL test. Tested by rebuilding lto1 and rerunning the failing test-case. gcc/ChangeLog: 2020-10-07 Tom de Vries * tree-ssa-loop-ch.c (ch_base::copy_headers): Add missing NULL test for dump_file. --- gcc/tree-ssa-loop-ch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c index b86acf7..1f3d932 100644 --- a/gcc/tree-ssa-loop-ch.c +++ b/gcc/tree-ssa-loop-ch.c @@ -425,7 +425,8 @@ ch_base::copy_headers (function *fun) if (!gimple_duplicate_sese_region (entry, exit, bbs, n_bbs, copied_bbs, true)) { - fprintf (dump_file, "Duplication failed.\n"); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Duplication failed.\n"); continue; } copied.safe_push (std::make_pair (entry, loop)); -- cgit v1.1 From 83f565ed4f37e550e1d40f7b6cf0b5845f29a9c7 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 7 Oct 2020 10:49:37 +0200 Subject: openmp: Improve composite simd vectorization > > I was really hoping bbs 4 and 5 would be one loop (the one I set safelen > > and force_vectorize etc. for) and that basic blocks 6 and 7 would be > > together with that inner loop another loop, but apparently loop discovery > > thinks it is just one loop. > > Any ideas what I'm doing wrong or is there any way how to make it two loops > > (that would also survive all the cfg cleanups until vectorization)? > > The early CFG looks like we have a common header with two latches > so it boils down to how we disambiguate those in the end (we seem > to unify the latches via a forwarder). IIRC OMP lowering builds > loops itself, could it not do the appropriate disambiguation itself? I realized I emit the same stmts on both paths (before goto doit; and before falling through it), at least the MIN_EXPR and PLUS_EXPR, so by forcing there an extra bb which does those two and having the "doit" label before that the innermost loop doesn't have multiple latches anymore and so is vectorized fine. 2020-10-07 Jakub Jelinek * omp-expand.c (expand_omp_simd): Don't emit MIN_EXPR and PLUS_EXPR at the end of entry_bb and innermost init_bb, instead force arguments for MIN_EXPR into temporaries in both cases and jump to a new bb that performs MIN_EXPR and PLUS_EXPR. * gcc.dg/gomp/simd-2.c: New test. * gcc.dg/gomp/simd-3.c: New test. --- gcc/omp-expand.c | 19 +++++++++++--- gcc/testsuite/gcc.dg/gomp/simd-2.c | 51 ++++++++++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/gomp/simd-3.c | 51 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/gomp/simd-2.c create mode 100644 gcc/testsuite/gcc.dg/gomp/simd-3.c (limited to 'gcc') diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index 99cb4f9..0d30089 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -6347,6 +6347,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) tree n2var = NULL_TREE; tree n2v = NULL_TREE; tree *nonrect_bounds = NULL; + tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE; if (fd->collapse > 1) { if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt)) @@ -6406,9 +6407,10 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) fold_convert (itype, fd->loops[i].step)); t = fold_convert (type, t); tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1); - t = fold_build2 (MIN_EXPR, type, t2, t); - t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t); - expand_omp_build_assign (&gsi, n2var, t); + min_arg1 = create_tmp_var (type); + expand_omp_build_assign (&gsi, min_arg1, t2); + min_arg2 = create_tmp_var (type); + expand_omp_build_assign (&gsi, min_arg2, t); } else { @@ -6815,7 +6817,16 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) } else t = counts[i + 1]; - t = fold_build2 (MIN_EXPR, type, t2, t); + expand_omp_build_assign (&gsi, min_arg1, t2); + expand_omp_build_assign (&gsi, min_arg2, t); + e = split_block (init_bb, last_stmt (init_bb)); + gsi = gsi_after_labels (e->dest); + init_bb = e->dest; + remove_edge (FALLTHRU_EDGE (entry_bb)); + make_edge (entry_bb, init_bb, EDGE_FALLTHRU); + set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb); + t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2); t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t); expand_omp_build_assign (&gsi, n2var, t); } diff --git a/gcc/testsuite/gcc.dg/gomp/simd-2.c b/gcc/testsuite/gcc.dg/gomp/simd-2.c new file mode 100644 index 0000000..7ac3eb4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-2.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fopenmp -fdump-tree-vect-details" } */ +/* { dg-additional-options "-mavx" { target avx } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-9]\[0-9]* loops in function" 5 "vect" } } */ + +int a[10000][128]; + +void +foo (void) +{ + #pragma omp for simd schedule (simd: dynamic, 32) collapse(2) + for (int i = 0; i < 10000; i++) + for (int j = 0; j < 128; j++) + a[i][j] += 3; +} + +void +bar (void) +{ + #pragma omp parallel for simd schedule (simd: dynamic, 32) collapse(2) + for (int i = 0; i < 10000; i++) + for (int j = 0; j < 128; j++) + a[i][j] += 3; +} + +void +baz (void) +{ + #pragma omp distribute parallel for simd schedule (simd: dynamic, 32) collapse(2) + for (int i = 0; i < 10000; i++) + for (int j = 0; j < 128; j++) + a[i][j] += 3; +} + +void +qux (void) +{ + #pragma omp distribute simd dist_schedule (static, 128) collapse(2) + for (int i = 0; i < 10000; i++) + for (int j = 0; j < 128; j++) + a[i][j] += 3; +} + +void +corge (void) +{ + #pragma omp taskloop simd collapse(2) + for (int i = 0; i < 10000; i++) + for (int j = 0; j < 128; j++) + a[i][j] += 3; +} diff --git a/gcc/testsuite/gcc.dg/gomp/simd-3.c b/gcc/testsuite/gcc.dg/gomp/simd-3.c new file mode 100644 index 0000000..13e1346 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gomp/simd-3.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fopenmp -fdump-tree-vect-details" } */ +/* { dg-additional-options "-mavx" { target avx } } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-9]\[0-9]* loops in function" 5 "vect" } } */ + +int a[1024][1024]; + +void +foo (void) +{ + #pragma omp for simd collapse(2) + for (int i = 0; i < 1024; i++) + for (int j = 0; j < i; j++) + a[i][j] += 3; +} + +void +bar (void) +{ + #pragma omp parallel for simd collapse(2) + for (int i = 0; i < 1024; i++) + for (int j = 0; j < i; j++) + a[i][j] += 3; +} + +void +baz (void) +{ + #pragma omp distribute parallel for simd collapse(2) + for (int i = 0; i < 1024; i++) + for (int j = 0; j < i; j++) + a[i][j] += 3; +} + +void +qux (void) +{ + #pragma omp distribute simd collapse(2) + for (int i = 0; i < 1024; i++) + for (int j = 0; j < i; j++) + a[i][j] += 3; +} + +void +corge (void) +{ + #pragma omp taskloop simd collapse(2) + for (int i = 0; i < 1024; i++) + for (int j = 0; j < i; j++) + a[i][j] += 3; +} -- cgit v1.1 From e91c34262d2dd06da4b9436744bff89007dee2c9 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 7 Oct 2020 10:52:47 +0200 Subject: options: Avoid unused variable mask warning [PR97305] > options-save.c: In function 'void cl_target_option_save(cl_target_option*, gcc_options*, gcc_options*)': > options-save.c:8526:26: error: unused variable 'mask' [-Werror=unused-variable] > 8526 | unsigned HOST_WIDE_INT mask = 0; > | ^~~~ > options-save.c: In function 'void cl_target_option_restore(gcc_options*, gcc_options*, cl_target_option*)': > options-save.c:8537:26: error: unused variable 'mask' [-Werror=unused-variable] > 8537 | unsigned HOST_WIDE_INT mask; > | ^~~~ Oops, missed that, sorry. The following patch should fix that, tested on x86_64-linux make options-save.c (same file as before) and -> ia64-linux cross make options-save.o (no warning anymore, just the unwanted declarations gone). 2020-10-07 Jakub Jelinek PR bootstrap/97305 * optc-save-gen.awk: Don't declare mask variable if explicit_mask array is not present. --- gcc/optc-save-gen.awk | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk index 5169acd..a756835 100644 --- a/gcc/optc-save-gen.awk +++ b/gcc/optc-save-gen.awk @@ -597,11 +597,13 @@ for (i = 0; i < n_target_string; i++) { } print ""; -print " unsigned HOST_WIDE_INT mask = 0;"; j = 0; k = 0; for (i = 0; i < n_extra_target_vars; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } print " if (opts_set->x_" extra_target_vars[i] ") mask |= HOST_WIDE_INT_1U << " j ";"; j++; if (j == 64) { @@ -617,6 +619,9 @@ for (i = 0; i < n_target_other; i++) { print " ptr->explicit_mask_" var_target_other[i] " = opts_set->x_" var_target_other[i] ";"; continue; } + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } print " if (opts_set->x_" var_target_other[i] ") mask |= HOST_WIDE_INT_1U << " j ";"; j++; if (j == 64) { @@ -628,6 +633,9 @@ for (i = 0; i < n_target_other; i++) { } for (i = 0; i < n_target_enum; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } print " if (opts_set->x_" var_target_enum[i] ") mask |= HOST_WIDE_INT_1U << " j ";"; j++; if (j == 64) { @@ -643,6 +651,9 @@ for (i = 0; i < n_target_int; i++) { print " ptr->explicit_mask_" var_target_int[i] " = opts_set->x_" var_target_int[i] ";"; continue; } + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } print " if (opts_set->x_" var_target_int[i] ") mask |= HOST_WIDE_INT_1U << " j ";"; j++; if (j == 64) { @@ -654,6 +665,9 @@ for (i = 0; i < n_target_int; i++) { } for (i = 0; i < n_target_short; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } print " if (opts_set->x_" var_target_short[i] ") mask |= HOST_WIDE_INT_1U << " j ";"; j++; if (j == 64) { @@ -665,6 +679,9 @@ for (i = 0; i < n_target_short; i++) { } for (i = 0; i < n_target_char; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } print " if (opts_set->x_" var_target_char[i] ") mask |= HOST_WIDE_INT_1U << " j ";"; j++; if (j == 64) { @@ -676,6 +693,9 @@ for (i = 0; i < n_target_char; i++) { } for (i = 0; i < n_target_string; i++) { + if (j == 0 && k == 0) { + print " unsigned HOST_WIDE_INT mask = 0;"; + } print " if (opts_set->x_" var_target_string[i] ") mask |= HOST_WIDE_INT_1U << " j ";"; j++; if (j == 64) { @@ -732,7 +752,9 @@ for (i = 0; i < n_target_string; i++) { } print ""; -print " unsigned HOST_WIDE_INT mask;"; +if (has_target_explicit_mask) { + print " unsigned HOST_WIDE_INT mask;"; +} j = 64; k = 0; -- cgit v1.1 From 6923255e35a3d54f2083ad0f67edebb3f1b86506 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 7 Oct 2020 10:55:35 +0200 Subject: debug: Pass --gdwarf-N to assembler if fixed gas is detected during configure > > As for the test assembly, I'd say we should take > > #define F void foo (void) {} > > F > > compile it with > > gcc -S -O2 -g1 -dA -gno-as-loc-support -fno-merge-debug-strings > > remove .cfi_* directives, remove the ret instruction, change @function > > and @progbits to %function and %progbits, change .uleb128 to just .byte, > > I think all the values should be small enough, maybe change .value to > > .2byte and .long to .4byte (whatever is most portable across different > > arches and gas versions), simplify (shorten) strings and adjust > > sizes, and do something with the .quad directives, that is dependent on > > the address size, perhaps just take those attributes out and adjust > > .debug_abbrev? Finally, remove all comments (emit them in the first case > > just to better understand the debug info). > > I'm afraid it is hard to avoid the .quad or .8byte. > Here is a 64-bit address version that assembles fine by both x86_64 and > aarch64 as. > Unfortunately doesn't fail with broken gas versions with -gdwarf-2 without > the nop, so we'll need at least a nop in there. > Fortunately gcc/configure.ac already determines the right nop insn for the > target, in $insn. > So I guess what we want next is have the 32-bit version of this with .4byte > instead of .8byte and just let's try to assemble both versions, first > without -gdwarf-2 and the one that succeeds assemble again with -gdwarf-2 > and check for the duplicate .debug_line sections error. Ok, here it is in patch form. I've briefly tested it, with the older binutils I have around (no --gdwarf-N support), with latest gas (--gdwarf-N that can be passed to as even when compiling C/C++ etc. code and emitting .debug_line) and latest gas with Mark's fix reverted (--gdwarf-N support, but can only pass it to as when assembling user .s/.S files, not when compiling C/C++ etc.). 2020-10-07 Jakub Jelinek * configure.ac (HAVE_AS_GDWARF_5_DEBUG_FLAG, HAVE_AS_WORKING_DWARF_4_FLAG): New tests. * gcc.c (ASM_DEBUG_DWARF_OPTION): Define. (ASM_DEBUG_SPEC): Use ASM_DEBUG_DWARF_OPTION instead of "--gdwarf2". Use %{cond:opt1;:opt2} style. (ASM_DEBUG_OPTION_DWARF_OPT): Define. (ASM_DEBUG_OPTION_SPEC): Define. (asm_debug_option): New variable. (asm_options): Add "%(asm_debug_option)". (static_specs): Add asm_debug_option entry. (static_spec_functions): Add dwarf-version-gt. (debug_level_greater_than_spec_func): New function. * config/darwin.h (ASM_DEBUG_OPTION_SPEC): Define. * config/darwin9.h (ASM_DEBUG_OPTION_SPEC): Redefine. * config.in: Regenerated. * configure: Regenerated. --- gcc/config.in | 13 +++ gcc/config/darwin.h | 1 + gcc/config/darwin9.h | 3 + gcc/configure | 310 +++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/configure.ac | 188 +++++++++++++++++++++++++++++++ gcc/gcc.c | 77 ++++++++++++- 6 files changed, 588 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/config.in b/gcc/config.in index 5835cea..3657c46 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -394,6 +394,12 @@ #endif +/* Define if your assembler supports the --gdwarf-5 option. */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_GDWARF_5_DEBUG_FLAG +#endif + + /* Define if your assembler supports .gnu_attribute. */ #ifndef USED_FOR_TARGET #undef HAVE_AS_GNU_ATTRIBUTE @@ -713,6 +719,13 @@ #endif +/* Define if your assembler supports --gdwarf-4 even with compiler generated + .debug_line */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_WORKING_DWARF_4_FLAG +#endif + + /* Define if your assembler supports -xbrace_comment option. */ #ifndef USED_FOR_TARGET #undef HAVE_AS_XBRACE_COMMENT_OPTION diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h index 55a5361..7843add 100644 --- a/gcc/config/darwin.h +++ b/gcc/config/darwin.h @@ -476,6 +476,7 @@ extern GTY(()) int darwin_ms_struct; debugging data. */ #define ASM_DEBUG_SPEC "%{g*:%{%:debug-level-gt(0):%{!gdwarf*:--gstabs}}}" +#define ASM_DEBUG_OPTION_SPEC "" #define ASM_FINAL_SPEC \ "%{gsplit-dwarf:%ngsplit-dwarf is not supported on this platform} %>confdefs.h fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for --gdwarf-5 option" >&5 +$as_echo_n "checking assembler for --gdwarf-5 option... " >&6; } +if ${gcc_cv_as_gdwarf_5_flag+:} false; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_gdwarf_5_flag=no + if test $in_tree_gas = yes; then + if test $in_tree_gas_is_elf = yes \ + && test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 36 \) \* 1000 + 0` + then gcc_cv_as_gdwarf_5_flag=yes +fi + elif test x$gcc_cv_as != x; then + $as_echo "$insn" > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags --gdwarf-5 -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_gdwarf_5_flag=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_gdwarf_5_flag" >&5 +$as_echo "$gcc_cv_as_gdwarf_5_flag" >&6; } +if test $gcc_cv_as_gdwarf_5_flag = yes; then + +$as_echo "#define HAVE_AS_GDWARF_5_DEBUG_FLAG 1" >>confdefs.h + +fi + + + dwarf4_debug_info_size=0x46 + dwarf4_high_pc_form=7 + dwarf4_debug_aranges_size=0x2c + dwarf4_line_sz=9 + for dwarf4_addr_size in 8 4; do + conftest_s="\ + .file \"a.c\" + .text +.Ltext0: + .p2align 4 + .globl foo + .type foo, %function +foo: +.LFB0: +.LM1: + $insn +.LM2: +.LFE0: + .size foo, .-foo +.Letext0: + .section .debug_info,\"\",%progbits +.Ldebug_info0: + .4byte $dwarf4_debug_info_size + .2byte 0x4 + .4byte .Ldebug_abbrev0 + .byte 0x$dwarf4_addr_size + .byte 0x1 + .ascii \"GNU C17\\0\" + .byte 0xc + .ascii \"a.c\\0\" + .ascii \"/\\0\" + .${dwarf4_addr_size}byte .Ltext0 + .${dwarf4_addr_size}byte .Letext0-.Ltext0 + .4byte .Ldebug_line0 + .byte 0x2 + .ascii \"foo\\0\" + .byte 0x1 + .byte 0x2 + .byte 0x1 + .${dwarf4_addr_size}byte .LFB0 + .${dwarf4_addr_size}byte .LFE0-.LFB0 + .byte 0x1 + .byte 0x9c + .byte 0 + .section .debug_abbrev,\"\",%progbits +.Ldebug_abbrev0: + .byte 0x1 + .byte 0x11 + .byte 0x1 + .byte 0x25 + .byte 0x8 + .byte 0x13 + .byte 0xb + .byte 0x3 + .byte 0x8 + .byte 0x1b + .byte 0x8 + .byte 0x11 + .byte 0x1 + .byte 0x12 + .byte 0x$dwarf4_high_pc_form + .byte 0x10 + .byte 0x17 + .byte 0 + .byte 0 + .byte 0x2 + .byte 0x2e + .byte 0 + .byte 0x3f + .byte 0x19 + .byte 0x3 + .byte 0x8 + .byte 0x3a + .byte 0xb + .byte 0x3b + .byte 0xb + .byte 0x39 + .byte 0xb + .byte 0x11 + .byte 0x1 + .byte 0x12 + .byte 0x$dwarf4_high_pc_form + .byte 0x40 + .byte 0x18 + .byte 0 + .byte 0 + .byte 0 + .section .debug_aranges,\"\",%progbits + .4byte $dwarf4_debug_aranges_size + .2byte 0x2 + .4byte .Ldebug_info0 + .byte 0x8 + .byte 0 + .2byte 0 + .2byte 0 + .${dwarf4_addr_size}byte .Ltext0 + .${dwarf4_addr_size}byte .Letext0-.Ltext0 + .${dwarf4_addr_size}byte 0 + .${dwarf4_addr_size}byte 0 + .section .debug_line,\"\",%progbits +.Ldebug_line0: + .4byte .LELT0-.LSLT0 +.LSLT0: + .2byte 0x4 + .4byte .LELTP0-.LASLTP0 +.LASLTP0: + .byte 0x1 + .byte 0x1 + .byte 0x1 + .byte 0xf6 + .byte 0xf2 + .byte 0xd + .byte 0 + .byte 0x1 + .byte 0x1 + .byte 0x1 + .byte 0x1 + .byte 0 + .byte 0 + .byte 0 + .byte 0x1 + .byte 0 + .byte 0 + .byte 0x1 + .byte 0 + .ascii \"a.c\\0\" + .byte 0 + .byte 0 + .byte 0 + .byte 0 +.LELTP0: + .byte 0 + .byte 0x$dwarf4_line_sz + .byte 0x2 + .${dwarf4_addr_size}byte .LM1 + .byte 0x18 + .byte 0x5 + .byte 0x1 + .byte 0 + .byte 0x$dwarf4_line_sz + .byte 0x2 + .${dwarf4_addr_size}byte .LM2 + .byte 0x1 + .byte 0x5 + .byte 0x1 + .byte 0 + .byte 0x$dwarf4_line_sz + .byte 0x2 + .${dwarf4_addr_size}byte .Letext0 + .byte 0 + .byte 0x1 + .byte 0x1 +.LELT0: + .section .debug_str,\"\",%progbits + .ident \"GCC\" +" + dwarf4_success=no + if test $dwarf4_addr_size = 4; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for assembly of compiler generated 32-bit .debug_line" >&5 +$as_echo_n "checking assembler for assembly of compiler generated 32-bit .debug_line... " >&6; } +if ${gcc_cv_as_debug_line_32_flag+:} false; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_debug_line_32_flag=no + if test $in_tree_gas = yes; then + if test $in_tree_gas_is_elf = yes \ + && test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 36 \) \* 1000 + 0` + then gcc_cv_as_debug_line_32_flag=yes +fi + elif test x$gcc_cv_as != x; then + $as_echo "$conftest_s" > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_debug_line_32_flag=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_debug_line_32_flag" >&5 +$as_echo "$gcc_cv_as_debug_line_32_flag" >&6; } +if test $gcc_cv_as_debug_line_32_flag = yes; then + success=yes +fi + + else + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for assembly of compiler generated 64-bit .debug_line" >&5 +$as_echo_n "checking assembler for assembly of compiler generated 64-bit .debug_line... " >&6; } +if ${gcc_cv_as_debug_line_64_flag+:} false; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_debug_line_64_flag=no + if test $in_tree_gas = yes; then + if test $in_tree_gas_is_elf = yes \ + && test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 36 \) \* 1000 + 0` + then gcc_cv_as_debug_line_64_flag=yes +fi + elif test x$gcc_cv_as != x; then + $as_echo "$conftest_s" > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_debug_line_64_flag=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_debug_line_64_flag" >&5 +$as_echo "$gcc_cv_as_debug_line_64_flag" >&6; } +if test $gcc_cv_as_debug_line_64_flag = yes; then + dwarf4_success=yes +fi + + fi + if test $dwarf4_success = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for --gdwarf-4 not refusing compiler generated .debug_line" >&5 +$as_echo_n "checking assembler for --gdwarf-4 not refusing compiler generated .debug_line... " >&6; } +if ${gcc_cv_as_dwarf_4_debug_line_flag+:} false; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_dwarf_4_debug_line_flag=no + if test $in_tree_gas = yes; then + if test $in_tree_gas_is_elf = yes \ + && test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 36 \) \* 1000 + 0` + then gcc_cv_as_dwarf_4_debug_line_flag=yes +fi + elif test x$gcc_cv_as != x; then + $as_echo "$conftest_s" > conftest.s + if { ac_try='$gcc_cv_as $gcc_cv_as_flags --gdwarf-4 -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then + gcc_cv_as_dwarf_4_debug_line_flag=yes + else + echo "configure: failed program was" >&5 + cat conftest.s >&5 + fi + rm -f conftest.o conftest.s + fi +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_dwarf_4_debug_line_flag" >&5 +$as_echo "$gcc_cv_as_dwarf_4_debug_line_flag" >&6; } +if test $gcc_cv_as_dwarf_4_debug_line_flag = yes; then + +$as_echo "#define HAVE_AS_WORKING_DWARF_4_FLAG 1" >>confdefs.h + +fi + + break + fi + dwarf4_debug_info_size=0x36 + dwarf4_high_pc_form=6 + dwarf4_debug_aranges_size=0x1c + dwarf4_line_sz=5 + done + { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for --gstabs option" >&5 $as_echo_n "checking assembler for --gstabs option... " >&6; } if ${gcc_cv_as_gstabs_flag+:} false; then : diff --git a/gcc/configure.ac b/gcc/configure.ac index 1ad5bbc6..26a5d8e 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -5197,6 +5197,194 @@ if test x"$insn" != x; then [AC_DEFINE(HAVE_AS_GDWARF2_DEBUG_FLAG, 1, [Define if your assembler supports the --gdwarf2 option.])]) + gcc_GAS_CHECK_FEATURE([--gdwarf-5 option], + gcc_cv_as_gdwarf_5_flag, + [elf,2,36,0], [--gdwarf-5], [$insn],, + [AC_DEFINE(HAVE_AS_GDWARF_5_DEBUG_FLAG, 1, +[Define if your assembler supports the --gdwarf-5 option.])]) + + dwarf4_debug_info_size=0x46 + dwarf4_high_pc_form=7 + dwarf4_debug_aranges_size=0x2c + dwarf4_line_sz=9 + for dwarf4_addr_size in 8 4; do + conftest_s="\ + .file \"a.c\" + .text +.Ltext0: + .p2align 4 + .globl foo + .type foo, %function +foo: +.LFB0: +.LM1: + $insn +.LM2: +.LFE0: + .size foo, .-foo +.Letext0: + .section .debug_info,\"\",%progbits +.Ldebug_info0: + .4byte $dwarf4_debug_info_size + .2byte 0x4 + .4byte .Ldebug_abbrev0 + .byte 0x$dwarf4_addr_size + .byte 0x1 + .ascii \"GNU C17\\0\" + .byte 0xc + .ascii \"a.c\\0\" + .ascii \"/\\0\" + .${dwarf4_addr_size}byte .Ltext0 + .${dwarf4_addr_size}byte .Letext0-.Ltext0 + .4byte .Ldebug_line0 + .byte 0x2 + .ascii \"foo\\0\" + .byte 0x1 + .byte 0x2 + .byte 0x1 + .${dwarf4_addr_size}byte .LFB0 + .${dwarf4_addr_size}byte .LFE0-.LFB0 + .byte 0x1 + .byte 0x9c + .byte 0 + .section .debug_abbrev,\"\",%progbits +.Ldebug_abbrev0: + .byte 0x1 + .byte 0x11 + .byte 0x1 + .byte 0x25 + .byte 0x8 + .byte 0x13 + .byte 0xb + .byte 0x3 + .byte 0x8 + .byte 0x1b + .byte 0x8 + .byte 0x11 + .byte 0x1 + .byte 0x12 + .byte 0x$dwarf4_high_pc_form + .byte 0x10 + .byte 0x17 + .byte 0 + .byte 0 + .byte 0x2 + .byte 0x2e + .byte 0 + .byte 0x3f + .byte 0x19 + .byte 0x3 + .byte 0x8 + .byte 0x3a + .byte 0xb + .byte 0x3b + .byte 0xb + .byte 0x39 + .byte 0xb + .byte 0x11 + .byte 0x1 + .byte 0x12 + .byte 0x$dwarf4_high_pc_form + .byte 0x40 + .byte 0x18 + .byte 0 + .byte 0 + .byte 0 + .section .debug_aranges,\"\",%progbits + .4byte $dwarf4_debug_aranges_size + .2byte 0x2 + .4byte .Ldebug_info0 + .byte 0x8 + .byte 0 + .2byte 0 + .2byte 0 + .${dwarf4_addr_size}byte .Ltext0 + .${dwarf4_addr_size}byte .Letext0-.Ltext0 + .${dwarf4_addr_size}byte 0 + .${dwarf4_addr_size}byte 0 + .section .debug_line,\"\",%progbits +.Ldebug_line0: + .4byte .LELT0-.LSLT0 +.LSLT0: + .2byte 0x4 + .4byte .LELTP0-.LASLTP0 +.LASLTP0: + .byte 0x1 + .byte 0x1 + .byte 0x1 + .byte 0xf6 + .byte 0xf2 + .byte 0xd + .byte 0 + .byte 0x1 + .byte 0x1 + .byte 0x1 + .byte 0x1 + .byte 0 + .byte 0 + .byte 0 + .byte 0x1 + .byte 0 + .byte 0 + .byte 0x1 + .byte 0 + .ascii \"a.c\\0\" + .byte 0 + .byte 0 + .byte 0 + .byte 0 +.LELTP0: + .byte 0 + .byte 0x$dwarf4_line_sz + .byte 0x2 + .${dwarf4_addr_size}byte .LM1 + .byte 0x18 + .byte 0x5 + .byte 0x1 + .byte 0 + .byte 0x$dwarf4_line_sz + .byte 0x2 + .${dwarf4_addr_size}byte .LM2 + .byte 0x1 + .byte 0x5 + .byte 0x1 + .byte 0 + .byte 0x$dwarf4_line_sz + .byte 0x2 + .${dwarf4_addr_size}byte .Letext0 + .byte 0 + .byte 0x1 + .byte 0x1 +.LELT0: + .section .debug_str,\"\",%progbits + .ident \"GCC\" +" + dwarf4_success=no + if test $dwarf4_addr_size = 4; then + gcc_GAS_CHECK_FEATURE([assembly of compiler generated 32-bit .debug_line], + gcc_cv_as_debug_line_32_flag, + [elf,2,36,0], [], [$conftest_s],, + [success=yes]) + else + gcc_GAS_CHECK_FEATURE([assembly of compiler generated 64-bit .debug_line], + gcc_cv_as_debug_line_64_flag, + [elf,2,36,0], [], [$conftest_s],, + [dwarf4_success=yes]) + fi + if test $dwarf4_success = yes; then + gcc_GAS_CHECK_FEATURE([--gdwarf-4 not refusing compiler generated .debug_line], + gcc_cv_as_dwarf_4_debug_line_flag, + [elf,2,36,0], [--gdwarf-4], [$conftest_s],, + [AC_DEFINE(HAVE_AS_WORKING_DWARF_4_FLAG, 1, +[Define if your assembler supports --gdwarf-4 even with compiler generated .debug_line])]) + break + fi + dwarf4_debug_info_size=0x36 + dwarf4_high_pc_form=6 + dwarf4_debug_aranges_size=0x1c + dwarf4_line_sz=5 + done + gcc_GAS_CHECK_FEATURE([--gstabs option], gcc_cv_as_gstabs_flag, [elf,2,11,0], [--gstabs], [$insn],, diff --git a/gcc/gcc.c b/gcc/gcc.c index 531f4e0..ff7b6c4 100644 --- a/gcc/gcc.c +++ b/gcc/gcc.c @@ -430,6 +430,7 @@ static const char *pass_through_libs_spec_func (int, const char **); static const char *dumps_spec_func (int, const char **); static const char *greater_than_spec_func (int, const char **); static const char *debug_level_greater_than_spec_func (int, const char **); +static const char *dwarf_version_greater_than_spec_func (int, const char **); static const char *find_fortran_preinclude_file (int, const char **); static char *convert_white_space (char *); static char *quote_spec (char *); @@ -876,22 +877,39 @@ proper position among the other output files. */ #endif /* HAVE_LD_COMPRESS_DEBUG >= 2 */ /* Define ASM_DEBUG_SPEC to be a spec suitable for translating '-g' - to the assembler. */ + to the assembler, when compiling assembly sources only. */ #ifndef ASM_DEBUG_SPEC +# if defined(HAVE_AS_GDWARF_5_DEBUG_FLAG) && defined(HAVE_AS_WORKING_DWARF_4_FLAG) +/* If --gdwarf-N is supported and as can handle even compiler generated + .debug_line with it, supply --gdwarf-N in ASM_DEBUG_OPTION_SPEC rather + than in ASM_DEBUG_SPEC, so that it applies to both .s and .c etc. + compilations. */ +# define ASM_DEBUG_DWARF_OPTION "" +# elif defined(HAVE_AS_GDWARF_5_DEBUG_FLAG) +# define ASM_DEBUG_DWARF_OPTION "%{%:dwarf-version-gt(4):--gdwarf-5;" \ + "%:dwarf-version-gt(3):--gdwarf-4;" \ + "%:dwarf-version-gt(2):--gdwarf-3;" \ + ":--gdwarf2}" +# else +# define ASM_DEBUG_DWARF_OPTION "--gdwarf2" +# endif # if defined(DBX_DEBUGGING_INFO) && defined(DWARF2_DEBUGGING_INFO) \ && defined(HAVE_AS_GDWARF2_DEBUG_FLAG) && defined(HAVE_AS_GSTABS_DEBUG_FLAG) # define ASM_DEBUG_SPEC \ (PREFERRED_DEBUGGING_TYPE == DBX_DEBUG \ ? "%{%:debug-level-gt(0):" \ - "%{gdwarf*:--gdwarf2}%{!gdwarf*:%{g*:--gstabs}}}" ASM_MAP \ + "%{gdwarf*:" ASM_DEBUG_DWARF_OPTION "};" \ + ":%{g*:--gstabs}}" ASM_MAP \ : "%{%:debug-level-gt(0):" \ - "%{gstabs*:--gstabs}%{!gstabs*:%{g*:--gdwarf2}}}" ASM_MAP) + "%{gstabs*:--gstabs;" \ + ":%{g*:" ASM_DEBUG_DWARF_OPTION "}}}" ASM_MAP) # else # if defined(DBX_DEBUGGING_INFO) && defined(HAVE_AS_GSTABS_DEBUG_FLAG) # define ASM_DEBUG_SPEC "%{g*:%{%:debug-level-gt(0):--gstabs}}" ASM_MAP # endif # if defined(DWARF2_DEBUGGING_INFO) && defined(HAVE_AS_GDWARF2_DEBUG_FLAG) -# define ASM_DEBUG_SPEC "%{g*:%{%:debug-level-gt(0):--gdwarf2}}" ASM_MAP +# define ASM_DEBUG_SPEC "%{g*:%{%:debug-level-gt(0):" \ + ASM_DEBUG_DWARF_OPTION "}}" ASM_MAP # endif # endif #endif @@ -899,6 +917,32 @@ proper position among the other output files. */ # define ASM_DEBUG_SPEC "" #endif +/* Define ASM_DEBUG_OPTION_SPEC to be a spec suitable for translating '-g' + to the assembler when compiling all sources. */ +#ifndef ASM_DEBUG_OPTION_SPEC +# if defined(HAVE_AS_GDWARF_5_DEBUG_FLAG) && defined(HAVE_AS_WORKING_DWARF_4_FLAG) +# define ASM_DEBUG_OPTION_DWARF_OPT \ + "%{%:dwarf-version-gt(4):--gdwarf-5 ;" \ + "%:dwarf-version-gt(3):--gdwarf-4 ;" \ + "%:dwarf-version-gt(2):--gdwarf-3 ;" \ + ":--gdwarf2 }" +# if defined(DBX_DEBUGGING_INFO) && defined(DWARF2_DEBUGGING_INFO) +# define ASM_DEBUG_OPTION_SPEC \ + (PREFERRED_DEBUGGING_TYPE == DBX_DEBUG \ + ? "%{%:debug-level-gt(0):" \ + "%{gdwarf*:" ASM_DEBUG_OPTION_DWARF_OPT "}}" \ + : "%{%:debug-level-gt(0):" \ + "%{!gstabs*:%{g*:" ASM_DEBUG_OPTION_DWARF_OPT "}}}") +# elif defined(DWARF2_DEBUGGING_INFO) +# define ASM_DEBUG_OPTION_SPEC "%{g*:%{%:debug-level-gt(0):" \ + ASM_DEBUG_OPTION_DWARF_OPT "}}" +# endif +# endif +#endif +#ifndef ASM_DEBUG_OPTION_SPEC +# define ASM_DEBUG_OPTION_SPEC "" +#endif + /* Here is the spec for running the linker, after compiling all files. */ /* This is overridable by the target in case they need to specify the @@ -1113,6 +1157,7 @@ proper position among the other output files. */ #endif static const char *asm_debug = ASM_DEBUG_SPEC; +static const char *asm_debug_option = ASM_DEBUG_OPTION_SPEC; static const char *cpp_spec = CPP_SPEC; static const char *cc1_spec = CC1_SPEC; static const char *cc1plus_spec = CC1PLUS_SPEC; @@ -1212,6 +1257,7 @@ static const char *asm_options = to the assembler equivalents. */ "%{v} %{w:-W} %{I*} " #endif +"%(asm_debug_option)" ASM_COMPRESS_DEBUG_SPEC "%a %Y %{c:%W{o*}%{!o*:-o %w%b%O}}%{!c:-o %d%w%u%O}"; @@ -1608,6 +1654,7 @@ static struct spec_list static_specs[] = { INIT_STATIC_SPEC ("asm", &asm_spec), INIT_STATIC_SPEC ("asm_debug", &asm_debug), + INIT_STATIC_SPEC ("asm_debug_option", &asm_debug_option), INIT_STATIC_SPEC ("asm_final", &asm_final_spec), INIT_STATIC_SPEC ("asm_options", &asm_options), INIT_STATIC_SPEC ("invoke_as", &invoke_as), @@ -1690,6 +1737,7 @@ static const struct spec_function static_spec_functions[] = { "dumps", dumps_spec_func }, { "gt", greater_than_spec_func }, { "debug-level-gt", debug_level_greater_than_spec_func }, + { "dwarf-version-gt", dwarf_version_greater_than_spec_func }, { "fortran-preinclude-file", find_fortran_preinclude_file}, #ifdef EXTRA_SPEC_FUNCTIONS EXTRA_SPEC_FUNCTIONS @@ -10614,6 +10662,27 @@ debug_level_greater_than_spec_func (int argc, const char **argv) return NULL; } +/* Returns "" if dwarf_version is greater than ARGV[ARGC-1]. + Otherwise, return NULL. */ + +static const char * +dwarf_version_greater_than_spec_func (int argc, const char **argv) +{ + char *converted; + + if (argc != 1) + fatal_error (input_location, + "wrong number of arguments to %%:dwarf-version-gt"); + + long arg = strtol (argv[0], &converted, 10); + gcc_assert (converted != argv[0]); + + if (dwarf_version > arg) + return ""; + + return NULL; +} + static void path_prefix_reset (path_prefix *prefix) { -- cgit v1.1 From d22eecf8754cc22c7a19bfab9834f6d918c7838d Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Fri, 18 Sep 2020 17:07:03 +0200 Subject: Output filepath strings in .debug_line_str for DWARF5 DWARF5 has a new string table specially for file paths. .debug_line file and dir tables reference strings in .debug_line_str. If a .debug_line_str section is emitted then also place CU DIE file names and comp dirs there. gcc/ChangeLog: * dwarf2out.c (add_filepath_AT_string): New function. (asm_outputs_debug_line_str): Likewise. (add_filename_attribute): Likewise. (add_comp_dir_attribute): Call add_filepath_AT_string. (gen_compile_unit_die): Call add_filename_attribute for name. (init_sections_and_labels): Init debug_line_str_section when asm_outputs_debug_line_str return true. (dwarf2out_early_finish): Remove DW_AT_name and DW_AT_comp_dir hack and call add_filename_attribute for the remap_debug_filename. --- gcc/dwarf2out.c | 100 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 68 insertions(+), 32 deletions(-) (limited to 'gcc') diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c index 4096c0c0..3036744 100644 --- a/gcc/dwarf2out.c +++ b/gcc/dwarf2out.c @@ -3347,6 +3347,8 @@ output_asm_line_debug_info (void) || !debug_variable_location_views)); } +static bool asm_outputs_debug_line_str (void); + /* Minimum line offset in a special line info. opcode. This value was chosen to give a reasonable range of values. */ #define DWARF_LINE_BASE -10 @@ -4731,6 +4733,35 @@ reset_indirect_string (indirect_string_node **h, void *) return 1; } +/* Add a string representing a file or filepath attribute value to a DIE. */ + +static inline void +add_filepath_AT_string (dw_die_ref die, enum dwarf_attribute attr_kind, + const char *str) +{ + if (! asm_outputs_debug_line_str ()) + add_AT_string (die, attr_kind, str); + else + { + dw_attr_node attr; + struct indirect_string_node *node; + + if (!debug_line_str_hash) + debug_line_str_hash + = hash_table::create_ggc (10); + + node = find_AT_string_in_table (str, debug_line_str_hash); + set_indirect_string (node); + node->form = DW_FORM_line_strp; + + attr.dw_attr = attr_kind; + attr.dw_attr_val.val_class = dw_val_class_str; + attr.dw_attr_val.val_entry = NULL; + attr.dw_attr_val.v.val_str = node; + add_dwarf_attr (die, &attr); + } +} + /* Find out whether a string should be output inline in DIE or out-of-line in .debug_str section. */ @@ -11839,6 +11870,29 @@ output_ranges (void) for -gsplit-dwarf we should use DW_FORM_strx instead. */ \ && !dwarf_split_debug_info) + +/* Returns TRUE if we are outputting DWARF5 and the assembler supports + DWARF5 .debug_line tables using .debug_line_str or we generate + it ourselves, except for split-dwarf which doesn't have a + .debug_line_str. */ +static bool +asm_outputs_debug_line_str (void) +{ + if (dwarf_version >= 5 + && ! output_asm_line_debug_info () + && DWARF5_USE_DEBUG_LINE_STR) + return true; + else + { +#if defined(HAVE_AS_GDWARF_5_DEBUG_FLAG) && defined(HAVE_AS_WORKING_DWARF_4_FLAG) + return !dwarf_split_debug_info && dwarf_version >= 5; +#else + return false; +#endif + } +} + + /* Assign .debug_rnglists indexes. */ static void @@ -20514,6 +20568,15 @@ add_name_attribute (dw_die_ref die, const char *name_string) } } +/* Generate a DW_AT_name attribute given some string value representing a + file or filepath to be included as value of the attribute. */ +static void +add_filename_attribute (dw_die_ref die, const char *name_string) +{ + if (name_string != NULL && *name_string != 0) + add_filepath_AT_string (die, DW_AT_name, name_string); +} + /* Generate a DW_AT_description attribute given some string value to be included as the value of the attribute. */ @@ -20640,7 +20703,7 @@ add_comp_dir_attribute (dw_die_ref die) { const char * wd = comp_dir_string (); if (wd != NULL) - add_AT_string (die, DW_AT_comp_dir, wd); + add_filepath_AT_string (die, DW_AT_comp_dir, wd); } /* Given a tree node VALUE describing a scalar attribute ATTR (i.e. a bound, a @@ -24482,7 +24545,7 @@ gen_compile_unit_die (const char *filename) if (filename) { - add_name_attribute (die, filename); + add_filename_attribute (die, filename); /* Don't add cwd for . */ if (filename[0] != '<') add_comp_dir_attribute (die); @@ -28733,7 +28796,8 @@ init_sections_and_labels (bool early_lto_debug) SECTION_DEBUG, NULL); debug_str_section = get_section (DEBUG_STR_SECTION, DEBUG_STR_SECTION_FLAGS, NULL); - if (!dwarf_split_debug_info && !output_asm_line_debug_info ()) + if ((!dwarf_split_debug_info && !output_asm_line_debug_info ()) + || asm_outputs_debug_line_str ()) debug_line_str_section = get_section (DEBUG_LINE_STR_SECTION, DEBUG_STR_SECTION_FLAGS, NULL); @@ -32020,37 +32084,9 @@ dwarf2out_early_finish (const char *filename) /* Add the name for the main input file now. We delayed this from dwarf2out_init to avoid complications with PCH. */ - add_name_attribute (comp_unit_die (), remap_debug_filename (filename)); + add_filename_attribute (comp_unit_die (), remap_debug_filename (filename)); add_comp_dir_attribute (comp_unit_die ()); - /* When emitting DWARF5 .debug_line_str, move DW_AT_name and - DW_AT_comp_dir into .debug_line_str section. */ - if (!output_asm_line_debug_info () - && dwarf_version >= 5 - && DWARF5_USE_DEBUG_LINE_STR) - { - for (int i = 0; i < 2; i++) - { - dw_attr_node *a = get_AT (comp_unit_die (), - i ? DW_AT_comp_dir : DW_AT_name); - if (a == NULL - || AT_class (a) != dw_val_class_str - || strlen (AT_string (a)) + 1 <= DWARF_OFFSET_SIZE) - continue; - - if (! debug_line_str_hash) - debug_line_str_hash - = hash_table::create_ggc (10); - - struct indirect_string_node *node - = find_AT_string_in_table (AT_string (a), debug_line_str_hash); - set_indirect_string (node); - node->form = DW_FORM_line_strp; - a->dw_attr_val.v.val_str->refcount--; - a->dw_attr_val.v.val_str = node; - } - } - /* With LTO early dwarf was really finished at compile-time, so make sure to adjust the phase after annotating the LTRANS CU DIE. */ if (in_lto_p) -- cgit v1.1 From 76136f7f0ce8f4e27ac194af6429d378360b7b41 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Wed, 13 May 2020 16:24:12 +0100 Subject: amdgcn: Use scalar instructions for addptrdi3 Allow addptr to use SPGRs as well as VGPRs for pointers. This ought to prevent some unnecessary copying back and forth. gcc/ChangeLog: * config/gcn/gcn.md (unspec): Add UNSPEC_ADDPTR. (addptrdi3): Add SGPR alternative. --- gcc/config/gcn/gcn.md | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) (limited to 'gcc') diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 0e73fea..763e770 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -67,6 +67,7 @@ UNSPECV_ICACHE_INV]) (define_c_enum "unspec" [ + UNSPEC_ADDPTR UNSPEC_VECTOR UNSPEC_BPERMUTE UNSPEC_SGPRBASE @@ -1219,29 +1220,47 @@ ; "addptr" is the same as "add" except that it must not write to VCC or SCC ; as a side-effect. Unfortunately GCN does not have a suitable instruction -; for this, so we use a custom VOP3 add with CC_SAVE_REG as a temp. -; Note that it is not safe to save/clobber/restore SCC because doing so will -; break data-flow analysis, so this must use vector registers. +; for this, so we use CC_SAVE_REG as a temp. +; Note that it is not safe to save/clobber/restore as separate insns because +; doing so will break data-flow analysis, so this must use multiple +; instructions in one insn. ; ; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever ; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway. +; +; The SGPR alternative is preferred as it is typically used with mov_sgprbase. (define_insn "addptrdi3" - [(set (match_operand:DI 0 "register_operand" "= v") - (plus:DI (match_operand:DI 1 "register_operand" " v0") - (match_operand:DI 2 "nonmemory_operand" "vDA")))] + [(set (match_operand:DI 0 "register_operand" "= v, Sg") + (unspec:DI [ + (plus:DI (match_operand:DI 1 "register_operand" "^v0,Sg0") + (match_operand:DI 2 "nonmemory_operand" "vDA,SgDB"))] + UNSPEC_ADDPTR))] "" { - rtx new_operands[4] = { operands[0], operands[1], operands[2], - gen_rtx_REG (DImode, CC_SAVE_REG) }; + if (which_alternative == 0) + { + rtx new_operands[4] = { operands[0], operands[1], operands[2], + gen_rtx_REG (DImode, CC_SAVE_REG) }; - output_asm_insn ("v_add%^_u32 %L0, %3, %L2, %L1", new_operands); - output_asm_insn ("v_addc%^_u32 %H0, %3, %H2, %H1, %3", new_operands); + output_asm_insn ("v_add%^_u32\t%L0, %3, %L2, %L1", new_operands); + output_asm_insn ("v_addc%^_u32\t%H0, %3, %H2, %H1, %3", new_operands); + } + else + { + rtx new_operands[4] = { operands[0], operands[1], operands[2], + gen_rtx_REG (BImode, CC_SAVE_REG) }; + + output_asm_insn ("s_mov_b32\t%3, scc", new_operands); + output_asm_insn ("s_add_u32\t%L0, %L1, %L2", new_operands); + output_asm_insn ("s_addc_u32\t%H0, %H1, %H2", new_operands); + output_asm_insn ("s_cmpk_lg_u32\t%3, 0", new_operands); + } return ""; } - [(set_attr "type" "vmult") - (set_attr "length" "16")]) + [(set_attr "type" "vmult,mult") + (set_attr "length" "16,24")]) ;; }}} ;; {{{ ALU special cases: Minus -- cgit v1.1 From ba42c30445cef0aef15405d92361a536e12ae865 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Wed, 7 Oct 2020 04:56:41 -0700 Subject: c++: Adding exception specs can changed dependentness Making an exception variant can cause a non-dependent function type to become dependent (since c++17 eh-specs are part of the type). The same is (possibly?) true for adding a late return type. Fixed thusly. My upcoming local extern-decl changes have a test case that covers this (which was how I found it). gcc/cp/ * tree.c (build_cp_fntype_variant): Clear TYPE_DEPENDENT_P_VALID if necessary. --- gcc/cp/tree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'gcc') diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c index 8b7c679..074fa0c 100644 --- a/gcc/cp/tree.c +++ b/gcc/cp/tree.c @@ -2638,6 +2638,9 @@ build_cp_fntype_variant (tree type, cp_ref_qualifier rqual, /* Need to build a new variant. */ v = build_variant_type_copy (type); + if (!TYPE_DEPENDENT_P (v)) + /* We no longer know that it's not type-dependent. */ + TYPE_DEPENDENT_P_VALID (v) = false; TYPE_RAISES_EXCEPTIONS (v) = raises; TYPE_HAS_LATE_RETURN_TYPE (v) = late; switch (rqual) -- cgit v1.1 From 99e9b54313dbb8fdb3d2a354c9554478cc33e234 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Wed, 7 Oct 2020 05:02:34 -0700 Subject: c++: Rename DECL_BUILTIN_P to DECL_UNDECLARED_BUILTIN_P I realized I'd misnamed DECL_BUILTIN_P, it's only true of compiler builtins unless and until the user declares them -- at that point they're real decls, and will have a location in the user's source. (BUILT_IN_FN and friends still work though). This renames them so future-me is not confused as to why the predicate becomes false. gcc/cp/ * cp-tree.h (DECL_BUILTIN_P): Rename to ... (DECL_UNDECLARED_BUILTIN_P): ... here. * decl.c (duplicate_decls): Adjust. * name-lookup.c (anticipated_builtin_p): Adjust. (do_nonmember_using_decl): Likewise. libcc1/ * libcp1plugin.cc (supplement_binding): Rename DECL_BUILTIN_P. --- gcc/cp/cp-tree.h | 5 +++-- gcc/cp/decl.c | 2 +- gcc/cp/name-lookup.c | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index c7b5e791..e5a2ff2 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -4035,8 +4035,9 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter) #define FNDECL_USED_AUTO(NODE) \ TREE_LANG_FLAG_2 (FUNCTION_DECL_CHECK (NODE)) -/* True if NODE is a builtin decl. */ -#define DECL_BUILTIN_P(NODE) \ +/* True if NODE is an undeclared builtin decl. As soon as the user + declares it, the location will be updated. */ +#define DECL_UNDECLARED_BUILTIN_P(NODE) \ (DECL_SOURCE_LOCATION(NODE) == BUILTINS_LOCATION) /* True for artificial decls added for OpenMP privatized non-static diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index f333a36..4ec1f4a 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -1467,7 +1467,7 @@ duplicate_decls (tree newdecl, tree olddecl, bool hiding, bool was_hidden) /* Check for redeclaration and other discrepancies. */ if (TREE_CODE (olddecl) == FUNCTION_DECL - && DECL_BUILTIN_P (olddecl)) + && DECL_UNDECLARED_BUILTIN_P (olddecl)) { if (TREE_CODE (newdecl) != FUNCTION_DECL) { diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index 774c447..ea0bfdc 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -2129,7 +2129,7 @@ anticipated_builtin_p (tree ovl) { return (TREE_CODE (ovl) == OVERLOAD && OVL_HIDDEN_P (ovl) - && DECL_BUILTIN_P (OVL_FUNCTION (ovl))); + && DECL_UNDECLARED_BUILTIN_P (OVL_FUNCTION (ovl))); } /* BINDING records an existing declaration for a name in the current scope. @@ -3982,7 +3982,7 @@ do_nonmember_using_decl (name_lookup &lookup, bool fn_scope_p, } else if (old.using_p ()) continue; /* This is a using decl. */ - else if (old.hidden_p () && DECL_BUILTIN_P (old_fn)) + else if (old.hidden_p () && DECL_UNDECLARED_BUILTIN_P (old_fn)) continue; /* This is an anticipated builtin. */ else if (!matching_fn_p (new_fn, old_fn)) continue; /* Parameters do not match. */ -- cgit v1.1 From e089e43365f7f2a90979e2316aea25d44823f5a3 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Wed, 7 Oct 2020 14:12:49 +0200 Subject: ipa-prop: Fix multiple-target speculation resolution As the FIXME which this patch removes states, the current code does not work when a call with multiple speculative targets gets resolved through parameter tracking during inlining - it feeds the inliner an edge it has already dealt with. The patch makes the code which should prevent it aware of the possibility that that speculation can have more than one target now. gcc/ChangeLog: 2020-09-30 Martin Jambor PR ipa/96394 * ipa-prop.c (update_indirect_edges_after_inlining): Do not add resolved speculation edges to vector of new direct edges even in presence of multiple speculative direct edges for a single call. gcc/testsuite/ChangeLog: 2020-09-30 Martin Jambor PR ipa/96394 * gcc.dg/tree-prof/pr96394.c: New test. --- gcc/ipa-prop.c | 10 +++-- gcc/testsuite/gcc.dg/tree-prof/pr96394.c | 64 ++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-prof/pr96394.c (limited to 'gcc') diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c index dec6c73..2d09d91 100644 --- a/gcc/ipa-prop.c +++ b/gcc/ipa-prop.c @@ -3787,11 +3787,13 @@ update_indirect_edges_after_inlining (struct cgraph_edge *cs, param_index = ici->param_index; jfunc = ipa_get_ith_jump_func (top, param_index); - cgraph_node *spec_target = NULL; - /* FIXME: This may need updating for multiple calls. */ + auto_vec spec_targets; if (ie->speculative) - spec_target = ie->first_speculative_call_target ()->callee; + for (cgraph_edge *direct = ie->first_speculative_call_target (); + direct; + direct = direct->next_speculative_call_target ()) + spec_targets.safe_push (direct->callee); if (!opt_for_fn (node->decl, flag_indirect_inlining)) new_direct_edge = NULL; @@ -3814,7 +3816,7 @@ update_indirect_edges_after_inlining (struct cgraph_edge *cs, /* If speculation was removed, then we need to do nothing. */ if (new_direct_edge && new_direct_edge != ie - && new_direct_edge->callee == spec_target) + && spec_targets.contains (new_direct_edge->callee)) { new_direct_edge->indirect_inlining_edge = 1; top = IPA_EDGE_REF (cs); diff --git a/gcc/testsuite/gcc.dg/tree-prof/pr96394.c b/gcc/testsuite/gcc.dg/tree-prof/pr96394.c new file mode 100644 index 0000000..4280182 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-prof/pr96394.c @@ -0,0 +1,64 @@ +/* PR ipa/96394 */ +/* { dg-options "-O2" } */ + +typedef struct _entry { + int has_next; + int next_ix; + int count; +} entry; + +extern entry table[]; + +void * +__attribute__((noipa)) +PyErr_Format(entry * e){ return 0; } + +void ae(entry *); +int h(entry *); +int ap(entry *); +int ag(entry *); + +int ag(entry *j) { + if (j->has_next) + h(&table[j->next_ix]); + return 0; +} +static int ai(entry *j, int k(entry *), int l, int m) { + int am = 1; + int ab; + + /* k is either 'h' or 'ap': 50%/50% */ + ab = k(j); + + /* loop never gets executed on real data */ + for (; j->count >= 2; am += 2) + if (l) { + entry *i = &table[am + m]; + PyErr_Format(i); + } + return ab; +} +void +__attribute__((noipa)) +bug() { + h(table); + h(table); +} +int h(entry *j) { return ai(j, ap, 4, 5); } +int ap(entry *j) { return ai(j, ag, 14, 4); } + +int main(void) +{ + bug(); +} + +entry table[2] = { + { .has_next = 1 + , .next_ix = 1 + , .count = 0 + }, + { .has_next = 0 + , .next_ix = 0 + , .count = 0 + }, +}; -- cgit v1.1 From 4e62aca0e0520e4ed2532f2d8153581190621c1a Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Wed, 7 Oct 2020 05:46:24 -0700 Subject: c++: block-scope externs get an alias [PR95677,PR31775,PR95677] This patch improves block-scope extern handling by always injecting a hidden copy into the enclosing namespace (or using a match already there). This hidden copy will be revealed if the user explicitly declares it later. We can get from the DECL_LOCAL_DECL_P local extern to the alias via DECL_LOCAL_DECL_ALIAS. This fixes several bugs and removes the kludgy per-function extern_decl_map. We only do this pushing for non-dependent local externs -- dependent ones will be pushed during instantiation. User code that expected to be able to handle incompatible local externs in different block-scopes will no longer work. That code is ill-formed. (always was, despite what 31775 claimed). I had to adjust a number of testcases that fell into this. I tried using DECL_VALUE_EXPR, but that didn't work out. Due to constexpr requirements we have to do the replacement very late (it happens in the gimplifier). Consider: extern int l[]; // #1 constexpr bool foo () { extern int l[3]; // this does not complete the type of decl #1 constexpr int *p = &l[2]; // ok return !p; } This requirement, coupled with our use of the common folding machinery makes pr97306 hard to fix, as we end up with an expression containing the two different decls for 'l', and only the c++ FE knows how to reconcile those. I punted on this. gcc/cp/ * cp-tree.h (struct language_function): Delete extern_decl_map. (DECL_LOCAL_DECL_ALIAS): New. * name-lookup.h (is_local_extern): Delete. * name-lookup.c (set_local_extern_decl_linkage): Replace with ... (push_local_extern_decl): ... this new function. (do_pushdecl): Call new function after pushing new decl. Unhide hidden non-functions. (is_local_extern): Delete. * decl.c (layout_var_decl): Do not allow VLA local externs. * decl2.c (mark_used): Also mark DECL_LOCAL_DECL_ALIAS. Drop old local-extern treatment. * parser.c (cp_parser_oacc_declare): Deal with local extern aliases. * pt.c (tsubst_expr): Adjust local extern instantiation. * cp-gimplify.c (cp_genericize_r): Remap DECL_LOCAL_DECLs. gcc/testsuite/ * g++.dg/cpp0x/lambda/lambda-sfinae1.C: Avoid ill-formed local extern * g++.dg/init/pr42844.C: Add expected error. * g++.dg/lookup/extern-redecl1.C: Likewise. * g++.dg/lookup/koenig15.C: Avoid ill-formed. * g++.dg/lto/pr95677.C: New. * g++.dg/other/nested-extern-1.C: Correct expected behabviour. * g++.dg/other/nested-extern-2.C: Likewise. * g++.dg/other/nested-extern.cc: Split ... * g++.dg/other/nested-extern-1.cc: ... here ... * g++.dg/other/nested-extern-2.cc: ... here. * g++.dg/template/scope5.C: Avoid ill-formed * g++.old-deja/g++.law/missed-error2.C: Allow extension. * g++.old-deja/g++.pt/crash3.C: Add expected error. --- gcc/cp/cp-gimplify.c | 26 ++- gcc/cp/cp-tree.h | 6 +- gcc/cp/decl.c | 3 +- gcc/cp/decl2.c | 25 ++- gcc/cp/name-lookup.c | 186 ++++++++------------- gcc/cp/name-lookup.h | 1 - gcc/cp/parser.c | 14 ++ gcc/cp/pt.c | 14 +- gcc/testsuite/g++.dg/cpp0x/lambda/lambda-sfinae1.C | 2 +- gcc/testsuite/g++.dg/init/pr42844.C | 2 +- gcc/testsuite/g++.dg/lookup/extern-redecl1.C | 10 +- gcc/testsuite/g++.dg/lookup/koenig15.C | 6 +- gcc/testsuite/g++.dg/lto/pr95677.C | 19 +++ gcc/testsuite/g++.dg/other/nested-extern-1.C | 16 +- gcc/testsuite/g++.dg/other/nested-extern-1.cc | 3 + gcc/testsuite/g++.dg/other/nested-extern-2.C | 27 ++- gcc/testsuite/g++.dg/other/nested-extern-2.cc | 3 + gcc/testsuite/g++.dg/other/nested-extern.cc | 1 - gcc/testsuite/g++.dg/template/scope5.C | 8 +- gcc/testsuite/g++.old-deja/g++.law/missed-error2.C | 7 +- gcc/testsuite/g++.old-deja/g++.pt/crash3.C | 5 +- 21 files changed, 206 insertions(+), 178 deletions(-) create mode 100644 gcc/testsuite/g++.dg/lto/pr95677.C create mode 100644 gcc/testsuite/g++.dg/other/nested-extern-1.cc create mode 100644 gcc/testsuite/g++.dg/other/nested-extern-2.cc delete mode 100644 gcc/testsuite/g++.dg/other/nested-extern.cc (limited to 'gcc') diff --git a/gcc/cp/cp-gimplify.c b/gcc/cp/cp-gimplify.c index 0754982..44c9d24 100644 --- a/gcc/cp/cp-gimplify.c +++ b/gcc/cp/cp-gimplify.c @@ -980,21 +980,17 @@ cp_genericize_r (tree *stmt_p, int *walk_subtrees, void *data) /* Map block scope extern declarations to visible declarations with the same name and type in outer scopes if any. */ - if (cp_function_chain->extern_decl_map - && VAR_OR_FUNCTION_DECL_P (stmt) - && DECL_EXTERNAL (stmt)) - { - struct cxx_int_tree_map *h, in; - in.uid = DECL_UID (stmt); - h = cp_function_chain->extern_decl_map->find_with_hash (&in, in.uid); - if (h) - { - *stmt_p = h->to; - TREE_USED (h->to) |= TREE_USED (stmt); - *walk_subtrees = 0; - return NULL; - } - } + if (VAR_OR_FUNCTION_DECL_P (stmt) && DECL_LOCAL_DECL_P (stmt)) + if (tree alias = DECL_LOCAL_DECL_ALIAS (stmt)) + { + if (alias != error_mark_node) + { + *stmt_p = alias; + TREE_USED (alias) |= TREE_USED (stmt); + } + *walk_subtrees = 0; + return NULL; + } if (TREE_CODE (stmt) == INTEGER_CST && TYPE_REF_P (TREE_TYPE (stmt)) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index e5a2ff2..4672561 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -1926,7 +1926,6 @@ struct GTY(()) language_function { /* Tracking possibly infinite loops. This is a vec only because vec doesn't work with gtype. */ vec *infinite_loops; - hash_table *extern_decl_map; }; /* The current C++-specific per-function global variables. */ @@ -2697,6 +2696,7 @@ struct GTY(()) lang_decl_min { In a lambda-capture proxy VAR_DECL, this is DECL_CAPTURED_VARIABLE. In a function-scope TREE_STATIC VAR_DECL or IMPLICIT_TYPEDEF_P TYPE_DECL, this is DECL_DISCRIMINATOR. + In a DECL_LOCAL_DECL_P decl, this is the namespace decl it aliases. Otherwise, in a class-scope DECL, this is DECL_ACCESS. */ tree access; }; @@ -4023,6 +4023,10 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter) #define DECL_LOCAL_DECL_P(NODE) \ DECL_LANG_FLAG_0 (VAR_OR_FUNCTION_DECL_CHECK (NODE)) +/* The namespace-scope decl a DECL_LOCAL_DECL_P aliases. */ +#define DECL_LOCAL_DECL_ALIAS(NODE) \ + DECL_ACCESS ((gcc_checking_assert (DECL_LOCAL_DECL_P (NODE)), NODE)) + /* Nonzero if NODE is the target for genericization of 'return' stmts in constructors/destructors of targetm.cxx.cdtor_returns_this targets. */ #define LABEL_DECL_CDTOR(NODE) \ diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c index 4ec1f4a..0fe74b2 100644 --- a/gcc/cp/decl.c +++ b/gcc/cp/decl.c @@ -5830,7 +5830,8 @@ layout_var_decl (tree decl) && DECL_SIZE (decl) != NULL_TREE && ! TREE_CONSTANT (DECL_SIZE (decl))) { - if (TREE_CODE (DECL_SIZE (decl)) == INTEGER_CST) + if (TREE_CODE (DECL_SIZE (decl)) == INTEGER_CST + && !DECL_LOCAL_DECL_P (decl)) constant_expression_warning (DECL_SIZE (decl)); else { diff --git a/gcc/cp/decl2.c b/gcc/cp/decl2.c index fd48a21..db3035d 100644 --- a/gcc/cp/decl2.c +++ b/gcc/cp/decl2.c @@ -5567,6 +5567,22 @@ mark_used (tree decl, tsubst_flags_t complain) return false; } + if (VAR_OR_FUNCTION_DECL_P (decl) && DECL_LOCAL_DECL_P (decl)) + { + if (!DECL_LANG_SPECIFIC (decl)) + /* An unresolved dependent local extern. */ + return true; + + DECL_ODR_USED (decl) = 1; + auto alias = DECL_LOCAL_DECL_ALIAS (decl); + if (!alias || alias == error_mark_node) + return true; + + /* Process the underlying decl. */ + decl = alias; + TREE_USED (decl) = true; + } + cp_warn_deprecated_use (decl, complain); /* We can only check DECL_ODR_USED on variables or functions with @@ -5650,14 +5666,7 @@ mark_used (tree decl, tsubst_flags_t complain) && !DECL_ARTIFICIAL (decl) && !decl_defined_p (decl) && no_linkage_check (TREE_TYPE (decl), /*relaxed_p=*/false)) - { - if (is_local_extern (decl)) - /* There's no way to define a local extern, and adding it to - the vector interferes with GC, so give an error now. */ - no_linkage_error (decl); - else - vec_safe_push (no_linkage_decls, decl); - } + vec_safe_push (no_linkage_decls, decl); if (TREE_CODE (decl) == FUNCTION_DECL && DECL_DECLARED_INLINE_P (decl) diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c index ea0bfdc..e3f3712 100644 --- a/gcc/cp/name-lookup.c +++ b/gcc/cp/name-lookup.c @@ -38,6 +38,7 @@ along with GCC; see the file COPYING3. If not see static cxx_binding *cxx_binding_make (tree value, tree type); static cp_binding_level *innermost_nonclass_level (void); +static tree do_pushdecl_with_scope (tree x, cp_binding_level *, bool hiding); static void set_identifier_type_value_with_scope (tree id, tree decl, cp_binding_level *b); static name_hint maybe_suggest_missing_std_header (location_t location, @@ -2921,108 +2922,66 @@ set_decl_context_in_fn (tree ctx, tree decl) DECL_CONTEXT (decl) = ctx; } -/* DECL is a local-scope decl with linkage. SHADOWED is true if the - name is already bound at the current level. - - [basic.link] If there is a visible declaration of an entity with - linkage having the same name and type, ignoring entities declared - outside the innermost enclosing namespace scope, the block scope - declaration declares that same entity and receives the linkage of - the previous declaration. - - Also, make sure that this decl matches any existing external decl - in the enclosing namespace. */ +/* DECL is a local extern decl. Find or create the namespace-scope + decl that it aliases. Also, determines the linkage of DECL. */ static void -set_local_extern_decl_linkage (tree decl, bool shadowed) +push_local_extern_decl_alias (tree decl) { - tree ns_value = decl; /* Unique marker. */ - - if (!shadowed) - { - tree loc_value = innermost_non_namespace_value (DECL_NAME (decl)); - if (!loc_value) - { - ns_value - = find_namespace_value (current_namespace, DECL_NAME (decl)); - loc_value = ns_value; - } - if (loc_value == error_mark_node - /* An ambiguous lookup. */ - || (loc_value && TREE_CODE (loc_value) == TREE_LIST)) - loc_value = NULL_TREE; - - for (ovl_iterator iter (loc_value); iter; ++iter) - if (!iter.hidden_p () - && (TREE_STATIC (*iter) || DECL_EXTERNAL (*iter)) - && decls_match (*iter, decl)) - { - /* The standard only says that the local extern inherits - linkage from the previous decl; in particular, default - args are not shared. Add the decl into a hash table to - make sure only the previous decl in this case is seen - by the middle end. */ - struct cxx_int_tree_map *h; - - /* We inherit the outer decl's linkage. But we're a - different decl. */ - TREE_PUBLIC (decl) = TREE_PUBLIC (*iter); - - if (cp_function_chain->extern_decl_map == NULL) - cp_function_chain->extern_decl_map - = hash_table::create_ggc (20); - - h = ggc_alloc (); - h->uid = DECL_UID (decl); - h->to = *iter; - cxx_int_tree_map **loc = cp_function_chain->extern_decl_map - ->find_slot (h, INSERT); - *loc = h; - break; - } - } + if (dependent_type_p (TREE_TYPE (decl))) + return; + /* EH specs were not part of the function type prior to c++17, but + we still can't go pushing dependent eh specs into the namespace. */ + if (cxx_dialect < cxx17 + && TREE_CODE (decl) == FUNCTION_DECL + && (value_dependent_expression_p + (TYPE_RAISES_EXCEPTIONS (TREE_TYPE (decl))))) + return; - if (TREE_PUBLIC (decl)) - { - /* DECL is externally visible. Make sure it matches a matching - decl in the namespace scope. We only really need to check - this when inserting the decl, not when we find an existing - match in the current scope. However, in practice we're - going to be inserting a new decl in the majority of cases -- - who writes multiple extern decls for the same thing in the - same local scope? Doing it here often avoids a duplicate - namespace lookup. */ + gcc_checking_assert (!DECL_LANG_SPECIFIC (decl) + || !DECL_TEMPLATE_INFO (decl)); + if (DECL_LANG_SPECIFIC (decl) && DECL_LOCAL_DECL_ALIAS (decl)) + /* We're instantiating a non-dependent local decl, it already + knows the alias. */ + return; - /* Avoid repeating a lookup. */ - if (ns_value == decl) - ns_value = find_namespace_value (current_namespace, DECL_NAME (decl)); + tree alias = NULL_TREE; - if (ns_value == error_mark_node - || (ns_value && TREE_CODE (ns_value) == TREE_LIST)) - ns_value = NULL_TREE; + if (DECL_SIZE (decl) && !TREE_CONSTANT (DECL_SIZE (decl))) + /* Do not let a VLA creep into a namespace. Diagnostic will be + emitted in layout_var_decl later. */ + alias = error_mark_node; + else + { + /* First look for a decl that matches. */ + tree ns = CP_DECL_CONTEXT (decl); + tree binding = find_namespace_value (ns, DECL_NAME (decl)); - for (ovl_iterator iter (ns_value); iter; ++iter) - { - tree other = *iter; - - if (!(TREE_PUBLIC (other) || DECL_EXTERNAL (other))) - ; /* Not externally visible. */ - else if (DECL_EXTERN_C_P (decl) && DECL_EXTERN_C_P (other)) - ; /* Both are extern "C", we'll check via that mechanism. */ - else if (TREE_CODE (other) != TREE_CODE (decl) - || ((VAR_P (decl) || matching_fn_p (other, decl)) - && !comptypes (TREE_TYPE (decl), TREE_TYPE (other), - COMPARE_REDECLARATION))) + if (binding && TREE_CODE (binding) != TREE_LIST) + for (ovl_iterator iter (binding); iter; ++iter) + if (decls_match (*iter, decl)) { - auto_diagnostic_group d; - if (permerror (DECL_SOURCE_LOCATION (decl), - "local external declaration %q#D", decl)) - inform (DECL_SOURCE_LOCATION (other), - "does not match previous declaration %q#D", other); + alias = *iter; break; } + + if (!alias) + { + /* No existing namespace-scope decl. Make one. */ + alias = copy_decl (decl); + + /* This is the real thing. */ + DECL_LOCAL_DECL_P (alias) = false; + + /* Expected default linkage is from the namespace. */ + TREE_PUBLIC (alias) = TREE_PUBLIC (ns); + alias = do_pushdecl_with_scope (alias, NAMESPACE_LEVEL (ns), + /* hiding= */true); } } + + retrofit_lang_decl (decl); + DECL_LOCAL_DECL_ALIAS (decl) = alias; } /* Record DECL as belonging to the current lexical scope. Check for @@ -3080,10 +3039,6 @@ do_pushdecl (tree decl, bool hiding) old = binding->value; } - if (current_function_decl && VAR_OR_FUNCTION_DECL_P (decl) - && DECL_EXTERNAL (decl)) - set_local_extern_decl_linkage (decl, old != NULL_TREE); - if (old == error_mark_node) old = NULL_TREE; @@ -3115,6 +3070,16 @@ do_pushdecl (tree decl, bool hiding) /* We need to check and register the decl now. */ check_extern_c_conflict (match); } + else if (slot && !hiding + && STAT_HACK_P (*slot) && STAT_DECL_HIDDEN_P (*slot)) + { + /* Unhide the non-function. */ + gcc_checking_assert (old == match); + if (!STAT_TYPE (*slot)) + *slot = match; + else + STAT_DECL (*slot) = match; + } return match; } @@ -3190,12 +3155,21 @@ do_pushdecl (tree decl, bool hiding) if (!instantiating_current_function_p ()) record_locally_defined_typedef (decl); } - else if (VAR_P (decl)) - maybe_register_incomplete_var (decl); + else + { + if (VAR_P (decl) && !DECL_LOCAL_DECL_P (decl)) + maybe_register_incomplete_var (decl); + + if (VAR_OR_FUNCTION_DECL_P (decl)) + { + if (DECL_LOCAL_DECL_P (decl) + && TREE_CODE (CP_DECL_CONTEXT (decl)) == NAMESPACE_DECL) + push_local_extern_decl_alias (decl); - if ((VAR_P (decl) || TREE_CODE (decl) == FUNCTION_DECL) - && DECL_EXTERN_C_P (decl)) - check_extern_c_conflict (decl); + if (DECL_EXTERN_C_P (decl)) + check_extern_c_conflict (decl); + } + } } else add_decl_to_level (level, decl); @@ -6871,20 +6845,6 @@ lookup_elaborated_type (tree name, TAG_how how) return ret; } -/* Returns true iff DECL is a block-scope extern declaration of a function - or variable. We will already have determined validity of the decl - when pushing it. So we do not have to redo that lookup. */ - -bool -is_local_extern (tree decl) -{ - if ((TREE_CODE (decl) == FUNCTION_DECL - || TREE_CODE (decl) == VAR_DECL)) - return DECL_LOCAL_DECL_P (decl); - - return false; -} - /* The type TYPE is being declared. If it is a class template, or a specialization of a class template, do any processing required and perform error-checking. If IS_FRIEND is nonzero, this TYPE is diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h index 01643fb..d63ff10 100644 --- a/gcc/cp/name-lookup.h +++ b/gcc/cp/name-lookup.h @@ -342,7 +342,6 @@ extern tree lookup_qualified_name (tree scope, tree name, extern tree lookup_qualified_name (tree scope, const char *name, LOOK_want = LOOK_want::NORMAL, bool = true); -extern bool is_local_extern (tree); extern bool pushdecl_class_level (tree); extern tree pushdecl_namespace_level (tree, bool hiding = false); extern bool push_class_level_binding (tree, tree); diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 2002c05..7a61abf 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -41176,6 +41176,10 @@ cp_parser_oacc_declare (cp_parser *parser, cp_token *pragma_tok) } if (!found_in_scope) + /* This seems to ignore the existence of cleanup scopes? + What is the meaning for local extern decls? The local + extern is in this scope, but it is referring to a decl that + is namespace scope. */ for (tree d = current_binding_level->names; d; d = TREE_CHAIN (d)) if (d == decl) { @@ -41205,6 +41209,16 @@ cp_parser_oacc_declare (cp_parser *parser, cp_token *pragma_tok) { tree id; + if (DECL_LOCAL_DECL_P (decl)) + /* We need to mark the aliased decl, as that is the entity + that is being referred to. This won't work for + dependent variables, but it didn't work for them before + DECL_LOCAL_DECL_P was a thing either. But then + dependent local extern variable decls are as rare as + hen's teeth. */ + if (auto alias = DECL_LOCAL_DECL_ALIAS (decl)) + decl = alias; + if (OMP_CLAUSE_MAP_KIND (t) == GOMP_MAP_LINK) id = get_identifier ("omp declare target link"); else diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 1ab5435..3755aab 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -18104,13 +18104,15 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl, } else if (DECL_IMPLICIT_TYPEDEF_P (t)) /* We already did a pushtag. */; - else if (TREE_CODE (decl) == FUNCTION_DECL - && DECL_LOCAL_DECL_P (decl) - && DECL_OMP_DECLARE_REDUCTION_P (decl)) + else if (VAR_OR_FUNCTION_DECL_P (decl) + && DECL_LOCAL_DECL_P (decl)) { - DECL_CONTEXT (decl) = current_function_decl; - pushdecl (decl); - if (cp_check_omp_declare_reduction (decl)) + if (TREE_CODE (DECL_CONTEXT (decl)) == FUNCTION_DECL) + DECL_CONTEXT (decl) = NULL_TREE; + decl = pushdecl (decl); + if (TREE_CODE (decl) == FUNCTION_DECL + && DECL_OMP_DECLARE_REDUCTION_P (decl) + && cp_check_omp_declare_reduction (decl)) instantiate_body (pattern_decl, args, decl, true); } else diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-sfinae1.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-sfinae1.C index 5928894..a9acc20 100644 --- a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-sfinae1.C +++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-sfinae1.C @@ -9,7 +9,7 @@ struct AddRvalueReferenceImpl { typedef T type; }; template struct AddRvalueReferenceImpl::type> { typedef T &&type; }; diff --git a/gcc/testsuite/g++.dg/init/pr42844.C b/gcc/testsuite/g++.dg/init/pr42844.C index 9b7ed05..d616f45 100644 --- a/gcc/testsuite/g++.dg/init/pr42844.C +++ b/gcc/testsuite/g++.dg/init/pr42844.C @@ -49,7 +49,7 @@ template void g () { T const t; // { dg-error "uninitialized 'const" } - extern T const text; + extern T const text; // { dg-error "conflicting declaration" } } template void g (); diff --git a/gcc/testsuite/g++.dg/lookup/extern-redecl1.C b/gcc/testsuite/g++.dg/lookup/extern-redecl1.C index 18e675b..f3e9b69 100644 --- a/gcc/testsuite/g++.dg/lookup/extern-redecl1.C +++ b/gcc/testsuite/g++.dg/lookup/extern-redecl1.C @@ -1,18 +1,18 @@ extern int X; // { dg-message "previous declaration" } -extern int Y (int); // { dg-message "previous declaration" } +extern int Y (int); // { dg-message "old declaration" } extern int Y (float); -static int Z (int s) +static int Z (int s) // { dg-message "old declaration" } { return s; } void Foo () { - extern char X; // { dg-error "local external declaration" } - extern char Y (int); // { dg-error "local external declaration" } + extern char X; // { dg-error "conflicting declaration" } + extern char Y (int); // { dg-error "ambiguating new declaration" } extern int Y (float); extern void Y (double); - extern char Z (int); + extern char Z (int); // { dg-error "ambiguating new declaration" } } diff --git a/gcc/testsuite/g++.dg/lookup/koenig15.C b/gcc/testsuite/g++.dg/lookup/koenig15.C index f317c01..6bf916a 100644 --- a/gcc/testsuite/g++.dg/lookup/koenig15.C +++ b/gcc/testsuite/g++.dg/lookup/koenig15.C @@ -4,10 +4,12 @@ namespace N { struct S { }; void f(S); + void g(S); } namespace M { void f(int); + void g(int); } void @@ -40,6 +42,6 @@ void fn3 () { N::S s; - extern void (*f)(char); - f (s); // { dg-error "cannot convert" } + extern void (*g)(char); + g (s); // { dg-error "cannot convert" } } diff --git a/gcc/testsuite/g++.dg/lto/pr95677.C b/gcc/testsuite/g++.dg/lto/pr95677.C new file mode 100644 index 0000000..520ef04 --- /dev/null +++ b/gcc/testsuite/g++.dg/lto/pr95677.C @@ -0,0 +1,19 @@ +// PR c++/95677 + +// { dg-do link } +// { dg-require-effective-target lto } +// { dg-options "-flto" } + + + +namespace { + void foo() { + extern int xx; // injects a *static* + xx = 0; + } + int xx = 1; +} + +int main() { + xx = 2; +} diff --git a/gcc/testsuite/g++.dg/other/nested-extern-1.C b/gcc/testsuite/g++.dg/other/nested-extern-1.C index 6533a2a..6c879e5 100644 --- a/gcc/testsuite/g++.dg/other/nested-extern-1.C +++ b/gcc/testsuite/g++.dg/other/nested-extern-1.C @@ -1,17 +1,21 @@ /* { dg-do run } */ -// { dg-additional-sources "nested-extern.cc" } +// { dg-additional-options "-fpermissive" } +// { dg-additional-sources "nested-extern-1.cc" } /* PR 31775 */ -extern "C" void abort(); +extern int *p_otheri; extern int *p; int main() { - extern int i; + extern int i; // { dg-message "previous declaration" } i = 1; *p = 2; - if (i == 2) - abort (); + if (i != 2) + return 1; + if (p_otheri != p) + return 2; return 0; } -static int i; +// This is extern because of the injection above. +static int i; // { dg-warning ".extern. and later .static" } int *p = &i; diff --git a/gcc/testsuite/g++.dg/other/nested-extern-1.cc b/gcc/testsuite/g++.dg/other/nested-extern-1.cc new file mode 100644 index 0000000..7d70072 --- /dev/null +++ b/gcc/testsuite/g++.dg/other/nested-extern-1.cc @@ -0,0 +1,3 @@ +extern int i; + +int *p_otheri = &i; diff --git a/gcc/testsuite/g++.dg/other/nested-extern-2.C b/gcc/testsuite/g++.dg/other/nested-extern-2.C index 58f53e0..acd78ff 100644 --- a/gcc/testsuite/g++.dg/other/nested-extern-2.C +++ b/gcc/testsuite/g++.dg/other/nested-extern-2.C @@ -1,18 +1,27 @@ /* { dg-do run } */ -// { dg-additional-sources "nested-extern.cc" } /* PR 31775 */ -extern "C" void abort(); -static int i; -int *p = &i; +// { dg-additional-sources "nested-extern-2.cc" } +extern int *p_otheri; +static int i; // #1 +int *p_si = &i; int main() { int i; - { + int *p_ai = &i; + { + // This is an alias of #1, not a different object extern int i; - i = 1; - *p = 2; - if (i == 2) - abort (); + int *p_ei = &i; + + *p_si = 1; + *p_ai = 2; + *p_ei = 3; + if (*p_si != 3) + return 1; + if (*p_ai != 2) + return 2; + if (*p_otheri != 17) + return 3; } return 0; } diff --git a/gcc/testsuite/g++.dg/other/nested-extern-2.cc b/gcc/testsuite/g++.dg/other/nested-extern-2.cc new file mode 100644 index 0000000..da43380 --- /dev/null +++ b/gcc/testsuite/g++.dg/other/nested-extern-2.cc @@ -0,0 +1,3 @@ +int i = 17; // never touched + +int *p_otheri = &i; diff --git a/gcc/testsuite/g++.dg/other/nested-extern.cc b/gcc/testsuite/g++.dg/other/nested-extern.cc deleted file mode 100644 index 048f715..0000000 --- a/gcc/testsuite/g++.dg/other/nested-extern.cc +++ /dev/null @@ -1 +0,0 @@ -int i; diff --git a/gcc/testsuite/g++.dg/template/scope5.C b/gcc/testsuite/g++.dg/template/scope5.C index 629225c..cf23a08 100644 --- a/gcc/testsuite/g++.dg/template/scope5.C +++ b/gcc/testsuite/g++.dg/template/scope5.C @@ -57,10 +57,12 @@ enum { a = b::e<0>::f }; template class au; template struct ac : ao { typedef c::e aq; }; template void ay(aw, i, ax) { - au::o>::f> > az(); + // Not sure if this has been creduced from an initialization of a + // variable to a block-scope extern function decl + au::o>::f> > az2(); } void v() { ad a; - void az(); - ay(az, a, v); + void az1(); + ay(az1, a, v); } diff --git a/gcc/testsuite/g++.old-deja/g++.law/missed-error2.C b/gcc/testsuite/g++.old-deja/g++.law/missed-error2.C index eaf8c01..7ae494b 100644 --- a/gcc/testsuite/g++.old-deja/g++.law/missed-error2.C +++ b/gcc/testsuite/g++.old-deja/g++.law/missed-error2.C @@ -1,4 +1,5 @@ // { dg-do assemble } +// { dg-additional-options -fpermissive } // GROUPS passed missed-error // missed-error file // From: ndc!don@csvax.cs.caltech.edu (Don Erway) @@ -20,14 +21,14 @@ inline double max(double a, double b) {return a > b ? a : b;}; // { dg-message " // { dg-error "extra ';'" "extra ;" { target c++98_only } .-1 } int main() { + // we treat this as-if extern static void foo(int i, int j, double x, double y) ;// { dg-error "" } .* foo(4, -37, 14.39, 14.38); } -// 971006 we no longer give an error for this since we emit a hard error -// about the declaration above -static void foo(int i, int j, double x, double y) { +static void foo(int i, int j, double x, double y) // { dg-warning ".extern." } +{ std::cout << "Max(int): " << max(i,j) << " Max(double): " << max(x,y) << '\n'; diff --git a/gcc/testsuite/g++.old-deja/g++.pt/crash3.C b/gcc/testsuite/g++.old-deja/g++.pt/crash3.C index e5b3f25..52701b7 100644 --- a/gcc/testsuite/g++.old-deja/g++.pt/crash3.C +++ b/gcc/testsuite/g++.old-deja/g++.pt/crash3.C @@ -5,12 +5,13 @@ class CVector { public: CVector f() const { - CVector v(); + // local-extern :) + CVector v(); // { dg-message "old declaration" } return v; // { dg-error "convert" } } CVector g() const { - CVector v(); + CVector v(); // { dg-error "ambiguating new" } return v; // { dg-error "convert" } } }; -- cgit v1.1 From 7345c89ecb1a31ce96c6789bffc7183268a040b3 Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 6 Oct 2020 16:58:00 -0400 Subject: Add -fdiagnostics-path-format=separate-events to -fdiagnostics-plain-output The path-printing default of -fdiagnostics-path-format=inline-events interacted poorly with -fdiagnostics-plain-output, so it makes most sense to add -fdiagnostics-path-format=separate-events to -fdiagnostics-plain-output. Seen when adding an experimental analyzer plugin to gcc.dg/plugin.exp. gcc/ChangeLog: * doc/invoke.texi (-fdiagnostics-plain-output): Add -fdiagnostics-path-format=separate-events to list of options injected by -fdiagnostics-plain-output. * opts-common.c (decode_cmdline_options_to_array): Likewise. gcc/testsuite/ChangeLog: * g++.dg/analyzer/analyzer.exp (DEFAULT_CXXFLAGS): Remove -fdiagnostics-path-format=separate-events. * gcc.dg/analyzer/analyzer.exp (DEFAULT_CFLAGS): Likewise. * gcc.dg/plugin/diagnostic-path-format-default.c: Rename to... * gcc.dg/plugin/diagnostic-path-format-plain.c: ...this. Remove dg-options directive. Copy remainder of test from diagnostic-path-format-separate-events.c. * gcc.dg/plugin/diagnostic-test-paths-2.c: Add -fdiagnostics-path-format=inline-events to options. Fix expected output for location of conditional within "for" loop. * gcc.dg/plugin/plugin.exp (plugin_test_list): Update for renaming. * gfortran.dg/analyzer/analyzer.exp (DEFAULT_FFLAGS): Remove -fdiagnostics-path-format=separate-events. --- gcc/doc/invoke.texi | 3 +- gcc/opts-common.c | 1 + gcc/testsuite/g++.dg/analyzer/analyzer.exp | 2 +- gcc/testsuite/gcc.dg/analyzer/analyzer.exp | 2 +- .../gcc.dg/plugin/diagnostic-path-format-default.c | 142 --------------------- .../gcc.dg/plugin/diagnostic-path-format-plain.c | 42 ++++++ .../gcc.dg/plugin/diagnostic-test-paths-2.c | 8 +- gcc/testsuite/gcc.dg/plugin/plugin.exp | 2 +- gcc/testsuite/gfortran.dg/analyzer/analyzer.exp | 2 +- 9 files changed, 53 insertions(+), 151 deletions(-) delete mode 100644 gcc/testsuite/gcc.dg/plugin/diagnostic-path-format-default.c create mode 100644 gcc/testsuite/gcc.dg/plugin/diagnostic-path-format-plain.c (limited to 'gcc') diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 7c81d7f..c177496 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -4345,7 +4345,8 @@ options: @gccoptlist{-fno-diagnostics-show-caret @gol -fno-diagnostics-show-line-numbers @gol -fdiagnostics-color=never @gol --fdiagnostics-urls=never} +-fdiagnostics-urls=never @gol +-fdiagnostics-path-format=separate-events} In the future, if GCC changes the default appearance of its diagnostics, the corresponding option to disable the new behavior will be added to this list. diff --git a/gcc/opts-common.c b/gcc/opts-common.c index 237e4ce..8ec8c1e 100644 --- a/gcc/opts-common.c +++ b/gcc/opts-common.c @@ -1000,6 +1000,7 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv, "-fno-diagnostics-show-line-numbers", "-fdiagnostics-color=never", "-fdiagnostics-urls=never", + "-fdiagnostics-path-format=separate-events", }; const int num_expanded = ARRAY_SIZE (expanded_args); opt_array_len += num_expanded - 1; diff --git a/gcc/testsuite/g++.dg/analyzer/analyzer.exp b/gcc/testsuite/g++.dg/analyzer/analyzer.exp index 60262f6..78edff2 100644 --- a/gcc/testsuite/g++.dg/analyzer/analyzer.exp +++ b/gcc/testsuite/g++.dg/analyzer/analyzer.exp @@ -29,7 +29,7 @@ if [info exists DEFAULT_CXXFLAGS] then { } # If a testcase doesn't have special options, use these. -set DEFAULT_CXXFLAGS " -fanalyzer -fdiagnostics-path-format=separate-events -Wanalyzer-too-complex -fanalyzer-call-summaries" +set DEFAULT_CXXFLAGS " -fanalyzer -Wanalyzer-too-complex -fanalyzer-call-summaries" # Initialize `dg'. dg-init diff --git a/gcc/testsuite/gcc.dg/analyzer/analyzer.exp b/gcc/testsuite/gcc.dg/analyzer/analyzer.exp index ac9c495..d72fef3 100644 --- a/gcc/testsuite/gcc.dg/analyzer/analyzer.exp +++ b/gcc/testsuite/gcc.dg/analyzer/analyzer.exp @@ -30,7 +30,7 @@ if [info exists DEFAULT_CFLAGS] then { } # If a testcase doesn't have special options, use these. -set DEFAULT_CFLAGS "-fanalyzer -fdiagnostics-path-format=separate-events -Wanalyzer-too-complex -fanalyzer-call-summaries" +set DEFAULT_CFLAGS "-fanalyzer -Wanalyzer-too-complex -fanalyzer-call-summaries" # Initialize `dg'. dg-init diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-path-format-default.c b/gcc/testsuite/gcc.dg/plugin/diagnostic-path-format-default.c deleted file mode 100644 index 5712dbd..0000000 --- a/gcc/testsuite/gcc.dg/plugin/diagnostic-path-format-default.c +++ /dev/null @@ -1,142 +0,0 @@ -/* { dg-do compile } */ -/* { dg-options "-fdiagnostics-show-caret" } */ - -#include - -void *wrapped_malloc (size_t size) -{ - return malloc (size); -} - -void wrapped_free (void *ptr) -{ - free (ptr); /* { dg-warning "double-free of 'ptr' \\\[CWE-415\\]" } */ - /* { dg-begin-multiline-output "" } - free (ptr); - ^~~~~~~~~~ - 'test': events 1-2 - | - | { - | ^ - | | - | (1) entering 'test' - | boxed_int *obj = make_boxed_int (i); - | ~~~~~~~~~~~~~~~~~~ - | | - | (2) calling 'make_boxed_int' - | - +--> 'make_boxed_int': events 3-4 - | - | { - | ^ - | | - | (3) entering 'make_boxed_int' - | boxed_int *result = (boxed_int *)wrapped_malloc (sizeof (boxed_int)); - | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - | | - | (4) calling 'wrapped_malloc' - | - +--> 'wrapped_malloc': events 5-6 - | - | { - | ^ - | | - | (5) entering 'wrapped_malloc' - | return malloc (size); - | ~~~~~~~~~~~~~ - | | - | (6) calling 'malloc' - | - <-------------+ - | - 'test': event 7 - | - | free_boxed_int (obj); - | ^~~~~~~~~~~~~~~~~~~~ - | | - | (7) calling 'free_boxed_int' - | - +--> 'free_boxed_int': events 8-9 - | - | { - | ^ - | | - | (8) entering 'free_boxed_int' - | wrapped_free (bi); - | ~~~~~~~~~~~~~~~~~ - | | - | (9) calling 'wrapped_free' - | - +--> 'wrapped_free': events 10-11 - | - | { - | ^ - | | - | (10) entering 'wrapped_free' - | free (ptr); - | ~~~~~~~~~~ - | | - | (11) calling 'free' - | - <-------------+ - | - 'test': event 12 - | - | free_boxed_int (obj); - | ^~~~~~~~~~~~~~~~~~~~ - | | - | (12) calling 'free_boxed_int' - | - +--> 'free_boxed_int': events 13-14 - | - | { - | ^ - | | - | (13) entering 'free_boxed_int' - | wrapped_free (bi); - | ~~~~~~~~~~~~~~~~~ - | | - | (14) calling 'wrapped_free' - | - +--> 'wrapped_free': events 15-16 - | - | { - | ^ - | | - | (15) entering 'wrapped_free' - | free (ptr); - | ~~~~~~~~~~ - | | - | (16) calling 'free' - | - { dg-end-multiline-output "" } */ -} - -typedef struct boxed_int -{ - int i; -} boxed_int; - -boxed_int * -make_boxed_int (int i) -{ - boxed_int *result = (boxed_int *)wrapped_malloc (sizeof (boxed_int)); - result->i = i; - return result; -} - -void -free_boxed_int (boxed_int *bi) -{ - wrapped_free (bi); -} - -void test (int i) -{ - boxed_int *obj = make_boxed_int (i); - - free_boxed_int (obj); - - free_boxed_int (obj); -} - diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-path-format-plain.c b/gcc/testsuite/gcc.dg/plugin/diagnostic-path-format-plain.c new file mode 100644 index 0000000..75acd25 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-path-format-plain.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ + +#include + +void *wrapped_malloc (size_t size) +{ + return malloc (size); +} + +void wrapped_free (void *ptr) +{ + free (ptr); /* { dg-warning "double-free of 'ptr' \\\[CWE-415\\]" } */ +} + +typedef struct boxed_int +{ + int i; +} boxed_int; + +boxed_int * +make_boxed_int (int i) +{ + boxed_int *result = (boxed_int *)wrapped_malloc (sizeof (boxed_int)); + result->i = i; + return result; +} + +void +free_boxed_int (boxed_int *bi) +{ + wrapped_free (bi); +} + +void test (int i) +{ /* { dg-message "\\(1\\) entering 'test'" } */ + boxed_int *obj = make_boxed_int (i); /* { dg-message "\\(2\\) calling 'make_boxed_int'" } */ + /* etc */ + + free_boxed_int (obj); + + free_boxed_int (obj); +} diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-2.c b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-2.c index 946a234..b2b269a 100644 --- a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-2.c +++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-paths-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fdiagnostics-show-caret -fdiagnostics-show-line-numbers" } */ +/* { dg-options "-fdiagnostics-show-caret -fdiagnostics-show-line-numbers -fdiagnostics-path-format=inline-events" } */ #include #include @@ -43,9 +43,9 @@ make_a_list_of_random_ints_badly(PyObject *self, | | (1) when 'PyList_New' fails, returning NULL | 26 | | 27 | for (i = 0; i < count; i++) { - | | ~~~ - | | | - | | (2) when 'i < count' + | | ~~~~~~~~~ + | | | + | | (2) when 'i < count' | 28 | item = PyLong_FromLong(random()); | 29 | PyList_Append(list, item); | | ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp index c02b008..5dd102a 100644 --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp @@ -100,7 +100,7 @@ set plugin_test_list [list \ diagnostic-test-paths-2.c \ diagnostic-test-paths-3.c \ diagnostic-test-paths-4.c \ - diagnostic-path-format-default.c \ + diagnostic-path-format-plain.c \ diagnostic-path-format-none.c \ diagnostic-path-format-separate-events.c \ diagnostic-path-format-inline-events-1.c \ diff --git a/gcc/testsuite/gfortran.dg/analyzer/analyzer.exp b/gcc/testsuite/gfortran.dg/analyzer/analyzer.exp index 00edfa5..462395c 100644 --- a/gcc/testsuite/gfortran.dg/analyzer/analyzer.exp +++ b/gcc/testsuite/gfortran.dg/analyzer/analyzer.exp @@ -33,7 +33,7 @@ if [info exists DEFAULT_FFLAGS] then { } # If a testcase doesn't have special options, use these. -set DEFAULT_FFLAGS "-fanalyzer -fdiagnostics-path-format=separate-events -Wanalyzer-too-complex -fanalyzer-call-summaries" +set DEFAULT_FFLAGS "-fanalyzer -Wanalyzer-too-complex -fanalyzer-call-summaries" # Initialize `dg'. dg-init -- cgit v1.1 From 2f7c50b7091c09d665aaf27173aacf34c9904e4c Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Tue, 6 Oct 2020 17:59:07 -0400 Subject: analyzer: handle C++ argument numbers and "this" [PR97116] gcc/analyzer/ChangeLog: PR analyzer/97116 * sm-malloc.cc (method_p): New. (describe_argument_index): New. (inform_nonnull_attribute): Use describe_argument_index. (possible_null_arg::describe_final_event): Likewise. (null_arg::describe_final_event): Likewise. gcc/testsuite/ChangeLog: PR analyzer/97116 * g++.dg/analyzer/pr97116.C: New test. --- gcc/analyzer/sm-malloc.cc | 61 +++++++++++++++++++++++++-------- gcc/testsuite/g++.dg/analyzer/pr97116.C | 39 +++++++++++++++++++++ 2 files changed, 86 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/g++.dg/analyzer/pr97116.C (limited to 'gcc') diff --git a/gcc/analyzer/sm-malloc.cc b/gcc/analyzer/sm-malloc.cc index 6293d78..fd12a35 100644 --- a/gcc/analyzer/sm-malloc.cc +++ b/gcc/analyzer/sm-malloc.cc @@ -562,15 +562,40 @@ public: }; +/* Return true if FNDECL is a C++ method. */ + +static bool +method_p (tree fndecl) +{ + return TREE_CODE (TREE_TYPE (fndecl)) == METHOD_TYPE; +} + +/* Return a 1-based description of ARG_IDX (0-based) of FNDECL. + Compare with %P in the C++ FE (implemented in cp/error.c: parm_to_string + as called from cp_printer). */ + +static label_text +describe_argument_index (tree fndecl, int arg_idx) +{ + if (method_p (fndecl)) + if (arg_idx == 0) + return label_text::borrow ("'this'"); + pretty_printer pp; + pp_printf (&pp, "%u", arg_idx + 1 - method_p (fndecl)); + return label_text::take (xstrdup (pp_formatted_text (&pp))); +} + /* Subroutine for use by possible_null_arg::emit and null_arg::emit. Issue a note informing that the pertinent argument must be non-NULL. */ static void inform_nonnull_attribute (tree fndecl, int arg_idx) { + label_text arg_desc = describe_argument_index (fndecl, arg_idx); inform (DECL_SOURCE_LOCATION (fndecl), - "argument %u of %qD must be non-null", - arg_idx + 1, fndecl); + "argument %s of %qD must be non-null", + arg_desc.m_buffer, fndecl); + arg_desc.maybe_free (); /* Ideally we would use the location of the parm and underline the attribute also - but we don't have the location_t values at this point in the middle-end. @@ -618,15 +643,19 @@ public: label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE { + label_text arg_desc = describe_argument_index (m_fndecl, m_arg_idx); + label_text result; if (m_origin_of_unchecked_event.known_p ()) - return ev.formatted_print ("argument %u (%qE) from %@ could be NULL" - " where non-null expected", - m_arg_idx + 1, ev.m_expr, - &m_origin_of_unchecked_event); + result = ev.formatted_print ("argument %s (%qE) from %@ could be NULL" + " where non-null expected", + arg_desc.m_buffer, ev.m_expr, + &m_origin_of_unchecked_event); else - return ev.formatted_print ("argument %u (%qE) could be NULL" - " where non-null expected", - m_arg_idx + 1, ev.m_expr); + result = ev.formatted_print ("argument %s (%qE) could be NULL" + " where non-null expected", + arg_desc.m_buffer, ev.m_expr); + arg_desc.maybe_free (); + return result; } private: @@ -714,13 +743,17 @@ public: label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE { + label_text arg_desc = describe_argument_index (m_fndecl, m_arg_idx); + label_text result; if (zerop (ev.m_expr)) - return ev.formatted_print ("argument %u NULL where non-null expected", - m_arg_idx + 1); + result = ev.formatted_print ("argument %s NULL where non-null expected", + arg_desc.m_buffer); else - return ev.formatted_print ("argument %u (%qE) NULL" - " where non-null expected", - m_arg_idx + 1, ev.m_expr); + result = ev.formatted_print ("argument %s (%qE) NULL" + " where non-null expected", + arg_desc.m_buffer, ev.m_expr); + arg_desc.maybe_free (); + return result; } private: diff --git a/gcc/testsuite/g++.dg/analyzer/pr97116.C b/gcc/testsuite/g++.dg/analyzer/pr97116.C new file mode 100644 index 0000000..d8e08a7 --- /dev/null +++ b/gcc/testsuite/g++.dg/analyzer/pr97116.C @@ -0,0 +1,39 @@ +#include +#include + +struct foo +{ + foo (int i) : m_i (i) {} // { dg-message "argument 'this' of 'foo::foo\\(int\\)' must be non-null" "note" } + + int get () const { return m_i; } // { dg-message "argument 'this' of '\[^\n\]*' must be non-null" "note" } + + int meth_1 (int, void *ptr) __attribute__((nonnull)); // { dg-message "argument 2 of '\[^\n\]*' must be non-null" "note" } + int meth_2 (int, void *ptr) __attribute__((nonnull(3))); // { dg-message "argument 2 of '\[^\n\]*' must be non-null" "note" } + + int m_i; +}; + +void test_1 (void) +{ + foo *p = new(NULL) foo (42); // { dg-warning "non-null expected" "warning" } + // { dg-message "argument 'this' \\(\[^\n\]*\\) NULL where non-null expected" "final event" { target *-*-* } .-1 } +} + +int test_2 (void) +{ + foo *p = NULL; + return p->get (); // { dg-warning "non-null expected" "warning" } + // { dg-message "argument 'this' \\('p'\\) NULL where non-null expected" "final event" { target *-*-* } .-1 } +} + +int test_meth_1 (foo *f) +{ + return f->meth_1 (42, NULL); // { dg-warning "non-null expected" "warning" } + // { dg-message "argument 2 NULL where non-null expected" "final event" { target *-*-* } .-1 } +} + +int test_meth_2 (foo *f) +{ + return f->meth_2 (42, NULL); // { dg-warning "non-null expected" "warning" } + // { dg-message "argument 2 NULL where non-null expected" "final event" { target *-*-* } .-1 } +} -- cgit v1.1 From 7c7e841806aecf4187c69fc2ff07813c7be09582 Mon Sep 17 00:00:00 2001 From: Paul Thomas Date: Wed, 7 Oct 2020 14:59:10 +0100 Subject: This patch fixes PR47469 - a trivial bit of tidying up. 2020-07-10 Paul Thomas gcc/fortran PR fortran/47469 * trans-expr.c (arrayfunc_assign_needs_temporary): Tidy detection of pointer and allocatable functions. --- gcc/fortran/trans-expr.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'gcc') diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index 2c31ec9..2167de4 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -9810,12 +9810,8 @@ arrayfunc_assign_needs_temporary (gfc_expr * expr1, gfc_expr * expr2) return true; /* Functions returning pointers or allocatables need temporaries. */ - c = expr2->value.function.esym - ? (expr2->value.function.esym->attr.pointer - || expr2->value.function.esym->attr.allocatable) - : (expr2->symtree->n.sym->attr.pointer - || expr2->symtree->n.sym->attr.allocatable); - if (c) + if (gfc_expr_attr (expr2).pointer + || gfc_expr_attr (expr2).allocatable) return true; /* Character array functions need temporaries unless the -- cgit v1.1 From 4e9213027587b807ca7e4bbde706b19102342d37 Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Tue, 6 Oct 2020 16:52:03 -0400 Subject: Off by one final fix. Allocate the memory in an approved portable way. gcc/ChangeLog: 2020-10-06 Andrew MacLeod * value-range.h (irange_allocator::allocate): Allocate in two hunks instead of using the variably-sized trailing array approach. --- gcc/value-range.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'gcc') diff --git a/gcc/value-range.h b/gcc/value-range.h index 7031a823..63c9620 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -668,13 +668,12 @@ irange_allocator::allocate (unsigned num_pairs) if (num_pairs < 2) num_pairs = 2; - struct newir { - irange range; - tree mem[2]; - }; - size_t nbytes = (sizeof (newir) + sizeof (tree) * 2 * (num_pairs - 1)); - struct newir *r = (newir *) obstack_alloc (&m_obstack, nbytes); - return new (r) irange (r->mem, num_pairs); + size_t nbytes = sizeof (tree) * 2 * num_pairs; + + // Allocate the irange and required memory for the vector. + void *r = obstack_alloc (&m_obstack, sizeof (irange)); + tree *mem = (tree *) obstack_alloc (&m_obstack, nbytes); + return new (r) irange (mem, num_pairs); } inline irange * -- cgit v1.1 From 592fe221735bdaa375b1834dd49ce125d0b600d8 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Wed, 7 Oct 2020 10:49:00 -0400 Subject: c++: Distinguish alignof and __alignof__ in cp_tree_equal [PR97273] cp_tree_equal currently considers alignof the same as __alignof__, but these operators are semantically different ever since r8-7957. In the testcase below, this causes the second static_assert to fail on targets where alignof(double) != __alignof__(double) because the specialization table (which uses cp_tree_equal as its equality predicate) conflates the two dependent specializations integral_constant<__alignof__(T)> and integral_constant. This patch makes cp_tree_equal distinguish between these two operators by inspecting the ALIGNOF_EXPR_STD_P flag. gcc/cp/ChangeLog: PR c++/88115 PR libstdc++/97273 * tree.c (cp_tree_equal) : Return false if ALIGNOF_EXPR_STD_P differ. gcc/testsuite/ChangeLog: PR c++/88115 PR libstdc++/97273 * g++.dg/template/alignof3.C: New test. --- gcc/cp/tree.c | 2 ++ gcc/testsuite/g++.dg/template/alignof3.C | 13 +++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 gcc/testsuite/g++.dg/template/alignof3.C (limited to 'gcc') diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c index 074fa0c..9bc37ac 100644 --- a/gcc/cp/tree.c +++ b/gcc/cp/tree.c @@ -3803,6 +3803,8 @@ cp_tree_equal (tree t1, tree t2) if (SIZEOF_EXPR_TYPE_P (t2)) o2 = TREE_TYPE (o2); } + else if (ALIGNOF_EXPR_STD_P (t1) != ALIGNOF_EXPR_STD_P (t2)) + return false; if (TREE_CODE (o1) != TREE_CODE (o2)) return false; diff --git a/gcc/testsuite/g++.dg/template/alignof3.C b/gcc/testsuite/g++.dg/template/alignof3.C new file mode 100644 index 0000000..e573727 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/alignof3.C @@ -0,0 +1,13 @@ +// PR c++/88115 +// { dg-do compile { target c++11 } } + +template +struct integral_constant { + static constexpr int value = __v; +}; + +template using StdAlignOf = integral_constant; +template using GCCAlignOf = integral_constant<__alignof__(T)>; + +static_assert(StdAlignOf::value == alignof(double), ""); +static_assert(GCCAlignOf::value == __alignof__(double), ""); -- cgit v1.1 From dae673abd37d400408959497e50fe1f3fbef5533 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 7 Oct 2020 10:42:12 +0200 Subject: tree-optimization/97307 - improve sinking of loads This improves the heuristics finding a sink location for loads that does not cross any store. 2020-10-07 Richard Biener PR tree-optimization/97307 * tree-ssa-sink.c (statement_sink_location): Change heuristic for not skipping stores to look for virtual definitions rather than uses. * gcc.dg/tree-ssa/ssa-sink-17.c: New testcase. * gcc.dg/vect/pr65947-3.c: XFAIL. --- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-17.c | 15 ++++++++++ gcc/testsuite/gcc.dg/vect/pr65947-3.c | 5 +++- gcc/tree-ssa-sink.c | 45 +++++++++++++++-------------- 3 files changed, 43 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-17.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-17.c new file mode 100644 index 0000000..cf2e2a0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-17.c @@ -0,0 +1,15 @@ +/* PR tree-optimization/97307 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-sink-details" } */ + +int pure_f(int a, int b) __attribute__((pure)); +int my_f(int a, int b) +{ + int x = pure_f(a, b); + if (a > 0) + return x; + return a; +} + +/* We should sink the call to pure_f to the if block. */ +/* { dg-final { scan-tree-dump "Sinking # VUSE" "sink" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c index 6b4077e..8a2608c 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c @@ -51,6 +51,9 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ +/* XFAILed because of the fix for PR97307 which sinks the load of a[i], preventing + if-conversion to happen. */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/tree-ssa-sink.c b/gcc/tree-ssa-sink.c index 4cc5195..ba8e557 100644 --- a/gcc/tree-ssa-sink.c +++ b/gcc/tree-ssa-sink.c @@ -369,10 +369,9 @@ statement_sink_location (gimple *stmt, basic_block frombb, return false; /* If this is a load then do not sink past any stores. - ??? This is overly simple but cheap. We basically look - for an existing load with the same VUSE in the path to one - of the sink candidate blocks and we adjust commondom to the - nearest to commondom. */ + Look for virtual definitions in the path from frombb to the sink + location computed from the real uses and if found, adjust + that it a common dominator. */ if (gimple_vuse (stmt)) { /* Do not sink loads from hard registers. */ @@ -383,29 +382,33 @@ statement_sink_location (gimple *stmt, basic_block frombb, imm_use_iterator imm_iter; use_operand_p use_p; - basic_block found = NULL; FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_vuse (stmt)) { gimple *use_stmt = USE_STMT (use_p); basic_block bb = gimple_bb (use_stmt); - /* For PHI nodes the block we know sth about - is the incoming block with the use. */ + /* For PHI nodes the block we know sth about is the incoming block + with the use. */ if (gimple_code (use_stmt) == GIMPLE_PHI) - bb = EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src; - /* Any dominator of commondom would be ok with - adjusting commondom to that block. */ - bb = nearest_common_dominator (CDI_DOMINATORS, bb, commondom); - if (!found) - found = bb; - else if (dominated_by_p (CDI_DOMINATORS, bb, found)) - found = bb; - /* If we can't improve, stop. */ - if (found == commondom) - break; + { + /* In case the PHI node post-dominates the current insert location + we can disregard it. */ + if (commondom != bb + && dominated_by_p (CDI_POST_DOMINATORS, commondom, bb)) + continue; + bb = EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src; + } + else if (!gimple_vdef (use_stmt)) + continue; + /* If the use is not dominated by the path entry it is not on + the path. */ + if (!dominated_by_p (CDI_DOMINATORS, bb, frombb)) + continue; + /* There is no easy way to disregard defs not on the path from + frombb to commondom so just consider them all. */ + commondom = nearest_common_dominator (CDI_DOMINATORS, bb, commondom); + if (commondom == frombb) + return false; } - commondom = found; - if (commondom == frombb) - return false; } /* Our common dominator has to be dominated by frombb in order to be a -- cgit v1.1 From 1e247c60df52e93c9814a3a1789a63bc07aa4542 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Wed, 7 Oct 2020 16:46:34 +0200 Subject: Rename -fevrp-mode= to --param=evrp-mode=. * common.opt (-fevrp-mode): Rename and move... * params.opt (--param=evrp-mode): ...here. * gimple-range.h (DEBUG_RANGE_CACHE): Use param_evrp_mode instead of flag_evrp_mode. * gimple-ssa-evrp.c (rvrp_folder): Same. (hybrid_folder): Same. (execute_early_vrp): Same. --- gcc/common.opt | 31 ------------------------------- gcc/gimple-range.h | 2 +- gcc/gimple-ssa-evrp.c | 8 ++++---- gcc/params.opt | 31 +++++++++++++++++++++++++++++++ 4 files changed, 36 insertions(+), 36 deletions(-) (limited to 'gcc') diff --git a/gcc/common.opt b/gcc/common.opt index e2bd90c..7e789d1 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2870,37 +2870,6 @@ ftree-vrp Common Report Var(flag_tree_vrp) Init(0) Optimization Perform Value Range Propagation on trees. -fevrp-mode= -Common Undocumented Joined RejectNegative Enum(evrp_mode) Var(flag_evrp_mode) Init(EVRP_MODE_EVRP_FIRST) Optimization --fevrp-mode=[legacy|ranger|legacy-first|ranger-first|ranger-trace|ranger-debug|trace|debug] Specifies the mode Early VRP should operate in. - -Enum -Name(evrp_mode) Type(enum evrp_mode) UnknownError(unknown evrp mode %qs) - -EnumValue -Enum(evrp_mode) String(legacy) Value(EVRP_MODE_EVRP_ONLY) - -EnumValue -Enum(evrp_mode) String(ranger) Value(EVRP_MODE_RVRP_ONLY) - -EnumValue -Enum(evrp_mode) String(legacy-first) Value(EVRP_MODE_EVRP_FIRST) - -EnumValue -Enum(evrp_mode) String(ranger-first) Value(EVRP_MODE_RVRP_FIRST) - -EnumValue -Enum(evrp_mode) String(ranger-trace) Value(EVRP_MODE_RVRP_TRACE) - -EnumValue -Enum(evrp_mode) String(ranger-debug) Value(EVRP_MODE_RVRP_DEBUG) - -EnumValue -Enum(evrp_mode) String(trace) Value(EVRP_MODE_TRACE) - -EnumValue -Enum(evrp_mode) String(debug) Value(EVRP_MODE_DEBUG) - fsplit-paths Common Report Var(flag_split_paths) Init(0) Optimization Split paths leading to loop backedges. diff --git a/gcc/gimple-range.h b/gcc/gimple-range.h index 4d35e72..041dc7c 100644 --- a/gcc/gimple-range.h +++ b/gcc/gimple-range.h @@ -165,6 +165,6 @@ private: }; // Flag to enable debugging the various internal Caches. -#define DEBUG_RANGE_CACHE (dump_file && (flag_evrp_mode & EVRP_MODE_DEBUG)) +#define DEBUG_RANGE_CACHE (dump_file && (param_evrp_mode & EVRP_MODE_DEBUG)) #endif // GCC_GIMPLE_RANGE_STMT_H diff --git a/gcc/gimple-ssa-evrp.c b/gcc/gimple-ssa-evrp.c index 6be32d7a..363e2ab 100644 --- a/gcc/gimple-ssa-evrp.c +++ b/gcc/gimple-ssa-evrp.c @@ -118,7 +118,7 @@ public: rvrp_folder () : substitute_and_fold_engine (), m_simplifier () { - if (flag_evrp_mode & EVRP_MODE_TRACE) + if (param_evrp_mode & EVRP_MODE_TRACE) m_ranger = new trace_ranger (); else m_ranger = new gimple_ranger (); @@ -175,7 +175,7 @@ class hybrid_folder : public evrp_folder public: hybrid_folder (bool evrp_first) { - if (flag_evrp_mode & EVRP_MODE_TRACE) + if (param_evrp_mode & EVRP_MODE_TRACE) m_ranger = new trace_ranger (); else m_ranger = new gimple_ranger (); @@ -307,8 +307,8 @@ execute_early_vrp () scev_initialize (); calculate_dominance_info (CDI_DOMINATORS); - // only the last 2 bits matter for choosing the folder. - switch (flag_evrp_mode & EVRP_MODE_RVRP_FIRST) + // Only the last 2 bits matter for choosing the folder. + switch (param_evrp_mode & EVRP_MODE_RVRP_FIRST) { case EVRP_MODE_EVRP_ONLY: { diff --git a/gcc/params.opt b/gcc/params.opt index 6f308a1..d770c55 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -102,6 +102,37 @@ Maximum size (in bytes) of objects tracked bytewise by dead store elimination. Common Joined UInteger Var(param_early_inlining_insns) Init(6) Optimization Param Maximal estimated growth of function body caused by early inlining of single call. +-param=evrp-mode= +Common Joined Var(param_evrp_mode) Enum(evrp_mode) Init(EVRP_MODE_EVRP_FIRST) Param Optimization +--param=evrp-mode=[legacy|ranger|legacy-first|ranger-first|ranger-trace|ranger-debug|trace|debug] Specifies the mode Early VRP should operate in. + +Enum +Name(evrp_mode) Type(enum evrp_mode) UnknownError(unknown evrp mode %qs) + +EnumValue +Enum(evrp_mode) String(legacy) Value(EVRP_MODE_EVRP_ONLY) + +EnumValue +Enum(evrp_mode) String(ranger) Value(EVRP_MODE_RVRP_ONLY) + +EnumValue +Enum(evrp_mode) String(legacy-first) Value(EVRP_MODE_EVRP_FIRST) + +EnumValue +Enum(evrp_mode) String(ranger-first) Value(EVRP_MODE_RVRP_FIRST) + +EnumValue +Enum(evrp_mode) String(ranger-trace) Value(EVRP_MODE_RVRP_TRACE) + +EnumValue +Enum(evrp_mode) String(ranger-debug) Value(EVRP_MODE_RVRP_DEBUG) + +EnumValue +Enum(evrp_mode) String(trace) Value(EVRP_MODE_TRACE) + +EnumValue +Enum(evrp_mode) String(debug) Value(EVRP_MODE_DEBUG) + -param=fsm-maximum-phi-arguments= Common Joined UInteger Var(param_fsm_maximum_phi_arguments) Init(100) IntegerRange(1, 999999) Param Optimization Maximum number of arguments a PHI may have before the FSM threader will not try to thread through its block. -- cgit v1.1 From 89d3af5ef7e313efd7a5e88a4c629b4704cfe976 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Wed, 7 Oct 2020 01:14:49 -0400 Subject: libgo: handle go1.10+ correctly in match.sh match.sh was not correctly handling build constraints for Go versions that have a two-digit suffix, like "go1.10". The same issue will arise with Go 1.100, but that is a long ways off. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/260077 --- gcc/go/gofrontend/MERGE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index c5c02aa..15f01a0 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -613e530547549f4220c4571ea913acbe5fa56f72 +762b74a56f7ca102a5b8da1c9d0ffce592caa46b The first line of this file holds the git revision number of the last merge done from the gofrontend repository. -- cgit v1.1 From 45376dc0f426c0fc39c5ee77937c928c27fab77a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Chigot?= Date: Thu, 24 Sep 2020 13:40:40 +0200 Subject: libgo/configure: remove -fno-section-anchors for AIX This option is no longer needed. There is no crash without it since at least gcc-9. Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/260157 --- gcc/go/gofrontend/MERGE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 15f01a0..930339e 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -762b74a56f7ca102a5b8da1c9d0ffce592caa46b +2563706e4ead80d6906d66ae23c8915c360583ad The first line of this file holds the git revision number of the last merge done from the gofrontend repository. -- cgit v1.1 From 635072248a426c933c74ef4431e82401249b6218 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Thu, 10 Sep 2020 17:27:43 -0400 Subject: c++: Fix P0846 (ADL and function templates) in template [PR97010] To quickly recap, P0846 says that a name is also considered to refer to a template if it is an unqualified-id followed by a < and name lookup finds either one or more functions or finds nothing. In a template, when parsing a function call that has type-dependent arguments, we can't perform ADL right away so we set KOENIG_LOOKUP_P in the call to remember to do it when instantiating the call (tsubst_copy_and_build/CALL_EXPR). When the called function is a function template, we represent the call with a TEMPLATE_ID_EXPR; usually the operand is an OVERLOAD. In the P0846 case though, the operand can be an IDENTIFIER_NODE, when name lookup found nothing when parsing the template name. But we weren't handling this correctly in tsubst_copy_and_build. First we need to pass the FUNCTION_P argument from to , otherwise we give a bogus error. And then in we need to perform ADL. The rest of the changes is to give better errors when ADL didn't find anything. gcc/cp/ChangeLog: PR c++/97010 * pt.c (tsubst_copy_and_build) : Call tsubst_copy_and_build explicitly instead of using the RECUR macro. Handle a TEMPLATE_ID_EXPR with an IDENTIFIER_NODE as its operand. : Perform ADL for a TEMPLATE_ID_EXPR with an IDENTIFIER_NODE as its operand. gcc/testsuite/ChangeLog: PR c++/97010 * g++.dg/cpp2a/fn-template21.C: New test. * g++.dg/cpp2a/fn-template22.C: New test. --- gcc/cp/pt.c | 37 ++++++++++++++++++++++-------- gcc/testsuite/g++.dg/cpp2a/fn-template21.C | 24 +++++++++++++++++++ gcc/testsuite/g++.dg/cpp2a/fn-template22.C | 25 ++++++++++++++++++++ 3 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/fn-template21.C create mode 100644 gcc/testsuite/g++.dg/cpp2a/fn-template22.C (limited to 'gcc') diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 3755aab..d9cc776 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -19278,7 +19278,8 @@ out: } /* Like tsubst but deals with expressions and performs semantic - analysis. FUNCTION_P is true if T is the "F" in "F (ARGS)". */ + analysis. FUNCTION_P is true if T is the "F" in "F (ARGS)" or + "F (ARGS)". */ tree tsubst_copy_and_build (tree t, @@ -19360,7 +19361,10 @@ tsubst_copy_and_build (tree t, case TEMPLATE_ID_EXPR: { tree object; - tree templ = RECUR (TREE_OPERAND (t, 0)); + tree templ = tsubst_copy_and_build (TREE_OPERAND (t, 0), args, + complain, in_decl, + function_p, + integral_constant_expression_p); tree targs = TREE_OPERAND (t, 1); if (targs) @@ -19407,13 +19411,21 @@ tsubst_copy_and_build (tree t, } else object = NULL_TREE; - templ = lookup_template_function (templ, targs); + + tree tid = lookup_template_function (templ, targs); if (object) - RETURN (build3 (COMPONENT_REF, TREE_TYPE (templ), - object, templ, NULL_TREE)); + RETURN (build3 (COMPONENT_REF, TREE_TYPE (tid), + object, tid, NULL_TREE)); + else if (identifier_p (templ)) + { + /* C++20 P0846: we can encounter an IDENTIFIER_NODE here when + name lookup found nothing when parsing the template name. */ + gcc_assert (cxx_dialect >= cxx20 || seen_error ()); + RETURN (tid); + } else - RETURN (baselink_for_fns (templ)); + RETURN (baselink_for_fns (tid)); } case INDIRECT_REF: @@ -20004,14 +20016,17 @@ tsubst_copy_and_build (tree t, /* We do not perform argument-dependent lookup if normal lookup finds a non-function, in accordance with the - expected resolution of DR 218. */ + resolution of DR 218. */ if (koenig_p && ((is_overloaded_fn (function) /* If lookup found a member function, the Koenig lookup is not appropriate, even if an unqualified-name was used to denote the function. */ && !DECL_FUNCTION_MEMBER_P (get_first_fn (function))) - || identifier_p (function)) + || identifier_p (function) + /* C++20 P0846: Lookup found nothing. */ + || (TREE_CODE (function) == TEMPLATE_ID_EXPR + && identifier_p (TREE_OPERAND (function, 0)))) /* Only do this when substitution turns a dependent call into a non-dependent call. */ && type_dependent_expression_p_push (t) @@ -20019,9 +20034,13 @@ tsubst_copy_and_build (tree t, function = perform_koenig_lookup (function, call_args, tf_none); if (function != NULL_TREE - && identifier_p (function) + && (identifier_p (function) + || (TREE_CODE (function) == TEMPLATE_ID_EXPR + && identifier_p (TREE_OPERAND (function, 0)))) && !any_type_dependent_arguments_p (call_args)) { + if (TREE_CODE (function) == TEMPLATE_ID_EXPR) + function = TREE_OPERAND (function, 0); if (koenig_p && (complain & tf_warning_or_error)) { /* For backwards compatibility and good diagnostics, try diff --git a/gcc/testsuite/g++.dg/cpp2a/fn-template21.C b/gcc/testsuite/g++.dg/cpp2a/fn-template21.C new file mode 100644 index 0000000..7b3f26b --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/fn-template21.C @@ -0,0 +1,24 @@ +// PR c++/97010 +// { dg-do compile { target c++20 } } + +namespace M { + struct S { int x; }; + S foo (); + + template + void get (S); +} + +template +void bar (const T& t) +{ + get(t); +} + +int +main () +{ + auto a = M::foo (); + get(a); + bar (a); +} diff --git a/gcc/testsuite/g++.dg/cpp2a/fn-template22.C b/gcc/testsuite/g++.dg/cpp2a/fn-template22.C new file mode 100644 index 0000000..62cc81d --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/fn-template22.C @@ -0,0 +1,25 @@ +// PR c++/97010 +// { dg-do compile { target c++20 } } + +namespace M { + struct S { int x; }; + S foo (); + +// Test not-found-by-ADL scenario. +// template +// void get (S); +} + +template +void bar (const T& t) +{ + get(t); // { dg-error ".get. was not declared in this scope" } +} + +int +main () +{ + auto a = M::foo (); + get(a); // { dg-error ".get. was not declared in this scope" } + bar (a); +} -- cgit v1.1 From 6caec77e3be8e2989f723ac7f522885dded6bcfc Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 8 Oct 2020 00:16:30 +0000 Subject: Daily bump. --- gcc/ChangeLog | 90 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/analyzer/ChangeLog | 9 +++++ gcc/cp/ChangeLog | 46 +++++++++++++++++++++++++ gcc/fortran/ChangeLog | 6 ++++ gcc/testsuite/ChangeLog | 66 ++++++++++++++++++++++++++++++++++++ 6 files changed, 218 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5f9e8ac..3074f90 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,93 @@ +2020-10-07 Aldy Hernandez + + * common.opt (-fevrp-mode): Rename and move... + * params.opt (--param=evrp-mode): ...here. + * gimple-range.h (DEBUG_RANGE_CACHE): Use param_evrp_mode instead + of flag_evrp_mode. + * gimple-ssa-evrp.c (rvrp_folder): Same. + (hybrid_folder): Same. + (execute_early_vrp): Same. + +2020-10-07 Richard Biener + + PR tree-optimization/97307 + * tree-ssa-sink.c (statement_sink_location): Change heuristic + for not skipping stores to look for virtual definitions + rather than uses. + +2020-10-07 Andrew MacLeod + + * value-range.h (irange_allocator::allocate): Allocate in two hunks + instead of using the variably-sized trailing array approach. + +2020-10-07 David Malcolm + + * doc/invoke.texi (-fdiagnostics-plain-output): Add + -fdiagnostics-path-format=separate-events to list of + options injected by -fdiagnostics-plain-output. + * opts-common.c (decode_cmdline_options_to_array): Likewise. + +2020-10-07 Martin Jambor + + PR ipa/96394 + * ipa-prop.c (update_indirect_edges_after_inlining): Do not add + resolved speculation edges to vector of new direct edges even in + presence of multiple speculative direct edges for a single call. + +2020-10-07 Andrew Stubbs + + * config/gcn/gcn.md (unspec): Add UNSPEC_ADDPTR. + (addptrdi3): Add SGPR alternative. + +2020-10-07 Mark Wielaard + + * dwarf2out.c (add_filepath_AT_string): New function. + (asm_outputs_debug_line_str): Likewise. + (add_filename_attribute): Likewise. + (add_comp_dir_attribute): Call add_filepath_AT_string. + (gen_compile_unit_die): Call add_filename_attribute for name. + (init_sections_and_labels): Init debug_line_str_section when + asm_outputs_debug_line_str return true. + (dwarf2out_early_finish): Remove DW_AT_name and DW_AT_comp_dir + hack and call add_filename_attribute for the remap_debug_filename. + +2020-10-07 Jakub Jelinek + + * configure.ac (HAVE_AS_GDWARF_5_DEBUG_FLAG, + HAVE_AS_WORKING_DWARF_4_FLAG): New tests. + * gcc.c (ASM_DEBUG_DWARF_OPTION): Define. + (ASM_DEBUG_SPEC): Use ASM_DEBUG_DWARF_OPTION instead of + "--gdwarf2". Use %{cond:opt1;:opt2} style. + (ASM_DEBUG_OPTION_DWARF_OPT): Define. + (ASM_DEBUG_OPTION_SPEC): Define. + (asm_debug_option): New variable. + (asm_options): Add "%(asm_debug_option)". + (static_specs): Add asm_debug_option entry. + (static_spec_functions): Add dwarf-version-gt. + (debug_level_greater_than_spec_func): New function. + * config/darwin.h (ASM_DEBUG_OPTION_SPEC): Define. + * config/darwin9.h (ASM_DEBUG_OPTION_SPEC): Redefine. + * config.in: Regenerated. + * configure: Regenerated. + +2020-10-07 Jakub Jelinek + + PR bootstrap/97305 + * optc-save-gen.awk: Don't declare mask variable if explicit_mask + array is not present. + +2020-10-07 Jakub Jelinek + + * omp-expand.c (expand_omp_simd): Don't emit MIN_EXPR and PLUS_EXPR + at the end of entry_bb and innermost init_bb, instead force arguments + for MIN_EXPR into temporaries in both cases and jump to a new bb that + performs MIN_EXPR and PLUS_EXPR. + +2020-10-07 Tom de Vries + + * tree-ssa-loop-ch.c (ch_base::copy_headers): Add missing NULL test + for dump_file. + 2020-10-06 Andrew MacLeod * flag-types.h (enum evrp_mode): New enumerated type EVRP_MODE_*. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 8478d98..df50455 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20201007 +20201008 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 2e40e81..d8453cb 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,12 @@ +2020-10-07 David Malcolm + + PR analyzer/97116 + * sm-malloc.cc (method_p): New. + (describe_argument_index): New. + (inform_nonnull_attribute): Use describe_argument_index. + (possible_null_arg::describe_final_event): Likewise. + (null_arg::describe_final_event): Likewise. + 2020-09-29 David Malcolm PR analyzer/95188 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index dcd5fd3..ebf1fb2 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,49 @@ +2020-10-07 Marek Polacek + + PR c++/97010 + * pt.c (tsubst_copy_and_build) : Call + tsubst_copy_and_build explicitly instead of using the RECUR macro. + Handle a TEMPLATE_ID_EXPR with an IDENTIFIER_NODE as its operand. + : Perform ADL for a TEMPLATE_ID_EXPR with an + IDENTIFIER_NODE as its operand. + +2020-10-07 Patrick Palka + + PR c++/88115 + PR libstdc++/97273 + * tree.c (cp_tree_equal) : Return false if + ALIGNOF_EXPR_STD_P differ. + +2020-10-07 Nathan Sidwell + + * cp-tree.h (struct language_function): Delete extern_decl_map. + (DECL_LOCAL_DECL_ALIAS): New. + * name-lookup.h (is_local_extern): Delete. + * name-lookup.c (set_local_extern_decl_linkage): Replace with ... + (push_local_extern_decl): ... this new function. + (do_pushdecl): Call new function after pushing new decl. Unhide + hidden non-functions. + (is_local_extern): Delete. + * decl.c (layout_var_decl): Do not allow VLA local externs. + * decl2.c (mark_used): Also mark DECL_LOCAL_DECL_ALIAS. Drop old + local-extern treatment. + * parser.c (cp_parser_oacc_declare): Deal with local extern aliases. + * pt.c (tsubst_expr): Adjust local extern instantiation. + * cp-gimplify.c (cp_genericize_r): Remap DECL_LOCAL_DECLs. + +2020-10-07 Nathan Sidwell + + * cp-tree.h (DECL_BUILTIN_P): Rename to ... + (DECL_UNDECLARED_BUILTIN_P): ... here. + * decl.c (duplicate_decls): Adjust. + * name-lookup.c (anticipated_builtin_p): Adjust. + (do_nonmember_using_decl): Likewise. + +2020-10-07 Nathan Sidwell + + * tree.c (build_cp_fntype_variant): Clear + TYPE_DEPENDENT_P_VALID if necessary. + 2020-10-06 Marek Polacek PR c++/97297 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 19ad11b..8de4b38 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,9 @@ +2020-10-07 Paul Thomas + + PR fortran/47469 + * trans-expr.c (arrayfunc_assign_needs_temporary): Tidy detection + of pointer and allocatable functions. + 2020-10-04 Harald Anlauf PR fortran/97272 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6466356..8ab7fe0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,69 @@ +2020-10-07 Marek Polacek + + PR c++/97010 + * g++.dg/cpp2a/fn-template21.C: New test. + * g++.dg/cpp2a/fn-template22.C: New test. + +2020-10-07 Richard Biener + + PR tree-optimization/97307 + * gcc.dg/tree-ssa/ssa-sink-17.c: New testcase. + * gcc.dg/vect/pr65947-3.c: XFAIL. + +2020-10-07 Patrick Palka + + PR c++/88115 + PR libstdc++/97273 + * g++.dg/template/alignof3.C: New test. + +2020-10-07 David Malcolm + + PR analyzer/97116 + * g++.dg/analyzer/pr97116.C: New test. + +2020-10-07 David Malcolm + + * g++.dg/analyzer/analyzer.exp (DEFAULT_CXXFLAGS): Remove + -fdiagnostics-path-format=separate-events. + * gcc.dg/analyzer/analyzer.exp (DEFAULT_CFLAGS): Likewise. + * gcc.dg/plugin/diagnostic-path-format-default.c: Rename to... + * gcc.dg/plugin/diagnostic-path-format-plain.c: ...this. Remove + dg-options directive. Copy remainder of test from + diagnostic-path-format-separate-events.c. + * gcc.dg/plugin/diagnostic-test-paths-2.c: Add + -fdiagnostics-path-format=inline-events to options. + Fix expected output for location of conditional within "for" loop. + * gcc.dg/plugin/plugin.exp (plugin_test_list): Update for + renaming. + * gfortran.dg/analyzer/analyzer.exp (DEFAULT_FFLAGS): Remove + -fdiagnostics-path-format=separate-events. + +2020-10-07 Nathan Sidwell + + * g++.dg/cpp0x/lambda/lambda-sfinae1.C: Avoid ill-formed local extern + * g++.dg/init/pr42844.C: Add expected error. + * g++.dg/lookup/extern-redecl1.C: Likewise. + * g++.dg/lookup/koenig15.C: Avoid ill-formed. + * g++.dg/lto/pr95677.C: New. + * g++.dg/other/nested-extern-1.C: Correct expected behabviour. + * g++.dg/other/nested-extern-2.C: Likewise. + * g++.dg/other/nested-extern.cc: Split ... + * g++.dg/other/nested-extern-1.cc: ... here ... + * g++.dg/other/nested-extern-2.cc: ... here. + * g++.dg/template/scope5.C: Avoid ill-formed + * g++.old-deja/g++.law/missed-error2.C: Allow extension. + * g++.old-deja/g++.pt/crash3.C: Add expected error. + +2020-10-07 Martin Jambor + + PR ipa/96394 + * gcc.dg/tree-prof/pr96394.c: New test. + +2020-10-07 Jakub Jelinek + + * gcc.dg/gomp/simd-2.c: New test. + * gcc.dg/gomp/simd-3.c: New test. + 2020-10-06 Marek Polacek PR c++/97297 -- cgit v1.1 From 6b1b5c255f859e75a2d74ae58a011e846d87a277 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Thu, 8 Oct 2020 00:05:36 -0400 Subject: c++: Set the constraints of a class type sooner [PR96229] In the testcase below, during processing (at parse time) of Y's base class X, convert_template_argument calls is_compatible_template_arg to check if the template argument Y is no more constrained than the parameter P. But at this point we haven't yet set Y's constraints, so get_normalized_constraints_from_decl yields NULL_TREE as the normal form and caches this result into the normalized_map. We set Y's constraints later in cp_parser_class_specifier_1 but the stale normal form in the normalized_map remains. This ultimately causes us to miss the constraint failure for Y because according to the cached normal form, Y is not constrained. This patch fixes this issue by moving up the call to associate_classtype_constraints so that we set constraints before we start processing a class's bases. gcc/cp/ChangeLog: PR c++/96229 * parser.c (cp_parser_class_specifier_1): Move call to associate_classtype_constraints from here to ... (cp_parser_class_head): ... here. * pt.c (is_compatible_template_arg): Correct documentation to say "argument is _no_ more constrained than the parameter". gcc/testsuite/ChangeLog: PR c++/96229 * g++.dg/cpp2a/concepts-class2.C: New test. --- gcc/cp/parser.c | 8 ++++---- gcc/cp/pt.c | 7 ++++--- gcc/testsuite/g++.dg/cpp2a/concepts-class2.C | 11 +++++++++++ 3 files changed, 19 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-class2.C (limited to 'gcc') diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index 7a61abf..592ce95 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -24044,10 +24044,6 @@ cp_parser_class_specifier_1 (cp_parser* parser) = parser->in_unbraced_linkage_specification_p; parser->in_unbraced_linkage_specification_p = false; - // Associate constraints with the type. - if (flag_concepts) - type = associate_classtype_constraints (type); - /* Start the class. */ if (nested_name_specifier_p) { @@ -24815,6 +24811,10 @@ cp_parser_class_head (cp_parser* parser, fixup_attribute_variants (type); } + /* Associate constraints with the type. */ + if (flag_concepts) + type = associate_classtype_constraints (type); + /* We will have entered the scope containing the class; the names of base classes should be looked up in that context. For example: diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index d9cc776..fc4b9bb 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -8127,9 +8127,10 @@ canonicalize_expr_argument (tree arg, tsubst_flags_t complain) return canon; } -// A template declaration can be substituted for a constrained -// template template parameter only when the argument is more -// constrained than the parameter. +/* A template declaration can be substituted for a constrained + template template parameter only when the argument is no more + constrained than the parameter. */ + static bool is_compatible_template_arg (tree parm, tree arg) { diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-class2.C b/gcc/testsuite/g++.dg/cpp2a/concepts-class2.C new file mode 100644 index 0000000..0ed9eb0 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-class2.C @@ -0,0 +1,11 @@ +// PR c++/96229 +// { dg-do compile { target c++20 } } + +template concept Int = requires { T{0}; }; +template