From 705ed42a1ad950860f46c51216ff69dbe0f4857a Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Tue, 4 Oct 2022 23:04:06 +0200 Subject: Fortran: error recovery for invalid types in array constructors [PR107000] gcc/fortran/ChangeLog: PR fortran/107000 * arith.cc (gfc_arith_error): Define error message for ARITH_INVALID_TYPE. (reduce_unary): Catch arithmetic expressions with invalid type. (reduce_binary_ac): Likewise. (reduce_binary_ca): Likewise. (reduce_binary_aa): Likewise. (eval_intrinsic): Likewise. (gfc_real2complex): Source expression must be of type REAL. * gfortran.h (enum arith): Add ARITH_INVALID_TYPE. gcc/testsuite/ChangeLog: PR fortran/107000 * gfortran.dg/pr107000.f90: New test. Co-authored-by: Mikael Morin --- gcc/fortran/arith.cc | 30 ++++++++++++++++---- gcc/fortran/gfortran.h | 2 +- gcc/testsuite/gfortran.dg/pr107000.f90 | 50 ++++++++++++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/pr107000.f90 (limited to 'gcc') diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc index d57059a..086b1f8 100644 --- a/gcc/fortran/arith.cc +++ b/gcc/fortran/arith.cc @@ -118,6 +118,9 @@ gfc_arith_error (arith code) case ARITH_WRONGCONCAT: p = G_("Illegal type in character concatenation at %L"); break; + case ARITH_INVALID_TYPE: + p = G_("Invalid type in arithmetic operation at %L"); + break; default: gfc_internal_error ("gfc_arith_error(): Bad error code"); @@ -1268,7 +1271,10 @@ reduce_unary (arith (*eval) (gfc_expr *, gfc_expr **), gfc_expr *op, head = gfc_constructor_copy (op->value.constructor); for (c = gfc_constructor_first (head); c; c = gfc_constructor_next (c)) { - rc = reduce_unary (eval, c->expr, &r); + if (c->expr->expr_type == EXPR_OP && c->expr->ts.type == BT_UNKNOWN) + rc = ARITH_INVALID_TYPE; + else + rc = reduce_unary (eval, c->expr, &r); if (rc != ARITH_OK) break; @@ -1309,6 +1315,8 @@ reduce_binary_ac (arith (*eval) (gfc_expr *, gfc_expr *, gfc_expr **), if (c->expr->expr_type == EXPR_CONSTANT) rc = eval (c->expr, op2, &r); + else if (c->expr->expr_type == EXPR_OP && c->expr->ts.type == BT_UNKNOWN) + rc = ARITH_INVALID_TYPE; else rc = reduce_binary_ac (eval, c->expr, op2, &r); @@ -1361,6 +1369,8 @@ reduce_binary_ca (arith (*eval) (gfc_expr *, gfc_expr *, gfc_expr **), if (c->expr->expr_type == EXPR_CONSTANT) rc = eval (op1, c->expr, &r); + else if (c->expr->expr_type == EXPR_OP && c->expr->ts.type == BT_UNKNOWN) + rc = ARITH_INVALID_TYPE; else rc = reduce_binary_ca (eval, op1, c->expr, &r); @@ -1420,14 +1430,19 @@ reduce_binary_aa (arith (*eval) (gfc_expr *, gfc_expr *, gfc_expr **), c && d; c = gfc_constructor_next (c), d = gfc_constructor_next (d)) { + if ((c->expr->expr_type == EXPR_OP && c->expr->ts.type == BT_UNKNOWN) + || (d->expr->expr_type == EXPR_OP && d->expr->ts.type == BT_UNKNOWN)) + rc = ARITH_INVALID_TYPE; + else rc = reduce_binary (eval, c->expr, d->expr, &r); - if (rc != ARITH_OK) - break; - gfc_replace_expr (c->expr, r); + if (rc != ARITH_OK) + break; + + gfc_replace_expr (c->expr, r); } - if (c || d) + if (rc == ARITH_OK && (c || d)) rc = ARITH_INCOMMENSURATE; if (rc != ARITH_OK) @@ -1638,6 +1653,8 @@ eval_intrinsic (gfc_intrinsic_op op, else rc = reduce_binary (eval.f3, op1, op2, &result); + if (rc == ARITH_INVALID_TYPE) + goto runtime; /* Something went wrong. */ if (op == INTRINSIC_POWER && rc == ARITH_PROHIBIT) @@ -2238,6 +2255,9 @@ gfc_real2complex (gfc_expr *src, int kind) arith rc; bool did_warn = false; + if (src->ts.type != BT_REAL) + return NULL; + result = gfc_get_constant_expr (BT_COMPLEX, kind, &src->where); mpc_set_fr (result->value.complex, src->value.real, GFC_MPC_RND_MODE); diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 608dda4..10bb098 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -226,7 +226,7 @@ enum gfc_intrinsic_op enum arith { ARITH_OK = 1, ARITH_OVERFLOW, ARITH_UNDERFLOW, ARITH_NAN, ARITH_DIV0, ARITH_INCOMMENSURATE, ARITH_ASYMMETRIC, ARITH_PROHIBIT, - ARITH_WRONGCONCAT + ARITH_WRONGCONCAT, ARITH_INVALID_TYPE }; /* Statements. */ diff --git a/gcc/testsuite/gfortran.dg/pr107000.f90 b/gcc/testsuite/gfortran.dg/pr107000.f90 new file mode 100644 index 0000000..3028907 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr107000.f90 @@ -0,0 +1,50 @@ +! { dg-do compile } +! PR fortran/107000 - ICE in gfc_real2complex, reduce_unary, reduce_binary_* +! Contributed by G.Steinmetz + +program p + real :: y(1) + complex :: x(1) + x = (1.0, 2.0) * [real :: -'1'] ! { dg-error "Operand of unary numeric operator" } + x = (1.0, 2.0) * [complex :: +'1'] ! { dg-error "Operand of unary numeric operator" } + x = [complex :: -'1'] * (1.0, 2.0) ! { dg-error "Operand of unary numeric operator" } + y = [complex :: -'1'] * 2 ! { dg-error "Operand of unary numeric operator" } + y = 2 * [complex :: -'1'] ! { dg-error "Operand of unary numeric operator" } + y = 2 * [complex :: -(.true.)] ! { dg-error "Operand of unary numeric operator" } + y = [complex :: -(.true.)] * 2 ! { dg-error "Operand of unary numeric operator" } + print *, - [real :: -'1' ] ! { dg-error "Operand of unary numeric operator" } + print *, - [real :: [-'1']] ! { dg-error "Operand of unary numeric operator" } + print *, - [real :: +(.true.) ] ! { dg-error "Operand of unary numeric operator" } + print *, - [real :: [+(.true.)]] ! { dg-error "Operand of unary numeric operator" } + print *, 2 * [real :: -'1' ] ! { dg-error "Operand of unary numeric operator" } + print *, 2 * [real :: (-'1')] ! { dg-error "Operand of unary numeric operator" } + print *, [real :: -'1' ] * 2 ! { dg-error "Operand of unary numeric operator" } + print *, [real :: (-'1')] * 2 ! { dg-error "Operand of unary numeric operator" } + print *, 2 * [integer :: -('1')] ! { dg-error "Operand of unary numeric operator" } + print *, [integer :: -('1')] * 2 ! { dg-error "Operand of unary numeric operator" } + print *, 2 * [real :: 0, (-'1')] ! { dg-error "Operand of unary numeric operator" } + print *, [real :: 0, (-'1')] * 2 ! { dg-error "Operand of unary numeric operator" } + print *, 2 * [real :: 0, -'1'] ! { dg-error "Operand of unary numeric operator" } + print *, [real :: 0, -'1'] * 2 ! { dg-error "Operand of unary numeric operator" } + print *, 2 * [real :: 0, 1+'1'] ! { dg-error "Operands of binary numeric operator" } + print *, [real :: 0, 1+'1'] * 2 ! { dg-error "Operands of binary numeric operator" } + print *, [real :: 1, +(.true.)] ! { dg-error "Operand of unary numeric operator" } + print *, [real :: 1, -(.true.)] ! { dg-error "Operand of unary numeric operator" } + print *, 2 * [real :: 1, +(.true.)] ! { dg-error "Operand of unary numeric operator" } + print *, [real :: 1, +(.true.)] * 2 ! { dg-error "Operand of unary numeric operator" } + print *, [1, 2] * [real :: 1, +(.true.)] ! { dg-error "Operand of unary numeric operator" } + print *, [real :: 1, +(.true.)] * [1, 2] ! { dg-error "Operand of unary numeric operator" } + print *, [real :: 1, 2] * [real :: 1, +(.true.)] ! { dg-error "Operand of unary numeric operator" } + print *, [real :: 1, +(.true.)] * [real :: 1, 2] ! { dg-error "Operand of unary numeric operator" } + print *, [real :: 0, -'1'] * [real :: 1, +(+(.true.))] ! { dg-error "Operand of unary numeric operator" } + print *, [real :: 1, [(+(.true.))]] * [real :: 0, [(-'1')]] ! { dg-error "Operand of unary numeric operator" } + + ! Legal: + print *, 2 * [real :: 1, [2], 3] + print *, [real :: 1, [2], 3] * 2 + print *, [real :: 1, [2], 3] * [real :: 1, [2], 3] + print *, [real :: 1, [2], 3] * [integer :: 1, [2], 3] + print *, [real :: 1, [2], 3] * [1, [2], 3] + print *, [real :: 1, huge(2.0)] * [real :: 1, real(1.0)] + print *, [real :: 1, -(huge(2.0))] * [real :: 1, +(real(1))] +end -- cgit v1.1 From d7346a3bf6554ddaef9853c1b0fb770c4a3cd9d2 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sun, 9 Oct 2022 00:17:55 +0000 Subject: Daily bump. --- gcc/DATESTAMP | 2 +- gcc/cp/ChangeLog | 34 ++++++++++++++++++++++++++++++++++ gcc/fortran/ChangeLog | 14 ++++++++++++++ gcc/testsuite/ChangeLog | 6 ++++++ 4 files changed, 55 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index ab0c096..15f928c 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20221008 +20221009 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 3aa9f03..e892628 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,37 @@ +2022-10-08 Jason Merrill + + * cp-tree.h (TARGET_EXPR_ELIDING_P): New. + (unsafe_copy_elision_p, set_target_expr_eliding) + (cp_build_init_expr): Declare. + * call.cc (unsafe_copy_elision_p): No longer static. + (build_over_call, build_special_member_call) + (build_new_method_call): Use cp_build_init_expr. + * coroutines.cc (expand_one_await_expression) + (build_actor_fn, flatten_await_stmt, handle_nested_conditionals) + (await_statement_walker, morph_fn_to_coro): Use cp_build_init_expr. + * cp-gimplify.cc (cp_gimplify_init_expr) + (cp_gimplify_expr): Check TARGET_EXPR_ELIDING_P. + (cp_fold_r): Propagate it. + (cp_fold): Use cp_build_init_expr. + * decl.cc (check_initializer): Use cp_build_init_expr. + * except.cc (build_throw): Use cp_build_init_expr. + * init.cc (get_nsdmi): Call set_target_expr_eliding. + (perform_member_init, expand_default_init, expand_aggr_init_1) + (build_new_1, build_vec_init): Use cp_build_init_expr. + * method.cc (do_build_copy_constructor): Use cp_build_init_expr. + * semantics.cc (simplify_aggr_init_expr, finalize_nrv_r) + (finish_omp_reduction_clause): Use cp_build_init_expr. + * tree.cc (build_target_expr): Call set_target_expr_eliding. + (bot_manip): Copy TARGET_EXPR_ELIDING_P. + * typeck.cc (cp_build_modify_expr): Call set_target_expr_eliding. + (check_return_expr): Use cp_build_modify_expr. + * typeck2.cc (split_nonconstant_init_1) + (split_nonconstant_init): Use cp_build_init_expr. + (massage_init_elt): Call set_target_expr_eliding. + (process_init_constructor_record): Clear TARGET_EXPR_ELIDING_P on + unsafe copy elision. + (set_target_expr_eliding, cp_build_init_expr): New. + 2022-10-07 Marek Polacek PR c++/107085 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 535b9ae..573cbaf 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,17 @@ +2022-10-08 Harald Anlauf + Mikael Morin + + PR fortran/107000 + * arith.cc (gfc_arith_error): Define error message for + ARITH_INVALID_TYPE. + (reduce_unary): Catch arithmetic expressions with invalid type. + (reduce_binary_ac): Likewise. + (reduce_binary_ca): Likewise. + (reduce_binary_aa): Likewise. + (eval_intrinsic): Likewise. + (gfc_real2complex): Source expression must be of type REAL. + * gfortran.h (enum arith): Add ARITH_INVALID_TYPE. + 2022-10-06 Tobias Burnus * trans-openmp.cc (gfc_trans_omp_assume): New. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index af8fc3f..a27b524 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2022-10-08 Harald Anlauf + Mikael Morin + + PR fortran/107000 + * gfortran.dg/pr107000.f90: New test. + 2022-10-07 Marek Polacek PR c++/107085 -- cgit v1.1 From 66c48be23e0fa5ee7474b4b078e013f901c71eed Mon Sep 17 00:00:00 2001 From: YunQiang Su Date: Tue, 2 Aug 2022 10:57:18 +0000 Subject: MIPS: improve -march=native arch detection If we cannot get info from options and cpuinfo, we try to get from: 1. getauxval(AT_BASE_PLATFORM), introduced since Linux 5.7 2. _MIPS_ARCH from host compiler. mnan=2008 option is also used if __mips_nan2008__ is used. This can fix the wrong loader usage on r5/r6 platform with -march=native. gcc/ChangeLog: * config.gcc: set with_arch to default_mips_arch if no defined. * config/mips/driver-native.cc (host_detect_local_cpu): try getauxval(AT_BASE_PLATFORM) and _MIPS_ARCH, too. pass -mnan=2008 if __mips_nan2008__ is defined. * config.in: define HAVE_SYS_AUXV_H and HAVE_GETAUXVAL. * configure.ac: detect sys/auxv.h and getauxval. * configure: regenerated. --- gcc/config.gcc | 2 ++ gcc/config.in | 10 ++++++++++ gcc/config/mips/driver-native.cc | 25 ++++++++++++++++++++++--- gcc/configure | 4 ++-- gcc/configure.ac | 4 ++-- 5 files changed, 38 insertions(+), 7 deletions(-) (limited to 'gcc') diff --git a/gcc/config.gcc b/gcc/config.gcc index e73cb84..eec544f 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -5608,6 +5608,8 @@ case ${target} in esac if test x$with_arch != x; then default_mips_arch=$with_arch + else + with_arch=$default_mips_arch fi if test x$with_abi != x; then default_mips_abi=$with_abi diff --git a/gcc/config.in b/gcc/config.in index 6ac17be..cc217b9 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -1939,6 +1939,12 @@ #endif +/* Define to 1 if you have the header file. */ +#ifndef USED_FOR_TARGET +#undef HAVE_SYS_AUXV_H +#endif + + /* Define to 1 if you have the header file. */ #ifndef USED_FOR_TARGET #undef HAVE_SYS_FILE_H @@ -2672,3 +2678,7 @@ #undef vfork #endif +/* Define to 1 if you have the `getauxval' function. */ +#ifndef USED_FOR_TARGET +#undef HAVE_GETAUXVAL +#endif diff --git a/gcc/config/mips/driver-native.cc b/gcc/config/mips/driver-native.cc index 47627f8..327ad25 100644 --- a/gcc/config/mips/driver-native.cc +++ b/gcc/config/mips/driver-native.cc @@ -23,6 +23,9 @@ along with GCC; see the file COPYING3. If not see #include "system.h" #include "coretypes.h" #include "tm.h" +#ifdef HAVE_SYS_AUXV_H +#include +#endif /* This will be called by the spec parser in gcc.cc when it sees a %:local_cpu_detect(args) construct. Currently it will be called @@ -41,6 +44,7 @@ const char * host_detect_local_cpu (int argc, const char **argv) { const char *cpu = NULL; + char *ret = NULL; char buf[128]; FILE *f; bool arch; @@ -54,7 +58,7 @@ host_detect_local_cpu (int argc, const char **argv) f = fopen ("/proc/cpuinfo", "r"); if (f == NULL) - return NULL; + goto fallback_cpu; while (fgets (buf, sizeof (buf), f) != NULL) if (startswith (buf, "cpu model")) @@ -84,8 +88,23 @@ host_detect_local_cpu (int argc, const char **argv) fclose (f); +fallback_cpu: +#if defined (__mips_nan2008) + ret = reconcat (ret, " -mnan=2008 ", NULL); +#endif + +#ifdef HAVE_GETAUXVAL if (cpu == NULL) - return NULL; + cpu = (const char *) getauxval (AT_BASE_PLATFORM); +#endif + +#if defined (_MIPS_ARCH) + if (cpu == NULL) + cpu = _MIPS_ARCH; +#endif + + if (cpu) + ret = reconcat (ret, ret, "-m", argv[0], "=", cpu, NULL); - return concat ("-m", argv[0], "=", cpu, NULL); + return ret; } diff --git a/gcc/configure b/gcc/configure index ce4e185..b512580 100755 --- a/gcc/configure +++ b/gcc/configure @@ -9327,7 +9327,7 @@ $as_echo "#define GWINSZ_IN_SYS_IOCTL 1" >>confdefs.h fi for ac_header in limits.h stddef.h string.h strings.h stdlib.h time.h iconv.h \ - fcntl.h ftw.h unistd.h sys/file.h sys/time.h sys/mman.h \ + fcntl.h ftw.h unistd.h sys/auxv.h sys/file.h sys/time.h sys/mman.h \ sys/resource.h sys/param.h sys/times.h sys/stat.h sys/locking.h \ direct.h malloc.h langinfo.h ldfcn.h locale.h wchar.h do : @@ -10622,7 +10622,7 @@ fi for ac_func in times clock kill getrlimit setrlimit atoq \ popen sysconf strsignal getrusage nl_langinfo \ gettimeofday mbstowcs wcswidth mmap posix_fallocate setlocale \ - clearerr_unlocked feof_unlocked ferror_unlocked fflush_unlocked fgetc_unlocked fgets_unlocked fileno_unlocked fprintf_unlocked fputc_unlocked fputs_unlocked fread_unlocked fwrite_unlocked getchar_unlocked getc_unlocked putchar_unlocked putc_unlocked madvise mallinfo mallinfo2 fstatat + clearerr_unlocked feof_unlocked ferror_unlocked fflush_unlocked fgetc_unlocked fgets_unlocked fileno_unlocked fprintf_unlocked fputc_unlocked fputs_unlocked fread_unlocked fwrite_unlocked getchar_unlocked getc_unlocked putchar_unlocked putc_unlocked madvise mallinfo mallinfo2 fstatat getauxval do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` ac_fn_cxx_check_func "$LINENO" "$ac_func" "$as_ac_var" diff --git a/gcc/configure.ac b/gcc/configure.ac index b6bafa8..e48fcbf 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -1307,7 +1307,7 @@ ACX_HEADER_STRING AC_HEADER_SYS_WAIT AC_HEADER_TIOCGWINSZ AC_CHECK_HEADERS(limits.h stddef.h string.h strings.h stdlib.h time.h iconv.h \ - fcntl.h ftw.h unistd.h sys/file.h sys/time.h sys/mman.h \ + fcntl.h ftw.h unistd.h sys/auxv.h sys/file.h sys/time.h sys/mman.h \ sys/resource.h sys/param.h sys/times.h sys/stat.h sys/locking.h \ direct.h malloc.h langinfo.h ldfcn.h locale.h wchar.h) @@ -1525,7 +1525,7 @@ define(gcc_UNLOCKED_FUNCS, clearerr_unlocked feof_unlocked dnl AC_CHECK_FUNCS(times clock kill getrlimit setrlimit atoq \ popen sysconf strsignal getrusage nl_langinfo \ gettimeofday mbstowcs wcswidth mmap posix_fallocate setlocale \ - gcc_UNLOCKED_FUNCS madvise mallinfo mallinfo2 fstatat) + gcc_UNLOCKED_FUNCS madvise mallinfo mallinfo2 fstatat getauxval) if test x$ac_cv_func_mbstowcs = xyes; then AC_CACHE_CHECK(whether mbstowcs works, gcc_cv_func_mbstowcs_works, -- cgit v1.1 From 73137f365a01327ae245fa0a9f0b127cb06e0cb3 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 22 Sep 2022 23:08:43 +0300 Subject: pru: Optimize DI shifts If the number of shift positions is a constant, then the DI shift operation is expanded to a sequence of 2 to 4 machine instructions. That is more efficient than the default action to call libgcc. gcc/ChangeLog: * config/pru/pru.md (lshrdi3): New expand pattern. (ashldi3): Ditto. gcc/testsuite/ChangeLog: * gcc.target/pru/ashiftdi-1.c: New test. * gcc.target/pru/lshiftrtdi-1.c: New test. Signed-off-by: Dimitar Dimitrov --- gcc/config/pru/pru.md | 196 ++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/pru/ashiftdi-1.c | 53 ++++++++ gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c | 53 ++++++++ 3 files changed, 302 insertions(+) create mode 100644 gcc/testsuite/gcc.target/pru/ashiftdi-1.c create mode 100644 gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c (limited to 'gcc') diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md index 144cd35..53ffff0 100644 --- a/gcc/config/pru/pru.md +++ b/gcc/config/pru/pru.md @@ -703,6 +703,202 @@ [(set_attr "type" "alu") (set_attr "length" "12")]) + +; 64-bit LSHIFTRT with a constant shift count can be expanded into +; more efficient code sequence than a variable register shift. +; +; 1. For shift >= 32: +; dst_lo = (src_hi >> (shift - 32)) +; dst_hi = 0 +; +; 2. For shift==1 there is no need for a temporary: +; dst_lo = (src_lo >> 1) +; if (src_hi & 1) +; dst_lo |= (1 << 31) +; dst_hi = (src_hi >> 1) +; +; 3. For shift < 32: +; dst_lo = (src_lo >> shift) +; tmp = (src_hi << (32 - shift) +; dst_lo |= tmp +; dst_hi = (src_hi >> shift) +; +; 4. For shift in a register: +; Fall back to calling libgcc. +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "register_operand") + (lshiftrt:DI + (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand")))] + "" +{ + gcc_assert (CONST_INT_P (operands[2])); + + const int nshifts = INTVAL (operands[2]); + rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0); + rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4); + rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0); + rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4); + + if (nshifts >= 32) + { + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_LSHIFTRT (SImode, + src_hi, + GEN_INT (nshifts - 32)))); + emit_insn (gen_rtx_SET (dst_hi, const0_rtx)); + DONE; + } + + gcc_assert (can_create_pseudo_p ()); + + /* The expansions which follow are safe only if DST_LO and SRC_HI + do not overlap. If they do, then fix by using a temporary register. + Overlapping of DST_HI and SRC_LO is safe because by the time DST_HI + is set, SRC_LO is no longer live. */ + if (reg_overlap_mentioned_p (dst_lo, src_hi)) + { + rtx new_src_hi = gen_reg_rtx (SImode); + + emit_move_insn (new_src_hi, src_hi); + src_hi = new_src_hi; + } + + if (nshifts == 1) + { + rtx_code_label *skip_hiset_label; + rtx j; + + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_LSHIFTRT (SImode, src_lo, const1_rtx))); + + /* The code generated by `genemit' would create a LABEL_REF. */ + skip_hiset_label = gen_label_rtx (); + j = emit_jump_insn (gen_cbranch_qbbx_const (EQ, + SImode, + src_hi, + GEN_INT (0), + skip_hiset_label)); + JUMP_LABEL (j) = skip_hiset_label; + LABEL_NUSES (skip_hiset_label)++; + + emit_insn (gen_iorsi3 (dst_lo, dst_lo, GEN_INT (1 << 31))); + emit_label (skip_hiset_label); + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_LSHIFTRT (SImode, src_hi, const1_rtx))); + DONE; + } + + if (nshifts < 32) + { + rtx tmpval = gen_reg_rtx (SImode); + + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_LSHIFTRT (SImode, + src_lo, + GEN_INT (nshifts)))); + emit_insn (gen_rtx_SET (tmpval, + gen_rtx_ASHIFT (SImode, + src_hi, + GEN_INT (32 - nshifts)))); + emit_insn (gen_iorsi3 (dst_lo, dst_lo, tmpval)); + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_LSHIFTRT (SImode, + src_hi, + GEN_INT (nshifts)))); + DONE; + } + gcc_unreachable (); +}) + +; 64-bit ASHIFT with a constant shift count can be expanded into +; more efficient code sequence than the libgcc call required by +; a variable shift in a register. + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "register_operand") + (ashift:DI + (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand")))] + "" +{ + gcc_assert (CONST_INT_P (operands[2])); + + const int nshifts = INTVAL (operands[2]); + rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0); + rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4); + rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0); + rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4); + + if (nshifts >= 32) + { + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_ASHIFT (SImode, + src_lo, + GEN_INT (nshifts - 32)))); + emit_insn (gen_rtx_SET (dst_lo, const0_rtx)); + DONE; + } + + gcc_assert (can_create_pseudo_p ()); + + /* The expansions which follow are safe only if DST_HI and SRC_LO + do not overlap. If they do, then fix by using a temporary register. + Overlapping of DST_LO and SRC_HI is safe because by the time DST_LO + is set, SRC_HI is no longer live. */ + if (reg_overlap_mentioned_p (dst_hi, src_lo)) + { + rtx new_src_lo = gen_reg_rtx (SImode); + + emit_move_insn (new_src_lo, src_lo); + src_lo = new_src_lo; + } + + if (nshifts == 1) + { + rtx_code_label *skip_hiset_label; + rtx j; + + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_ASHIFT (SImode, src_hi, const1_rtx))); + + skip_hiset_label = gen_label_rtx (); + j = emit_jump_insn (gen_cbranch_qbbx_const (EQ, + SImode, + src_lo, + GEN_INT (31), + skip_hiset_label)); + JUMP_LABEL (j) = skip_hiset_label; + LABEL_NUSES (skip_hiset_label)++; + + emit_insn (gen_iorsi3 (dst_hi, dst_hi, GEN_INT (1 << 0))); + emit_label (skip_hiset_label); + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_ASHIFT (SImode, src_lo, const1_rtx))); + DONE; + } + + if (nshifts < 32) + { + rtx tmpval = gen_reg_rtx (SImode); + + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_ASHIFT (SImode, + src_hi, + GEN_INT (nshifts)))); + emit_insn (gen_rtx_SET (tmpval, + gen_rtx_LSHIFTRT (SImode, + src_lo, + GEN_INT (32 - nshifts)))); + emit_insn (gen_iorsi3 (dst_hi, dst_hi, tmpval)); + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_ASHIFT (SImode, + src_lo, + GEN_INT (nshifts)))); + DONE; + } + gcc_unreachable (); +}) ;; Include ALU patterns with zero-extension of operands. That's where ;; the real insns are defined. diff --git a/gcc/testsuite/gcc.target/pru/ashiftdi-1.c b/gcc/testsuite/gcc.target/pru/ashiftdi-1.c new file mode 100644 index 0000000..516e5a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/ashiftdi-1.c @@ -0,0 +1,53 @@ +/* Functional test for DI left shift. */ + +/* { dg-do run } */ +/* { dg-options "-pedantic-errors" } */ + +#include +#include + +extern void abort (void); + +uint64_t __attribute__((noinline)) ashift_1 (uint64_t a) +{ + return a << 1; +} + +uint64_t __attribute__((noinline)) ashift_10 (uint64_t a) +{ + return a << 10; +} + +uint64_t __attribute__((noinline)) ashift_32 (uint64_t a) +{ + return a << 32; +} + +uint64_t __attribute__((noinline)) ashift_36 (uint64_t a) +{ + return a << 36; +} + +int +main (int argc, char** argv) +{ + if (ashift_1 (0xaaaa5555aaaa5555ull) != 0x5554aaab5554aaaaull) + abort(); + if (ashift_10 (0xaaaa5555aaaa5555ull) != 0xa95556aaa9555400ull) + abort(); + if (ashift_32 (0xaaaa5555aaaa5555ull) != 0xaaaa555500000000ull) + abort(); + if (ashift_36 (0xaaaa5555aaaa5555ull) != 0xaaa5555000000000ull) + abort(); + + if (ashift_1 (0x1234567822334455ull) != 0x2468acf0446688aaull) + abort(); + if (ashift_10 (0x1234567822334455ull) != 0xd159e088cd115400ull) + abort(); + if (ashift_32 (0x1234567822334455ull) != 0x2233445500000000ull) + abort(); + if (ashift_36 (0x1234567822334455ull) != 0x2334455000000000ull) + abort(); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c b/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c new file mode 100644 index 0000000..7adae6c --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c @@ -0,0 +1,53 @@ +/* Functional test for DI right shift. */ + +/* { dg-do run } */ +/* { dg-options "-pedantic-errors" } */ + +#include +#include + +extern void abort (void); + +uint64_t __attribute__((noinline)) lshift_1 (uint64_t a) +{ + return a >> 1; +} + +uint64_t __attribute__((noinline)) lshift_10 (uint64_t a) +{ + return a >> 10; +} + +uint64_t __attribute__((noinline)) lshift_32 (uint64_t a) +{ + return a >> 32; +} + +uint64_t __attribute__((noinline)) lshift_36 (uint64_t a) +{ + return a >> 36; +} + +int +main (int argc, char** argv) +{ + if (lshift_1 (0xaaaa5555aaaa5555ull) != 0x55552aaad5552aaaull) + abort(); + if (lshift_10 (0xaaaa5555aaaa5555ull) != 0x002aaa95556aaa95ull) + abort(); + if (lshift_32 (0xaaaa5555aaaa5555ull) != 0x00000000aaaa5555ull) + abort(); + if (lshift_36 (0xaaaa5555aaaa5555ull) != 0x000000000aaaa555ull) + abort(); + + if (lshift_1 (0x1234567822334455ull) != 0x091a2b3c1119a22aull) + abort(); + if (lshift_10 (0x1234567822334455ull) != 0x00048d159e088cd1ull) + abort(); + if (lshift_32 (0x1234567822334455ull) != 0x0000000012345678ull) + abort(); + if (lshift_36 (0x1234567822334455ull) != 0x0000000001234567ull) + abort(); + + return 0; +} -- cgit v1.1 From e95e91eccd022a4a3a86da2749809fbad9afd20e Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Sun, 18 Sep 2022 16:27:18 +0300 Subject: pru: Add cbranchdi4 pattern Manually expanding into 32-bit comparisons is much more efficient than the default expansion into word-size comparisons. Note that word for PRU is 8-bit. PR target/106562 gcc/ChangeLog: * config/pru/pru-protos.h (pru_noteq_condition): New function declaration. * config/pru/pru.cc (pru_noteq_condition): New function. * config/pru/pru.md (cbranchdi4): Define new pattern. gcc/testsuite/ChangeLog: * gcc.target/pru/pr106562-1.c: New test. * gcc.target/pru/pr106562-2.c: New test. * gcc.target/pru/pr106562-3.c: New test. * gcc.target/pru/pr106562-4.c: New test. Signed-off-by: Dimitar Dimitrov --- gcc/config/pru/pru-protos.h | 1 + gcc/config/pru/pru.cc | 21 ++++ gcc/config/pru/pru.md | 180 ++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/pru/pr106562-1.c | 9 ++ gcc/testsuite/gcc.target/pru/pr106562-2.c | 9 ++ gcc/testsuite/gcc.target/pru/pr106562-3.c | 9 ++ gcc/testsuite/gcc.target/pru/pr106562-4.c | 159 ++++++++++++++++++++++++++ 7 files changed, 388 insertions(+) create mode 100644 gcc/testsuite/gcc.target/pru/pr106562-1.c create mode 100644 gcc/testsuite/gcc.target/pru/pr106562-2.c create mode 100644 gcc/testsuite/gcc.target/pru/pr106562-3.c create mode 100644 gcc/testsuite/gcc.target/pru/pr106562-4.c (limited to 'gcc') diff --git a/gcc/config/pru/pru-protos.h b/gcc/config/pru/pru-protos.h index 4b190c9..517fa02 100644 --- a/gcc/config/pru/pru-protos.h +++ b/gcc/config/pru/pru-protos.h @@ -52,6 +52,7 @@ extern const char *pru_output_signed_cbranch (rtx *, bool); extern const char *pru_output_signed_cbranch_ubyteop2 (rtx *, bool); extern const char *pru_output_signed_cbranch_zeroop2 (rtx *, bool); +extern enum rtx_code pru_noteq_condition (enum rtx_code code); extern rtx pru_expand_fp_compare (rtx comparison, machine_mode mode); extern void pru_emit_doloop (rtx *, int); diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc index 04eca90..0029dcb 100644 --- a/gcc/config/pru/pru.cc +++ b/gcc/config/pru/pru.cc @@ -895,6 +895,27 @@ pru_init_libfuncs (void) set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull"); } +/* Given a comparison CODE, return a similar comparison but without + the "equals" condition. In other words, it strips GE/GEU/LE/LEU + and instead returns GT/GTU/LT/LTU. */ + +enum rtx_code +pru_noteq_condition (enum rtx_code code) +{ + switch (code) + { + case GT: return GT; + case GTU: return GTU; + case GE: return GT; + case GEU: return GTU; + case LT: return LT; + case LTU: return LTU; + case LE: return LT; + case LEU: return LTU; + default: + gcc_unreachable (); + } +} /* Emit comparison instruction if necessary, returning the expression that holds the compare result in the proper mode. Return the comparison diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md index 53ffff0..bdc5ad7 100644 --- a/gcc/config/pru/pru.md +++ b/gcc/config/pru/pru.md @@ -1309,6 +1309,186 @@ operands[2] = XEXP (t, 1); }) +;; Expand the cbranchdi pattern in order to avoid the default +;; expansion into word_mode operations, which is not efficient for PRU. +;; In pseudocode this expansion outputs: +;; +;; /* EQ */ +;; if (OP1_hi {reverse_condition (cmp)} OP2_hi) +;; goto fallthrough +;; if (OP1_lo {cmp} OP2_lo) +;; goto label3 +;; fallthrough: +;; +;; /* NE */ +;; if (OP1_hi {cmp} OP2_hi) +;; goto label3 +;; if (OP1_lo {cmp} OP2_lo) +;; goto label3 +;; +;; The LT comparisons with zero take one machine instruction to simply +;; check the sign bit. The GT comparisons with zero take two - one +;; to check the sign bit, and one to check for zero. Hence arrange +;; the expand such that only LT comparison is used for OP1_HI, because +;; OP2_HI is const0_rtx. +;; +;; The LTU comparisons with zero will be removed by subsequent passes. +;; +;; /* LT/LTU/LE/LEU */ +;; if (OP1_hi {noteq_condition (cmp)} OP2_hi) +;; goto label3 /* DI comparison obviously true. */ +;; if (OP1_hi != OP2_hi) +;; goto fallthrough /* DI comparison obviously not true. */ +;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo) +;; goto label3 /* Comparison was deferred to lo parts. */ +;; fallthrough: + +;; /* GT/GTU/GE/GEU */ +;; if (OP1_hi {reverse_condition (noteq_condition (cmp))} OP2_hi) +;; goto fallthrough /* DI comparison obviously not true. */ +;; if (OP1_hi != OP2_hi) +;; goto label3 /* DI comparison obviously true. */ +;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo) +;; goto label3 /* Comparison was deferred to lo parts. */ +;; fallthrough: + +(define_expand "cbranchdi4" + [(set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" + [(match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_ubyte_operand")]) + (label_ref (match_operand 3 "")) + (pc)))] + "" +{ + const enum rtx_code code = GET_CODE (operands[0]); + rtx label3 = operands[3]; + rtx op1_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0); + rtx op1_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4); + rtx op2_lo = simplify_gen_subreg (SImode, operands[2], DImode, 0); + rtx op2_hi = simplify_gen_subreg (SImode, operands[2], DImode, 4); + rtx j; + + if (code == EQ) + { + rtx label_fallthrough = gen_label_rtx (); + rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough); + + rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label_fallthrough_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label_fallthrough; + LABEL_NUSES (label_fallthrough)++; + + rtx label3_ref = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (EQ, VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + emit_label (label_fallthrough); + DONE; + } + if (code == NE) + { + rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label3_ref1, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (NE, VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref2, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + DONE; + } + + if (code == LT || code == LTU || code == LE || code == LEU) + { + /* Check for "DI comparison obviously true". */ + rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (code), + VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label3_ref1, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + /* Check for "DI comparison obviously not true". */ + rtx label_fallthrough = gen_label_rtx (); + rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough); + rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine, + label_fallthrough_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine)); + JUMP_LABEL (j) = label_fallthrough; + LABEL_NUSES (label_fallthrough)++; + + /* Comparison deferred to the lo parts. */ + rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code), + VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref2, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + emit_label (label_fallthrough); + DONE; + } + + if (code == GT || code == GTU || code == GE || code == GEU) + { + /* Check for "DI comparison obviously not true". */ + const enum rtx_code reversed_code = reverse_condition (code); + rtx label_fallthrough = gen_label_rtx (); + rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough); + rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (reversed_code), + VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label_fallthrough_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label_fallthrough; + LABEL_NUSES (label_fallthrough)++; + + /* Check for "DI comparison obviously true". */ + rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine, + label3_ref1, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + /* Comparison deferred to the lo parts. */ + rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code), + VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref2, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + emit_label (label_fallthrough); + DONE; + } + gcc_unreachable (); +}) + ; ; Bit test branch diff --git a/gcc/testsuite/gcc.target/pru/pr106562-1.c b/gcc/testsuite/gcc.target/pru/pr106562-1.c new file mode 100644 index 0000000..5bfbe52 --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/pr106562-1.c @@ -0,0 +1,9 @@ +/* { dg-do assemble } */ +/* { dg-options "-Os" } */ +/* { dg-final { object-size text <= 40 } } */ + + +char test(unsigned long long a, unsigned long long b) +{ + return a && b; +} diff --git a/gcc/testsuite/gcc.target/pru/pr106562-2.c b/gcc/testsuite/gcc.target/pru/pr106562-2.c new file mode 100644 index 0000000..ec5f82a --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/pr106562-2.c @@ -0,0 +1,9 @@ +/* { dg-do assemble } */ +/* { dg-options "-Os" } */ +/* { dg-final { object-size text <= 32 } } */ + + +char test(long long a) +{ + return a > 10; +} diff --git a/gcc/testsuite/gcc.target/pru/pr106562-3.c b/gcc/testsuite/gcc.target/pru/pr106562-3.c new file mode 100644 index 0000000..d098058 --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/pr106562-3.c @@ -0,0 +1,9 @@ +/* { dg-do assemble } */ +/* { dg-options "-Os" } */ +/* { dg-final { object-size text <= 32 } } */ + + +char test(long long a) +{ + return a < 10; +} diff --git a/gcc/testsuite/gcc.target/pru/pr106562-4.c b/gcc/testsuite/gcc.target/pru/pr106562-4.c new file mode 100644 index 0000000..b29e426 --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/pr106562-4.c @@ -0,0 +1,159 @@ +/* Functional test for DI comparisons. */ + +/* { dg-do run } */ +/* { dg-options "-pedantic-errors" } */ + +/* The default test suite options use "-ansi", which + generates spurious errors by enabling "-Wlong-long". + Thus override the options and drop "-ansi", in order + to freely use 64-bit (long long) types for PRU. */ + +#include +#include + +extern void abort (void); + +char __attribute__((noinline)) test_lt (int64_t a, int64_t b) +{ + return a < b; +} + +char __attribute__((noinline)) test_ltu (uint64_t a, uint64_t b) +{ + return a < b; +} + +char __attribute__((noinline)) test_le (int64_t a, int64_t b) +{ + return a <= b; +} + +char __attribute__((noinline)) test_leu (uint64_t a, uint64_t b) +{ + return a <= b; +} + +char __attribute__((noinline)) test_gt (int64_t a, int64_t b) +{ + return a > b; +} + +char __attribute__((noinline)) test_gtu (uint64_t a, uint64_t b) +{ + return a > b; +} + +char __attribute__((noinline)) test_ge (int64_t a, int64_t b) +{ + return a >= b; +} + +char __attribute__((noinline)) test_geu (uint64_t a, uint64_t b) +{ + return a >= b; +} + +char __attribute__((noinline)) test_eq (uint64_t a, uint64_t b) +{ + return a == b; +} + +char __attribute__((noinline)) test_ne (uint64_t a, uint64_t b) +{ + return a != b; +} + +struct test_case { + uint64_t a; + uint64_t b; + char lt; + char ltu; + char le; + char leu; + char gt; + char gtu; + char ge; + char geu; + char eq; + char ne; +}; + +const struct test_case cases[] = { + /* LT,LTU,LE,LEU,GT,GTU,GE,GEU,EQ,NE */ + { 0x1234567800112233ULL, + 0x1234567800112233ULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, + { 0x0000000000000000ULL, + 0x0000000000000000ULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, + { 0xffffffffffffffffULL, + 0xffffffffffffffffULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, + + { 0xffffffffffffffefULL, + 0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + { 0x8000000000000000ULL, + 0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + { 0x80000000ffffffffULL, + 0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + { 0x80000000ffffffffULL, + 0xffffffff00000000ULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + { 0xffefffffffffffffULL, + 0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + + { 0x0000000000000000ULL, + 0xffffffffffffffffULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, + { 0x0000000000000001ULL, + 0xffffffffffffffffULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, + { 0x0000000000000001ULL, + 0x8000000000000000ULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, + { 0x7fffffffffffffffULL, + 0x8000000000000000ULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, + + /* Ensure lo uses unsigned comparison if hi parts are same. */ + { 0x12345678ffffffffULL, + 0x1234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 }, + { 0xf23456780fffffffULL, + 0xf234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 }, + { 0xf2345678ffffffffULL, + 0xf234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 }, + { 0x1234567800000002ULL, + 0x1234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 }, + { 0x1234567800000002ULL, + 0x1234567800000003ULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, +}; + +int +main (void) +{ + size_t i; + + for (i = 0; i < (sizeof (cases)/sizeof (cases[0])); i++) + { + const int64_t sa = (int64_t)cases[i].a; + const int64_t sb = (int64_t)cases[i].b; + const uint64_t ua = cases[i].a; + const uint64_t ub = cases[i].b; + + if (cases[i].lt != test_lt (sa, sb)) + abort (); + if (cases[i].ltu != test_ltu (ua, ub)) + abort (); + if (cases[i].le != test_le (sa, sb)) + abort (); + if (cases[i].leu != test_leu (ua, ub)) + abort (); + if (cases[i].gt != test_gt (sa, sb)) + abort (); + if (cases[i].gtu != test_gtu (ua, ub)) + abort (); + if (cases[i].ge != test_ge (sa, sb)) + abort (); + if (cases[i].geu != test_geu (ua, ub)) + abort (); + if (cases[i].eq != test_eq (ua, ub)) + abort (); + if (cases[i].ne != test_ne (ua, ub)) + abort (); + } + + return 0; +} + -- cgit v1.1 From 570eb458c64a15d33817994f8e4640b63c81d6d4 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Mon, 10 Oct 2022 00:16:29 +0000 Subject: Daily bump. --- gcc/ChangeLog | 23 +++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/testsuite/ChangeLog | 13 +++++++++++++ 3 files changed, 37 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5086463..8f729dc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2022-10-09 Dimitar Dimitrov + + PR target/106562 + * config/pru/pru-protos.h (pru_noteq_condition): New + function declaration. + * config/pru/pru.cc (pru_noteq_condition): New function. + * config/pru/pru.md (cbranchdi4): Define new pattern. + +2022-10-09 Dimitar Dimitrov + + * config/pru/pru.md (lshrdi3): New expand pattern. + (ashldi3): Ditto. + +2022-10-09 YunQiang Su + + * config.gcc: set with_arch to default_mips_arch if no defined. + * config/mips/driver-native.cc (host_detect_local_cpu): + try getauxval(AT_BASE_PLATFORM) and _MIPS_ARCH, too. + pass -mnan=2008 if __mips_nan2008__ is defined. + * config.in: define HAVE_SYS_AUXV_H and HAVE_GETAUXVAL. + * configure.ac: detect sys/auxv.h and getauxval. + * configure: regenerated. + 2022-10-07 Eugene Rozenfeld * tree-cfg.cc (assign_discriminators): Set discriminators for call stmts diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 15f928c..836fdac 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20221009 +20221010 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a27b524..1b40443 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,16 @@ +2022-10-09 Dimitar Dimitrov + + PR target/106562 + * gcc.target/pru/pr106562-1.c: New test. + * gcc.target/pru/pr106562-2.c: New test. + * gcc.target/pru/pr106562-3.c: New test. + * gcc.target/pru/pr106562-4.c: New test. + +2022-10-09 Dimitar Dimitrov + + * gcc.target/pru/ashiftdi-1.c: New test. + * gcc.target/pru/lshiftrtdi-1.c: New test. + 2022-10-08 Harald Anlauf Mikael Morin -- cgit v1.1 From 5e81df88cae713bf867c6260df79994d59cac5e5 Mon Sep 17 00:00:00 2001 From: Claudiu Zissulescu Date: Mon, 10 Oct 2022 10:27:12 +0300 Subject: arc: Use negative enter pattern instruction's offsets The enter pattern instruction contains the necessary information for the dwarf machinery to generate the appropriate dwarf code. This patch is fixing the register offsets related to CFA, and adds a test. gcc/ * config/arc/arc.cc (arc_save_callee_enter): Use negative offsets. gcc/testsuite * gcc.target/arc/enter-dw2-1.c: New file. Signed-off-by: Claudiu Zissulescu --- gcc/config/arc/arc.cc | 6 +++--- gcc/testsuite/gcc.target/arc/enter-dw2-1.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arc/enter-dw2-1.c (limited to 'gcc') diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index db4b56b..7be27e0 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -3356,7 +3356,7 @@ arc_save_callee_enter (uint64_t gmask, reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx, - off)); + -off)); XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg); RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1; off -= UNITS_PER_WORD; @@ -3370,7 +3370,7 @@ arc_save_callee_enter (uint64_t gmask, reg = gen_rtx_REG (SImode, regno); mem = gen_frame_mem (SImode, plus_constant (Pmode, stack_pointer_rtx, - off)); + -off)); XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, reg); RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx)) = 1; gmask = gmask & ~(1ULL << regno); @@ -3380,7 +3380,7 @@ arc_save_callee_enter (uint64_t gmask, { mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx, - off)); + -off)); XVECEXP (insn, 0, indx) = gen_rtx_SET (mem, hard_frame_pointer_rtx); RTX_FRAME_RELATED_P (XVECEXP (insn, 0, indx++)) = 1; off -= UNITS_PER_WORD; diff --git a/gcc/testsuite/gcc.target/arc/enter-dw2-1.c b/gcc/testsuite/gcc.target/arc/enter-dw2-1.c new file mode 100644 index 0000000..25d0356 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/enter-dw2-1.c @@ -0,0 +1,28 @@ +/* Verify that we generate appropriate CFI offsets in the case of enter + instruction. */ +/* { dg-skip-if "Not having enter_s insn." { arc700 || arc6xx } } */ +/* { dg-do compile } */ +/* { dg-options "-g -Os" } */ + +extern void bar (void); + +void foo (void) +{ + asm volatile (";my clobber list" + : : : "r13", "r14", "r15", "r16", "r17", "r18", "r19"); + bar (); + asm volatile (";my clobber list" + : : : "r13", "r14", "r15", "r16", "r17", "r18", "r19"); +} + + +/* { dg-final { scan-assembler-times "enter_s" 1 } } */ +/* { dg-final { scan-assembler-times "\.cfi_def_cfa_offset 32" 1 } } */ +/* { dg-final { scan-assembler-times "\.cfi_offset 31, -32" 1 } } */ +/* { dg-final { scan-assembler-times "\.cfi_offset 13, -28" 1 } } */ +/* { dg-final { scan-assembler-times "\.cfi_offset 14, -24" 1 } } */ +/* { dg-final { scan-assembler-times "\.cfi_offset 15, -20" 1 } } */ +/* { dg-final { scan-assembler-times "\.cfi_offset 16, -16" 1 } } */ +/* { dg-final { scan-assembler-times "\.cfi_offset 17, -12" 1 } } */ +/* { dg-final { scan-assembler-times "\.cfi_offset 18, -8" 1 } } */ +/* { dg-final { scan-assembler-times "\.cfi_offset 19, -4" 1 } } */ -- cgit v1.1 From af681e948fb67978a7c97a7fc2bd0d226be6a53b Mon Sep 17 00:00:00 2001 From: Claudiu Zissulescu Date: Mon, 10 Oct 2022 10:27:12 +0300 Subject: arc: Remove Rcr constraint gcc/ChangeLog: * config/arc/arc.md(mulsi3_700): Remove Rcr. (mulsi3_highpart): Likewise. (umulsi3_highpart_i): Likewise. (umulsi3_highpart_int): Likewise. (macd): Likewise. (macdu): Likewise. * config/arc/constraints.md (Rcr): Remove it. gcc/testsuite/ChangeLog: * gcc.target/arc/tmac-2.c: Update test. Signed-off-by: Claudiu Zissulescu --- gcc/config/arc/arc.md | 36 +++++++++++++++++------------------ gcc/config/arc/constraints.md | 10 ---------- gcc/testsuite/gcc.target/arc/tmac-2.c | 2 +- 3 files changed, 19 insertions(+), 29 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 7170445..90ce66d 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -2336,11 +2336,11 @@ archs4x, archs4xd" ; registers, since it cannot be the destination of a multi-cycle insn ; like MPY or MPYU. (define_insn "mulsi3_700" - [(set (match_operand:SI 0 "mpy_dest_reg_operand" "=Rcr,r,r,Rcr,r") - (mult:SI (match_operand:SI 1 "register_operand" "%0,c,0,0,c") - (match_operand:SI 2 "nonmemory_operand" "cL,cL,I,Cal,Cal")))] + [(set (match_operand:SI 0 "mpy_dest_reg_operand" "=r, r,r, r,r") + (mult:SI (match_operand:SI 1 "register_operand" "%0, r,0, 0,r") + (match_operand:SI 2 "nonmemory_operand" "rL,rL,I,Cal,Cal")))] "TARGET_ARC700_MPY" - "mpyu%? %0,%1,%2" + "mpyu%?\\t%0,%1,%2" [(set_attr "length" "4,4,4,8,8") (set_attr "type" "umulti") (set_attr "predicable" "yes,no,no,yes,no") @@ -2501,15 +2501,15 @@ archs4x, archs4xd" (set_attr "length" "8")]) (define_insn "mulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "=Rcr,r,Rcr,r") + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") (truncate:SI (lshiftrt:DI (mult:DI - (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,c, 0,c")) - (sign_extend:DI (match_operand:SI 2 "extend_operand" "c,c, i,i"))) + (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,r,0,r")) + (sign_extend:DI (match_operand:SI 2 "extend_operand" "r,r,i,i"))) (const_int 32))))] "TARGET_MPY" - "mpy%+%? %0,%1,%2" + "mpy%+%?\\t%0,%1,%2" [(set_attr "length" "4,4,8,8") (set_attr "type" "multi") (set_attr "predicable" "yes,no,yes,no") @@ -2518,15 +2518,15 @@ archs4x, archs4xd" ; Note that mpyhu has the same latency as mpy / mpyh, ; thus we use the type multi. (define_insn "*umulsi3_highpart_i" - [(set (match_operand:SI 0 "register_operand" "=Rcr,r,Rcr,r") + [(set (match_operand:SI 0 "register_operand" "=r,r,r,r") (truncate:SI (lshiftrt:DI (mult:DI - (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,c, 0,c")) - (zero_extend:DI (match_operand:SI 2 "extend_operand" "c,c, i,i"))) + (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,r,0,r")) + (zero_extend:DI (match_operand:SI 2 "extend_operand" "r,r,i,i"))) (const_int 32))))] "TARGET_MPY" - "mpy%+u%? %0,%1,%2" + "mpy%+u%?\\t%0,%1,%2" [(set_attr "length" "4,4,8,8") (set_attr "type" "multi") (set_attr "predicable" "yes,no,yes,no") @@ -2536,15 +2536,15 @@ archs4x, archs4xd" ;; need a separate pattern for immediates ;; ??? This is fine for combine, but not for reload. (define_insn "umulsi3_highpart_int" - [(set (match_operand:SI 0 "register_operand" "=Rcr, r, r,Rcr, r") + [(set (match_operand:SI 0 "register_operand" "=r, r, r,r, r") (truncate:SI (lshiftrt:DI (mult:DI - (zero_extend:DI (match_operand:SI 1 "register_operand" " 0, c, 0, 0, c")) - (match_operand:DI 2 "immediate_usidi_operand" "L, L, I, Cal, Cal")) + (zero_extend:DI (match_operand:SI 1 "register_operand" " 0, r, 0, 0, r")) + (match_operand:DI 2 "immediate_usidi_operand" "L, L, I,Cal,Cal")) (const_int 32))))] "TARGET_MPY" - "mpy%+u%? %0,%1,%2" + "mpy%+u%?\\t%0,%1,%2" [(set_attr "length" "4,4,4,8,8") (set_attr "type" "multi") (set_attr "predicable" "yes,no,no,yes,no") @@ -6141,7 +6141,7 @@ archs4x, archs4xd" (set_attr "length" "36")]) (define_insn "macd" - [(set (match_operand:DI 0 "even_register_operand" "=Rcr,r,r") + [(set (match_operand:DI 0 "even_register_operand" "=r,r,r") (plus:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,r,r")) @@ -6243,7 +6243,7 @@ archs4x, archs4xd" (set_attr "length" "36")]) (define_insn "macdu" - [(set (match_operand:DI 0 "even_register_operand" "=Rcr,r,r") + [(set (match_operand:DI 0 "even_register_operand" "=r,r,r") (plus:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,r,r")) diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md index 02aa37f..039954e 100644 --- a/gcc/config/arc/constraints.md +++ b/gcc/config/arc/constraints.md @@ -466,16 +466,6 @@ && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], REGNO (op))"))) -(define_constraint "Rcr" - "@internal - Cryptic r - for use in early alternatives with matching constraint" - (and (match_code "reg") - (match_test - "TARGET_Rcw - && REGNO (op) < FIRST_PSEUDO_REGISTER - && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], - REGNO (op))"))) - (define_constraint "Rcb" "@internal Stack Pointer register @code{r28} - do not reload into its class" diff --git a/gcc/testsuite/gcc.target/arc/tmac-2.c b/gcc/testsuite/gcc.target/arc/tmac-2.c index ee1339a..2bd051b 100644 --- a/gcc/testsuite/gcc.target/arc/tmac-2.c +++ b/gcc/testsuite/gcc.target/arc/tmac-2.c @@ -7,5 +7,5 @@ /* { dg-final { scan-assembler "mac " } } */ /* { dg-final { scan-assembler "macu" } } */ -/* { dg-final { scan-assembler "mpym " } } */ +/* { dg-final { scan-assembler "mpym\\t" } } */ /* { dg-final { scan-assembler "mpymu" } } */ -- cgit v1.1 From 5ebbdf9c4ea62327b983de4aff96e6846cf218b4 Mon Sep 17 00:00:00 2001 From: Claudiu Zissulescu Date: Mon, 10 Oct 2022 10:27:12 +0300 Subject: arc: Remove Rcw constraint gcc/Changelog: * config/arc/arc.md (smaxsi3): Remove Rcw. (sminsi3): Likewise. (addsi3_mixed): Likewise. (add_f_2): Likewise. (subsi3_insn): Likewise. (sub_f): Likewise. (sub_n): Likewise. (bset): Likewise. (bxor): Likewise. (bclr): Likewise. (bset_insn): Likewise. (bxor_insn): Likewise. (bclr_insn): Likewise. (bmsk_insn): Likewise. (bicsi3_insn): Likewise. (xorsi3): Likewise. (negsi2): Likewise. (lshrsi3_insn): Likewise. (abssf2): Likewise. (negsf2): Likewise. * config/arc/constraints.md(Rcw): Remove it. Signed-off-by: Claudiu Zissulescu --- gcc/config/arc/arc.md | 220 +++++++++++++++++++++--------------------- gcc/config/arc/constraints.md | 14 --- 2 files changed, 110 insertions(+), 124 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 90ce66d..e6fa2a1 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -1991,22 +1991,22 @@ archs4x, archs4xd" ;; Maximum and minimum insns (define_insn "smaxsi3" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw, w, w") - (smax:SI (match_operand:SI 1 "register_operand" "%0, c, c") - (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=r, r, r") + (smax:SI (match_operand:SI 1 "register_operand" "%0, r, r") + (match_operand:SI 2 "nonmemory_operand" "rL,rL,Cal")))] "" - "max%? %0,%1,%2" + "max%?\\t%0,%1,%2" [(set_attr "type" "two_cycle_core") (set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no")] ) (define_insn "sminsi3" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw, w, w") - (smin:SI (match_operand:SI 1 "register_operand" "%0, c, c") - (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=r, r, r") + (smin:SI (match_operand:SI 1 "register_operand" "%0, r, r") + (match_operand:SI 2 "nonmemory_operand" "rL,rL,Cal")))] "" - "min%? %0,%1,%2" + "min%?\\t%0,%1,%2" [(set_attr "type" "two_cycle_core") (set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no")] @@ -2028,10 +2028,10 @@ archs4x, archs4xd" ; We avoid letting this pattern use LP_COUNT as a register by specifying ; register class 'W' instead of 'w'. (define_insn_and_split "*addsi3_mixed" - ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12 - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,Rcq, h,!*Rsd,Rcq,Rcb,Rcq, Rcqq,Rcqq,Rcw,Rcw, Rcw, W, W,W, W,Rcqq,Rcw, W") - (plus:SI (match_operand:SI 1 "register_operand" "%0, c, 0, Rcqq, 0, 0,Rcb, Rcqq, 0, 0, c, 0, c, c,0, 0, 0, 0, c") - (match_operand:SI 2 "nonmemory_operand" "cL, 0, Cm1, L,CL2,Csp,CM4,RcqqK, cO, cL, 0,cCca,cLCmL,Cca,I,C2a, Cal,Cal,Cal")))] + ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12 + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,Rcq, h,!*Rsd,Rcq,Rcb,Rcq, Rcqq,Rcqq, r,r, r, W, W,W, W,Rcqq, r, W") + (plus:SI (match_operand:SI 1 "register_operand" "%0, c, 0, Rcqq, 0, 0,Rcb, Rcqq, 0, 0,r, 0, c, c,0, 0, 0, 0, c") + (match_operand:SI 2 "nonmemory_operand" "cL, 0, Cm1, L,CL2,Csp,CM4,RcqqK, cO,rL,0,rCca,cLCmL,Cca,I,C2a, Cal,Cal,Cal")))] "" { arc_output_addsi (operands, arc_ccfsm_cond_exec_p (), true); @@ -2792,13 +2792,13 @@ archs4x, archs4xd" (define_insn "*add_f_2" [(set (reg:CC_C CC_REG) (compare:CC_C - (plus:SI (match_operand:SI 1 "register_operand" "c,0,c") - (match_operand:SI 2 "nonmemory_operand" "cL,I,cCal")) + (plus:SI (match_operand:SI 1 "register_operand" "r ,0,r") + (match_operand:SI 2 "nonmemory_operand" "rL,I,rCal")) (match_dup 2))) - (set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w") + (set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") (plus:SI (match_dup 1) (match_dup 2)))] "" - "add.f %0,%1,%2" + "add.f\\t%0,%1,%2" [(set_attr "cond" "set") (set_attr "type" "compare") (set_attr "length" "4,4,8")]) @@ -2895,22 +2895,22 @@ archs4x, archs4xd" ; the casesi expander might generate a sub of zero, so we have to recognize it. ; combine should make such an insn go away. (define_insn_and_split "subsi3_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,Rcw,Rcw,w,w,w, w, w, w") - (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,Rcqq, 0, cL,c,L,I,Cal,Cal, c") - (match_operand:SI 2 "nonmemory_operand" "Rcqq,Rcqq, c, 0,c,c,0, 0, c,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,r, r,r,r,r, r, r, r") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,Rcqq,0,rL,r,L,I,Cal,Cal, r") + (match_operand:SI 2 "nonmemory_operand" "Rcqq,Rcqq,r, 0,r,r,0, 0, r,Cal")))] "register_operand (operands[1], SImode) || register_operand (operands[2], SImode)" "@ - sub%? %0,%1,%2%& - sub%? %0,%1,%2%& - sub%? %0,%1,%2 - rsub%? %0,%2,%1 - sub %0,%1,%2 - rsub %0,%2,%1 - rsub %0,%2,%1 - rsub%? %0,%2,%1 - rsub %0,%2,%1 - sub %0,%1,%2" + sub%?\\t%0,%1,%2%& + sub%?\\t%0,%1,%2%& + sub%?\\t%0,%1,%2 + rsub%?\\t%0,%2,%1 + sub\\t%0,%1,%2 + rsub\\t%0,%2,%1 + rsub\\t%0,%2,%1 + rsub%?\\t%0,%2,%1 + rsub\\t%0,%2,%1 + sub\\t%0,%1,%2" "reload_completed && get_attr_length (insn) == 8 && satisfies_constraint_I (operands[1]) && GET_CODE (PATTERN (insn)) != COND_EXEC" @@ -2990,19 +2990,19 @@ archs4x, archs4xd" (define_insn "sub_f" [(set (reg:CC CC_REG) - (compare:CC (match_operand:SI 1 "nonmemory_operand" " c,L,0,I,c,Cal") - (match_operand:SI 2 "nonmemory_operand" "cL,c,I,0,Cal,c"))) - (set (match_operand:SI 0 "dest_reg_operand" "=w,w,Rcw,Rcw,w,w") + (compare:CC (match_operand:SI 1 "nonmemory_operand" " r,L,0,I,r,Cal") + (match_operand:SI 2 "nonmemory_operand" "rL,r,I,0,Cal,r"))) + (set (match_operand:SI 0 "dest_reg_operand" "=r,r,r,r,r,r") (minus:SI (match_dup 1) (match_dup 2)))] "register_operand (operands[1], SImode) || register_operand (operands[2], SImode)" "@ - sub.f %0,%1,%2 - rsub.f %0,%2,%1 - sub.f %0,%1,%2 - rsub.f %0,%2,%1 - sub.f %0,%1,%2 - sub.f %0,%1,%2" + sub.f\\t%0,%1,%2 + rsub.f\\t%0,%2,%1 + sub.f\\t%0,%1,%2 + rsub.f\\t%0,%2,%1 + sub.f\\t%0,%1,%2 + sub.f\\t%0,%1,%2" [(set_attr "type" "compare") (set_attr "length" "4,4,4,4,8,8")]) @@ -3051,12 +3051,12 @@ archs4x, archs4xd" ;; N.B. sub[123] has the operands of the MINUS in the opposite order from ;; what synth_mult likes. (define_insn "*sub_n" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") - (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,c,?Cal") - (ashift:SI (match_operand:SI 2 "register_operand" "c,c,c") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,r,?Cal") + (ashift:SI (match_operand:SI 2 "register_operand" "r,r,r") (match_operand:SI 3 "_1_2_3_operand" ""))))] "" - "sub%c3%? %0,%1,%2" + "sub%c3%?\\t%0,%1,%2" [(set_attr "type" "shift") (set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") @@ -3064,12 +3064,12 @@ archs4x, archs4xd" (set_attr "iscompact" "false")]) (define_insn "*sub_n" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") - (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,c,?Cal") - (mult:SI (match_operand:SI 2 "register_operand" "c,c,c") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,r,?Cal") + (mult:SI (match_operand:SI 2 "register_operand" "r,r,r") (match_operand:SI 3 "_2_4_8_operand" ""))))] "" - "sub%z3%? %0,%1,%2" + "sub%z3%?\\t%0,%1,%2" [(set_attr "type" "shift") (set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") @@ -3078,12 +3078,12 @@ archs4x, archs4xd" ; ??? check if combine matches this. (define_insn "*bset" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") (ior:SI (ashift:SI (const_int 1) - (match_operand:SI 1 "nonmemory_operand" "cL,cL,c")) - (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + (match_operand:SI 1 "nonmemory_operand" "rL,rL,r")) + (match_operand:SI 2 "nonmemory_operand" "0,r,Cal")))] "" - "bset%? %0,%2,%1" + "bset%?\\t%0,%2,%1" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3091,12 +3091,12 @@ archs4x, archs4xd" ; ??? check if combine matches this. (define_insn "*bxor" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") (xor:SI (ashift:SI (const_int 1) - (match_operand:SI 1 "nonmemory_operand" "cL,cL,c")) - (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + (match_operand:SI 1 "nonmemory_operand" "rL,rL,r")) + (match_operand:SI 2 "nonmemory_operand" "0,r,Cal")))] "" - "bxor%? %0,%2,%1" + "bxor%?\\t%0,%2,%1" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3104,12 +3104,12 @@ archs4x, archs4xd" ; ??? check if combine matches this. (define_insn "*bclr" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") (and:SI (not:SI (ashift:SI (const_int 1) - (match_operand:SI 1 "nonmemory_operand" "cL,cL,c"))) - (match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))] + (match_operand:SI 1 "nonmemory_operand" "rL,rL,r"))) + (match_operand:SI 2 "nonmemory_operand" "0,r,Cal")))] "" - "bclr%? %0,%2,%1" + "bclr%?\\t%0,%2,%1" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3121,15 +3121,15 @@ archs4x, archs4xd" ; see also iorsi3 for use with constant bit number. (define_insn "*bset_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") - (ior:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") + (ior:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal") (ashift:SI (const_int 1) - (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ] + (match_operand:SI 2 "nonmemory_operand" "rL,rL,r"))) ) ] "" "@ - bset%? %0,%1,%2 ;;peep2, constr 1 - bset %0,%1,%2 ;;peep2, constr 2 - bset %0,%1,%2 ;;peep2, constr 3" + bset%?\\t%0,%1,%2 ;;peep2, constr 1 + bset\\t%0,%1,%2 ;;peep2, constr 2 + bset\\t%0,%1,%2 ;;peep2, constr 3" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3137,15 +3137,15 @@ archs4x, archs4xd" ; see also xorsi3 for use with constant bit number. (define_insn "*bxor_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") - (xor:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") + (xor:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal") (ashift:SI (const_int 1) - (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ] + (match_operand:SI 2 "nonmemory_operand" "rL,rL,r"))) ) ] "" "@ - bxor%? %0,%1,%2 - bxor %0,%1,%2 - bxor %0,%1,%2" + bxor%?\\t%0,%1,%2 + bxor\\t%0,%1,%2 + bxor\\t%0,%1,%2" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3153,15 +3153,15 @@ archs4x, archs4xd" ; see also andsi3 for use with constant bit number. (define_insn "*bclr_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") (and:SI (not:SI (ashift:SI (const_int 1) - (match_operand:SI 2 "nonmemory_operand" "cL,rL,r"))) - (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")))] + (match_operand:SI 2 "nonmemory_operand" "rL,rL,r"))) + (match_operand:SI 1 "nonmemory_operand" "0,r,Cal")))] "" "@ - bclr%? %0,%1,%2 - bclr %0,%1,%2 - bclr %0,%1,%2" + bclr%?\\t%0,%1,%2 + bclr\\t%0,%1,%2 + bclr\\t%0,%1,%2" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3169,17 +3169,17 @@ archs4x, archs4xd" ; see also andsi3 for use with constant bit number. (define_insn "*bmsk_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") - (and:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r") + (and:SI (match_operand:SI 1 "nonmemory_operand" "0,r,Cal") (plus:SI (ashift:SI (const_int 1) (plus:SI (match_operand:SI 2 "nonmemory_operand" "rL,rL,r") (const_int 1))) (const_int -1))))] "" "@ - bmsk%? %0,%1,%2 - bmsk %0,%1,%2 - bmsk %0,%1,%2" + bmsk%?\\t%0,%1,%2 + bmsk\\t%0,%1,%2 + bmsk\\t%0,%1,%2" [(set_attr "length" "4,4,8") (set_attr "predicable" "yes,no,no") (set_attr "cond" "canuse,nocond,nocond")] @@ -3282,18 +3282,18 @@ archs4x, archs4xd" ;;bic define_insn that allows limm to be the first operand (define_insn "*bicsi3_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,Rcw,Rcw,w,w,w") - (and:SI (not:SI (match_operand:SI 1 "nonmemory_operand" "Rcqq,Lc,I,Cal,Lc,Cal,c")) - (match_operand:SI 2 "nonmemory_operand" "0,0,0,0,c,c,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,r,r,r,r,r,r") + (and:SI (not:SI (match_operand:SI 1 "nonmemory_operand" "Rcqq,Lr,I,Cal,Lr,Cal,r")) + (match_operand:SI 2 "nonmemory_operand" "0,0,0,0,r,r,Cal")))] "" "@ - bic%? %0, %2, %1%& ;;constraint 0 - bic%? %0,%2,%1 ;;constraint 1 - bic %0,%2,%1 ;;constraint 2, FIXME: will it ever get generated ??? - bic%? %0,%2,%1 ;;constraint 3, FIXME: will it ever get generated ??? - bic %0,%2,%1 ;;constraint 4 - bic %0,%2,%1 ;;constraint 5, FIXME: will it ever get generated ??? - bic %0,%2,%1 ;;constraint 6" + bic%?\\t%0, %2, %1%& ;;constraint 0 + bic%?\\t%0,%2,%1 ;;constraint 1 + bic\\t%0,%2,%1 ;;constraint 2, FIXME: will it ever get generated ??? + bic%?\\t%0,%2,%1 ;;constraint 3, FIXME: will it ever get generated ??? + bic\\t%0,%2,%1 ;;constraint 4 + bic\\t%0,%2,%1 ;;constraint 5, FIXME: will it ever get generated ??? + bic\\t%0,%2,%1 ;;constraint 6" [(set_attr "length" "*,4,4,8,4,8,8") (set_attr "iscompact" "maybe, false, false, false, false, false, false") (set_attr "predicable" "no,yes,no,yes,no,no,no") @@ -3334,19 +3334,19 @@ archs4x, archs4xd" (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,nocond,canuse,nocond")]) (define_insn "xorsi3" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq,Rcw,Rcw,Rcw,Rcw, w, w,w, w, w") - (xor:SI (match_operand:SI 1 "register_operand" "%0, Rcq, 0, c, 0, 0, c, c,0, 0, c") - (match_operand:SI 2 "nonmemory_operand" " Rcqq, 0, cL, 0,C0p, I,cL,C0p,I,Cal,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq, r,r, r,r, r, r,r, r, r") + (xor:SI (match_operand:SI 1 "register_operand" "%0, Rcq, 0,r, 0,0, r, r,0, 0, r") + (match_operand:SI 2 "nonmemory_operand" " Rcqq, 0,rL,0,C0p,I,rL,C0p,I,Cal,Cal")))] "" "* switch (which_alternative) { case 0: case 2: case 5: case 6: case 8: case 9: case 10: - return \"xor%? %0,%1,%2%&\"; + return \"xor%?\\t%0,%1,%2%&\"; case 1: case 3: - return \"xor%? %0,%2,%1%&\"; + return \"xor%?\\t%0,%2,%1%&\"; case 4: case 7: - return \"bxor%? %0,%1,%z2\"; + return \"bxor%?\\t%0,%1,%z2\"; default: gcc_unreachable (); } @@ -3358,10 +3358,10 @@ archs4x, archs4xd" (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")]) (define_insn "negsi2" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,Rcw,w") - (neg:SI (match_operand:SI 1 "register_operand" "0,Rcqq,0,c")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,r,r") + (neg:SI (match_operand:SI 1 "register_operand" "0,Rcqq,0,r")))] "" - "neg%? %0,%1%&" + "neg%?\\t%0,%1%&" [(set_attr "type" "unary") (set_attr "iscompact" "maybe,true,false,false") (set_attr "predicable" "no,no,yes,no")]) @@ -3498,14 +3498,14 @@ archs4x, archs4xd" (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")]) (define_insn "*lshrsi3_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcqq,Rcqq,Rcw, w, w") - (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq, 0, 0, c,cCal") - (match_operand:SI 2 "nonmemory_operand" "N, N,RcqqM, cL,cL,cCal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcqq,Rcqq, r, r, r") + (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq, 0, 0, r,rCal") + (match_operand:SI 2 "nonmemory_operand" "N, N,RcqqM,rL,rL,rCal")))] "TARGET_BARREL_SHIFTER && (register_operand (operands[1], SImode) || register_operand (operands[2], SImode))" "*return (which_alternative <= 1 && !arc_ccfsm_cond_exec_p () - ? \"lsr%? %0,%1%&\" : \"lsr%? %0,%1,%2%&\");" + ? \"lsr%?\\t%0,%1%&\" : \"lsr%?\\t%0,%1,%2%&\");" [(set_attr "type" "shift") (set_attr "iscompact" "maybe,maybe,maybe,false,false,false") (set_attr "predicable" "no,no,no,yes,no,no") @@ -5153,20 +5153,20 @@ archs4x, archs4xd" (set_attr "predicable" "yes")]) (define_insn "abssf2" - [(set (match_operand:SF 0 "dest_reg_operand" "=Rcq#q,Rcw,w") - (abs:SF (match_operand:SF 1 "register_operand" "0, 0,c")))] + [(set (match_operand:SF 0 "dest_reg_operand" "=Rcq#q,r,r") + (abs:SF (match_operand:SF 1 "register_operand" "0,0,r")))] "" - "bclr%? %0,%1,31%&" + "bclr%?\\t%0,%1,31%&" [(set_attr "type" "unary") (set_attr "iscompact" "maybe,false,false") (set_attr "length" "2,4,4") (set_attr "predicable" "no,yes,no")]) (define_insn "negsf2" - [(set (match_operand:SF 0 "dest_reg_operand" "=Rcw,w") - (neg:SF (match_operand:SF 1 "register_operand" "0,c")))] + [(set (match_operand:SF 0 "dest_reg_operand" "=r,r") + (neg:SF (match_operand:SF 1 "register_operand" "0,r")))] "" - "bxor%? %0,%1,31" + "bxor%?\\t%0,%1,31" [(set_attr "type" "unary") (set_attr "predicable" "yes,no")]) diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md index 039954e..69ec4d5 100644 --- a/gcc/config/arc/constraints.md +++ b/gcc/config/arc/constraints.md @@ -452,20 +452,6 @@ && !arc_ccfsm_cond_exec_p () && IN_RANGE (REGNO (op) ^ 4, 4, 11)"))) -; If we need a reload, we generally want to steer reload to use three-address -; alternatives in preference of two-address alternatives, unless the -; three-address alternative introduces a LIMM that is unnecessary for the -; two-address alternative. -(define_constraint "Rcw" - "@internal - Cryptic w - for use in early alternatives with matching constraint" - (and (match_code "reg") - (match_test - "TARGET_Rcw - && REGNO (op) < FIRST_PSEUDO_REGISTER - && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], - REGNO (op))"))) - (define_constraint "Rcb" "@internal Stack Pointer register @code{r28} - do not reload into its class" -- cgit v1.1 From e8f5074f486a2423355703483a8626a4ffe9fe47 Mon Sep 17 00:00:00 2001 From: Claudiu Zissulescu Date: Mon, 10 Oct 2022 10:27:12 +0300 Subject: arc: Remove Rcq constraint. gcc/ * config/arc/arc.cc (arc_check_short_reg_p): New function. (arc_address_cost): Replace satisfies_constraint_Rcq with the above new function. (arc_output_addsi): Likewise. (split_addsi): Likewise. (split_subsi): Likewise. * config/arc/arc.md (movqi_insn): Remove Rcq constraint. (movhi_insn): Likewise. (movsi_insn): Likewise. (tst_movb): Likewise. (tst): Likewise. (tst_bitfield): Likewise. (abssi2): Likewise. (addsi3_mixed): Likewise. (mulhisi3_reg): Likewise. (umulhisi3_reg): Likewise. (mulsi_600): Likewise. (mul64): Likewise. (subsi3_insn): Likewise. (bicsi3_insn): Likewise. (xorsi3): Likewise. (negsi2): Likewise. (one_cmplsi2): Likewise. (lshrsi3_insn): Likewise. (cmpsi_cc_insn_mixed): Likewise. (cmpsi_cc_zn_insn): Likewise. (btst): Likewise. (cmpsi_cc_z_insn): Likewise. (cmpsi_cc_c_insn): Likewise. (indirect_jump): Likewise. (casesi_jump): Likewise. (call_i): Likewise. (call_value_i): Likewise. (bbit): Likewise. (abssf2): Likewise. (ashlsi2_cnt1): Likewise. (lshrsi3_cnt1): Likewise. (ashrsi3_cnt1): Likewise. * config/arc/constraints.md (Rcq): Remove. Signed-off-by: Claudiu Zissulescu --- gcc/config/arc/arc.cc | 48 ++++++++----- gcc/config/arc/arc.md | 152 ++++++++++++++++++++---------------------- gcc/config/arc/constraints.md | 20 ------ 3 files changed, 104 insertions(+), 116 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index 7be27e0..e6f52d8 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -2474,6 +2474,20 @@ arc_setup_incoming_varargs (cumulative_args_t args_so_far, } } +/* Return TRUE if reg is ok for short instrcutions. */ + +static bool +arc_check_short_reg_p (rtx op) +{ + if (!REG_P (op)) + return false; + + if (IN_RANGE (REGNO (op) ^ 4, 4, 11)) + return true; + + return false; +} + /* Cost functions. */ /* Provide the costs of an addressing mode that contains ADDR. @@ -2485,7 +2499,7 @@ arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed) switch (GET_CODE (addr)) { case REG : - return speed || satisfies_constraint_Rcq (addr) ? 0 : 1; + return speed || arc_check_short_reg_p (addr) ? 0 : 1; case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: case PRE_MODIFY: case POST_MODIFY: return !speed; @@ -2517,14 +2531,14 @@ arc_address_cost (rtx addr, machine_mode, addr_space_t, bool speed) ? COSTS_N_INSNS (1) : speed ? 0 - : (satisfies_constraint_Rcq (plus0) + : (arc_check_short_reg_p (plus0) && satisfies_constraint_O (plus1)) ? 0 : 1); case REG: return (speed < 1 ? 0 - : (satisfies_constraint_Rcq (plus0) - && satisfies_constraint_Rcq (plus1)) + : (arc_check_short_reg_p (plus0) + && arc_check_short_reg_p (plus1)) ? 0 : 1); case CONST : case SYMBOL_REF : @@ -9003,8 +9017,8 @@ arc_output_addsi (rtx *operands, bool cond_p, bool output_p) int intval = (REG_P (operands[2]) ? 1 : CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057); int neg_intval = -intval; - int short_0 = satisfies_constraint_Rcq (operands[0]); - int short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1])); + int short_0 = arc_check_short_reg_p (operands[0]); + int short_p = (!cond_p && short_0 && arc_check_short_reg_p (operands[1])); int ret = 0; #define REG_H_P(OP) (REG_P (OP) && ((TARGET_V2 && REGNO (OP) <= 31 \ @@ -9037,7 +9051,7 @@ arc_output_addsi (rtx *operands, bool cond_p, bool output_p) patterns. */ if (short_p && ((REG_H_P (operands[2]) - && (match || satisfies_constraint_Rcq (operands[2]))) + && (match || arc_check_short_reg_p (operands[2]))) || (CONST_INT_P (operands[2]) && ((unsigned) intval <= (match ? 127 : 7))))) ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;1"); @@ -9064,7 +9078,7 @@ arc_output_addsi (rtx *operands, bool cond_p, bool output_p) /* Generate add_s r0,b,u6; add_s r1,b,u6 patterns. */ if (TARGET_CODE_DENSITY && REG_P (operands[0]) && REG_P (operands[1]) && ((REGNO (operands[0]) == 0) || (REGNO (operands[0]) == 1)) - && satisfies_constraint_Rcq (operands[1]) + && arc_check_short_reg_p (operands[1]) && satisfies_constraint_L (operands[2])) ADDSI_OUTPUT1 ("add%? %0,%1,%2 ;6"); } @@ -10033,7 +10047,7 @@ split_addsi (rtx *operands) /* Try for two short insns first. Lengths being equal, we prefer expansions with shorter register lifetimes. */ if (val > 127 && val <= 255 - && satisfies_constraint_Rcq (operands[0])) + && arc_check_short_reg_p (operands[0])) { operands[3] = operands[2]; operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]); @@ -10057,8 +10071,8 @@ split_subsi (rtx *operands) /* Try for two short insns first. Lengths being equal, we prefer expansions with shorter register lifetimes. */ - if (satisfies_constraint_Rcq (operands[0]) - && satisfies_constraint_Rcq (operands[2])) + if (arc_check_short_reg_p (operands[0]) + && arc_check_short_reg_p (operands[2])) { if (val >= -31 && val <= 127) { @@ -10436,12 +10450,12 @@ arc_lra_p (void) return arc_lra_flag; } -/* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to use - Rcq registers, because some insn are shorter with them. OTOH we already - have separate alternatives for this purpose, and other insns don't - mind, so maybe we should rather prefer the other registers? - We need more data, and we can only get that if we allow people to - try all options. */ +/* ??? Should we define TARGET_REGISTER_PRIORITY? We might perfer to + use q registers, because some insn are shorter with them. OTOH we + already have separate alternatives for this purpose, and other + insns don't mind, so maybe we should rather prefer the other + registers? We need more data, and we can only get that if we allow + people to try all options. */ static int arc_register_priority (int r) { diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index e6fa2a1..458d3ed 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -696,15 +696,13 @@ archs4x, archs4xd" ; In order to allow the ccfsm machinery to do its work, the leading compact ; alternatives say 'canuse' - there is another alternative that will match ; when the condition codes are used. -; Rcq won't match if the condition is actually used; to avoid a spurious match -; via q, q is inactivated as constraint there. ; Likewise, the length of an alternative that might be shifted to conditional ; execution must reflect this, lest out-of-range branches are created. ; The iscompact attribute allows the epilogue expander to know for which ; insns it should lengthen the return insn. (define_insn "*movqi_insn" - [(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w, w,???w,h, w,Rcq, S,!*x, r,r, Ucm,m,???m, m,Usc") - (match_operand:QI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL, I,?Rac,i,?i, T,Rcq,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))] + [(set (match_operand:QI 0 "move_dest_operand" "=q, q,r,q, h, w, w,???w,h, w,q,S,!*x, r,r, Ucm,m,???m, m,Usc") + (match_operand:QI 1 "move_src_operand" "rL,rP,q,P,hCm1,cL, I,?Rac,i,?i,T,q,Usd,Ucm,m,?Rac,c,?Rac,Cm3,i"))] "register_operand (operands[0], QImode) || register_operand (operands[1], QImode) || (satisfies_constraint_Cm3 (operands[1]) @@ -742,8 +740,8 @@ archs4x, archs4xd" "if (prepare_move_operands (operands, HImode)) DONE;") (define_insn "*movhi_insn" - [(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q, w,Rcq#q, h, w, w,???w,Rcq#q,h, w,Rcq, S, r,r, Ucm,m,???m, m,VUsc") - (match_operand:HI 1 "move_src_operand" " cL, cP,Rcq#q, P,hCm1,cL, I,?Rac, i,i,?i, T,Rcq,Ucm,m,?Rac,c,?Rac,Cm3,i"))] + [(set (match_operand:HI 0 "move_dest_operand" "=q, q,r,q, h, w, w,???w,q,h, w,q,S, r,r, Ucm,m,???m, m,VUsc") + (match_operand:HI 1 "move_src_operand" " rL,rP,q,P,hCm1,cL, I,?Rac,i,i,?i,T,q,Ucm,m,?Rac,c,?Rac,Cm3,i"))] "register_operand (operands[0], HImode) || register_operand (operands[1], HImode) || (CONSTANT_P (operands[1]) @@ -793,8 +791,8 @@ archs4x, archs4xd" ; the iscompact attribute allows the epilogue expander to know for which ; insns it should lengthen the return insn. (define_insn_and_split "*movsi_insn" ; 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 - [(set (match_operand:SI 0 "move_dest_operand" "=q, q,r,q, h, rl,r, r, r, r, ?r, r, q, h, rl, q, S, Us<,RcqRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m, m,VUsc") - (match_operand:SI 1 "move_src_operand" "rL,rP,q,P,hCm1,rLl,I,Clo,Chi,Cbi,Cpc,Clb,Cax,Cal,Cal,Uts,Rcq,RcqRck, Us>,Usd,Ucm, Usd, Ucd,m, r,!*Rzd,r,Cm3, C32"))] + [(set (match_operand:SI 0 "move_dest_operand" "=q, q,r,q, h, rl,r, r, r, r, ?r, r, q, h, rl, q, S, Us<,qRck,!*x, r,!*Rsd,!*Rcd,r,Ucm, Usd,m, m,VUsc") + (match_operand:SI 1 "move_src_operand" "rL,rP,q,P,hCm1,rLl,I,Clo,Chi,Cbi,Cpc,Clb,Cax,Cal,Cal,Uts,q,qRck, Us>,Usd,Ucm, Usd, Ucd,m, r,!*Rzd,r,Cm3, C32"))] "register_operand (operands[0], SImode) || register_operand (operands[1], SImode) || (CONSTANT_P (operands[1]) @@ -998,8 +996,8 @@ archs4x, archs4xd" (match_operand 0 "cc_register" "") (match_operator 4 "zn_compare_operator" [(and:SI - (match_operand:SI 1 "register_operand" "%Rcq,Rcq, c, c, c, c,Rrq,Rrq, c") - (match_operand:SI 2 "nonmemory_operand" "Rcq,C0p,cI,C1p,Ccp,Chs,Cbf,Cbf,???Cal")) + (match_operand:SI 1 "register_operand" "%q, q, c, c, c, c, q, q, c") + (match_operand:SI 2 "nonmemory_operand" "q,C0p,cI,C1p,Ccp,Chs,Cbf,Cbf,???Cal")) (const_int 0)])) (clobber (match_scratch:SI 3 "=X,X,X,X,X,X,Rrq,1,c"))] "TARGET_NPS_BITOPS" @@ -1014,9 +1012,9 @@ archs4x, archs4xd" (match_operator 3 "zn_compare_operator" [(and:SI (match_operand:SI 1 "register_operand" - "%Rcq,Rcq, c, c, c, c, c, c") + "%q, q, c, c, c, c, c, c") (match_operand:SI 2 "nonmemory_operand" - " Rcq,C0p,cI,cL,C1p,Ccp,Chs,Cal")) + " q,C0p,cI,cL,C1p,Ccp,Chs,Cal")) (const_int 0)]))] "reload_completed || !satisfies_constraint_Cbf (operands[2]) @@ -1092,9 +1090,9 @@ archs4x, archs4xd" [(set (match_operand:CC_ZN 0 "cc_set_register" "") (match_operator 5 "zn_compare_operator" [(zero_extract:SI - (match_operand:SI 1 "register_operand" "%Rcqq,c, c,Rrq,c") - (match_operand:SI 2 "const_int_operand" "N,N, n,Cbn,n") - (match_operand:SI 3 "const_int_operand" "n,n,C_0,Cbn,n")) + (match_operand:SI 1 "register_operand" "%q,c, c,Rrq,c") + (match_operand:SI 2 "const_int_operand" "N,N, n,Cbn,n") + (match_operand:SI 3 "const_int_operand" "n,n,C_0,Cbn,n")) (const_int 0)])) (clobber (match_scratch:SI 4 "=X,X,X,Rrq,X"))] "" @@ -1678,7 +1676,7 @@ archs4x, archs4xd" "" { if (rtx_equal_p (operands[1], const0_rtx) && GET_CODE (operands[3]) == NE - && satisfies_constraint_Rcq (operands[0])) + && IN_RANGE (REGNO (operands[0]) ^ 4, 4, 11)) return "sub%?.ne %0,%0,%0"; /* ??? might be good for speed on ARC600 too, *if* properly scheduled. */ if ((optimize_size && (!TARGET_ARC600_FAMILY)) @@ -1980,8 +1978,8 @@ archs4x, archs4xd" ;; Absolute instructions (define_insn "abssi2" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,w,w") - (abs:SI (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,w,w") + (abs:SI (match_operand:SI 1 "nonmemory_operand" "q,cL,Cal")))] "" "abs%? %0,%1%&" [(set_attr "type" "two_cycle_core") @@ -2028,10 +2026,10 @@ archs4x, archs4xd" ; We avoid letting this pattern use LP_COUNT as a register by specifying ; register class 'W' instead of 'w'. (define_insn_and_split "*addsi3_mixed" - ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12 - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,Rcq, h,!*Rsd,Rcq,Rcb,Rcq, Rcqq,Rcqq, r,r, r, W, W,W, W,Rcqq, r, W") - (plus:SI (match_operand:SI 1 "register_operand" "%0, c, 0, Rcqq, 0, 0,Rcb, Rcqq, 0, 0,r, 0, c, c,0, 0, 0, 0, c") - (match_operand:SI 2 "nonmemory_operand" "cL, 0, Cm1, L,CL2,Csp,CM4,RcqqK, cO,rL,0,rCca,cLCmL,Cca,I,C2a, Cal,Cal,Cal")))] + ;; 0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12 + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q, h,!*Rsd, q,Rcb, q, q, q, r,r, r, W, W,W, W, q, r, W") + (plus:SI (match_operand:SI 1 "register_operand" "%0,c, 0, q, 0, 0,Rcb, q, 0, 0,r, 0, c, c,0, 0, 0, 0, c") + (match_operand:SI 2 "nonmemory_operand" "cL,0, Cm1, L,CL2,Csp,CM4,qK,cO,rL,0,rCca,cLCmL,Cca,I,C2a,Cal,Cal,Cal")))] "" { arc_output_addsi (operands, arc_ccfsm_cond_exec_p (), true); @@ -2083,9 +2081,9 @@ archs4x, archs4xd" ]) (define_insn "mulhisi3_reg" - [(set (match_operand:SI 0 "register_operand" "=Rcqq,r,r") - (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" " 0,0,r")) - (sign_extend:SI (match_operand:HI 2 "nonmemory_operand" "Rcqq,r,r"))))] + [(set (match_operand:SI 0 "register_operand" "=q,r,r") + (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" "0,0,r")) + (sign_extend:SI (match_operand:HI 2 "nonmemory_operand" "q,r,r"))))] "TARGET_MPYW" "mpyw%? %0,%1,%2" [(set_attr "length" "*,4,4") @@ -2123,9 +2121,9 @@ archs4x, archs4xd" ]) (define_insn "umulhisi3_reg" - [(set (match_operand:SI 0 "register_operand" "=Rcqq, r, r") - (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" " %0, 0, r")) - (zero_extend:SI (match_operand:HI 2 "register_operand" " Rcqq, r, r"))))] + [(set (match_operand:SI 0 "register_operand" "=q, r, r") + (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" "%0, 0, r")) + (zero_extend:SI (match_operand:HI 2 "register_operand" "q, r, r"))))] "TARGET_MPYW" "mpyuw%? %0,%1,%2" [(set_attr "length" "*,4,4") @@ -2246,8 +2244,8 @@ archs4x, archs4xd" (define_insn "mulsi_600" [(set (match_operand:SI 2 "mlo_operand" "") - (mult:SI (match_operand:SI 0 "register_operand" "%Rcq#q,c,c,c") - (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,I,Cal"))) + (mult:SI (match_operand:SI 0 "register_operand" "%q,c,c,c") + (match_operand:SI 1 "nonmemory_operand" "q,cL,I,Cal"))) (clobber (match_operand:SI 3 "mhi_operand" ""))] "TARGET_MUL64_SET" "mul64%?\\t0,%0,%1" @@ -2282,8 +2280,8 @@ archs4x, archs4xd" (define_insn "mul64" [(set (reg:DI MUL64_OUT_REG) (mult:DI - (sign_extend:DI (match_operand:SI 0 "register_operand" "%Rcq#q, c,c, c")) - (sign_extend:DI (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,L,C32"))))] + (sign_extend:DI (match_operand:SI 0 "register_operand" "%q, c,c, c")) + (sign_extend:DI (match_operand:SI 1 "nonmemory_operand" "q,cL,L,C32"))))] "TARGET_MUL64_SET" "mul64%? \t0, %0, %1%&" [(set_attr "length" "*,4,4,8") @@ -2895,9 +2893,9 @@ archs4x, archs4xd" ; the casesi expander might generate a sub of zero, so we have to recognize it. ; combine should make such an insn go away. (define_insn_and_split "subsi3_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,r, r,r,r,r, r, r, r") - (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,Rcqq,0,rL,r,L,I,Cal,Cal, r") - (match_operand:SI 2 "nonmemory_operand" "Rcqq,Rcqq,r, 0,r,r,0, 0, r,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q,r, r,r,r,r, r, r, r") + (minus:SI (match_operand:SI 1 "nonmemory_operand" "0,q,0,rL,r,L,I,Cal,Cal, r") + (match_operand:SI 2 "nonmemory_operand" "q,q,r, 0,r,r,0, 0, r,Cal")))] "register_operand (operands[1], SImode) || register_operand (operands[2], SImode)" "@ @@ -3282,8 +3280,8 @@ archs4x, archs4xd" ;;bic define_insn that allows limm to be the first operand (define_insn "*bicsi3_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,r,r,r,r,r,r") - (and:SI (not:SI (match_operand:SI 1 "nonmemory_operand" "Rcqq,Lr,I,Cal,Lr,Cal,r")) + [(set (match_operand:SI 0 "dest_reg_operand" "=q,r,r,r,r,r,r") + (and:SI (not:SI (match_operand:SI 1 "nonmemory_operand" "q,Lr,I,Cal,Lr,Cal,r")) (match_operand:SI 2 "nonmemory_operand" "0,0,0,0,r,r,Cal")))] "" "@ @@ -3334,9 +3332,9 @@ archs4x, archs4xd" (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,nocond,canuse,nocond")]) (define_insn "xorsi3" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcq, r,r, r,r, r, r,r, r, r") - (xor:SI (match_operand:SI 1 "register_operand" "%0, Rcq, 0,r, 0,0, r, r,0, 0, r") - (match_operand:SI 2 "nonmemory_operand" " Rcqq, 0,rL,0,C0p,I,rL,C0p,I,Cal,Cal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q, r,r, r,r, r, r,r, r, r") + (xor:SI (match_operand:SI 1 "register_operand" "%0,q, 0,r, 0,0, r, r,0, 0, r") + (match_operand:SI 2 "nonmemory_operand" "q,0,rL,0,C0p,I,rL,C0p,I,Cal,Cal")))] "" "* switch (which_alternative) @@ -3358,8 +3356,8 @@ archs4x, archs4xd" (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")]) (define_insn "negsi2" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,r,r") - (neg:SI (match_operand:SI 1 "register_operand" "0,Rcqq,0,r")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q,r,r") + (neg:SI (match_operand:SI 1 "register_operand" "0,q,0,r")))] "" "neg%?\\t%0,%1%&" [(set_attr "type" "unary") @@ -3367,8 +3365,8 @@ archs4x, archs4xd" (set_attr "predicable" "no,no,yes,no")]) (define_insn "one_cmplsi2" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w") - (not:SI (match_operand:SI 1 "register_operand" "Rcqq,c")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,w") + (not:SI (match_operand:SI 1 "register_operand" "q,c")))] "" "not%? %0,%1%&" [(set_attr "type" "unary,unary") @@ -3498,9 +3496,9 @@ archs4x, archs4xd" (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")]) (define_insn "*lshrsi3_insn" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcqq,Rcqq, r, r, r") - (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq, 0, 0, r,rCal") - (match_operand:SI 2 "nonmemory_operand" "N, N,RcqqM,rL,rL,rCal")))] + [(set (match_operand:SI 0 "dest_reg_operand" "=q,q, q, r, r, r") + (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,q, 0, 0, r,rCal") + (match_operand:SI 2 "nonmemory_operand" "N,N,qM,rL,rL,rCal")))] "TARGET_BARREL_SHIFTER && (register_operand (operands[1], SImode) || register_operand (operands[2], SImode))" @@ -3546,8 +3544,8 @@ archs4x, archs4xd" ;; modifed cc user if second, but not first operand is a compact register. (define_insn "cmpsi_cc_insn_mixed" [(set (reg:CC CC_REG) - (compare:CC (match_operand:SI 0 "register_operand" "Rcq#q,Rcqq, h, c, c,qRcq,c") - (match_operand:SI 1 "nonmemory_operand" "cO, hO,Cm1,cI,cL, Cal,Cal")))] + (compare:CC (match_operand:SI 0 "register_operand" "q, q, h, c, c, q,c") + (match_operand:SI 1 "nonmemory_operand" "cO,hO,Cm1,cI,cL,Cal,Cal")))] "" "cmp%? %0,%B1%&" [(set_attr "type" "compare") @@ -3559,7 +3557,7 @@ archs4x, archs4xd" (define_insn "*cmpsi_cc_zn_insn" [(set (reg:CC_ZN CC_REG) - (compare:CC_ZN (match_operand:SI 0 "register_operand" "qRcq,c") + (compare:CC_ZN (match_operand:SI 0 "register_operand" "q,c") (const_int 0)))] "" "tst%? %0,%0%&" @@ -3573,7 +3571,7 @@ archs4x, archs4xd" (define_insn "*btst" [(set (reg:CC_ZN CC_REG) (compare:CC_ZN - (zero_extract:SI (match_operand:SI 0 "register_operand" "Rcqq,c") + (zero_extract:SI (match_operand:SI 0 "register_operand" "q,c") (const_int 1) (match_operand:SI 1 "nonmemory_operand" "L,Lc")) (const_int 0)))] @@ -3618,7 +3616,7 @@ archs4x, archs4xd" (define_insn "*cmpsi_cc_z_insn" [(set (reg:CC_Z CC_REG) - (compare:CC_Z (match_operand:SI 0 "register_operand" "qRcq,c") + (compare:CC_Z (match_operand:SI 0 "register_operand" "q,c") (match_operand:SI 1 "p2_immediate_operand" "O,n")))] "" "@ @@ -3631,8 +3629,8 @@ archs4x, archs4xd" (define_insn "*cmpsi_cc_c_insn" [(set (reg:CC_C CC_REG) - (compare:CC_C (match_operand:SI 0 "register_operand" "Rcqq,Rcqq, h, c,Rcqq, c") - (match_operand:SI 1 "nonmemory_operand" "cO, hO,Cm1,cI, Cal,Cal")))] + (compare:CC_C (match_operand:SI 0 "register_operand" "q, q, h, c, q, c") + (match_operand:SI 1 "nonmemory_operand" "cO,hO,Cm1,cI,Cal,Cal")))] "" "cmp%? %0,%1%&" [(set_attr "type" "compare") @@ -3944,7 +3942,7 @@ archs4x, archs4xd" (const_int 2)))]) (define_insn "indirect_jump" - [(set (pc) (match_operand:SI 0 "nonmemory_operand" "L,I,Cal,Rcqq,r"))] + [(set (pc) (match_operand:SI 0 "nonmemory_operand" "L,I,Cal,q,r"))] "" "@ j%!%* %0%& @@ -4076,7 +4074,7 @@ archs4x, archs4xd" ; Unlike the canonical tablejump, this pattern always uses a jump address, ; even for CASE_VECTOR_PC_RELATIVE. (define_insn "casesi_jump" - [(set (pc) (match_operand:SI 0 "register_operand" "Cal,Rcqq,c")) + [(set (pc) (match_operand:SI 0 "register_operand" "Cal,q,c")) (use (label_ref (match_operand 1 "" "")))] "" "j%!%* [%0]%&" @@ -4106,18 +4104,16 @@ archs4x, archs4xd" } ") -; Rcq, which is used in alternative 0, checks for conditional execution. ; At instruction output time, if it doesn't match and we end up with ; alternative 1 ("q"), that means that we can't use the short form. (define_insn "*call_i" [(call (mem:SI (match_operand:SI 0 - "call_address_operand" "Rcq,q,c,Cji,Csc,Cbp,Cbr,L,I,Cal")) + "call_address_operand" "q,c,Cji,Csc,Cbp,Cbr,L,I,Cal")) (match_operand 1 "" "")) (clobber (reg:SI 31))] "" "@ jl%!%* [%0]%& - jl%!%* [%0]%& jl%!%* [%0] jli_s %S0 sjli %S0 @@ -4126,10 +4122,10 @@ archs4x, archs4xd" jl%!%* %0 jl%* %0 jl%! %0" - [(set_attr "type" "call,call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot") - (set_attr "iscompact" "maybe,false,*,true,*,*,*,*,*,*") - (set_attr "predicable" "no,no,yes,no,no,yes,no,yes,no,yes") - (set_attr "length" "*,*,4,2,4,4,4,4,4,8")]) + [(set_attr "type" "call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot") + (set_attr "iscompact" "maybe,*,true,*,*,*,*,*,*") + (set_attr "predicable" "no,yes,no,no,yes,no,yes,no,yes") + (set_attr "length" "*,4,2,4,4,4,4,4,8")]) (define_expand "call_value" ;; operand 2 is stack_size_rtx @@ -4151,19 +4147,17 @@ archs4x, archs4xd" XEXP (operands[1], 0) = force_reg (Pmode, callee); }") -; Rcq, which is used in alternative 0, checks for conditional execution. ; At instruction output time, if it doesn't match and we end up with ; alternative 1 ("q"), that means that we can't use the short form. (define_insn "*call_value_i" - [(set (match_operand 0 "dest_reg_operand" "=Rcq,q,w, w, w, w, w,w,w, w") + [(set (match_operand 0 "dest_reg_operand" "=q,w, w, w, w, w,w,w, w") (call (mem:SI (match_operand:SI 1 - "call_address_operand" "Rcq,q,c,Cji,Csc,Cbp,Cbr,L,I,Cal")) + "call_address_operand" "q,c,Cji,Csc,Cbp,Cbr,L,I,Cal")) (match_operand 2 "" ""))) (clobber (reg:SI 31))] "" "@ jl%!%* [%1]%& - jl%!%* [%1]%& jl%!%* [%1] jli_s %S1 sjli %S1 @@ -4172,10 +4166,10 @@ archs4x, archs4xd" jl%!%* %1 jl%* %1 jl%! %1" - [(set_attr "type" "call,call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot") - (set_attr "iscompact" "maybe,false,*,true,false,*,*,*,*,*") - (set_attr "predicable" "no,no,yes,no,no,yes,no,yes,no,yes") - (set_attr "length" "*,*,4,2,4,4,4,4,4,8")]) + [(set_attr "type" "call,call,call_no_delay_slot,call_no_delay_slot,call,call,call,call,call_no_delay_slot") + (set_attr "iscompact" "maybe,*,true,false,*,*,*,*,*") + (set_attr "predicable" "no,yes,no,no,yes,no,yes,no,yes") + (set_attr "length" "*,4,2,4,4,4,4,4,8")]) ; There is a bl_s instruction (16 bit opcode branch-and-link), but we can't ; use it for lack of inter-procedural branch shortening. @@ -4943,7 +4937,7 @@ archs4x, archs4xd" [(set (pc) (if_then_else (match_operator 3 "equality_comparison_operator" - [(zero_extract:SI (match_operand:SI 1 "register_operand" "Rcqq,c") + [(zero_extract:SI (match_operand:SI 1 "register_operand" "q,c") (const_int 1) (match_operand:SI 2 "nonmemory_operand" "L,Lc")) (const_int 0)]) @@ -5153,7 +5147,7 @@ archs4x, archs4xd" (set_attr "predicable" "yes")]) (define_insn "abssf2" - [(set (match_operand:SF 0 "dest_reg_operand" "=Rcq#q,r,r") + [(set (match_operand:SF 0 "dest_reg_operand" "=q,r,r") (abs:SF (match_operand:SF 1 "register_operand" "0,0,r")))] "" "bclr%?\\t%0,%1,31%&" @@ -5966,8 +5960,8 @@ archs4x, archs4xd" (set_attr "length" "4")]) (define_insn "*ashlsi2_cnt1" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w") - (ashift:SI (match_operand:SI 1 "register_operand" "Rcqq,c") + [(set (match_operand:SI 0 "dest_reg_operand" "=q,w") + (ashift:SI (match_operand:SI 1 "register_operand" "q,c") (const_int 1)))] "" "asl%? %0,%1%&" @@ -5999,8 +5993,8 @@ archs4x, archs4xd" (set_attr "predicable" "no")]) (define_insn "*lshrsi3_cnt1" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w") - (lshiftrt:SI (match_operand:SI 1 "register_operand" "Rcqq,c") + [(set (match_operand:SI 0 "dest_reg_operand" "=q,w") + (lshiftrt:SI (match_operand:SI 1 "register_operand" "q,c") (const_int 1)))] "" "lsr%? %0,%1%&" @@ -6009,8 +6003,8 @@ archs4x, archs4xd" (set_attr "predicable" "no,no")]) (define_insn "*ashrsi3_cnt1" - [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w") - (ashiftrt:SI (match_operand:SI 1 "register_operand" "Rcqq,c") + [(set (match_operand:SI 0 "dest_reg_operand" "=q,w") + (ashiftrt:SI (match_operand:SI 1 "register_operand" "q,c") (const_int 1)))] "" "asr%? %0,%1%&" diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md index 69ec4d5..38bda12 100644 --- a/gcc/config/arc/constraints.md +++ b/gcc/config/arc/constraints.md @@ -432,26 +432,6 @@ && !arc_legitimate_pic_addr_p (op) && !(satisfies_constraint_I (op) && optimize_size)")) -; Note that the 'cryptic' register constraints will not make reload use the -; associated class to reload into, but this will not penalize reloading of any -; other operands, or using an alternate part of the same alternative. - -; Rcq is different in three important ways from a register class constraint: -; - It does not imply a register class, hence reload will not use it to drive -; reloads. -; - It matches even when there is no register class to describe its accepted -; set; not having such a set again lessens the impact on register allocation. -; - It won't match when the instruction is conditionalized by the ccfsm. -(define_constraint "Rcq" - "@internal - Cryptic q - for short insn generation while not affecting register allocation - Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3}, - @code{r12}-@code{r15}" - (and (match_code "reg") - (match_test "TARGET_Rcq - && !arc_ccfsm_cond_exec_p () - && IN_RANGE (REGNO (op) ^ 4, 4, 11)"))) - (define_constraint "Rcb" "@internal Stack Pointer register @code{r28} - do not reload into its class" -- cgit v1.1 From 59d27853f10fdefa32779d74056fb7d16a6bb965 Mon Sep 17 00:00:00 2001 From: Claudiu Zissulescu Date: Mon, 10 Oct 2022 10:27:12 +0300 Subject: arc: Remove obsolete mRcq and mRcw options. gcc/ * common/config/arc/arc-common.cc (arc_option_optimization_table): Remove Rcq and Rcw options. * config/arc/arc.opt (mRcq): Ignore option, preserve it for backwards compatibility. (mRcw): Likewise. * doc/invoke.texi (mRcw, mRcq): Update document. Signed-off-by: Claudiu Zissulescu --- gcc/common/config/arc/arc-common.cc | 2 -- gcc/config/arc/arc.opt | 10 ++++++---- gcc/doc/invoke.texi | 8 ++------ 3 files changed, 8 insertions(+), 12 deletions(-) (limited to 'gcc') diff --git a/gcc/common/config/arc/arc-common.cc b/gcc/common/config/arc/arc-common.cc index e69c4a4..62fe542 100644 --- a/gcc/common/config/arc/arc-common.cc +++ b/gcc/common/config/arc/arc-common.cc @@ -44,8 +44,6 @@ arc_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED) #define OPT_LEVELS_3_PLUS_SPEED_ONLY OPT_LEVELS_3_PLUS static const struct default_options arc_option_optimization_table[] = { - { OPT_LEVELS_ALL, OPT_mRcq, NULL, 1 }, - { OPT_LEVELS_ALL, OPT_mRcw, NULL, 1 }, { OPT_LEVELS_ALL, OPT_msize_level_, NULL, 1 }, { OPT_LEVELS_ALL, OPT_mearly_cbranchsi, NULL, 1 }, { OPT_LEVELS_ALL, OPT_mbbit_peephole, NULL, 1 }, diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt index 0add5a2..b5827325 100644 --- a/gcc/config/arc/arc.opt +++ b/gcc/config/arc/arc.opt @@ -308,12 +308,14 @@ Target Ignore Does nothing. Preserved for backward compatibility. mRcq -Target Var(TARGET_Rcq) -Enable Rcq constraint handling - most short code generation depends on this. +Target Ignore +Does nothing. Preserved for backward compatibility. + mRcw -Target Var(TARGET_Rcw) -Enable Rcw constraint handling - ccfsm condexec mostly depends on this. +Target Ignore +Does nothing. Preserved for backward compatibility. + mearly-cbranchsi Target Var(TARGET_EARLY_CBRANCHSI) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1eeaec1..278c55d 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -20896,15 +20896,11 @@ This is the default for @option{-Os}. @item -mRcq @opindex mRcq -Enable @samp{Rcq} constraint handling. -Most short code generation depends on this. -This is the default. +Does nothing. Preserved for backward compatibility. @item -mRcw @opindex mRcw -Enable @samp{Rcw} constraint handling. -Most ccfsm condexec mostly depends on this. -This is the default. +Does nothing. Preserved for backward compatibility. @item -msize-level=@var{level} @opindex msize-level -- cgit v1.1 From 70f66a47fdf7d5f3837e475175b622abb5b985ad Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 10 Oct 2022 09:34:35 +0200 Subject: openmp, fortran: Fix up IFN_ASSUME call Like in other spots in trans-openmp.cc that create a TARGET_EXPR, the slot has to be created with create_tmp_var_raw, because gfc_create_var adds the var to BLOCK_VARS and that ICEs during expansion because gimple_add_tmp_var_fn has: gcc_assert (!DECL_CHAIN (tmp) && !DECL_SEEN_IN_BIND_EXPR_P (tmp)); assertion. Also, both C/C++ ensure the argument to IFN_ASSUME has boolean_type_node, it is easier if Fortran does that too. 2022-10-10 Jakub Jelinek * trans-openmp.cc (gfc_trans_omp_assume): Use create_tmp_var_raw instead of gfc_create_var for TARGET_EXPR slot creation. Create it with boolean_type_node and convert. --- gcc/fortran/trans-openmp.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 8ea573f..9bd4e6c 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -4588,11 +4588,14 @@ gfc_trans_omp_assume (gfc_code *code) t = se.expr; else { - tree var = gfc_create_var (TREE_TYPE (se.expr), NULL); + tree var = create_tmp_var_raw (boolean_type_node); + DECL_CONTEXT (var) = current_function_decl; stmtblock_t block2; gfc_init_block (&block2); gfc_add_block_to_block (&block2, &se.pre); - gfc_add_modify_loc (loc, &block2, var, se.expr); + gfc_add_modify_loc (loc, &block2, var, + fold_convert_loc (loc, boolean_type_node, + se.expr)); gfc_add_block_to_block (&block2, &se.post); t = gfc_finish_block (&block2); t = build4 (TARGET_EXPR, boolean_type_node, var, t, NULL, NULL); -- cgit v1.1 From 9b8520fa9d745b3a974d5eb98cb4b9a9021b215d Mon Sep 17 00:00:00 2001 From: liuhongt Date: Sun, 9 Oct 2022 15:30:10 +0800 Subject: Fix unrecognizable insn of cvtss2si. Adjust lrintmn2 operand preidcates according to real instructions. gcc/ChangeLog: PR target/107185 * config/i386/i386.md (lrint2): Swap predicate of operands[0] and operands[1]. gcc/testsuite/ChangeLog: * gcc.target/i386/pr107185.c: New test. --- gcc/config/i386/i386.md | 4 ++-- gcc/testsuite/gcc.target/i386/pr107185.c | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr107185.c (limited to 'gcc') diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 1be9b66..9475137 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -20145,8 +20145,8 @@ (set_attr "mode" "")]) (define_expand "lrint2" - [(set (match_operand:SWI48 0 "nonimmediate_operand") - (unspec:SWI48 [(match_operand:MODEF 1 "register_operand")] + [(set (match_operand:SWI48 0 "register_operand") + (unspec:SWI48 [(match_operand:MODEF 1 "nonimmediate_operand")] UNSPEC_FIX_NOTRUNC))] "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH") diff --git a/gcc/testsuite/gcc.target/i386/pr107185.c b/gcc/testsuite/gcc.target/i386/pr107185.c new file mode 100644 index 0000000..333191a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr107185.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-ffast-math" } */ + +void +foo (float f) +{ + long p = __builtin_lrintf (f); +} -- cgit v1.1 From 98ad452728f51d6f44c7ff71e3d20c6c1dca20fd Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Wed, 5 Oct 2022 20:23:15 +0200 Subject: Add op1_range for __builtin_signbit. This is the op1_range range-op entry for __builtin_signbit. It allows us to wind back through a call to signbit. For example, on the true side of if (__builtin_signbit(x_5) != 0) we can crop down the range of x_5 to: [frange] float [-Inf, -0.0 (-0x0.0p+0)] -NAN Similarly on the false side, we can crop to: [frange] float [0.0 (0x0.0p+0), +Inf] +NAN Tested on x86-64 Linux. gcc/ChangeLog: * gimple-range-op.cc: Add op1_range entry for __builtin_signbit. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/vrp-float-signbit-3.c: New test. --- gcc/gimple-range-op.cc | 20 ++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/vrp-float-signbit-3.c | 15 +++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/vrp-float-signbit-3.c (limited to 'gcc') diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc index 42ebc7d..abc33e7 100644 --- a/gcc/gimple-range-op.cc +++ b/gcc/gimple-range-op.cc @@ -306,6 +306,7 @@ class cfn_signbit : public range_operator_float { public: using range_operator_float::fold_range; + using range_operator_float::op1_range; virtual bool fold_range (irange &r, tree type, const frange &lh, const irange &, relation_kind) const { @@ -320,6 +321,25 @@ public: } return false; } + virtual bool op1_range (frange &r, tree type, const irange &lhs, + const frange &, relation_kind) const override + { + if (lhs.zero_p ()) + { + r.set (type, dconst0, frange_val_max (type)); + r.update_nan (false); + return true; + } + if (!lhs.contains_p (build_zero_cst (lhs.type ()))) + { + REAL_VALUE_TYPE dconstm0 = dconst0; + dconstm0.sign = 1; + r.set (type, frange_val_min (type), dconstm0); + r.update_nan (true); + return true; + } + return false; + } } op_cfn_signbit; // Implement range operator for CFN_BUILT_IN_TOUPPER and CFN_BUILT_IN_TOLOWER. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp-float-signbit-3.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp-float-signbit-3.c new file mode 100644 index 0000000..182a33f --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp-float-signbit-3.c @@ -0,0 +1,15 @@ +// { dg-do compile } +// { dg-options "-O2 -ffinite-math-only -fdump-tree-evrp" } + +void link_error(); + +void foo(float x) +{ + if (__builtin_signbit (x)) + { + if (x > 0.0) + link_error(); + } +} + +// { dg-final { scan-tree-dump-not "link_error" "evrp" } } -- cgit v1.1 From a99f511c57b5b02edfd5969148c580b4a8737ee8 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 10 Oct 2022 12:04:56 +0200 Subject: Require fgraphite effective target for pr107153.c test [PR107153] The test uses -floop-parallelize-all which emits a sorry when graphite isn't configured in. 2022-10-10 Jakub Jelinek PR tree-optimization/107153 * gcc.dg/autopar/pr107153.c: Require fgraphite effective target. --- gcc/testsuite/gcc.dg/autopar/pr107153.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/autopar/pr107153.c b/gcc/testsuite/gcc.dg/autopar/pr107153.c index 2391a67..dbdc1c1 100644 --- a/gcc/testsuite/gcc.dg/autopar/pr107153.c +++ b/gcc/testsuite/gcc.dg/autopar/pr107153.c @@ -1,4 +1,4 @@ -/* { dg-do compile } */ +/* { dg-do compile { target fgraphite } } */ /* { dg-options "-O1 -floop-parallelize-all -ftree-parallelize-loops=2 -fno-tree-dominator-opts" } */ void -- cgit v1.1 From 8b6bcedc88d54415cbc018b3f7fc2bc20dcd4800 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 10 Oct 2022 13:43:56 +0200 Subject: Return non-legacy ranges in range.h. int_range<1> is a legacy range (think anti ranges, legacy VRP, etc). There is a penalty for converting anything built with <1> to non-legacy. Since most of the uses of these functions are now ranger, we can save a miniscule amount of time by converting them to non-legacy. gcc/ChangeLog: * range.h (range_true): Return int_range<2>. (range_false): Same. (range_true_and_false): Same. --- gcc/range.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/range.h b/gcc/range.h index 5c70c66..8138d6f 100644 --- a/gcc/range.h +++ b/gcc/range.h @@ -32,7 +32,7 @@ static inline int_range<1> range_true (tree type) { unsigned prec = TYPE_PRECISION (type); - return int_range<1> (type, wi::one (prec), wi::one (prec)); + return int_range<2> (type, wi::one (prec), wi::one (prec)); } // Return an irange instance that is a boolean FALSE. @@ -41,7 +41,7 @@ static inline int_range<1> range_false (tree type) { unsigned prec = TYPE_PRECISION (type); - return int_range<1> (type, wi::zero (prec), wi::zero (prec)); + return int_range<2> (type, wi::zero (prec), wi::zero (prec)); } // Return an irange that covers both true and false. @@ -50,7 +50,7 @@ static inline int_range<1> range_true_and_false (tree type) { unsigned prec = TYPE_PRECISION (type); - return int_range<1> (type, wi::zero (prec), wi::one (prec)); + return int_range<2> (type, wi::zero (prec), wi::one (prec)); } #endif // GCC_RANGE_H -- cgit v1.1 From 58fa21622b01d6064024c92bb09fb60fa5b75977 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 10 Oct 2022 11:01:48 +0200 Subject: x UNORD x should set NAN on the TRUE side (and !NAN on the FALSE side). gcc/ChangeLog: * range-op-float.cc (foperator_unordered::op1_range): Set NAN when operands are equal and result is TRUE. --- gcc/range-op-float.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index 68578aa..91833d3 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -1026,23 +1026,27 @@ bool foperator_unordered::op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind) const + relation_kind rel) const { switch (get_bool_state (r, lhs, type)) { case BRS_TRUE: + if (rel == VREL_EQ) + r.set_nan (type); // Since at least one operand must be NAN, if one of them is // not, the other must be. - if (!op2.maybe_isnan ()) + else if (!op2.maybe_isnan ()) r.set_nan (type); else r.set_varying (type); break; case BRS_FALSE: + if (rel == VREL_EQ) + r.clear_nan (); // A false UNORDERED means both operands are !NAN, so it's // impossible for op2 to be a NAN. - if (op2.known_isnan ()) + else if (op2.known_isnan ()) r.set_undefined (); else { -- cgit v1.1 From 69988cd58becc115b236f88627fe92436baaa674 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 10 Oct 2022 11:15:43 +0200 Subject: The true side of x != x should set NAN. gcc/ChangeLog: * range-op-float.cc (foperator_not_equal::op1_range): Set NAN on TRUE side for x != x. --- gcc/range-op-float.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index 91833d3..5ffe38d 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -497,14 +497,17 @@ bool foperator_not_equal::op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind) const + relation_kind rel) const { switch (get_bool_state (r, lhs, type)) { case BRS_TRUE: + // The TRUE side of op1 != op1 implies op1 is NAN. + if (rel == VREL_EQ) + r.set_nan (type); // If the result is true, the only time we know anything is if // OP2 is a constant. - if (op2.singleton_p ()) + else if (op2.singleton_p ()) { // This is correct even if op1 is NAN, because the following // range would be ~[tmp, tmp] with the NAN property set to -- cgit v1.1 From e23f7dabe6f7ce1d92bfc07d2b62cc7bcdc0caae Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 10 Oct 2022 13:43:16 +0200 Subject: Add frange::maybe_isnan (bool sign). It is useful to know if there's the possiblity of a NAN with a given sign. This is to complement maybe_isnan(void) which returns TRUE for a NAN of any sign. A follow-up patch implementing ABS will make use of this. gcc/ChangeLog: * value-range.h (frange::maybe_isnan): New. --- gcc/value-range.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'gcc') diff --git a/gcc/value-range.h b/gcc/value-range.h index 484f911..07a2067 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -323,6 +323,7 @@ public: bool known_isnan () const; bool known_isinf () const; bool maybe_isnan () const; + bool maybe_isnan (bool sign) const; bool maybe_isinf () const; bool signbit_p (bool &signbit) const; private: @@ -1295,6 +1296,18 @@ frange::maybe_isnan () const return m_pos_nan || m_neg_nan; } +// Return TRUE if range is possibly a NAN with SIGN. + +inline bool +frange::maybe_isnan (bool sign) const +{ + if (undefined_p ()) + return false; + if (sign) + return m_neg_nan; + return m_pos_nan; +} + // Return TRUE if range is a +NAN or -NAN. inline bool -- cgit v1.1 From 80cb09d4f773a366bd8f9593729a5a80b5aa8850 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 10 Oct 2022 13:46:32 +0200 Subject: Make range-op-float entries public. gcc/ChangeLog: * range-op-float.cc (class foperator_identity): Make members public. (class foperator_equal): Same. (class foperator_not_equal): Same. (class foperator_lt): Same. (class foperator_le): Same. (class foperator_gt): Same. (class foperator_ge): Same. (class foperator_unordered): Same. (class foperator_ordered): Same. --- gcc/range-op-float.cc | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'gcc') diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index 5ffe38d..3cf117d 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -315,7 +315,7 @@ class foperator_identity : public range_operator_float { using range_operator_float::fold_range; using range_operator_float::op1_range; - +public: bool fold_range (frange &r, tree type ATTRIBUTE_UNUSED, const frange &op1, const frange &op2 ATTRIBUTE_UNUSED, relation_kind) const final override @@ -338,7 +338,7 @@ class foperator_equal : public range_operator_float using range_operator_float::fold_range; using range_operator_float::op1_range; using range_operator_float::op2_range; - +public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, relation_kind rel) const final override; @@ -444,7 +444,7 @@ class foperator_not_equal : public range_operator_float { using range_operator_float::fold_range; using range_operator_float::op1_range; - +public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, relation_kind rel) const final override; @@ -545,7 +545,7 @@ class foperator_lt : public range_operator_float using range_operator_float::fold_range; using range_operator_float::op1_range; using range_operator_float::op2_range; - +public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, relation_kind rel) const final override; @@ -660,7 +660,7 @@ class foperator_le : public range_operator_float using range_operator_float::fold_range; using range_operator_float::op1_range; using range_operator_float::op2_range; - +public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, relation_kind rel) const final override; @@ -767,7 +767,7 @@ class foperator_gt : public range_operator_float using range_operator_float::fold_range; using range_operator_float::op1_range; using range_operator_float::op2_range; - +public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, relation_kind rel) const final override; @@ -882,7 +882,7 @@ class foperator_ge : public range_operator_float using range_operator_float::fold_range; using range_operator_float::op1_range; using range_operator_float::op2_range; - +public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, relation_kind rel) const final override; @@ -993,7 +993,6 @@ class foperator_unordered : public range_operator_float using range_operator_float::fold_range; using range_operator_float::op1_range; using range_operator_float::op2_range; - public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, @@ -1071,7 +1070,6 @@ class foperator_ordered : public range_operator_float using range_operator_float::fold_range; using range_operator_float::op1_range; using range_operator_float::op2_range; - public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, -- cgit v1.1 From 684d238b8cd7e8222d9e66457815f2a63178730b Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Mon, 10 Oct 2022 21:05:50 +0800 Subject: RISC-V: Add newline to the end of file [NFC] gcc/ChangeLog: * config/riscv/riscv-c.cc: Add newline to the end of file. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pragma-1.c: Add newline to the end of file. * gcc.target/riscv/rvv/base/pragma-2.c: Ditto. * gcc.target/riscv/rvv/base/pragma-3.c: Ditto. * gcc.target/riscv/rvv/base/user-1.c: Ditto. * gcc.target/riscv/rvv/base/user-2.c: Ditto. * gcc.target/riscv/rvv/base/user-3.c: Ditto. * gcc.target/riscv/rvv/base/user-4.c: Ditto. * gcc.target/riscv/rvv/base/user-5.c: Ditto. * gcc.target/riscv/rvv/base/user-6.c: Ditto. * gcc.target/riscv/rvv/base/vread_csr.c: Ditto. * gcc.target/riscv/rvv/base/vwrite_csr.c: Ditto. --- gcc/config/riscv/riscv-c.cc | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/pragma-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/pragma-2.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/pragma-3.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/user-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/user-2.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/user-3.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/user-4.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/user-5.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/user-6.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/vread_csr.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/vwrite_csr.c | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc index cac0043..78f6eac 100644 --- a/gcc/config/riscv/riscv-c.cc +++ b/gcc/config/riscv/riscv-c.cc @@ -190,4 +190,4 @@ void riscv_register_pragmas (void) { c_register_pragma ("riscv", "intrinsic", riscv_pragma_intrinsic); -} \ No newline at end of file +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-1.c index 79b1159..3d81b17 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-1.c @@ -1,4 +1,4 @@ /* { dg-do compile } */ /* { dg-options "-O3 -march=rv32gc -mabi=ilp32d" } */ -#pragma riscv intrinsic "vector" /* { dg-error {#pragma riscv intrinsic' option 'vector' needs 'V' extension enabled} } */ \ No newline at end of file +#pragma riscv intrinsic "vector" /* { dg-error {#pragma riscv intrinsic' option 'vector' needs 'V' extension enabled} } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-2.c index fa790b1..fd2aa30 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-2.c @@ -1,4 +1,4 @@ /* { dg-do compile } */ /* { dg-skip-if "test rvv intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } */ -#pragma riscv intrinsic "vector" \ No newline at end of file +#pragma riscv intrinsic "vector" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-3.c index 86da678..96a0e05 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pragma-3.c @@ -1,4 +1,4 @@ /* { dg-do compile } */ /* { dg-skip-if "test rvv intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } */ -#pragma riscv intrinsic "report-error" /* { dg-error {unknown '#pragma riscv intrinsic' option 'report-error'} } */ \ No newline at end of file +#pragma riscv intrinsic "report-error" /* { dg-error {unknown '#pragma riscv intrinsic' option 'report-error'} } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/user-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/user-1.c index 299e393..fa1f0f3 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/user-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/user-1.c @@ -62,4 +62,4 @@ void foo61 () {vfloat32m8_t t;} void foo62 () {vfloat64m1_t t;} void foo63 () {vfloat64m2_t t;} void foo64 () {vfloat64m4_t t;} -void foo65 () {vfloat64m8_t t;} \ No newline at end of file +void foo65 () {vfloat64m8_t t;} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/user-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/user-2.c index 2a88467..92f4ee0 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/user-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/user-2.c @@ -62,4 +62,4 @@ void foo61 () {vfloat32m8_t t;} /* { dg-error {unknown type name 'vfloat32m8_t'} void foo62 () {vfloat64m1_t t;} /* { dg-error {unknown type name 'vfloat64m1_t'} } */ void foo63 () {vfloat64m2_t t;} /* { dg-error {unknown type name 'vfloat64m2_t'} } */ void foo64 () {vfloat64m4_t t;} /* { dg-error {unknown type name 'vfloat64m4_t'} } */ -void foo65 () {vfloat64m8_t t;} /* { dg-error {unknown type name 'vfloat64m8_t'} } */ \ No newline at end of file +void foo65 () {vfloat64m8_t t;} /* { dg-error {unknown type name 'vfloat64m8_t'} } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/user-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/user-3.c index 85a6d04..3a42572 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/user-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/user-3.c @@ -62,4 +62,4 @@ void foo61 () {vfloat32m8_t t;} void foo62 () {vfloat64m1_t t;} /* { dg-error {unknown type name 'vfloat64m1_t'} } */ void foo63 () {vfloat64m2_t t;} /* { dg-error {unknown type name 'vfloat64m2_t'} } */ void foo64 () {vfloat64m4_t t;} /* { dg-error {unknown type name 'vfloat64m4_t'} } */ -void foo65 () {vfloat64m8_t t;} /* { dg-error {unknown type name 'vfloat64m8_t'} } */ \ No newline at end of file +void foo65 () {vfloat64m8_t t;} /* { dg-error {unknown type name 'vfloat64m8_t'} } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/user-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/user-4.c index c51c03e..76c5e60 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/user-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/user-4.c @@ -62,4 +62,4 @@ void foo61 () {vfloat32m8_t t;} void foo62 () {vfloat64m1_t t;} void foo63 () {vfloat64m2_t t;} void foo64 () {vfloat64m4_t t;} -void foo65 () {vfloat64m8_t t;} \ No newline at end of file +void foo65 () {vfloat64m8_t t;} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/user-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/user-5.c index fb1c684..de850e5 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/user-5.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/user-5.c @@ -62,4 +62,4 @@ void foo61 () {vfloat32m8_t t;} /* { dg-error {unknown type name 'vfloat32m8_t'} void foo62 () {vfloat64m1_t t;} /* { dg-error {unknown type name 'vfloat64m1_t'} } */ void foo63 () {vfloat64m2_t t;} /* { dg-error {unknown type name 'vfloat64m2_t'} } */ void foo64 () {vfloat64m4_t t;} /* { dg-error {unknown type name 'vfloat64m4_t'} } */ -void foo65 () {vfloat64m8_t t;} /* { dg-error {unknown type name 'vfloat64m8_t'} } */ \ No newline at end of file +void foo65 () {vfloat64m8_t t;} /* { dg-error {unknown type name 'vfloat64m8_t'} } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/user-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/user-6.c index 5361fbd..1d79b6b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/user-6.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/user-6.c @@ -62,4 +62,4 @@ void foo61 () {vfloat32m8_t t;} void foo62 () {vfloat64m1_t t;} /* { dg-error {unknown type name 'vfloat64m1_t'} } */ void foo63 () {vfloat64m2_t t;} /* { dg-error {unknown type name 'vfloat64m2_t'} } */ void foo64 () {vfloat64m4_t t;} /* { dg-error {unknown type name 'vfloat64m4_t'} } */ -void foo65 () {vfloat64m8_t t;} /* { dg-error {unknown type name 'vfloat64m8_t'} } */ \ No newline at end of file +void foo65 () {vfloat64m8_t t;} /* { dg-error {unknown type name 'vfloat64m8_t'} } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vread_csr.c b/gcc/testsuite/gcc.target/riscv/rvv/base/vread_csr.c index 9151349f..fa643c5 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/vread_csr.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vread_csr.c @@ -23,4 +23,4 @@ unsigned long vread_csr_vcsr(void) { /* { dg-final { scan-assembler-times {csrr\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*vstart} 1 } } */ /* { dg-final { scan-assembler-times {csrr\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*vxsat} 1 } } */ /* { dg-final { scan-assembler-times {csrr\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*vxrm} 1 } } */ -/* { dg-final { scan-assembler-times {csrr\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*vcsr} 1 } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {csrr\s+(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7]),\s*vcsr} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vwrite_csr.c b/gcc/testsuite/gcc.target/riscv/rvv/base/vwrite_csr.c index a50eba7..e23da4b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/vwrite_csr.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vwrite_csr.c @@ -23,4 +23,4 @@ void vwrite_csr_vcsr(unsigned long value) { /* { dg-final { scan-assembler-times {csrw\s+vstart,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])} 1 } } */ /* { dg-final { scan-assembler-times {csrw\s+vxsat,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])} 1 } } */ /* { dg-final { scan-assembler-times {csrw\s+vxrm,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])} 1 } } */ -/* { dg-final { scan-assembler-times {csrw\s+vcsr,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])} 1 } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {csrw\s+vcsr,\s*(?:ra|[sgtf]p|t[0-6]|s[0-9]|s10|s11|a[0-7])} 1 } } */ -- cgit v1.1 From 99e9796ca1e9afcf820214353756a9a140d89be3 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Fri, 7 Oct 2022 16:54:05 +0800 Subject: RISC-V: Adjust testcase for rvv/base/user-1.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The -march option check isn't precise enough, -march=rv*v* also mach any zve extensions. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/user-1.c: Add dg-options and drop dg-skip-if. Reported-by: Christoph Müllner Tested-by: Christoph Müllner Reviewed-by: Ju-Zhe Zhong --- gcc/testsuite/gcc.target/riscv/rvv/base/user-1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/user-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/user-1.c index fa1f0f3..00fb73f 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/user-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/user-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-skip-if "test rvv intrinsic" { *-*-* } { "*" } { "-march=rv*v*" } } */ +/* { dg-options "-O3 -march=rv32gcv -mabi=ilp32d" } */ #include "riscv_vector.h" -- cgit v1.1 From d0bbecb1c418b680505faa998fe420f0fd4bbfc1 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Fri, 7 Oct 2022 16:55:14 +0800 Subject: RISC-V: Add riscv_vector.h wrapper in testsuite to prevent pull in stdint.h from C library MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For RISC-V linux/glibc toolchain will got header file not found when including stdint.h if multilib is not enabled, it because some header file will try to include gnu/stubs-.h from the system, however it only generated when multilib enabled. In order to prevent that, we introduce a wrapper for riscv_vector.h, include stdint-gcc.h rather than the default stdint.h. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/riscv_vector.h: New. Reported-by: Christoph Müllner Tested-by: Christoph Müllner Reviewed-by: Ju-Zhe Zhong --- gcc/testsuite/gcc.target/riscv/rvv/base/riscv_vector.h | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/riscv_vector.h (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/riscv_vector.h b/gcc/testsuite/gcc.target/riscv/rvv/base/riscv_vector.h new file mode 100644 index 0000000..fbb4858f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/riscv_vector.h @@ -0,0 +1,11 @@ +/* Wrapper of riscv_vector.h, prevent riscv_vector.h including stdint.h from + C library, that might cause problem on testing RV32 related testcase when + we disable multilib. */ +#ifndef _RISCV_VECTOR_WRAP_H + +#define _GCC_WRAP_STDINT_H +#include "stdint-gcc.h" +#include_next +#define _RISCV_VECTOR_WRAP_H + +#endif -- cgit v1.1 From 248c8aeebc49aae3fd96bd587367d12e7c8b3c3a Mon Sep 17 00:00:00 2001 From: Andrea Corallo Date: Tue, 27 Sep 2022 16:20:28 +0200 Subject: Don't ICE running selftests if errors were raised [PR99723] Hi all this is to address PR 99723. In the PR GCC crashes as the initialization of common trees is not performed as no compilation is happening, this is because we raise an error earlier while processing the arch flags. This patch changes the code to execute selftests only if no errors where raised before. Bootstrapped on aarch64, okay for trunk? Best Regards Andrea 2022-09-27 Andrea Corallo PR other/99723 * toplev.cc (toplev::main): Don't run self tests in case of previous error. --- gcc/toplev.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/toplev.cc b/gcc/toplev.cc index 924871f..b53a78b 100644 --- a/gcc/toplev.cc +++ b/gcc/toplev.cc @@ -2276,7 +2276,7 @@ toplev::main (int argc, char **argv) start_timevars (); do_compile (no_backend); - if (flag_self_test) + if (flag_self_test && !seen_error ()) { if (no_backend) error_at (UNKNOWN_LOCATION, "self-tests incompatible with %<-E%>"); -- cgit v1.1 From 67efffec943656a509e036cd3c785a5c3d6885e1 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Thu, 29 Sep 2022 17:49:32 -0400 Subject: c-family: ICE with [[gnu::nocf_check]] [PR106937] When getting the name of an attribute, we ought to use get_attribute_name, which handles both [[]] and __attribute__(()) forms. Failure to do so may result in an ICE, like here. pp_c_attributes_display wasn't able to print the [[]] form of attributes, so this patch teaches it to. When printing a pointer to function with a standard attribute, the attribute should be printed after the parameter-list. With this patch we print: aka 'void (*)(int) [[gnu::nocf_check]]' or, in C++ with noexcept: aka 'void (*)(int) noexcept [[gnu::nocf_check]]' pp_c_attributes has been unused since its introduction in r56273 so this patch removes it. PR c++/106937 gcc/c-family/ChangeLog: * c-pretty-print.cc (pp_c_specifier_qualifier_list): Print only GNU attributes here. (c_pretty_printer::direct_abstract_declarator): Print the standard [[]] attributes here. (pp_c_attributes): Remove. (pp_c_attributes_display): Print the [[]] form if appropriate. Use get_attribute_name. Don't print a trailing space when printing the [[]] form. * c-pretty-print.h (pp_c_attributes): Remove. gcc/cp/ChangeLog: * error.cc: Include "attribs.h". (dump_type_prefix): Print only GNU attributes here. (dump_type_suffix): Print standard attributes here. gcc/testsuite/ChangeLog: * c-c++-common/pointer-to-fn1.c: New test. --- gcc/c-family/c-pretty-print.cc | 96 ++++++++++++++++------------- gcc/c-family/c-pretty-print.h | 1 - gcc/cp/error.cc | 16 ++++- gcc/testsuite/c-c++-common/pointer-to-fn1.c | 18 ++++++ 4 files changed, 86 insertions(+), 45 deletions(-) create mode 100644 gcc/testsuite/c-c++-common/pointer-to-fn1.c (limited to 'gcc') diff --git a/gcc/c-family/c-pretty-print.cc b/gcc/c-family/c-pretty-print.cc index efa1768..c99b2ce 100644 --- a/gcc/c-family/c-pretty-print.cc +++ b/gcc/c-family/c-pretty-print.cc @@ -466,7 +466,12 @@ pp_c_specifier_qualifier_list (c_pretty_printer *pp, tree t) { pp_c_whitespace (pp); pp_c_left_paren (pp); - pp_c_attributes_display (pp, TYPE_ATTRIBUTES (pointee)); + /* If we're dealing with the GNU form of attributes, print this: + void (__attribute__((noreturn)) *f) (); + If it is the standard [[]] attribute, we'll print the attribute + in c_pretty_printer::direct_abstract_declarator/FUNCTION_TYPE. */ + if (!cxx11_attribute_p (TYPE_ATTRIBUTES (pointee))) + pp_c_attributes_display (pp, TYPE_ATTRIBUTES (pointee)); } else if (!c_dialect_cxx ()) pp_c_whitespace (pp); @@ -595,6 +600,13 @@ c_pretty_printer::direct_abstract_declarator (tree t) case FUNCTION_TYPE: pp_c_parameter_type_list (this, t); direct_abstract_declarator (TREE_TYPE (t)); + /* If this is the standard [[]] attribute, print + void (*)() [[noreturn]]; */ + if (cxx11_attribute_p (TYPE_ATTRIBUTES (t))) + { + pp_space (this); + pp_c_attributes_display (this, TYPE_ATTRIBUTES (t)); + } break; case ARRAY_TYPE: @@ -850,32 +862,7 @@ c_pretty_printer::declaration (tree t) pp_c_init_declarator (this, t); } -/* Pretty-print ATTRIBUTES using GNU C extension syntax. */ - -void -pp_c_attributes (c_pretty_printer *pp, tree attributes) -{ - if (attributes == NULL_TREE) - return; - - pp_c_ws_string (pp, "__attribute__"); - pp_c_left_paren (pp); - pp_c_left_paren (pp); - for (; attributes != NULL_TREE; attributes = TREE_CHAIN (attributes)) - { - pp_tree_identifier (pp, TREE_PURPOSE (attributes)); - if (TREE_VALUE (attributes)) - pp_c_call_argument_list (pp, TREE_VALUE (attributes)); - - if (TREE_CHAIN (attributes)) - pp_separate_with (pp, ','); - } - pp_c_right_paren (pp); - pp_c_right_paren (pp); -} - -/* Pretty-print ATTRIBUTES using GNU C extension syntax for attributes - marked to be displayed on disgnostic. */ +/* Pretty-print ATTRIBUTES marked to be displayed on diagnostic. */ void pp_c_attributes_display (c_pretty_printer *pp, tree a) @@ -885,10 +872,12 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a) if (a == NULL_TREE) return; + const bool std_p = cxx11_attribute_p (a); + for (; a != NULL_TREE; a = TREE_CHAIN (a)) { - const struct attribute_spec *as; - as = lookup_attribute_spec (TREE_PURPOSE (a)); + const struct attribute_spec *as + = lookup_attribute_spec (get_attribute_name (a)); if (!as || as->affects_type_identity == false) continue; if (c_dialect_cxx () @@ -896,26 +885,47 @@ pp_c_attributes_display (c_pretty_printer *pp, tree a) /* In C++ transaction_safe is printed at the end of the declarator. */ continue; if (is_first) - { - pp_c_ws_string (pp, "__attribute__"); - pp_c_left_paren (pp); - pp_c_left_paren (pp); - is_first = false; - } + { + if (std_p) + { + pp_c_left_bracket (pp); + pp_c_left_bracket (pp); + } + else + { + pp_c_ws_string (pp, "__attribute__"); + pp_c_left_paren (pp); + pp_c_left_paren (pp); + } + is_first = false; + } else - { - pp_separate_with (pp, ','); - } - pp_tree_identifier (pp, TREE_PURPOSE (a)); + pp_separate_with (pp, ','); + tree ns; + if (std_p && (ns = get_attribute_namespace (a))) + { + pp_tree_identifier (pp, ns); + pp_colon (pp); + pp_colon (pp); + } + pp_tree_identifier (pp, get_attribute_name (a)); if (TREE_VALUE (a)) - pp_c_call_argument_list (pp, TREE_VALUE (a)); + pp_c_call_argument_list (pp, TREE_VALUE (a)); } if (!is_first) { - pp_c_right_paren (pp); - pp_c_right_paren (pp); - pp_c_whitespace (pp); + if (std_p) + { + pp_c_right_bracket (pp); + pp_c_right_bracket (pp); + } + else + { + pp_c_right_paren (pp); + pp_c_right_paren (pp); + pp_c_whitespace (pp); + } } } diff --git a/gcc/c-family/c-pretty-print.h b/gcc/c-family/c-pretty-print.h index be86bed..92674ab 100644 --- a/gcc/c-family/c-pretty-print.h +++ b/gcc/c-family/c-pretty-print.h @@ -119,7 +119,6 @@ void pp_c_space_for_pointer_operator (c_pretty_printer *, tree); /* Declarations. */ void pp_c_tree_decl_identifier (c_pretty_printer *, tree); void pp_c_function_definition (c_pretty_printer *, tree); -void pp_c_attributes (c_pretty_printer *, tree); void pp_c_attributes_display (c_pretty_printer *, tree); void pp_c_cv_qualifiers (c_pretty_printer *pp, int qualifiers, bool func_type); void pp_c_type_qualifier_list (c_pretty_printer *, tree); diff --git a/gcc/cp/error.cc b/gcc/cp/error.cc index 4514c8b..da8c95c 100644 --- a/gcc/cp/error.cc +++ b/gcc/cp/error.cc @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see #include "internal-fn.h" #include "gcc-rich-location.h" #include "cp-name-hint.h" +#include "attribs.h" #define pp_separate_with_comma(PP) pp_cxx_separate_with (PP, ',') #define pp_separate_with_semicolon(PP) pp_cxx_separate_with (PP, ';') @@ -897,7 +898,12 @@ dump_type_prefix (cxx_pretty_printer *pp, tree t, int flags) { pp_cxx_whitespace (pp); pp_cxx_left_paren (pp); - pp_c_attributes_display (pp, TYPE_ATTRIBUTES (sub)); + /* If we're dealing with the GNU form of attributes, print this: + void (__attribute__((noreturn)) *f) (); + If it is the standard [[]] attribute, we'll print the attribute + in dump_type_suffix. */ + if (!cxx11_attribute_p (TYPE_ATTRIBUTES (sub))) + pp_c_attributes_display (pp, TYPE_ATTRIBUTES (sub)); } if (TYPE_PTR_P (t)) pp_star (pp); @@ -1030,6 +1036,14 @@ dump_type_suffix (cxx_pretty_printer *pp, tree t, int flags) if (tx_safe_fn_type_p (t)) pp_cxx_ws_string (pp, "transaction_safe"); dump_exception_spec (pp, TYPE_RAISES_EXCEPTIONS (t), flags); + /* If this is the standard [[]] attribute, print + void (*)() [[noreturn]]; */ + if (cxx11_attribute_p (TYPE_ATTRIBUTES (t))) + { + pp_space (pp); + pp_c_attributes_display (pp, TYPE_ATTRIBUTES (t)); + pp->padding = pp_before; + } dump_type_suffix (pp, TREE_TYPE (t), flags); break; } diff --git a/gcc/testsuite/c-c++-common/pointer-to-fn1.c b/gcc/testsuite/c-c++-common/pointer-to-fn1.c new file mode 100644 index 0000000..9758854 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pointer-to-fn1.c @@ -0,0 +1,18 @@ +/* PR c++/106937 */ +/* { dg-options "-fcf-protection" } */ +/* { dg-additional-options "-std=c++11 -fpermissive" { target c++ } } */ +/* Test printing a pointer to function with attribute. */ + +__attribute__((nocf_check)) typedef void (*FPA1)(); +[[gnu::nocf_check]] typedef void (*FPA2)(int); +typedef void (*FP1)(); +typedef void (*FP2)(int); + +void +g (FP1 f1, FP2 f2) +{ + FPA1 p1 = f1; // { dg-warning {aka 'void \(__attribute__\(\(nocf_check\)\) \*\)\(\)'} } + FPA2 p2 = f2; // { dg-warning {aka 'void \(\*\)\(int\) \[\[gnu::nocf_check\]\]'} } + FP1 p3 = p1; // { dg-warning {aka 'void \(__attribute__\(\(nocf_check\)\) \*\)\(\)'} } + FP2 p4 = p2; // { dg-warning {aka 'void \(\*\)\(int\) \[\[gnu::nocf_check\]\]'} } +} -- cgit v1.1 From 0f6efd34c21ab6452aa846fd7e59acbccf15fbef Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Sat, 8 Oct 2022 11:47:56 -0700 Subject: compiler: only build thunk struct type when it is needed Instead of building the thunk struct type in the determine_types pass, build it when we need it. That ensures that we are consistent in determining whether an argument is constant. We no longer need to add a field for a call to recover, as the simplify_thunk_statements pass runs after the build_recover_thunks pass, so the additional argument will already have been added to the call. The test case is https://go.dev/cl/440297. Fixes golang/go#56109 Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/440298 --- gcc/go/gofrontend/MERGE | 2 +- gcc/go/gofrontend/statements.cc | 34 +++++++++------------------------- gcc/go/gofrontend/statements.h | 5 +---- 3 files changed, 11 insertions(+), 30 deletions(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 10ed3fe..1c24660 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -50707b4b51266166ce9bcf9de187e35760ec50f9 +164f2aeb1deec4c11e55b8bfb152ff7ff4c1dd4c The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/statements.cc b/gcc/go/gofrontend/statements.cc index b442830..af8c7d1 100644 --- a/gcc/go/gofrontend/statements.cc +++ b/gcc/go/gofrontend/statements.cc @@ -2349,7 +2349,7 @@ Thunk_statement::Thunk_statement(Statement_classification classification, Call_expression* call, Location location) : Statement(classification, location), - call_(call), struct_type_(NULL) + call_(call) { } @@ -2430,15 +2430,6 @@ void Thunk_statement::do_determine_types() { this->call_->determine_type_no_context(); - - // Now that we know the types of the call, build the struct used to - // pass parameters. - Call_expression* ce = this->call_->call_expression(); - if (ce == NULL) - return; - Function_type* fntype = ce->get_function_type(); - if (fntype != NULL && !this->is_simple(fntype)) - this->struct_type_ = this->build_struct(fntype); } // Check types in a thunk statement. @@ -2581,6 +2572,8 @@ Thunk_statement::simplify_statement(Gogo* gogo, Named_object* function, if (this->is_simple(fntype)) return false; + Struct_type* struct_type = this->build_struct(fntype); + Expression* fn = ce->fn(); Interface_field_reference_expression* interface_method = fn->interface_field_reference_expression(); @@ -2600,7 +2593,7 @@ Thunk_statement::simplify_statement(Gogo* gogo, Named_object* function, std::string thunk_name = gogo->thunk_name(); // Build the thunk. - this->build_thunk(gogo, thunk_name); + this->build_thunk(gogo, thunk_name, struct_type); // Generate code to call the thunk. @@ -2630,8 +2623,7 @@ Thunk_statement::simplify_statement(Gogo* gogo, Named_object* function, // Build the struct. Expression* constructor = - Expression::make_struct_composite_literal(this->struct_type_, vals, - location); + Expression::make_struct_composite_literal(struct_type, vals, location); // Allocate the initialized struct on the heap. constructor = Expression::make_heap_expression(constructor, location); @@ -2745,15 +2737,6 @@ Thunk_statement::build_struct(Function_type* fntype) fields->push_back(Struct_field(tid)); } - // The predeclared recover function has no argument. However, we - // add an argument when building recover thunks. Handle that here. - if (ce->is_recover_call()) - { - fields->push_back(Struct_field(Typed_identifier("can_recover", - Type::lookup_bool_type(), - location))); - } - const Expression_list* args = ce->args(); if (args != NULL) { @@ -2781,7 +2764,8 @@ Thunk_statement::build_struct(Function_type* fntype) // artificial, function. void -Thunk_statement::build_thunk(Gogo* gogo, const std::string& thunk_name) +Thunk_statement::build_thunk(Gogo* gogo, const std::string& thunk_name, + Struct_type* struct_type) { Location location = this->location(); @@ -2807,7 +2791,7 @@ Thunk_statement::build_thunk(Gogo* gogo, const std::string& thunk_name) // which is a pointer to the special structure we build. const char* const parameter_name = "__go_thunk_parameter"; Typed_identifier_list* thunk_parameters = new Typed_identifier_list(); - Type* pointer_to_struct_type = Type::make_pointer_type(this->struct_type_); + Type* pointer_to_struct_type = Type::make_pointer_type(struct_type); thunk_parameters->push_back(Typed_identifier(parameter_name, pointer_to_struct_type, location)); @@ -2914,7 +2898,7 @@ Thunk_statement::build_thunk(Gogo* gogo, const std::string& thunk_name) } Expression_list* call_params = new Expression_list(); - const Struct_field_list* fields = this->struct_type_->fields(); + const Struct_field_list* fields = struct_type->fields(); Struct_field_list::const_iterator p = fields->begin(); for (unsigned int i = 0; i < next_index; ++i) ++p; diff --git a/gcc/go/gofrontend/statements.h b/gcc/go/gofrontend/statements.h index c08b493..3d1ee33 100644 --- a/gcc/go/gofrontend/statements.h +++ b/gcc/go/gofrontend/statements.h @@ -1411,7 +1411,7 @@ class Thunk_statement : public Statement // Build the thunk. void - build_thunk(Gogo*, const std::string&); + build_thunk(Gogo*, const std::string&, Struct_type*); // Set the name to use for thunk field N. void @@ -1420,9 +1420,6 @@ class Thunk_statement : public Statement // The function call to be executed in a separate thread (go) or // later (defer). Expression* call_; - // The type used for a struct to pass to a thunk, if this is not a - // simple call. - Struct_type* struct_type_; }; // A go statement. -- cgit v1.1 From 29b0fe393859729215b0db5d28f2faea30c6ec32 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Sun, 9 Oct 2022 12:52:17 -0700 Subject: compiler: treat S("") as a string constant The compiler neglected to notice that a conversion from a string constant to a string type was a valid string constant. No test case because this only caused a compiler failure when compiling without optimization, which is not the normal case, and is not a case that we test. Fixes golang/go#56113 Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/441555 --- gcc/go/gofrontend/MERGE | 2 +- gcc/go/gofrontend/expressions.cc | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 1c24660..5b95b38 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -164f2aeb1deec4c11e55b8bfb152ff7ff4c1dd4c +6c188108858e3ae8c8ea8e4cc55427d8cf01bbc8 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc index 247ae1b..71838b1 100644 --- a/gcc/go/gofrontend/expressions.cc +++ b/gcc/go/gofrontend/expressions.cc @@ -4092,6 +4092,9 @@ Type_conversion_expression::do_numeric_constant_value( bool Type_conversion_expression::do_string_constant_value(std::string* val) const { + if (this->type_->is_string_type() && this->expr_->type()->is_string_type()) + return this->expr_->string_constant_value(val); + if (this->type_->is_string_type() && this->expr_->type()->integer_type() != NULL) { -- cgit v1.1 From c7cb239f51788dbe3148368942b208934707b6a7 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Mon, 10 Oct 2022 17:55:04 -0400 Subject: c++: Lambda context mangling VAR and FIELD decls can become part of a lambda context, when the lambda is 'attached' to that entity (It's a C++20 ODR thing that was discovered with modules, but is actually separate.) We were not marking those decls as substitution candidates, leading to demangling failures and variance from other compilers. This patch bumps the ABI, and adds the contexts them to the substitution table. This is the intent of the ABI. gcc/ * common.opt (-fabi-version=): Document 18. * doc/invoke.texi (-fabi-version): Document 18. gcc/c-family/ * c-opts.cc (c_common_post_options): Bump abi to 18. gcc/cp/ * mangle.cc (write_prefix): Add VAR_DECL & FIELD_DECL to substitution table under abi=18. Note possible mismatch. gcc/testsuite/ * g++.dg/abi/lambda-ctx1-17.C: New. * g++.dg/abi/lambda-ctx1-18.C: New. * g++.dg/abi/lambda-ctx1-18vs17.C: New. * g++.dg/abi/lambda-ctx1.h: New. * g++.dg/abi/lambda-vis.C: Adjust expected mangles. * g++.dg/abi/macro0.C: Adjust. --- gcc/c-family/c-opts.cc | 2 +- gcc/common.opt | 3 +++ gcc/cp/mangle.cc | 9 ++++++++- gcc/doc/invoke.texi | 3 +++ gcc/testsuite/g++.dg/abi/lambda-ctx1-17.C | 10 ++++++++++ gcc/testsuite/g++.dg/abi/lambda-ctx1-18.C | 11 +++++++++++ gcc/testsuite/g++.dg/abi/lambda-ctx1-18vs17.C | 9 +++++++++ gcc/testsuite/g++.dg/abi/lambda-ctx1.h | 20 ++++++++++++++++++++ gcc/testsuite/g++.dg/abi/lambda-vis.C | 8 +++++--- gcc/testsuite/g++.dg/abi/macro0.C | 2 +- 10 files changed, 71 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/g++.dg/abi/lambda-ctx1-17.C create mode 100644 gcc/testsuite/g++.dg/abi/lambda-ctx1-18.C create mode 100644 gcc/testsuite/g++.dg/abi/lambda-ctx1-18vs17.C create mode 100644 gcc/testsuite/g++.dg/abi/lambda-ctx1.h (limited to 'gcc') diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc index babaa2f..55cebf6 100644 --- a/gcc/c-family/c-opts.cc +++ b/gcc/c-family/c-opts.cc @@ -975,7 +975,7 @@ c_common_post_options (const char **pfilename) /* Change flag_abi_version to be the actual current ABI level, for the benefit of c_cpp_builtins, and to make comparison simpler. */ - const int latest_abi_version = 17; + const int latest_abi_version = 18; /* Generate compatibility aliases for ABI v13 (8.2) by default. */ const int abi_compat_default = 13; diff --git a/gcc/common.opt b/gcc/common.opt index 3a97e67..bce3e51 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1004,6 +1004,9 @@ Driver Undocumented ; member initializers in C++14 and up. ; Default in G++ 12. ; +; 18: Corrects errors in mangling of lambdas with additional context. +; Default in G++ 13. +; ; Additional positive integers will be assigned as new versions of ; the ABI become the default version of the ABI. fabi-version= diff --git a/gcc/cp/mangle.cc b/gcc/cp/mangle.cc index f051e76..1215463 100644 --- a/gcc/cp/mangle.cc +++ b/gcc/cp/mangle.cc @@ -1252,7 +1252,14 @@ write_prefix (const tree node) { /* := M */ write_char ('M'); - return; + + /* Before ABI 18, we did not count these as substitution + candidates. This leads to incorrect demanglings (and + ABI divergence to other compilers). */ + if (abi_warn_or_compat_version_crosses (18)) + G.need_abi_warning = true; + if (!abi_version_at_least (18)) + return; } } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 278c55d..271c8bb8 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -2973,6 +2973,9 @@ Version 17, which first appeared in G++ 12, fixes layout of classes that inherit from aggregate classes with default member initializers in C++14 and up. +Version 18, which first appeard in G++ 13, fixes manglings of lambdas +that have additional context. + See also @option{-Wabi}. @item -fabi-compat-version=@var{n} diff --git a/gcc/testsuite/g++.dg/abi/lambda-ctx1-17.C b/gcc/testsuite/g++.dg/abi/lambda-ctx1-17.C new file mode 100644 index 0000000..42f277a --- /dev/null +++ b/gcc/testsuite/g++.dg/abi/lambda-ctx1-17.C @@ -0,0 +1,10 @@ +// { dg-do compile { target c++20 } } +// { dg-options -fabi-version=17 } + +#include "lambda-ctx1.h" + +// These demangle incorrectly, due to a missed substitution candidate +// { dg-final { scan-assembler {_ZNK1C1fMUlT_E_clIMS_iEEDaS0_:} } } +// { dg-final { scan-assembler {_ZNK2L2MUlT_T0_E_clIifEEvS_S0_:} } } +// { dg-final { scan-assembler {_ZNK1B2L3MUlT_T0_E_clIjdEEvS0_S1_:} } } +// { dg-final { scan-assembler {_Z3fooIN1qMUlvE_EN1qMUlvE0_EEiOT_OT0_:} } } diff --git a/gcc/testsuite/g++.dg/abi/lambda-ctx1-18.C b/gcc/testsuite/g++.dg/abi/lambda-ctx1-18.C new file mode 100644 index 0000000..c1c9e27 --- /dev/null +++ b/gcc/testsuite/g++.dg/abi/lambda-ctx1-18.C @@ -0,0 +1,11 @@ +// { dg-do compile { target c++20 } } +// { dg-options -fabi-version=18 } + +#include "lambda-ctx1.h" + +// These correctly include the lambda's extra context as a +// substitution candidate, and thus demangle as expected +// { dg-final { scan-assembler {_ZNK1C1fMUlT_E_clIMS_iEEDaS1_:} } } +// { dg-final { scan-assembler {_ZNK2L2MUlT_T0_E_clIifEEvS0_S1_:} } } +// { dg-final { scan-assembler {_ZNK1B2L3MUlT_T0_E_clIjdEEvS1_S2_:} } } +// { dg-final { scan-assembler {_Z3fooIN1qMUlvE_ENS0_UlvE0_EEiOT_OT0_:} } } diff --git a/gcc/testsuite/g++.dg/abi/lambda-ctx1-18vs17.C b/gcc/testsuite/g++.dg/abi/lambda-ctx1-18vs17.C new file mode 100644 index 0000000..f5ec905 --- /dev/null +++ b/gcc/testsuite/g++.dg/abi/lambda-ctx1-18vs17.C @@ -0,0 +1,9 @@ +// { dg-do compile { target c++20 } } +// { dg-options {-fabi-version=18 -Wabi=17} } + +#include "lambda-ctx1.h" + +// { dg-regexp {[^\n]*lambda-ctx1.h:[:0-9]* warning: the mangled name [^\n]* \('_ZNK1B2L3MUlT_T0_E_clIjdEEvS0_S1_'\) and '-fabi-version=18' \('_ZNK1B2L3MUlT_T0_E_clIjdEEvS1_S2_'\) [^\n]*\n} } +// { dg-regexp {[^\n]*lambda-ctx1.h:[:0-9]* warning: the mangled name [^\n]* \('_ZNK2L2MUlT_T0_E_clIifEEvS_S0_'\) and '-fabi-version=18' \('_ZNK2L2MUlT_T0_E_clIifEEvS0_S1_'\) [^\n]*\n} } +// { dg-regexp {[^\n]*lambda-ctx1.h:[:0-9]* warning: the mangled name [^\n]* \('_ZNK1C1fMUlT_E_clIMS_iEEDaS0_'\) and '-fabi-version=18' \('_ZNK1C1fMUlT_E_clIMS_iEEDaS1_'\) [^\n]*\n} } +// { dg-regexp {[^\n]*lambda-ctx1.h:[:0-9]* warning: the mangled name [^\n]* \('_Z3fooIN1qMUlvE_EN1qMUlvE0_EEiOT_OT0_'\) and '-fabi-version=18' \('_Z3fooIN1qMUlvE_ENS0_UlvE0_EEiOT_OT0_'\) [^\n]*\n} } diff --git a/gcc/testsuite/g++.dg/abi/lambda-ctx1.h b/gcc/testsuite/g++.dg/abi/lambda-ctx1.h new file mode 100644 index 0000000..9afb66a --- /dev/null +++ b/gcc/testsuite/g++.dg/abi/lambda-ctx1.h @@ -0,0 +1,20 @@ +inline auto L2 = [] (T, U) -> void {}; +namespace B +{ + inline auto L3 = [] (T, U) -> void {}; +} + +struct C +{ + int f = [] (auto){ return 1;}(&C::f); + C (); +}; + +C::C () +{ + L2 (1, 1.2f); + B::L3 (1u, 1.2); +} + +template int foo (A&&, B&&) {return 0;} +inline int q = foo ([](){}, [](){}); diff --git a/gcc/testsuite/g++.dg/abi/lambda-vis.C b/gcc/testsuite/g++.dg/abi/lambda-vis.C index c1033f5..81cffcb 100644 --- a/gcc/testsuite/g++.dg/abi/lambda-vis.C +++ b/gcc/testsuite/g++.dg/abi/lambda-vis.C @@ -13,9 +13,11 @@ int gvar = gfoo (capture ([]{})); inline int ivar = ifoo (capture ([]{})); -// { dg-final { scan-assembler {_?_Z7captureINL4svarMUlvE_EE7WrapperIT_EOS2_:} } } -// { dg-final { scan-assembler {_?_Z7captureIN4gvarMUlvE_EE7WrapperIT_EOS2_:} } } -// { dg-final { scan-assembler {_?_Z7captureIN4ivarMUlvE_EE7WrapperIT_EOS2_:} } } +// These manglings change between ABIs 17 and 18 (the final +// substitution number). +// { dg-final { scan-assembler {_?_Z7captureINL4svarMUlvE_EE7WrapperIT_EOS3_:} } } +// { dg-final { scan-assembler {_?_Z7captureIN4gvarMUlvE_EE7WrapperIT_EOS3_:} } } +// { dg-final { scan-assembler {_?_Z7captureIN4ivarMUlvE_EE7WrapperIT_EOS3_:} } } // Calls to the foos are emitted. // { dg-final { scan-assembler {call[ \t]*_?_Z4sfooI7WrapperINL4svarMUlvE_EEEiT_} { target { i?86-*-* x86_64-*-* } } } } diff --git a/gcc/testsuite/g++.dg/abi/macro0.C b/gcc/testsuite/g++.dg/abi/macro0.C index 2d07fcd..4a0e9d0 100644 --- a/gcc/testsuite/g++.dg/abi/macro0.C +++ b/gcc/testsuite/g++.dg/abi/macro0.C @@ -1,6 +1,6 @@ // This testcase will need to be kept in sync with c_common_post_options. // { dg-options "-fabi-version=0" } -#if __GXX_ABI_VERSION != 1017 +#if __GXX_ABI_VERSION != 1018 #error "Incorrect value of __GXX_ABI_VERSION" #endif -- cgit v1.1 From 27bfe54e975d12aac750f0702f716a4c1c0a81fe Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 11 Oct 2022 00:17:00 +0000 Subject: Daily bump. --- gcc/ChangeLog | 146 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/c-family/ChangeLog | 17 ++++++ gcc/cp/ChangeLog | 12 ++++ gcc/fortran/ChangeLog | 6 ++ gcc/testsuite/ChangeLog | 58 +++++++++++++++++++ 6 files changed, 240 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8f729dc..bc10014 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,149 @@ +2022-10-10 Nathan Sidwell + + * common.opt (-fabi-version=): Document 18. + * doc/invoke.texi (-fabi-version): Document 18. + +2022-10-10 Andrea Corallo + + PR other/99723 + * toplev.cc (toplev::main): Don't run self tests in case of + previous error. + +2022-10-10 Kito Cheng + + * config/riscv/riscv-c.cc: Add newline to the end of file. + +2022-10-10 Aldy Hernandez + + * range-op-float.cc (class foperator_identity): Make members public. + (class foperator_equal): Same. + (class foperator_not_equal): Same. + (class foperator_lt): Same. + (class foperator_le): Same. + (class foperator_gt): Same. + (class foperator_ge): Same. + (class foperator_unordered): Same. + (class foperator_ordered): Same. + +2022-10-10 Aldy Hernandez + + * value-range.h (frange::maybe_isnan): New. + +2022-10-10 Aldy Hernandez + + * range-op-float.cc (foperator_not_equal::op1_range): Set NAN on + TRUE side for x != x. + +2022-10-10 Aldy Hernandez + + * range-op-float.cc (foperator_unordered::op1_range): Set NAN when + operands are equal and result is TRUE. + +2022-10-10 Aldy Hernandez + + * range.h (range_true): Return int_range<2>. + (range_false): Same. + (range_true_and_false): Same. + +2022-10-10 Aldy Hernandez + + * gimple-range-op.cc: Add op1_range entry for __builtin_signbit. + +2022-10-10 liuhongt + + PR target/107185 + * config/i386/i386.md (lrint2): Swap + predicate of operands[0] and operands[1]. + +2022-10-10 Claudiu Zissulescu + + * common/config/arc/arc-common.cc (arc_option_optimization_table): + Remove Rcq and Rcw options. + * config/arc/arc.opt (mRcq): Ignore option, preserve it for + backwards compatibility. + (mRcw): Likewise. + * doc/invoke.texi (mRcw, mRcq): Update document. + +2022-10-10 Claudiu Zissulescu + + * config/arc/arc.cc (arc_check_short_reg_p): New function. + (arc_address_cost): Replace satisfies_constraint_Rcq with the + above new function. + (arc_output_addsi): Likewise. + (split_addsi): Likewise. + (split_subsi): Likewise. + * config/arc/arc.md (movqi_insn): Remove Rcq constraint. + (movhi_insn): Likewise. + (movsi_insn): Likewise. + (tst_movb): Likewise. + (tst): Likewise. + (tst_bitfield): Likewise. + (abssi2): Likewise. + (addsi3_mixed): Likewise. + (mulhisi3_reg): Likewise. + (umulhisi3_reg): Likewise. + (mulsi_600): Likewise. + (mul64): Likewise. + (subsi3_insn): Likewise. + (bicsi3_insn): Likewise. + (xorsi3): Likewise. + (negsi2): Likewise. + (one_cmplsi2): Likewise. + (lshrsi3_insn): Likewise. + (cmpsi_cc_insn_mixed): Likewise. + (cmpsi_cc_zn_insn): Likewise. + (btst): Likewise. + (cmpsi_cc_z_insn): Likewise. + (cmpsi_cc_c_insn): Likewise. + (indirect_jump): Likewise. + (casesi_jump): Likewise. + (call_i): Likewise. + (call_value_i): Likewise. + (bbit): Likewise. + (abssf2): Likewise. + (ashlsi2_cnt1): Likewise. + (lshrsi3_cnt1): Likewise. + (ashrsi3_cnt1): Likewise. + * config/arc/constraints.md (Rcq): Remove. + +2022-10-10 Claudiu Zissulescu + + * config/arc/arc.md (smaxsi3): Remove Rcw. + (sminsi3): Likewise. + (addsi3_mixed): Likewise. + (add_f_2): Likewise. + (subsi3_insn): Likewise. + (sub_f): Likewise. + (sub_n): Likewise. + (bset): Likewise. + (bxor): Likewise. + (bclr): Likewise. + (bset_insn): Likewise. + (bxor_insn): Likewise. + (bclr_insn): Likewise. + (bmsk_insn): Likewise. + (bicsi3_insn): Likewise. + (xorsi3): Likewise. + (negsi2): Likewise. + (lshrsi3_insn): Likewise. + (abssf2): Likewise. + (negsf2): Likewise. + * config/arc/constraints.md(Rcw): Remove it. + +2022-10-10 Claudiu Zissulescu + + * config/arc/arc.md(mulsi3_700): Remove Rcr. + (mulsi3_highpart): Likewise. + (umulsi3_highpart_i): Likewise. + (umulsi3_highpart_int): Likewise. + (macd): Likewise. + (macdu): Likewise. + * config/arc/constraints.md (Rcr): Remove it. + +2022-10-10 Claudiu Zissulescu + + * config/arc/arc.cc (arc_save_callee_enter): Use negative offsets. + 2022-10-09 Dimitar Dimitrov PR target/106562 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 836fdac..ee52440 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20221010 +20221011 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 7bae1d5..f6176a7 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,20 @@ +2022-10-10 Nathan Sidwell + + * c-opts.cc (c_common_post_options): Bump abi to 18. + +2022-10-10 Marek Polacek + + PR c++/106937 + * c-pretty-print.cc (pp_c_specifier_qualifier_list): Print only GNU + attributes here. + (c_pretty_printer::direct_abstract_declarator): Print the standard [[]] + attributes here. + (pp_c_attributes): Remove. + (pp_c_attributes_display): Print the [[]] form if appropriate. Use + get_attribute_name. Don't print a trailing space when printing the + [[]] form. + * c-pretty-print.h (pp_c_attributes): Remove. + 2022-10-07 Qing Zhao * c-attribs.cc (handle_strict_flex_array_attribute): New function. diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index e892628..50e5f3e 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,15 @@ +2022-10-10 Nathan Sidwell + + * mangle.cc (write_prefix): Add VAR_DECL & FIELD_DECL to + substitution table under abi=18. Note possible mismatch. + +2022-10-10 Marek Polacek + + PR c++/106937 + * error.cc: Include "attribs.h". + (dump_type_prefix): Print only GNU attributes here. + (dump_type_suffix): Print standard attributes here. + 2022-10-08 Jason Merrill * cp-tree.h (TARGET_EXPR_ELIDING_P): New. diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 573cbaf..094a11d 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,9 @@ +2022-10-10 Jakub Jelinek + + * trans-openmp.cc (gfc_trans_omp_assume): Use create_tmp_var_raw + instead of gfc_create_var for TARGET_EXPR slot creation. Create it + with boolean_type_node and convert. + 2022-10-08 Harald Anlauf Mikael Morin diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 1b40443..7230773 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,61 @@ +2022-10-10 Nathan Sidwell + + * g++.dg/abi/lambda-ctx1-17.C: New. + * g++.dg/abi/lambda-ctx1-18.C: New. + * g++.dg/abi/lambda-ctx1-18vs17.C: New. + * g++.dg/abi/lambda-ctx1.h: New. + * g++.dg/abi/lambda-vis.C: Adjust expected mangles. + * g++.dg/abi/macro0.C: Adjust. + +2022-10-10 Marek Polacek + + PR c++/106937 + * c-c++-common/pointer-to-fn1.c: New test. + +2022-10-10 Kito Cheng + + * gcc.target/riscv/rvv/base/riscv_vector.h: New. + +2022-10-10 Kito Cheng + + * gcc.target/riscv/rvv/base/user-1.c: Add dg-options and drop + dg-skip-if. + +2022-10-10 Kito Cheng + + * gcc.target/riscv/rvv/base/pragma-1.c: Add newline to the end of file. + * gcc.target/riscv/rvv/base/pragma-2.c: Ditto. + * gcc.target/riscv/rvv/base/pragma-3.c: Ditto. + * gcc.target/riscv/rvv/base/user-1.c: Ditto. + * gcc.target/riscv/rvv/base/user-2.c: Ditto. + * gcc.target/riscv/rvv/base/user-3.c: Ditto. + * gcc.target/riscv/rvv/base/user-4.c: Ditto. + * gcc.target/riscv/rvv/base/user-5.c: Ditto. + * gcc.target/riscv/rvv/base/user-6.c: Ditto. + * gcc.target/riscv/rvv/base/vread_csr.c: Ditto. + * gcc.target/riscv/rvv/base/vwrite_csr.c: Ditto. + +2022-10-10 Jakub Jelinek + + PR tree-optimization/107153 + * gcc.dg/autopar/pr107153.c: Require fgraphite effective target. + +2022-10-10 Aldy Hernandez + + * gcc.dg/tree-ssa/vrp-float-signbit-3.c: New test. + +2022-10-10 liuhongt + + * gcc.target/i386/pr107185.c: New test. + +2022-10-10 Claudiu Zissulescu + + * gcc.target/arc/tmac-2.c: Update test. + +2022-10-10 Claudiu Zissulescu + + * gcc.target/arc/enter-dw2-1.c: New file. + 2022-10-09 Dimitar Dimitrov PR target/106562 -- cgit v1.1 From 1627d05240da3b1a985b1b2006b7a9f562fe9d43 Mon Sep 17 00:00:00 2001 From: Ju-Zhe Zhong Date: Mon, 10 Oct 2022 21:43:22 +0800 Subject: RISC-V: Add missing vsetvl instruction type. When implementing built-in framework, I notice I missed vsetvl instruction type, so add it in a single patch preparing for the following patches. gcc/ChangeLog: * config/riscv/riscv.md: Add vsetvl instruction type. Reviewed-by: Kito Cheng --- gcc/config/riscv/riscv.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 014206f..2d1cda2 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -229,6 +229,7 @@ ;; Classification of RVV instructions which will be added to each RVV .md pattern and used by scheduler. ;; rdvlenb vector byte length vlenb csrr read ;; rdvl vector length vl csrr read +;; vsetvl vector configuration-setting instrucions ;; 7. Vector Loads and Stores ;; vlde vector unit-stride load instructions ;; vste vector unit-stride store instructions @@ -316,7 +317,7 @@ "unknown,branch,jump,call,load,fpload,store,fpstore, mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul, fmadd,fdiv,fcmp,fcvt,fsqrt,multi,auipc,sfb_alu,nop,ghost,bitmanip,rotate, - rdvlenb,rdvl,vlde,vste,vldm,vstm,vlds,vsts, + rdvlenb,rdvl,vsetvl,vlde,vste,vldm,vstm,vlds,vsts, vldux,vldox,vstux,vstox,vldff,vldr,vstr, vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp, vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,vimov, -- cgit v1.1 From d2efb10a19b3948e48a2d9273b294db4e1d65296 Mon Sep 17 00:00:00 2001 From: Ju-Zhe Zhong Date: Mon, 10 Oct 2022 21:57:21 +0800 Subject: RISC-V: move struct vector_type_info from *.h to *.cc and change "user_name" into "name". gcc/ChangeLog: * config/riscv/riscv-vector-builtins.cc (struct vector_type_info): Move from config/riscv/riscv-vector-builtins.h. (DEF_RVV_TYPE): Change USER_NAME to NAME. (register_vector_type): Change user_name to name. * config/riscv/riscv-vector-builtins.def (DEF_RVV_TYPE): Change USER_NAME to NAME. * config/riscv/riscv-vector-builtins.h (struct vector_type_info): Move to riscv-vector-builtins.cc. (DEF_RVV_TYPE): Change USER_NAME to NAME. Reviewed-by: Kito Cheng --- gcc/config/riscv/riscv-vector-builtins.cc | 28 ++++++++++++++++++++++------ gcc/config/riscv/riscv-vector-builtins.def | 2 +- gcc/config/riscv/riscv-vector-builtins.h | 20 ++------------------ 3 files changed, 25 insertions(+), 25 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index 0096e32..7033b1f 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -50,10 +50,26 @@ using namespace riscv_vector; namespace riscv_vector { +/* Static information about each vector type. */ +struct vector_type_info +{ + /* The name of the type as declared by riscv_vector.h + which is recommend to use. For example: 'vint32m1_t'. */ + const char *name; + + /* ABI name of vector type. The type is always available + under this name, even when riscv_vector.h isn't included. + For example: '__rvv_int32m1_t'. */ + const char *abi_name; + + /* The C++ mangling of ABI_NAME. */ + const char *mangled_name; +}; + /* Information about each RVV type. */ static CONSTEXPR const vector_type_info vector_types[] = { -#define DEF_RVV_TYPE(USER_NAME, NCHARS, ABI_NAME, ARGS...) \ - {#USER_NAME, #ABI_NAME, "u" #NCHARS #ABI_NAME}, +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, ARGS...) \ + {#NAME, #ABI_NAME, "u" #NCHARS #ABI_NAME}, #include "riscv-vector-builtins.def" }; @@ -151,14 +167,14 @@ register_builtin_types () = TARGET_64BIT ? unsigned_intSI_type_node : long_unsigned_type_node; machine_mode mode; -#define DEF_RVV_TYPE(USER_NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ VECTOR_MODE_MIN_VLEN_32) \ mode = TARGET_MIN_VLEN > 32 ? VECTOR_MODE##mode \ : VECTOR_MODE_MIN_VLEN_32##mode; \ - scalar_types[VECTOR_TYPE_##USER_NAME] \ + scalar_types[VECTOR_TYPE_##NAME] \ = riscv_v_ext_enabled_vector_mode_p (mode) ? SCALAR_TYPE##_type_node \ : NULL_TREE; \ - vector_modes[VECTOR_TYPE_##USER_NAME] \ + vector_modes[VECTOR_TYPE_##NAME] \ = riscv_v_ext_enabled_vector_mode_p (mode) ? mode : VOIDmode; #include "riscv-vector-builtins.def" @@ -198,7 +214,7 @@ register_vector_type (vector_type_index type) is disabled according to '-march'. */ if (!vectype) return; - tree id = get_identifier (vector_types[type].user_name); + tree id = get_identifier (vector_types[type].name); tree decl = build_decl (input_location, TYPE_DECL, id, vectype); decl = lang_hooks.decls.pushdecl (decl); diff --git a/gcc/config/riscv/riscv-vector-builtins.def b/gcc/config/riscv/riscv-vector-builtins.def index a9001b3..664734b 100644 --- a/gcc/config/riscv/riscv-vector-builtins.def +++ b/gcc/config/riscv/riscv-vector-builtins.def @@ -32,7 +32,7 @@ along with GCC; see the file COPYING3. If not see TARGET_MIN_VLEN > 32. Otherwise the machine mode is VNx1SImode. */ #ifndef DEF_RVV_TYPE -#define DEF_RVV_TYPE(USER_NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ VECTOR_MODE_MIN_VLEN_32) #endif diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index 6ca0b07..ec85e0b 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -26,28 +26,12 @@ namespace riscv_vector { /* This is for segment instructions. */ const unsigned int MAX_TUPLE_SIZE = 8; -/* Static information about each vector type. */ -struct vector_type_info -{ - /* The name of the type as declared by riscv_vector.h - which is recommend to use. For example: 'vint32m1_t'. */ - const char *user_name; - - /* ABI name of vector type. The type is always available - under this name, even when riscv_vector.h isn't included. - For example: '__rvv_int32m1_t'. */ - const char *abi_name; - - /* The C++ mangling of ABI_NAME. */ - const char *mangled_name; -}; - /* Enumerates the RVV types, together called "vector types" for brevity. */ enum vector_type_index { -#define DEF_RVV_TYPE(USER_NAME, ABI_NAME, NCHARS, ARGS...) \ - VECTOR_TYPE_##USER_NAME, +#define DEF_RVV_TYPE(NAME, ABI_NAME, NCHARS, ARGS...) \ + VECTOR_TYPE_##NAME, #include "riscv-vector-builtins.def" NUM_VECTOR_TYPES }; -- cgit v1.1 From db24bdc743cf23ea12d2dcf8254d86ab366bb46d Mon Sep 17 00:00:00 2001 From: Jeff Law Date: Tue, 11 Oct 2022 00:44:26 -0400 Subject: [PR rtl-optimization/107182] Clear EDGE_CROSSING for jump->ret optimization When turning a jump to a return into a return, we need to clear EDGE_CROSSING of the fallthru edge to prevent a checking failure. I considered not applying the transformation when the edge has EDGE_CROSSING set, but it still seems like we ought to eliminate the unnecessary jump in that case. gcc/ PR rtl-optimization/107182 * cfgrtl.cc (fixup_reorder_chain): When optimizing a jump to a return, clear EDGE_CROSSING on the appropriate edge. --- gcc/cfgrtl.cc | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/cfgrtl.cc b/gcc/cfgrtl.cc index 281a432..f31941a 100644 --- a/gcc/cfgrtl.cc +++ b/gcc/cfgrtl.cc @@ -4055,6 +4055,7 @@ fixup_reorder_chain (void) ret_label = PATTERN (ret); dest = EXIT_BLOCK_PTR_FOR_FN (cfun); + e_fall->flags &= ~EDGE_CROSSING; /* E_FALL->dest might become unreachable as a result of replacing the jump with a return. So arrange to remove unreachable blocks. */ -- cgit v1.1 From b88adba751da635c6f0c353c5bc51bbe2ecf4c89 Mon Sep 17 00:00:00 2001 From: Liwei Xu Date: Fri, 23 Sep 2022 13:46:02 +0800 Subject: Optimize nested permutation to single VEC_PERM_EXPR [PR54346] This patch implemented the optimization in PR 54346, which Merges c = VEC_PERM_EXPR ; d = VEC_PERM_EXPR ; to d = VEC_PERM_EXPR ; Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} tree-ssa/forwprop-19.c fail to pass but I'm not sure whether it is ok to removed it. gcc/ChangeLog: PR tree-optimization/54346 * match.pd: Merge the index of VCST then generates the new vec_perm. gcc/testsuite/ChangeLog: * gcc.dg/pr54346.c: New test. Co-authored-by: liuhongt --- gcc/match.pd | 41 +++++++++++++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/pr54346.c | 13 +++++++++++++ 2 files changed, 54 insertions(+) create mode 100755 gcc/testsuite/gcc.dg/pr54346.c (limited to 'gcc') diff --git a/gcc/match.pd b/gcc/match.pd index 345bcb7..3550c16 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -8086,6 +8086,47 @@ and, (minus (mult (vec_perm @1 @1 @3) @2) @4))) +/* Merge + c = VEC_PERM_EXPR ; + d = VEC_PERM_EXPR ; + to + d = VEC_PERM_EXPR ; */ + +(simplify + (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4) + (with + { + if (!TYPE_VECTOR_SUBPARTS (type).is_constant ()) + return NULL_TREE; + + tree op0; + machine_mode result_mode = TYPE_MODE (type); + machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); + int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); + vec_perm_builder builder0; + vec_perm_builder builder1; + vec_perm_builder builder2 (nelts, nelts, 1); + + if (!tree_to_vec_perm_builder (&builder0, @3) + || !tree_to_vec_perm_builder (&builder1, @4)) + return NULL_TREE; + + vec_perm_indices sel0 (builder0, 2, nelts); + vec_perm_indices sel1 (builder1, 1, nelts); + + for (int i = 0; i < nelts; i++) + builder2.quick_push (sel0[sel1[i].to_constant ()]); + + vec_perm_indices sel2 (builder2, 2, nelts); + + if (!can_vec_perm_const_p (result_mode, op_mode, sel2, false)) + return NULL_TREE; + + op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); + } + (vec_perm @1 @2 { op0; }))) + + /* Match count trailing zeroes for simplify_count_trailing_zeroes in fwprop. The canonical form is array[((x & -x) * C) >> SHIFT] where C is a magic constant which when multiplied by a power of 2 contains a unique value diff --git a/gcc/testsuite/gcc.dg/pr54346.c b/gcc/testsuite/gcc.dg/pr54346.c new file mode 100755 index 0000000..63611ab --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr54346.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-dse1" } */ + +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); + +void fun (veci a, veci b, veci *i) +{ + veci c = __builtin_shuffle (a, b, __extension__ (veci) {1, 4, 2, 7}); + *i = __builtin_shuffle (c, __extension__ (veci) { 7, 2, 1, 5 }); +} + +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 3, 6, 0, 0 }" "dse1" } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "dse1" } } */ -- cgit v1.1 From 80f414e6d73f9f1683f93d83ce63a6a482e54bee Mon Sep 17 00:00:00 2001 From: Eugene Rozenfeld Date: Mon, 10 Oct 2022 14:10:31 -0700 Subject: Fix PR107193. The bug was introduced in f30e9fd33e56a5a721346ea6140722e1b193db42. A variable (cur_locus_e) was incorrectly declared inside a loop. I also moved two other declarations (last and locus) down to make the code more clear. Tested on x86_64-pc-linux-gnu. gcc/ChangeLog: PR debug/107193 * tree-cfg.cc (assign_discriminators): Move declaration of cur_locus_e out of the loop. --- gcc/tree-cfg.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index 41f2925..ae78187 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -1204,9 +1204,8 @@ assign_discriminators (void) edge e; edge_iterator ei; gimple_stmt_iterator gsi; - gimple *last = last_stmt (bb); - location_t locus = last ? gimple_location (last) : UNKNOWN_LOCATION; location_t curr_locus = UNKNOWN_LOCATION; + expanded_location curr_locus_e = {}; int curr_discr = 0; /* Traverse the basic block, if two function calls within a basic block @@ -1215,7 +1214,7 @@ assign_discriminators (void) for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { gimple *stmt = gsi_stmt (gsi); - expanded_location curr_locus_e; + if (curr_locus == UNKNOWN_LOCATION) { curr_locus = gimple_location (stmt); @@ -1238,6 +1237,8 @@ assign_discriminators (void) curr_discr = next_discriminator_for_locus (curr_locus); } + gimple *last = last_stmt (bb); + location_t locus = last ? gimple_location (last) : UNKNOWN_LOCATION; if (locus == UNKNOWN_LOCATION) continue; -- cgit v1.1 From 70d81e3a782ca8451c1c6b2c57ed154d20906aa3 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Tue, 11 Oct 2022 08:52:28 +0200 Subject: ranger: add override keyword Fixes the following clang warning: gcc/gimple-range-op.cc:310:16: warning: 'fold_range' overrides a member function but is not marked 'override' [-Winconsistent-missing-override] gcc/ChangeLog: * gimple-range-op.cc: Add override keyword. --- gcc/gimple-range-op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc index abc33e7..bc4389e 100644 --- a/gcc/gimple-range-op.cc +++ b/gcc/gimple-range-op.cc @@ -308,7 +308,7 @@ public: using range_operator_float::fold_range; using range_operator_float::op1_range; virtual bool fold_range (irange &r, tree type, const frange &lh, - const irange &, relation_kind) const + const irange &, relation_kind) const override { bool signbit; if (lh.signbit_p (signbit)) -- cgit v1.1 From 46325c16324b06fa1b3ea7f8cb05f2109ed66cf6 Mon Sep 17 00:00:00 2001 From: Olivier Hainque Date: Fri, 18 Feb 2022 22:44:53 +0000 Subject: Tigthen the addition of -lgcc_eh to vxworks_libgcc_spec This change refines VXWORKS_LIBGCC_SPEC wrt the inclusion of -lgcc_eh. Unless the compiler features support for dual sjlj and table based eh, libgcc_eh.a is available only with multilib variants for which we build a shared lib (mrtp on VxWorks). Rework logic to handle absence of libgcc_s for -mrtp -mcmodel=large, using a conditional expr kind of spec. The gthread support in libgcc_eh might resort to libgcc functions on some targets, e.g. cas synchronisation routines on aarch64. Arrange to append -lgcc also after -lgcc_eh in VXWORKS_LIBGCC_SPEC. 2022-10-09 Olivier Hainque gcc/ * config/vxworks.h (VX_LGCC_EH_SO0, VX_LGCC_EH_SO1): New internal macros. (VXWORKS_LIBGCC_SPEC): Use them and document. --- gcc/config/vxworks.h | 48 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h index 075a451..e7e5ffe 100644 --- a/gcc/config/vxworks.h +++ b/gcc/config/vxworks.h @@ -224,14 +224,54 @@ extern void vxworks_driver_init (unsigned int *, struct cl_decoded_option **); #undef VXWORKS_LINK_SPEC #define VXWORKS_LINK_SPEC VXWORKS_BASE_LINK_SPEC " " VXWORKS_EXTRA_LINK_SPEC +/* Control how to include libgcc in the link closure, handling both "shared" + and "non-static" in addition to "static-libgcc" when shared lib support is + enabled. */ + #undef VXWORKS_LIBGCC_SPEC + +/* libgcc_eh control; libgcc_eh.a is available either together with libgcc_s + (mrtp and mcmodel!=large when configured with --enable-shared) or when the + compiler is specially setup to support dual sjlj/table-based eh. */ + +/* VX_LGCC_EH_SO1: The "-lgcc_eh" part we need in situations where we know a + shared libgcc is available (ENABLE_SHARED_LIBGCC + mrtp multilib). */ + +#define VX_LGCC_EH_SO1 " -lgcc_eh -lgcc" +/* Extra -lgcc to handle functions from libgcc_eh that refer to symbols + exposed by libgcc and not guaranteed to be dragged in before -lgcc_eh + appears. */ + +/* VX_LGCC_EH_SO0: The "-lgcc_eh" part we need in situations where we know a + shared libgcc is not available (!ENABLE_SHARED_LIBGCC or !mrtp multlib). */ + +#if !defined(CONFIG_DUAL_EXCEPTIONS) + +/* No shared lib && !DUAL_EH -> no libgcc_eh available at all. */ +#define VX_LGCC_EH_SO0 + +#else /* CONFIG_DUAL_EXCEPTIONS */ + +/* No shared lib but DUAL_EH -> libgcc_eh around and spec handled by the driver + depending on ENABLE_SHARED_LIBGCC. If defined, the driver expects a regular + sequence. Otherwise, the driver is expected to turn -lgcc into -lgcc_eh on + its own and just add an instance to address possible cross refs. */ + +#if defined(ENABLE_SHARED_LIBGCC) +#define VX_LGCC_EH_SO0 " -lgcc_eh -lgcc" +#else +#define VX_LGCC_EH_SO0 " -lgcc" +#endif + +#endif /* CONFIG_DUAL_EXCEPTIONS */ + #if defined(ENABLE_SHARED_LIBGCC) #define VXWORKS_LIBGCC_SPEC \ -"%{!mrtp:-lgcc -lgcc_eh} \ - %{mrtp:%{!static-libgcc:%{shared|non-static:-lgcc_s;:-lgcc -lgcc_eh}} \ - %{static-libgcc:-lgcc -lgcc_eh}}" + "%{!mrtp|mcmodel=large:-lgcc" VX_LGCC_EH_SO0 ";" \ + " :%{!static-libgcc:%{shared|non-static:-lgcc_s;:-lgcc" VX_LGCC_EH_SO1 "}} \ + %{static-libgcc:-lgcc" VX_LGCC_EH_SO1 "}}" #else -#define VXWORKS_LIBGCC_SPEC "-lgcc" +#define VXWORKS_LIBGCC_SPEC "-lgcc" VX_LGCC_EH_SO0 #endif /* Setup the crtstuff begin/end we might need for dwarf EH registration -- cgit v1.1 From 0ecd0f1cc6f8f3ba818946a42b22c2ab61f46825 Mon Sep 17 00:00:00 2001 From: Olivier Hainque Date: Fri, 3 Dec 2021 17:50:56 +0000 Subject: Generic configury support for shared libs on VxWorks This change adds the configury bits to activate the build of shared libs on VxWorks ports configured with --enable-shared, for libraries variants where this is generally supported (rtp, code model !large - currently not compatible with -fPIC). Set lt_cv_deplibs_check_method in libtool.m4, so the build of libraries know how to establish dependencies. This is useful in configurations such as aarch64 where proper support of LSE relies on accurate dependency information between libstdc++ and libgcc_s to begin with. Regenerate configure scripts to reflect libtool.m4 change. 2022-10-09 Olivier Hainque * libtool.m4 (*vxworks*): When enable_shared, set dynamic_linker and friends for rtp !large. Assume the linker has the required abilities and set lt_cv_deplibs_check_method. gcc/ * config.gcc (*vxworks*): Add t-slibgcc fragment if enable_shared. libgcc/ * config.host (*vxworks*): When enable_shared, add libgcc and crtstuff "shared" fragments for rtp except large code model. (aarch64*-wrs-vxworks7*): Remove t-slibgcc-libgcc from the list of fragments. 2022-10-09 Olivier Hainque gcc/ * configure: Regenerate. libatomic/ * configure: Regenerate. libbacktrace/ * configure: Regenerate. libcc1/ * configure: Regenerate. libffi/ * configure: Regenerate. libgfortran/ * configure: Regenerate. libgomp/ * configure: Regenerate. libitm/ * configure: Regenerate. libobjc/ * configure: Regenerate. liboffloadmic/ * configure: Regenerate. liboffloadmic/ * plugin/configure: Regenerate. libphobos/ * configure: Regenerate. libquadmath/ * configure: Regenerate. libsanitizer/ * configure: Regenerate. libssp/ * configure: Regenerate. libstdc++-v3/ * configure: Regenerate. libvtv/ * configure: Regenerate. lto-plugin/ * configure: Regenerate. zlib/ * configure: Regenerate. --- gcc/config.gcc | 9 +++++++++ gcc/configure | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/config.gcc b/gcc/config.gcc index eec544f..8d5972f 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -5518,6 +5518,15 @@ case "${target}" in ;; esac +# VxWorks support for shared libraries has to be requested explicitly, +# so we can factor this part here: + +case "${target}-${enable_shared}" in + *-*-vxworks*-yes) + tmake_file="${tmake_file} t-slibgcc" + ;; +esac + # Targets for which there is at least one VxWorks port should include # vxworks-dummy.h to allow safe references to various TARGET_VXWORKS kinds # of markers from other files in the port, including the vxworks*.h files to diff --git a/gcc/configure b/gcc/configure index b512580..db36681 100755 --- a/gcc/configure +++ b/gcc/configure @@ -14737,6 +14737,11 @@ sysv4 | sysv4.3*) tpf*) lt_cv_deplibs_check_method=pass_all ;; +vxworks*) + # Assume VxWorks cross toolchains are built on Linux, possibly + # as canadian for Windows hosts. + lt_cv_deplibs_check_method=pass_all + ;; esac fi @@ -19248,6 +19253,25 @@ uts4*) shlibpath_var=LD_LIBRARY_PATH ;; +# Shared libraries for VwWorks, >= 7 only at this stage +# and (fpic) still incompatible with "large" code models +# in a few configurations. Only for RTP mode in any case, +# and upon explicit request at configure time. +vxworks7*) + dynamic_linker=no + case ${with_multisubdir}-${enable_shared} in + *large*) + ;; + *mrtp*-yes) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker="$host_os module_loader" + ;; + esac + ;; *) dynamic_linker=no ;; @@ -21354,8 +21378,9 @@ if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi ;; vxworks*) - # FIXME: insert proper C++ library support - ld_shlibs_CXX=no + # For VxWorks ports, we assume the use of a GNU linker with + # standard elf conventions. + ld_shlibs_CXX=yes ;; *) @@ -22903,6 +22928,25 @@ uts4*) shlibpath_var=LD_LIBRARY_PATH ;; +# Shared libraries for VwWorks, >= 7 only at this stage +# and (fpic) still incompatible with "large" code models +# in a few configurations. Only for RTP mode in any case, +# and upon explicit request at configure time. +vxworks7*) + dynamic_linker=no + case ${with_multisubdir}-${enable_shared} in + *large*) + ;; + *mrtp*-yes) + version_type=linux + need_lib_prefix=no + need_version=no + library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' + soname_spec='${libname}${release}${shared_ext}$major' + dynamic_linker="$host_os module_loader" + ;; + esac + ;; *) dynamic_linker=no ;; -- cgit v1.1 From c4d15dddf6b9eacb36f535807ad2ee364af46e04 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 10 Oct 2022 20:42:10 +0200 Subject: [PR107195] Set range to zero when nonzero mask is 0. When solving 0 = _15 & 1, we calculate _15 as: [irange] int [-INF, -2][0, +INF] NONZERO 0xfffffffe The known value of _15 is [0, 1] NONZERO 0x1 which is intersected with the above, yielding: [0, 1] NONZERO 0x0 This eventually gets copied to a _Bool [0, 1] NONZERO 0x0. This is problematic because here we have a bool which is zero, but returns false for irange::zero_p, since the latter does not look at nonzero bits. This causes logical_combine to assume the range is not-zero, and all hell breaks loose. I think we should just normalize a nonzero mask of 0 to [0, 0] at creation, thus avoiding all this. PR tree-optimization/107195 gcc/ChangeLog: * value-range.cc (irange::set_range_from_nonzero_bits): Set range to [0,0] when nonzero mask is 0. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/pr107195-1.c: New test. * gcc.dg/tree-ssa/pr107195-2.c: New test. --- gcc/testsuite/gcc.dg/tree-ssa/pr107195-1.c | 15 +++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/pr107195-2.c | 16 ++++++++++++++++ gcc/value-range.cc | 5 +++++ 3 files changed, 36 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr107195-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr107195-2.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr107195-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr107195-1.c new file mode 100644 index 0000000..a0c20db --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr107195-1.c @@ -0,0 +1,15 @@ +// { dg-do run } +// { dg-options "-O1 -fno-tree-ccp" } + +int a, b; +int main() { + int c = 0; + if (a) + c = 1; + c = 1 & (a && c) && b; + if (a) { + b = c; + __builtin_abort (); + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr107195-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr107195-2.c new file mode 100644 index 0000000..d447c78 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr107195-2.c @@ -0,0 +1,16 @@ +// { dg-do run } +// { dg-options "-O1" } + +int a, b; +int main() { + int c = 0; + long d; + for (; b < 1; b++) { + (c && d) & 3 || a; + d = c; + c = -1; + if (d) + __builtin_abort(); + } + return 0; +} diff --git a/gcc/value-range.cc b/gcc/value-range.cc index a14f9bc..e07d2aa 100644 --- a/gcc/value-range.cc +++ b/gcc/value-range.cc @@ -2903,6 +2903,11 @@ irange::set_range_from_nonzero_bits () } return true; } + else if (popcount == 0) + { + set_zero (type ()); + return true; + } return false; } -- cgit v1.1 From 498ad738690b3c464f901d63dcd4d0f49a50dd00 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 10 Oct 2022 11:31:48 +0800 Subject: Add define_insn_and_split to support general version of "kxnor". For genereal_reg_operand, it will be splitted into xor + not. For mask_reg_operand, it will be splitted with UNSPEC_MASK_OP just like what we did for other logic operations. The patch will optimize xor+not to kxnor when possible. gcc/ChangeLog: PR target/107093 * config/i386/i386.md (*notxor_1): New post_reload define_insn_and_split. (*notxorqi_1): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr107093.c: New test. --- gcc/config/i386/i386.md | 71 ++++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr107093.c | 38 +++++++++++++++++ 2 files changed, 109 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr107093.c (limited to 'gcc') diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9475137..9390dd5 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -10826,6 +10826,39 @@ (set_attr "type" "alu, alu, msklog") (set_attr "mode" "")]) +(define_insn_and_split "*notxor_1" + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k") + (not:SWI248 + (xor:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k") + (match_operand:SWI248 2 "" "r,,k")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, mode, operands)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (xor:SWI248 (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) + (not:SWI248 (match_dup 0)))] +{ + if (MASK_REGNO_P (REGNO (operands[0]))) + { + emit_insn (gen_kxnor (operands[0], operands[1], operands[2])); + DONE; + } +} + [(set (attr "isa") + (cond [(eq_attr "alternative" "2") + (if_then_else (eq_attr "mode" "SI,DI") + (const_string "avx512bw") + (const_string "avx512f")) + ] + (const_string "*"))) + (set_attr "type" "alu, alu, msklog") + (set_attr "mode" "")]) + (define_insn_and_split "*iordi_1_bts" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") (ior:DI @@ -10959,6 +10992,44 @@ (symbol_ref "!TARGET_PARTIAL_REG_STALL")] (symbol_ref "true")))]) +(define_insn_and_split "*notxorqi_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r,?k") + (not:QI + (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0,k") + (match_operand:QI 2 "general_operand" "qn,m,rn,k")))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (XOR, QImode, operands)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (xor:QI (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))]) + (set (match_dup 0) + (not:QI (match_dup 0)))] +{ + if (mask_reg_operand (operands[0], QImode)) + { + emit_insn (gen_kxnorqi (operands[0], operands[1], operands[2])); + DONE; + } +} + [(set_attr "isa" "*,*,*,avx512f") + (set_attr "type" "alu,alu,alu,msklog") + (set (attr "mode") + (cond [(eq_attr "alternative" "2") + (const_string "SI") + (and (eq_attr "alternative" "3") + (match_test "!TARGET_AVX512DQ")) + (const_string "HI") + ] + (const_string "QI"))) + ;; Potential partial reg stall on alternative 2. + (set (attr "preferred_for_speed") + (cond [(eq_attr "alternative" "2") + (symbol_ref "!TARGET_PARTIAL_REG_STALL")] + (symbol_ref "true")))]) + ;; Alternative 1 is needed to work around LRA limitation, see PR82524. (define_insn_and_split "*_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+,&")) diff --git a/gcc/testsuite/gcc.target/i386/pr107093.c b/gcc/testsuite/gcc.target/i386/pr107093.c new file mode 100644 index 0000000..23e30cb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr107093.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -O2 -mavx512vl" } */ +/* { dg-final { scan-assembler-times {(?n)kxnor[bwqd]} 4 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times {(?n)kxnor[bwdq]} 3 { target ia32 } } } */ + +#include + +__m512i +foo (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask32 k1 = _mm512_cmp_epi16_mask (a, b, 1); + __mmask32 k2 = _mm512_cmp_epi16_mask (c, d, 2); + return _mm512_mask_mov_epi16 (a, ~(k1 ^ k2), c); +} + +__m512i +foo1 (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask16 k1 = _mm512_cmp_epi32_mask (a, b, 1); + __mmask16 k2 = _mm512_cmp_epi32_mask (c, d, 2); + return _mm512_mask_mov_epi32 (a, ~(k1 ^ k2), c); +} + +__m512i +foo2 (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask64 k1 = _mm512_cmp_epi8_mask (a, b, 1); + __mmask64 k2 = _mm512_cmp_epi8_mask (c, d, 2); + return _mm512_mask_mov_epi8 (a, ~(k1 ^ k2), c); +} + +__m512i +foo3 (__m512i a, __m512i b, __m512i c, __m512i d) +{ + __mmask8 k1 = _mm512_cmp_epi64_mask (a, b, 1); + __mmask8 k2 = _mm512_cmp_epi64_mask (c, d, 2); + return _mm512_mask_mov_epi64 (a, ~(k1 ^ k2), c); +} -- cgit v1.1 From 25413fdb2ac24933214123e24ba165026452a6f2 Mon Sep 17 00:00:00 2001 From: Andre Vieira Date: Tue, 11 Oct 2022 10:49:27 +0100 Subject: vect: Teach vectorizer how to handle bitfield accesses gcc/ChangeLog: * tree-if-conv.cc (if_convertible_loop_p_1): Move ordering of loop bb's from here... (tree_if_conversion): ... to here. Also call bitfield lowering when appropriate. (version_loop_for_if_conversion): Adapt to enable loop versioning when we only need to lower bitfields. (ifcvt_split_critical_edges): Relax condition of expected loop form as this is checked earlier. (get_bitfield_rep): New function. (lower_bitfield): Likewise. (bitfields_to_lower_p): Likewise. (need_to_lower_bitfields): New global boolean. (need_to_ifcvt): Likewise. * tree-vect-data-refs.cc (vect_find_stmt_data_reference): Improve diagnostic message. * tree-vect-patterns.cc (vect_recog_temp_ssa_var): Add default value for last parameter. (vect_recog_bitfield_ref_pattern): New. (vect_recog_bit_insert_pattern): New. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-bitfield-read-1.c: New test. * gcc.dg/vect/vect-bitfield-read-2.c: New test. * gcc.dg/vect/vect-bitfield-read-3.c: New test. * gcc.dg/vect/vect-bitfield-read-4.c: New test. * gcc.dg/vect/vect-bitfield-read-5.c: New test. * gcc.dg/vect/vect-bitfield-read-6.c: New test. * gcc.dg/vect/vect-bitfield-write-1.c: New test. * gcc.dg/vect/vect-bitfield-write-2.c: New test. * gcc.dg/vect/vect-bitfield-write-3.c: New test. * gcc.dg/vect/vect-bitfield-write-4.c: New test. * gcc.dg/vect/vect-bitfield-write-5.c: New test. --- gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c | 40 +++ gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c | 43 +++ gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c | 44 +++ gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c | 45 +++ gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c | 42 +++ gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c | 42 +++ gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c | 39 +++ gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c | 42 +++ gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c | 43 +++ gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c | 42 +++ gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c | 42 +++ gcc/tree-if-conv.cc | 313 ++++++++++++++++++-- gcc/tree-vect-data-refs.cc | 3 +- gcc/tree-vect-patterns.cc | 330 +++++++++++++++++++++- 14 files changed, 1079 insertions(+), 31 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c create mode 100644 gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c new file mode 100644 index 0000000..01cf34f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c @@ -0,0 +1,40 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +extern void abort(void); + +struct s { int i : 31; }; + +#define ELT0 {0} +#define ELT1 {1} +#define ELT2 {2} +#define ELT3 {3} +#define N 32 +#define RES 48 +struct s A[N] + = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3}; + +int __attribute__ ((noipa)) +f(struct s *ptr, unsigned n) { + int res = 0; + for (int i = 0; i < n; ++i) + res += ptr[i].i; + return res; +} + +int main (void) +{ + check_vect (); + + if (f(&A[0], N) != RES) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c new file mode 100644 index 0000000..1a4a157 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c @@ -0,0 +1,43 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +extern void abort(void); + +struct s { + unsigned i : 31; + char a : 4; +}; + +#define N 32 +#define ELT0 {0x7FFFFFFFUL, 0} +#define ELT1 {0x7FFFFFFFUL, 1} +#define ELT2 {0x7FFFFFFFUL, 2} +#define ELT3 {0x7FFFFFFFUL, 3} +#define RES 48 +struct s A[N] + = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3}; + +int __attribute__ ((noipa)) +f(struct s *ptr, unsigned n) { + int res = 0; + for (int i = 0; i < n; ++i) + res += ptr[i].a; + return res; +} + +int main (void) +{ + check_vect (); + + if (f(&A[0], N) != RES) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c new file mode 100644 index 0000000..849f4a0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c @@ -0,0 +1,44 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" +#include + +extern void abort(void); + +typedef struct { + int c; + int b; + bool a : 1; + int d : 31; +} struct_t; + +#define N 16 +#define ELT_F { 0xFFFFFFFF, 0xFFFFFFFF, 0, 0x7FFFFFFF } +#define ELT_T { 0xFFFFFFFF, 0xFFFFFFFF, 1, 0x7FFFFFFF } + +struct_t vect_false[N] = { ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, + ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F }; +struct_t vect_true[N] = { ELT_F, ELT_F, ELT_T, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F, + ELT_F, ELT_F, ELT_T, ELT_F, ELT_F, ELT_F, ELT_F, ELT_F }; +int main (void) +{ + unsigned ret = 0; + for (unsigned i = 0; i < N; i++) + { + ret |= vect_false[i].a; + } + if (ret) + abort (); + + for (unsigned i = 0; i < N; i++) + { + ret |= vect_true[i].a; + } + if (!ret) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c new file mode 100644 index 0000000..5bc9c41 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c @@ -0,0 +1,45 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +extern void abort(void); + +struct s { + unsigned i : 31; + char x : 2; + char a : 4; +}; + +#define N 32 +#define ELT0 {0x7FFFFFFFUL, 3, 0} +#define ELT1 {0x7FFFFFFFUL, 3, 1} +#define ELT2 {0x7FFFFFFFUL, 3, 2} +#define ELT3 {0x7FFFFFFFUL, 3, 3} +#define RES 48 +struct s A[N] + = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3}; + +int __attribute__ ((noipa)) +f(struct s *ptr, unsigned n) { + int res = 0; + for (int i = 0; i < n; ++i) + res += ptr[i].a; + return res; +} + +int main (void) +{ + check_vect (); + + if (f(&A[0], N) != RES) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c new file mode 100644 index 0000000..1dc24d3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c @@ -0,0 +1,42 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +extern void abort(void); + +struct s { + unsigned a : 23; unsigned b : 9; +}; + +#define N 32 +#define ELT0 {0x7FFFFFUL, 0} +#define ELT1 {0x7FFFFFUL, 1} +#define ELT2 {0x7FFFFFUL, 2} +#define ELT3 {0x7FFFFFUL, 3} +#define RES 48 +struct s A[N] + = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3}; + +int __attribute__ ((noipa)) +f(struct s *ptr, unsigned n) { + int res = 0; + for (int i = 0; i < n; ++i) + res += ptr[i].b; + return res; +} + +int main (void) +{ + check_vect (); + + if (f(&A[0], N) != RES) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c new file mode 100644 index 0000000..7d24c29 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-6.c @@ -0,0 +1,42 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +extern void abort(void); + +struct s { + unsigned a : 23; unsigned b : 8; +}; + +#define N 32 +#define ELT0 {0x7FFFFFUL, 0} +#define ELT1 {0x7FFFFFUL, 1} +#define ELT2 {0x7FFFFFUL, 2} +#define ELT3 {0x7FFFFFUL, 3} +#define RES 48 +struct s A[N] + = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3, + ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3}; + +int __attribute__ ((noipa)) +f(struct s *ptr, unsigned n) { + int res = 0; + for (int i = 0; i < n; ++i) + res += ptr[i].b; + return res; +} + +int main (void) +{ + check_vect (); + + if (f(&A[0], N) != RES) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c new file mode 100644 index 0000000..19683d2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-1.c @@ -0,0 +1,39 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +extern void abort(void); + +struct s { int i : 31; }; + +#define N 32 +#define V 5 +struct s A[N]; + +void __attribute__ ((noipa)) +f(struct s *ptr, unsigned n) { + for (int i = 0; i < n; ++i) + ptr[i].i = V; +} + +void __attribute__ ((noipa)) +check_f(struct s *ptr) { + for (unsigned i = 0; i < N; ++i) + if (ptr[i].i != V) + abort (); +} + +int main (void) +{ + check_vect (); + __builtin_memset (&A[0], 0, sizeof(struct s) * N); + + f(&A[0], N); + check_f (&A[0]); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c new file mode 100644 index 0000000..d550dd3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-2.c @@ -0,0 +1,42 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +extern void abort(void); + +struct s { + unsigned i : 31; + char a : 4; +}; + +#define N 32 +#define V 5 +struct s A[N]; + +void __attribute__ ((noipa)) +f(struct s *ptr, unsigned n) { + for (int i = 0; i < n; ++i) + ptr[i].a = V; +} + +void __attribute__ ((noipa)) +check_f(struct s *ptr) { + for (unsigned i = 0; i < N; ++i) + if (ptr[i].a != V) + abort (); +} + +int main (void) +{ + check_vect (); + __builtin_memset (&A[0], 0, sizeof(struct s) * N); + + f(&A[0], N); + check_f (&A[0]); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c new file mode 100644 index 0000000..3303d26 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-3.c @@ -0,0 +1,43 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +extern void abort(void); + +struct s { + unsigned i : 31; + char x : 2; + char a : 4; +}; + +#define N 32 +#define V 5 +struct s A[N]; + +void __attribute__ ((noipa)) +f(struct s *ptr, unsigned n) { + for (int i = 0; i < n; ++i) + ptr[i].a = V; +} + +void __attribute__ ((noipa)) +check_f(struct s *ptr) { + for (unsigned i = 0; i < N; ++i) + if (ptr[i].a != V) + abort (); +} + +int main (void) +{ + check_vect (); + __builtin_memset (&A[0], 0, sizeof(struct s) * N); + + f(&A[0], N); + check_f (&A[0]); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c new file mode 100644 index 0000000..fae6ea3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-4.c @@ -0,0 +1,42 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +extern void abort(void); + +struct s { + unsigned b : 23; + unsigned a : 9; +}; + +#define N 32 +#define V 5 +struct s A[N]; + +void __attribute__ ((noipa)) +f(struct s *ptr, unsigned n) { + for (int i = 0; i < n; ++i) + ptr[i].a = V; +} + +void __attribute__ ((noipa)) +check_f(struct s *ptr) { + for (unsigned i = 0; i < N; ++i) + if (ptr[i].a != V) + abort (); +} + +int main (void) +{ + check_vect (); + __builtin_memset (&A[0], 0, sizeof(struct s) * N); + + f(&A[0], N); + check_f (&A[0]); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c new file mode 100644 index 0000000..99360c2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-write-5.c @@ -0,0 +1,42 @@ +/* { dg-require-effective-target vect_int } */ + +#include +#include "tree-vect.h" + +extern void abort(void); + +struct s { + unsigned b : 23; + unsigned a : 8; +}; + +#define N 32 +#define V 5 +struct s A[N]; + +void __attribute__ ((noipa)) +f(struct s *ptr, unsigned n) { + for (int i = 0; i < n; ++i) + ptr[i].a = V; +} + +void __attribute__ ((noipa)) +check_f(struct s *ptr) { + for (unsigned i = 0; i < N; ++i) + if (ptr[i].a != V) + abort (); +} + +int main (void) +{ + check_vect (); + __builtin_memset (&A[0], 0, sizeof(struct s) * N); + + f(&A[0], N); + check_f (&A[0]); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ + diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index bac29fb..e468a46 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -91,6 +91,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-pass.h" #include "ssa.h" #include "expmed.h" +#include "expr.h" #include "optabs-query.h" #include "gimple-pretty-print.h" #include "alias.h" @@ -123,6 +124,9 @@ along with GCC; see the file COPYING3. If not see #include "tree-vectorizer.h" #include "tree-eh.h" +/* For lang_hooks.types.type_for_mode. */ +#include "langhooks.h" + /* Only handle PHIs with no more arguments unless we are asked to by simd pragma. */ #define MAX_PHI_ARG_NUM \ @@ -145,6 +149,12 @@ static bool need_to_rewrite_undefined; before phi_convertible_by_degenerating_args. */ static bool any_complicated_phi; +/* True if we have bitfield accesses we can lower. */ +static bool need_to_lower_bitfields; + +/* True if there is any ifcvting to be done. */ +static bool need_to_ifcvt; + /* Hash for struct innermost_loop_behavior. It depends on the user to free the memory. */ @@ -1411,15 +1421,6 @@ if_convertible_loop_p_1 (class loop *loop, vec *refs) calculate_dominance_info (CDI_DOMINATORS); - /* Allow statements that can be handled during if-conversion. */ - ifc_bbs = get_loop_body_in_if_conv_order (loop); - if (!ifc_bbs) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Irreducible loop\n"); - return false; - } - for (i = 0; i < loop->num_nodes; i++) { basic_block bb = ifc_bbs[i]; @@ -2899,18 +2900,22 @@ version_loop_for_if_conversion (class loop *loop, vec *preds) class loop *new_loop; gimple *g; gimple_stmt_iterator gsi; - unsigned int save_length; + unsigned int save_length = 0; g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2, build_int_cst (integer_type_node, loop->num), integer_zero_node); gimple_call_set_lhs (g, cond); - /* Save BB->aux around loop_version as that uses the same field. */ - save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes; - void **saved_preds = XALLOCAVEC (void *, save_length); - for (unsigned i = 0; i < save_length; i++) - saved_preds[i] = ifc_bbs[i]->aux; + void **saved_preds = NULL; + if (any_complicated_phi || need_to_predicate) + { + /* Save BB->aux around loop_version as that uses the same field. */ + save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes; + saved_preds = XALLOCAVEC (void *, save_length); + for (unsigned i = 0; i < save_length; i++) + saved_preds[i] = ifc_bbs[i]->aux; + } initialize_original_copy_tables (); /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED @@ -2922,8 +2927,9 @@ version_loop_for_if_conversion (class loop *loop, vec *preds) profile_probability::always (), true); free_original_copy_tables (); - for (unsigned i = 0; i < save_length; i++) - ifc_bbs[i]->aux = saved_preds[i]; + if (any_complicated_phi || need_to_predicate) + for (unsigned i = 0; i < save_length; i++) + ifc_bbs[i]->aux = saved_preds[i]; if (new_loop == NULL) return NULL; @@ -2999,7 +3005,7 @@ ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv) auto_vec critical_edges; /* Loop is not well formed. */ - if (num <= 2 || loop->inner || !single_exit (loop)) + if (loop->inner) return false; body = get_loop_body (loop); @@ -3260,6 +3266,201 @@ ifcvt_hoist_invariants (class loop *loop, edge pe) free (body); } +/* Returns the DECL_FIELD_BIT_OFFSET of the bitfield accesse in stmt iff its + type mode is not BLKmode. If BITPOS is not NULL it will hold the poly_int64 + value of the DECL_FIELD_BIT_OFFSET of the bitfield access and STRUCT_EXPR, + if not NULL, will hold the tree representing the base struct of this + bitfield. */ + +static tree +get_bitfield_rep (gassign *stmt, bool write, tree *bitpos, + tree *struct_expr) +{ + tree comp_ref = write ? gimple_assign_lhs (stmt) + : gimple_assign_rhs1 (stmt); + + tree field_decl = TREE_OPERAND (comp_ref, 1); + tree rep_decl = DECL_BIT_FIELD_REPRESENTATIVE (field_decl); + + /* Bail out if the representative is BLKmode as we will not be able to + vectorize this. */ + if (TYPE_MODE (TREE_TYPE (rep_decl)) == E_BLKmode) + return NULL_TREE; + + /* Bail out if the DECL_SIZE of the field_decl isn't the same as the BF's + precision. */ + unsigned HOST_WIDE_INT bf_prec + = TYPE_PRECISION (TREE_TYPE (gimple_assign_lhs (stmt))); + if (compare_tree_int (DECL_SIZE (field_decl), bf_prec) != 0) + return NULL_TREE; + + if (struct_expr) + *struct_expr = TREE_OPERAND (comp_ref, 0); + + if (bitpos) + *bitpos + = fold_build2 (MINUS_EXPR, bitsizetype, + DECL_FIELD_BIT_OFFSET (field_decl), + DECL_FIELD_BIT_OFFSET (rep_decl)); + + return rep_decl; + +} + +/* Lowers the bitfield described by DATA. + For a write like: + + struct.bf = _1; + + lower to: + + __ifc_1 = struct.; + __ifc_2 = BIT_INSERT_EXPR (__ifc_1, _1, bitpos); + struct. = __ifc_2; + + For a read: + + _1 = struct.bf; + + lower to: + + __ifc_1 = struct.; + _1 = BIT_FIELD_REF (__ifc_1, bitsize, bitpos); + + where representative is a legal load that contains the bitfield value, + bitsize is the size of the bitfield and bitpos the offset to the start of + the bitfield within the representative. */ + +static void +lower_bitfield (gassign *stmt, bool write) +{ + tree struct_expr; + tree bitpos; + tree rep_decl = get_bitfield_rep (stmt, write, &bitpos, &struct_expr); + tree rep_type = TREE_TYPE (rep_decl); + tree bf_type = TREE_TYPE (gimple_assign_lhs (stmt)); + + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Lowering:\n"); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + fprintf (dump_file, "to:\n"); + } + + /* REP_COMP_REF is a COMPONENT_REF for the representative. NEW_VAL is it's + defining SSA_NAME. */ + tree rep_comp_ref = build3 (COMPONENT_REF, rep_type, struct_expr, rep_decl, + NULL_TREE); + tree new_val = ifc_temp_var (rep_type, rep_comp_ref, &gsi); + + if (dump_file && (dump_flags & TDF_DETAILS)) + print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM); + + if (write) + { + new_val = ifc_temp_var (rep_type, + build3 (BIT_INSERT_EXPR, rep_type, new_val, + unshare_expr (gimple_assign_rhs1 (stmt)), + bitpos), &gsi); + + if (dump_file && (dump_flags & TDF_DETAILS)) + print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (new_val), 0, TDF_SLIM); + + gimple *new_stmt = gimple_build_assign (unshare_expr (rep_comp_ref), + new_val); + gimple_move_vops (new_stmt, stmt); + gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); + + if (dump_file && (dump_flags & TDF_DETAILS)) + print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); + } + else + { + tree bfr = build3 (BIT_FIELD_REF, bf_type, new_val, + build_int_cst (bitsizetype, TYPE_PRECISION (bf_type)), + bitpos); + new_val = ifc_temp_var (bf_type, bfr, &gsi); + + gimple *new_stmt = gimple_build_assign (gimple_assign_lhs (stmt), + new_val); + gimple_move_vops (new_stmt, stmt); + gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); + + if (dump_file && (dump_flags & TDF_DETAILS)) + print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); + } + + gsi_remove (&gsi, true); +} + +/* Return TRUE if there are bitfields to lower in this LOOP. Fill TO_LOWER + with data structures representing these bitfields. */ + +static bool +bitfields_to_lower_p (class loop *loop, + vec &reads_to_lower, + vec &writes_to_lower) +{ + gimple_stmt_iterator gsi; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Analyzing loop %d for bitfields:\n", loop->num); + } + + for (unsigned i = 0; i < loop->num_nodes; ++i) + { + basic_block bb = ifc_bbs[i]; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gassign *stmt = dyn_cast (gsi_stmt (gsi)); + if (!stmt) + continue; + + tree op = gimple_assign_lhs (stmt); + bool write = TREE_CODE (op) == COMPONENT_REF; + + if (!write) + op = gimple_assign_rhs1 (stmt); + + if (TREE_CODE (op) != COMPONENT_REF) + continue; + + if (DECL_BIT_FIELD_TYPE (TREE_OPERAND (op, 1))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + + if (!INTEGRAL_TYPE_P (TREE_TYPE (op))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\t Bitfield NO OK to lower," + " field type is not Integral.\n"); + return false; + } + + if (!get_bitfield_rep (stmt, write, NULL, NULL)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\t Bitfield NOT OK to lower," + " representative is BLKmode.\n"); + return false; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\tBitfield OK to lower.\n"); + if (write) + writes_to_lower.safe_push (stmt); + else + reads_to_lower.safe_push (stmt); + } + } + } + return !reads_to_lower.is_empty () || !writes_to_lower.is_empty (); +} + + /* If-convert LOOP when it is legal. For the moment this pass has no profitability analysis. Returns non-zero todo flags when something changed. */ @@ -3270,12 +3471,16 @@ tree_if_conversion (class loop *loop, vec *preds) unsigned int todo = 0; bool aggressive_if_conv; class loop *rloop; + auto_vec reads_to_lower; + auto_vec writes_to_lower; bitmap exit_bbs; edge pe; again: rloop = NULL; ifc_bbs = NULL; + need_to_lower_bitfields = false; + need_to_ifcvt = false; need_to_predicate = false; need_to_rewrite_undefined = false; any_complicated_phi = false; @@ -3291,16 +3496,42 @@ tree_if_conversion (class loop *loop, vec *preds) aggressive_if_conv = true; } - if (!ifcvt_split_critical_edges (loop, aggressive_if_conv)) + if (!single_exit (loop)) goto cleanup; - if (!if_convertible_loop_p (loop) - || !dbg_cnt (if_conversion_tree)) + /* If there are more than two BBs in the loop then there is at least one if + to convert. */ + if (loop->num_nodes > 2 + && !ifcvt_split_critical_edges (loop, aggressive_if_conv)) goto cleanup; - if ((need_to_predicate || any_complicated_phi) - && ((!flag_tree_loop_vectorize && !loop->force_vectorize) - || loop->dont_vectorize)) + ifc_bbs = get_loop_body_in_if_conv_order (loop); + if (!ifc_bbs) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Irreducible loop\n"); + goto cleanup; + } + + if (loop->num_nodes > 2) + { + need_to_ifcvt = true; + + if (!if_convertible_loop_p (loop) || !dbg_cnt (if_conversion_tree)) + goto cleanup; + + if ((need_to_predicate || any_complicated_phi) + && ((!flag_tree_loop_vectorize && !loop->force_vectorize) + || loop->dont_vectorize)) + goto cleanup; + } + + if ((flag_tree_loop_vectorize || loop->force_vectorize) + && !loop->dont_vectorize) + need_to_lower_bitfields = bitfields_to_lower_p (loop, reads_to_lower, + writes_to_lower); + + if (!need_to_ifcvt && !need_to_lower_bitfields) goto cleanup; /* The edge to insert invariant stmts on. */ @@ -3311,7 +3542,8 @@ tree_if_conversion (class loop *loop, vec *preds) Either version this loop, or if the pattern is right for outer-loop vectorization, version the outer loop. In the latter case we will still if-convert the original inner loop. */ - if (need_to_predicate + if (need_to_lower_bitfields + || need_to_predicate || any_complicated_phi || flag_tree_loop_if_convert != 1) { @@ -3351,10 +3583,31 @@ tree_if_conversion (class loop *loop, vec *preds) pe = single_pred_edge (gimple_bb (preds->last ())); } - /* Now all statements are if-convertible. Combine all the basic - blocks into one huge basic block doing the if-conversion - on-the-fly. */ - combine_blocks (loop); + if (need_to_lower_bitfields) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "-------------------------\n"); + fprintf (dump_file, "Start lowering bitfields\n"); + } + while (!reads_to_lower.is_empty ()) + lower_bitfield (reads_to_lower.pop (), false); + while (!writes_to_lower.is_empty ()) + lower_bitfield (writes_to_lower.pop (), true); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Done lowering bitfields\n"); + fprintf (dump_file, "-------------------------\n"); + } + } + if (need_to_ifcvt) + { + /* Now all statements are if-convertible. Combine all the basic + blocks into one huge basic block doing the if-conversion + on-the-fly. */ + combine_blocks (loop); + } /* Perform local CSE, this esp. helps the vectorizer analysis if loads and stores are involved. CSE only the loop body, not the entry @@ -3394,6 +3647,8 @@ tree_if_conversion (class loop *loop, vec *preds) if (rloop != NULL) { loop = rloop; + reads_to_lower.truncate (0); + writes_to_lower.truncate (0); goto again; } diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index e03b504..4a23d61 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -4302,7 +4302,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt, free_data_ref (dr); return opt_result::failure_at (stmt, "not vectorized:" - " statement is bitfield access %G", stmt); + " statement is an unsupported" + " bitfield access %G", stmt); } if (DR_BASE_ADDRESS (dr) diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index d2bd15b..0cc315d 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -35,6 +35,8 @@ along with GCC; see the file COPYING3. If not see #include "tree-eh.h" #include "gimplify.h" #include "gimple-iterator.h" +#include "gimple-fold.h" +#include "gimplify-me.h" #include "cfgloop.h" #include "tree-vectorizer.h" #include "dumpfile.h" @@ -663,7 +665,7 @@ vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */ static tree -vect_recog_temp_ssa_var (tree type, gimple *stmt) +vect_recog_temp_ssa_var (tree type, gimple *stmt = NULL) { return make_temp_ssa_name (type, stmt, "patt"); } @@ -1829,6 +1831,330 @@ vect_recog_widen_sum_pattern (vec_info *vinfo, return pattern_stmt; } +/* Function vect_recog_bitfield_ref_pattern + + Try to find the following pattern: + + bf_value = BIT_FIELD_REF (container, bitsize, bitpos); + result = (type_out) bf_value; + + where type_out is a non-bitfield type, that is to say, it's precision matches + 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)). + + Input: + + * STMT_VINFO: The stmt from which the pattern search begins. + here it starts with: + result = (type_out) bf_value; + + Output: + + * TYPE_OUT: The vector type of the output of this pattern. + + * Return value: A new stmt that will be used to replace the sequence of + stmts that constitute the pattern. If the precision of type_out is bigger + than the precision type of _1 we perform the widening before the shifting, + since the new precision will be large enough to shift the value and moving + widening operations up the statement chain enables the generation of + widening loads. If we are widening and the operation after the pattern is + an addition then we mask first and shift later, to enable the generation of + shifting adds. In the case of narrowing we will always mask first, shift + last and then perform a narrowing operation. This will enable the + generation of narrowing shifts. + + Widening with mask first, shift later: + container = (type_out) container; + masked = container & (((1 << bitsize) - 1) << bitpos); + result = patt2 >> masked; + + Widening with shift first, mask last: + container = (type_out) container; + shifted = container >> bitpos; + result = shifted & ((1 << bitsize) - 1); + + Narrowing: + masked = container & (((1 << bitsize) - 1) << bitpos); + result = masked >> bitpos; + result = (type_out) result; + + The shifting is always optional depending on whether bitpos != 0. + +*/ + +static gimple * +vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info, + tree *type_out) +{ + gassign *first_stmt = dyn_cast (stmt_info->stmt); + + if (!first_stmt) + return NULL; + + gassign *bf_stmt; + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (first_stmt)) + && TREE_CODE (gimple_assign_rhs1 (first_stmt)) == SSA_NAME) + { + gimple *second_stmt + = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (first_stmt)); + bf_stmt = dyn_cast (second_stmt); + if (!bf_stmt + || gimple_assign_rhs_code (bf_stmt) != BIT_FIELD_REF) + return NULL; + } + else + return NULL; + + tree bf_ref = gimple_assign_rhs1 (bf_stmt); + tree container = TREE_OPERAND (bf_ref, 0); + + if (!bit_field_offset (bf_ref).is_constant () + || !bit_field_size (bf_ref).is_constant () + || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container)))) + return NULL; + + if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref)) + || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode) + return NULL; + + gimple *use_stmt, *pattern_stmt; + use_operand_p use_p; + tree ret = gimple_assign_lhs (first_stmt); + tree ret_type = TREE_TYPE (ret); + bool shift_first = true; + tree vectype; + + /* If the first operand of the BIT_FIELD_REF is not an INTEGER type, convert + it to one of the same width so we can perform the necessary masking and + shifting. */ + if (!INTEGRAL_TYPE_P (TREE_TYPE (container))) + { + unsigned HOST_WIDE_INT container_size = + tree_to_uhwi (TYPE_SIZE (TREE_TYPE (container))); + tree int_type = build_nonstandard_integer_type (container_size, true); + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (int_type), + VIEW_CONVERT_EXPR, container); + vectype = get_vectype_for_scalar_type (vinfo, int_type); + container = gimple_assign_lhs (pattern_stmt); + append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype); + } + else + vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (container)); + + /* We move the conversion earlier if the loaded type is smaller than the + return type to enable the use of widening loads. */ + if (TYPE_PRECISION (TREE_TYPE (container)) < TYPE_PRECISION (ret_type) + && !useless_type_conversion_p (TREE_TYPE (container), ret_type)) + { + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (ret_type), + NOP_EXPR, container); + container = gimple_get_lhs (pattern_stmt); + append_pattern_def_seq (vinfo, stmt_info, pattern_stmt); + } + else if (!useless_type_conversion_p (TREE_TYPE (container), ret_type)) + /* If we are doing the conversion last then also delay the shift as we may + be able to combine the shift and conversion in certain cases. */ + shift_first = false; + + tree container_type = TREE_TYPE (container); + + /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a + PLUS_EXPR then do the shift last as some targets can combine the shift and + add into a single instruction. */ + if (single_imm_use (gimple_assign_lhs (first_stmt), &use_p, &use_stmt)) + { + if (gimple_code (use_stmt) == GIMPLE_ASSIGN + && gimple_assign_rhs_code (use_stmt) == PLUS_EXPR) + shift_first = false; + } + + unsigned HOST_WIDE_INT shift_n = bit_field_offset (bf_ref).to_constant (); + unsigned HOST_WIDE_INT mask_width = bit_field_size (bf_ref).to_constant (); + unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type)); + if (BYTES_BIG_ENDIAN) + shift_n = prec - shift_n - mask_width; + + /* If we don't have to shift we only generate the mask, so just fix the + code-path to shift_first. */ + if (shift_n == 0) + shift_first = true; + + tree result; + if (shift_first) + { + tree shifted = container; + if (shift_n) + { + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (container_type), + RSHIFT_EXPR, container, + build_int_cst (sizetype, shift_n)); + shifted = gimple_assign_lhs (pattern_stmt); + append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype); + } + + tree mask = wide_int_to_tree (container_type, + wi::mask (mask_width, false, prec)); + + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (container_type), + BIT_AND_EXPR, shifted, mask); + result = gimple_assign_lhs (pattern_stmt); + } + else + { + tree mask = wide_int_to_tree (container_type, + wi::shifted_mask (shift_n, mask_width, + false, prec)); + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (container_type), + BIT_AND_EXPR, container, mask); + tree masked = gimple_assign_lhs (pattern_stmt); + + append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype); + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (container_type), + RSHIFT_EXPR, masked, + build_int_cst (sizetype, shift_n)); + result = gimple_assign_lhs (pattern_stmt); + } + + if (!useless_type_conversion_p (TREE_TYPE (result), ret_type)) + { + append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype); + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (ret_type), + NOP_EXPR, result); + } + + *type_out = STMT_VINFO_VECTYPE (stmt_info); + vect_pattern_detected ("bitfield_ref pattern", stmt_info->stmt); + + return pattern_stmt; +} + +/* Function vect_recog_bit_insert_pattern + + Try to find the following pattern: + + written = BIT_INSERT_EXPR (container, value, bitpos); + + Input: + + * STMT_VINFO: The stmt we want to replace. + + Output: + + * TYPE_OUT: The vector type of the output of this pattern. + + * Return value: A new stmt that will be used to replace the sequence of + stmts that constitute the pattern. In this case it will be: + value = (container_type) value; // Make sure + shifted = value << bitpos; // Shift value into place + masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in + // the 'to-write value'. + cleared = container & ~(mask << bitpos); // Clearing the bits we want to + // write to from the value we want + // to write to. + written = cleared | masked; // Write bits. + + + where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of + bits corresponding to the real size of the bitfield value we are writing to. + The shifting is always optional depending on whether bitpos != 0. + +*/ + +static gimple * +vect_recog_bit_insert_pattern (vec_info *vinfo, stmt_vec_info stmt_info, + tree *type_out) +{ + gassign *bf_stmt = dyn_cast (stmt_info->stmt); + if (!bf_stmt || gimple_assign_rhs_code (bf_stmt) != BIT_INSERT_EXPR) + return NULL; + + tree container = gimple_assign_rhs1 (bf_stmt); + tree value = gimple_assign_rhs2 (bf_stmt); + tree shift = gimple_assign_rhs3 (bf_stmt); + + tree bf_type = TREE_TYPE (value); + tree container_type = TREE_TYPE (container); + + if (!INTEGRAL_TYPE_P (container_type) + || !tree_fits_uhwi_p (TYPE_SIZE (container_type))) + return NULL; + + gimple *pattern_stmt; + + vect_unpromoted_value unprom; + unprom.set_op (value, vect_internal_def); + value = vect_convert_input (vinfo, stmt_info, container_type, &unprom, + get_vectype_for_scalar_type (vinfo, + container_type)); + + unsigned HOST_WIDE_INT mask_width = TYPE_PRECISION (bf_type); + unsigned HOST_WIDE_INT prec = tree_to_uhwi (TYPE_SIZE (container_type)); + unsigned HOST_WIDE_INT shift_n = tree_to_uhwi (shift); + if (BYTES_BIG_ENDIAN) + { + shift_n = prec - shift_n - mask_width; + shift = build_int_cst (TREE_TYPE (shift), shift_n); + } + + if (!useless_type_conversion_p (TREE_TYPE (value), container_type)) + { + pattern_stmt = + gimple_build_assign (vect_recog_temp_ssa_var (container_type), + NOP_EXPR, value); + append_pattern_def_seq (vinfo, stmt_info, pattern_stmt); + value = gimple_get_lhs (pattern_stmt); + } + + /* Shift VALUE into place. */ + tree shifted = value; + if (shift_n) + { + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (container_type), + LSHIFT_EXPR, value, shift); + append_pattern_def_seq (vinfo, stmt_info, pattern_stmt); + shifted = gimple_get_lhs (pattern_stmt); + } + + tree mask_t + = wide_int_to_tree (container_type, + wi::shifted_mask (shift_n, mask_width, false, prec)); + + /* Clear bits we don't want to write back from SHIFTED. */ + gimple_seq stmts = NULL; + tree masked = gimple_build (&stmts, BIT_AND_EXPR, container_type, shifted, + mask_t); + if (!gimple_seq_empty_p (stmts)) + { + pattern_stmt = gimple_seq_first_stmt (stmts); + append_pattern_def_seq (vinfo, stmt_info, pattern_stmt); + } + + /* Mask off the bits in the container that we are to write to. */ + mask_t = wide_int_to_tree (container_type, + wi::shifted_mask (shift_n, mask_width, true, prec)); + tree cleared = vect_recog_temp_ssa_var (container_type); + pattern_stmt = gimple_build_assign (cleared, BIT_AND_EXPR, container, mask_t); + append_pattern_def_seq (vinfo, stmt_info, pattern_stmt); + + /* Write MASKED into CLEARED. */ + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (container_type), + BIT_IOR_EXPR, cleared, masked); + + *type_out = STMT_VINFO_VECTYPE (stmt_info); + vect_pattern_detected ("bit_insert pattern", stmt_info->stmt); + + return pattern_stmt; +} + + /* Recognize cases in which an operation is performed in one type WTYPE but could be done more efficiently in a narrower type NTYPE. For example, if we have: @@ -5622,6 +5948,8 @@ struct vect_recog_func taken which means usually the more complex one needs to preceed the less comples onex (widen_sum only after dot_prod or sad for example). */ static vect_recog_func vect_vect_recog_func_ptrs[] = { + { vect_recog_bitfield_ref_pattern, "bitfield_ref" }, + { vect_recog_bit_insert_pattern, "bit_insert" }, { vect_recog_over_widening_pattern, "over_widening" }, /* Must come after over_widening, which narrows the shift as much as possible beforehand. */ -- cgit v1.1 From 45381d6f9f4e7b5c7b062f5ad8cc9788091c2d07 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Mon, 3 Aug 2020 21:09:36 +0100 Subject: amdgcn: add multiple vector sizes The vectors sizes are simulated using implicit masking, but they make life easier for the autovectorizer and SLP passes. gcc/ChangeLog: * config/gcn/gcn-modes.def (VECTOR_MODE): Add new modes V32QI, V32HI, V32SI, V32DI, V32TI, V32HF, V32SF, V32DF, V16QI, V16HI, V16SI, V16DI, V16TI, V16HF, V16SF, V16DF, V8QI, V8HI, V8SI, V8DI, V8TI, V8HF, V8SF, V8DF, V4QI, V4HI, V4SI, V4DI, V4TI, V4HF, V4SF, V4DF, V2QI, V2HI, V2SI, V2DI, V2TI, V2HF, V2SF, V2DF. (ADJUST_ALIGNMENT): Likewise. * config/gcn/gcn-protos.h (gcn_full_exec): Delete. (gcn_full_exec_reg): Delete. (gcn_scalar_exec): Delete. (gcn_scalar_exec_reg): Delete. (vgpr_1reg_mode_p): Use inner mode to identify vector registers. (vgpr_2reg_mode_p): Likewise. (vgpr_vector_mode_p): Use VECTOR_MODE_P. * config/gcn/gcn-valu.md (V_QI, V_HI, V_HF, V_SI, V_SF, V_DI, V_DF, V_QIHI, V_1REG, V_INT_1REG, V_INT_1REG_ALT, V_FP_1REG, V_2REG, V_noQI, V_noHI, V_INT_noQI, V_INT_noHI, V_ALL, V_ALL_ALT, V_INT, V_FP): Add additional vector modes. (V64_SI, V64_DI, V64_ALL, V64_FP): New iterators. (scalar_mode, SCALAR_MODE, vnsi, VnSI, vndi, VnDI, sdwa): Add additional vector mode mappings. (mov): Implement vector length conversions. (ldexp3): Use VnSI. (frexp_exp2): Likewise. (VCVT_MODE, VCVT_FMODE, VCVT_IMODE): Add additional vector modes. (reduc__scal_): Use V64_ALL. (fold_left_plus_): Use V64_FP. (*_dpp_shr_): Use V64_1REG. (*_dpp_shr_): Use V64_DI. (*plus_carry_dpp_shr_): Use V64_INT_1REG. (*plus_carry_in_dpp_shr_): Use V64_SI. (*plus_carry_dpp_shr_): Use V64_DI. (mov_from_lane63_): Use V64_2REG. * config/gcn/gcn.cc (VnMODE): New function. (gcn_can_change_mode_class): Support multiple vector sizes. (gcn_modes_tieable_p): Likewise. (gcn_operand_part): Likewise. (gcn_scalar_exec): Delete function. (gcn_scalar_exec_reg): Delete function. (gcn_full_exec): Delete function. (gcn_full_exec_reg): Delete function. (gcn_inline_fp_constant_p): Support multiple vector sizes. (gcn_fp_constant_p): Likewise. (A): New macro. (GEN_VN_NOEXEC): New macro. (GEN_VNM_NOEXEC): New macro. (GEN_VN): New macro. (GEN_VNM): New macro. (GET_VN_FN): New macro. (CODE_FOR): New macro. (CODE_FOR_OP): New macro. (gen_mov_with_exec): Delete function. (gen_duplicate_load): Delete function. (gcn_expand_vector_init): Support multiple vector sizes. (strided_constant): Likewise. (gcn_addr_space_legitimize_address): Likewise. (gcn_expand_scalar_to_vector_address): Likewise. (gcn_expand_scaled_offsets): Likewise. (gcn_secondary_reload): Likewise. (gcn_valid_cvt_p): Likewise. (gcn_expand_builtin_1): Likewise. (gcn_make_vec_perm_address): Likewise. (gcn_vectorize_vec_perm_const): Likewise. (gcn_vector_mode_supported_p): Likewise. (gcn_autovectorize_vector_modes): New hook. (gcn_related_vector_mode): Support multiple vector sizes. (gcn_expand_dpp_shr_insn): Add FIXME comment. (gcn_md_reorg): Support multiple vector sizes. (print_reg): Likewise. (print_operand): Likewise. (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES): New hook. --- gcc/config/gcn/gcn-modes.def | 82 ++++ gcc/config/gcn/gcn-protos.h | 22 +- gcc/config/gcn/gcn-valu.md | 332 +++++++++++++--- gcc/config/gcn/gcn.cc | 927 +++++++++++++++++++++++++++---------------- 4 files changed, 938 insertions(+), 425 deletions(-) (limited to 'gcc') diff --git a/gcc/config/gcn/gcn-modes.def b/gcc/config/gcn/gcn-modes.def index 82585de..1b8a320 100644 --- a/gcc/config/gcn/gcn-modes.def +++ b/gcc/config/gcn/gcn-modes.def @@ -29,6 +29,48 @@ VECTOR_MODE (FLOAT, HF, 64); /* V64HF */ VECTOR_MODE (FLOAT, SF, 64); /* V64SF */ VECTOR_MODE (FLOAT, DF, 64); /* V64DF */ +/* Artificial vector modes, for when vector masking doesn't work (yet). */ +VECTOR_MODE (INT, QI, 32); /* V32QI */ +VECTOR_MODE (INT, HI, 32); /* V32HI */ +VECTOR_MODE (INT, SI, 32); /* V32SI */ +VECTOR_MODE (INT, DI, 32); /* V32DI */ +VECTOR_MODE (INT, TI, 32); /* V32TI */ +VECTOR_MODE (FLOAT, HF, 32); /* V32HF */ +VECTOR_MODE (FLOAT, SF, 32); /* V32SF */ +VECTOR_MODE (FLOAT, DF, 32); /* V32DF */ +VECTOR_MODE (INT, QI, 16); /* V16QI */ +VECTOR_MODE (INT, HI, 16); /* V16HI */ +VECTOR_MODE (INT, SI, 16); /* V16SI */ +VECTOR_MODE (INT, DI, 16); /* V16DI */ +VECTOR_MODE (INT, TI, 16); /* V16TI */ +VECTOR_MODE (FLOAT, HF, 16); /* V16HF */ +VECTOR_MODE (FLOAT, SF, 16); /* V16SF */ +VECTOR_MODE (FLOAT, DF, 16); /* V16DF */ +VECTOR_MODE (INT, QI, 8); /* V8QI */ +VECTOR_MODE (INT, HI, 8); /* V8HI */ +VECTOR_MODE (INT, SI, 8); /* V8SI */ +VECTOR_MODE (INT, DI, 8); /* V8DI */ +VECTOR_MODE (INT, TI, 8); /* V8TI */ +VECTOR_MODE (FLOAT, HF, 8); /* V8HF */ +VECTOR_MODE (FLOAT, SF, 8); /* V8SF */ +VECTOR_MODE (FLOAT, DF, 8); /* V8DF */ +VECTOR_MODE (INT, QI, 4); /* V4QI */ +VECTOR_MODE (INT, HI, 4); /* V4HI */ +VECTOR_MODE (INT, SI, 4); /* V4SI */ +VECTOR_MODE (INT, DI, 4); /* V4DI */ +VECTOR_MODE (INT, TI, 4); /* V4TI */ +VECTOR_MODE (FLOAT, HF, 4); /* V4HF */ +VECTOR_MODE (FLOAT, SF, 4); /* V4SF */ +VECTOR_MODE (FLOAT, DF, 4); /* V4DF */ +VECTOR_MODE (INT, QI, 2); /* V2QI */ +VECTOR_MODE (INT, HI, 2); /* V2HI */ +VECTOR_MODE (INT, SI, 2); /* V2SI */ +VECTOR_MODE (INT, DI, 2); /* V2DI */ +VECTOR_MODE (INT, TI, 2); /* V2TI */ +VECTOR_MODE (FLOAT, HF, 2); /* V2HF */ +VECTOR_MODE (FLOAT, SF, 2); /* V2SF */ +VECTOR_MODE (FLOAT, DF, 2); /* V2DF */ + /* Vector units handle reads independently and thus no large alignment needed. */ ADJUST_ALIGNMENT (V64QI, 1); @@ -39,3 +81,43 @@ ADJUST_ALIGNMENT (V64TI, 16); ADJUST_ALIGNMENT (V64HF, 2); ADJUST_ALIGNMENT (V64SF, 4); ADJUST_ALIGNMENT (V64DF, 8); +ADJUST_ALIGNMENT (V32QI, 1); +ADJUST_ALIGNMENT (V32HI, 2); +ADJUST_ALIGNMENT (V32SI, 4); +ADJUST_ALIGNMENT (V32DI, 8); +ADJUST_ALIGNMENT (V32TI, 16); +ADJUST_ALIGNMENT (V32HF, 2); +ADJUST_ALIGNMENT (V32SF, 4); +ADJUST_ALIGNMENT (V32DF, 8); +ADJUST_ALIGNMENT (V16QI, 1); +ADJUST_ALIGNMENT (V16HI, 2); +ADJUST_ALIGNMENT (V16SI, 4); +ADJUST_ALIGNMENT (V16DI, 8); +ADJUST_ALIGNMENT (V16TI, 16); +ADJUST_ALIGNMENT (V16HF, 2); +ADJUST_ALIGNMENT (V16SF, 4); +ADJUST_ALIGNMENT (V16DF, 8); +ADJUST_ALIGNMENT (V8QI, 1); +ADJUST_ALIGNMENT (V8HI, 2); +ADJUST_ALIGNMENT (V8SI, 4); +ADJUST_ALIGNMENT (V8DI, 8); +ADJUST_ALIGNMENT (V8TI, 16); +ADJUST_ALIGNMENT (V8HF, 2); +ADJUST_ALIGNMENT (V8SF, 4); +ADJUST_ALIGNMENT (V8DF, 8); +ADJUST_ALIGNMENT (V4QI, 1); +ADJUST_ALIGNMENT (V4HI, 2); +ADJUST_ALIGNMENT (V4SI, 4); +ADJUST_ALIGNMENT (V4DI, 8); +ADJUST_ALIGNMENT (V4TI, 16); +ADJUST_ALIGNMENT (V4HF, 2); +ADJUST_ALIGNMENT (V4SF, 4); +ADJUST_ALIGNMENT (V4DF, 8); +ADJUST_ALIGNMENT (V2QI, 1); +ADJUST_ALIGNMENT (V2HI, 2); +ADJUST_ALIGNMENT (V2SI, 4); +ADJUST_ALIGNMENT (V2DI, 8); +ADJUST_ALIGNMENT (V2TI, 16); +ADJUST_ALIGNMENT (V2HF, 2); +ADJUST_ALIGNMENT (V2SF, 4); +ADJUST_ALIGNMENT (V2DF, 8); diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index ca80460..6300c1c 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -34,8 +34,6 @@ extern rtx gcn_expand_scalar_to_vector_address (machine_mode, rtx, rtx, rtx); extern void gcn_expand_vector_init (rtx, rtx); extern bool gcn_flat_address_p (rtx, machine_mode); extern bool gcn_fp_constant_p (rtx, bool); -extern rtx gcn_full_exec (); -extern rtx gcn_full_exec_reg (); extern rtx gcn_gen_undef (machine_mode); extern bool gcn_global_address_p (rtx); extern tree gcn_goacc_adjust_private_decl (location_t, tree var, int level); @@ -67,8 +65,6 @@ extern rtx gcn_operand_part (machine_mode, rtx, int); extern bool gcn_regno_mode_code_ok_for_base_p (int, machine_mode, addr_space_t, int, int); extern reg_class gcn_regno_reg_class (int regno); -extern rtx gcn_scalar_exec (); -extern rtx gcn_scalar_exec_reg (); extern bool gcn_scalar_flat_address_p (rtx); extern bool gcn_scalar_flat_mem_p (rtx); extern bool gcn_sgpr_move_p (rtx, rtx); @@ -105,9 +101,11 @@ extern gimple_opt_pass *make_pass_omp_gcn (gcc::context *ctxt); inline bool vgpr_1reg_mode_p (machine_mode mode) { - return (mode == SImode || mode == SFmode || mode == HImode || mode == QImode - || mode == V64QImode || mode == V64HImode || mode == V64SImode - || mode == V64HFmode || mode == V64SFmode || mode == BImode); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + return (mode == SImode || mode == SFmode || mode == HImode || mode == HFmode + || mode == QImode || mode == BImode); } /* Return true if MODE is valid for 1 SGPR register. */ @@ -124,8 +122,10 @@ sgpr_1reg_mode_p (machine_mode mode) inline bool vgpr_2reg_mode_p (machine_mode mode) { - return (mode == DImode || mode == DFmode - || mode == V64DImode || mode == V64DFmode); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + return (mode == DImode || mode == DFmode); } /* Return true if MODE can be handled directly by VGPR operations. */ @@ -133,9 +133,7 @@ vgpr_2reg_mode_p (machine_mode mode) inline bool vgpr_vector_mode_p (machine_mode mode) { - return (mode == V64QImode || mode == V64HImode - || mode == V64SImode || mode == V64DImode - || mode == V64HFmode || mode == V64SFmode || mode == V64DFmode); + return VECTOR_MODE_P (mode); } diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index dec81e8..52d2fcb 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -17,88 +17,243 @@ ;; {{{ Vector iterators ; Vector modes for specific types -; (This will make more sense when there are multiple vector sizes) (define_mode_iterator V_QI - [V64QI]) + [V2QI V4QI V8QI V16QI V32QI V64QI]) (define_mode_iterator V_HI - [V64HI]) + [V2HI V4HI V8HI V16HI V32HI V64HI]) (define_mode_iterator V_HF - [V64HF]) + [V2HF V4HF V8HF V16HF V32HF V64HF]) (define_mode_iterator V_SI - [V64SI]) + [V2SI V4SI V8SI V16SI V32SI V64SI]) (define_mode_iterator V_SF - [V64SF]) + [V2SF V4SF V8SF V16SF V32SF V64SF]) (define_mode_iterator V_DI - [V64DI]) + [V2DI V4DI V8DI V16DI V32DI V64DI]) (define_mode_iterator V_DF - [V64DF]) + [V2DF V4DF V8DF V16DF V32DF V64DF]) + +(define_mode_iterator V64_SI + [V64SI]) +(define_mode_iterator V64_DI + [V64DI]) ; Vector modes for sub-dword modes (define_mode_iterator V_QIHI - [V64QI V64HI]) + [V2QI V2HI + V4QI V4HI + V8QI V8HI + V16QI V16HI + V32QI V32HI + V64QI V64HI]) ; Vector modes for one vector register (define_mode_iterator V_1REG - [V64QI V64HI V64SI V64HF V64SF]) + [V2QI V2HI V2SI V2HF V2SF + V4QI V4HI V4SI V4HF V4SF + V8QI V8HI V8SI V8HF V8SF + V16QI V16HI V16SI V16HF V16SF + V32QI V32HI V32SI V32HF V32SF + V64QI V64HI V64SI V64HF V64SF]) (define_mode_iterator V_INT_1REG - [V64QI V64HI V64SI]) + [V2QI V2HI V2SI + V4QI V4HI V4SI + V8QI V8HI V8SI + V16QI V16HI V16SI + V32QI V32HI V32SI + V64QI V64HI V64SI]) (define_mode_iterator V_INT_1REG_ALT - [V64QI V64HI V64SI]) + [V2QI V2HI V2SI + V4QI V4HI V4SI + V8QI V8HI V8SI + V16QI V16HI V16SI + V32QI V32HI V32SI + V64QI V64HI V64SI]) (define_mode_iterator V_FP_1REG - [V64HF V64SF]) + [V2HF V2SF + V4HF V4SF + V8HF V8SF + V16HF V16SF + V32HF V32SF + V64HF V64SF]) + +; V64_* modes are for where more general support is unimplemented +; (e.g. reductions) +(define_mode_iterator V64_1REG + [V64QI V64HI V64SI V64HF V64SF]) +(define_mode_iterator V64_INT_1REG + [V64QI V64HI V64SI]) ; Vector modes for two vector registers (define_mode_iterator V_2REG + [V2DI V2DF + V4DI V4DF + V8DI V8DF + V16DI V16DF + V32DI V32DF + V64DI V64DF]) + +(define_mode_iterator V64_2REG [V64DI V64DF]) ; Vector modes with native support (define_mode_iterator V_noQI - [V64HI V64HF V64SI V64SF V64DI V64DF]) + [V2HI V2HF V2SI V2SF V2DI V2DF + V4HI V4HF V4SI V4SF V4DI V4DF + V8HI V8HF V8SI V8SF V8DI V8DF + V16HI V16HF V16SI V16SF V16DI V16DF + V32HI V32HF V32SI V32SF V32DI V32DF + V64HI V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_noHI - [V64HF V64SI V64SF V64DI V64DF]) + [V2HF V2SI V2SF V2DI V2DF + V4HF V4SI V4SF V4DI V4DF + V8HF V8SI V8SF V8DI V8DF + V16HF V16SI V16SF V16DI V16DF + V32HF V32SI V32SF V32DI V32DF + V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_INT_noQI - [V64HI V64SI V64DI]) + [V2HI V2SI V2DI + V4HI V4SI V4DI + V8HI V8SI V8DI + V16HI V16SI V16DI + V32HI V32SI V32DI + V64HI V64SI V64DI]) (define_mode_iterator V_INT_noHI - [V64SI V64DI]) + [V2SI V2DI + V4SI V4DI + V8SI V8DI + V16SI V16DI + V32SI V32DI + V64SI V64DI]) ; All of above (define_mode_iterator V_ALL - [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) + [V2QI V2HI V2HF V2SI V2SF V2DI V2DF + V4QI V4HI V4HF V4SI V4SF V4DI V4DF + V8QI V8HI V8HF V8SI V8SF V8DI V8DF + V16QI V16HI V16HF V16SI V16SF V16DI V16DF + V32QI V32HI V32HF V32SI V32SF V32DI V32DF + V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_ALL_ALT - [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) + [V2QI V2HI V2HF V2SI V2SF V2DI V2DF + V4QI V4HI V4HF V4SI V4SF V4DI V4DF + V8QI V8HI V8HF V8SI V8SF V8DI V8DF + V16QI V16HI V16HF V16SI V16SF V16DI V16DF + V32QI V32HI V32HF V32SI V32SF V32DI V32DF + V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_INT - [V64QI V64HI V64SI V64DI]) + [V2QI V2HI V2SI V2DI + V4QI V4HI V4SI V4DI + V8QI V8HI V8SI V8DI + V16QI V16HI V16SI V16DI + V32QI V32HI V32SI V32DI + V64QI V64HI V64SI V64DI]) (define_mode_iterator V_FP + [V2HF V2SF V2DF + V4HF V4SF V4DF + V8HF V8SF V8DF + V16HF V16SF V16DF + V32HF V32SF V32DF + V64HF V64SF V64DF]) + +(define_mode_iterator V64_ALL + [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) +(define_mode_iterator V64_FP [V64HF V64SF V64DF]) (define_mode_attr scalar_mode - [(V64QI "qi") (V64HI "hi") (V64SI "si") + [(V2QI "qi") (V2HI "hi") (V2SI "si") + (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df") + (V4QI "qi") (V4HI "hi") (V4SI "si") + (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df") + (V8QI "qi") (V8HI "hi") (V8SI "si") + (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df") + (V16QI "qi") (V16HI "hi") (V16SI "si") + (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df") + (V32QI "qi") (V32HI "hi") (V32SI "si") + (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df") + (V64QI "qi") (V64HI "hi") (V64SI "si") (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")]) (define_mode_attr SCALAR_MODE - [(V64QI "QI") (V64HI "HI") (V64SI "SI") + [(V2QI "QI") (V2HI "HI") (V2SI "SI") + (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF") + (V4QI "QI") (V4HI "HI") (V4SI "SI") + (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF") + (V8QI "QI") (V8HI "HI") (V8SI "SI") + (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF") + (V16QI "QI") (V16HI "HI") (V16SI "SI") + (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF") + (V32QI "QI") (V32HI "HI") (V32SI "SI") + (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF") + (V64QI "QI") (V64HI "HI") (V64SI "SI") (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) (define_mode_attr vnsi - [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si") + [(V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si") + (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") + (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si") + (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") + (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si") + (V8SF "v8si") (V8DI "v8si") (V8DF "v8si") + (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si") + (V16SF "v16si") (V16DI "v16si") (V16DF "v16si") + (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si") + (V32SF "v32si") (V32DI "v32si") (V32DF "v32si") + (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si") (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")]) (define_mode_attr VnSI - [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI") + [(V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI") + (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") + (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI") + (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") + (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI") + (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") + (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI") + (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") + (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI") + (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") + (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI") (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")]) (define_mode_attr vndi - [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di") + [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di") + (V2SF "v2di") (V2DI "v2di") (V2DF "v2di") + (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di") + (V4SF "v4di") (V4DI "v4di") (V4DF "v4di") + (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di") + (V8SF "v8di") (V8DI "v8di") (V8DF "v8di") + (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di") + (V16SF "v16di") (V16DI "v16di") (V16DF "v16di") + (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di") + (V32SF "v32di") (V32DI "v32di") (V32DF "v32di") + (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di") (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")]) (define_mode_attr VnDI - [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI") + [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI") + (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") + (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI") + (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") + (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI") + (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") + (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI") + (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") + (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI") + (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") + (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI") (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")]) -(define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")]) +(define_mode_attr sdwa + [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD") + (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD") + (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD") + (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD") + (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD") + (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")]) ;; }}} ;; {{{ Substitutions @@ -180,6 +335,37 @@ (match_operand:V_ALL 1 "general_operand"))] "" { + /* Bitwise reinterpret casts via SUBREG don't work with GCN vector + registers, but we can convert the MEM to a mode that does work. */ + if (MEM_P (operands[0]) && !SUBREG_P (operands[0]) + && SUBREG_P (operands[1]) + && GET_MODE_SIZE (GET_MODE (operands[1])) + == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1])))) + { + rtx src = SUBREG_REG (operands[1]); + rtx mem = copy_rtx (operands[0]); + PUT_MODE_RAW (mem, GET_MODE (src)); + emit_move_insn (mem, src); + DONE; + } + if (MEM_P (operands[1]) && !SUBREG_P (operands[1]) + && SUBREG_P (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) + == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0])))) + { + rtx dest = SUBREG_REG (operands[0]); + rtx mem = copy_rtx (operands[1]); + PUT_MODE_RAW (mem, GET_MODE (dest)); + emit_move_insn (dest, mem); + DONE; + } + + /* SUBREG of MEM is not supported. */ + gcc_assert ((!SUBREG_P (operands[0]) + || !MEM_P (SUBREG_REG (operands[0]))) + && (!SUBREG_P (operands[1]) + || !MEM_P (SUBREG_REG (operands[1])))); + if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed) { operands[1] = force_reg (mode, operands[1]); @@ -2419,10 +2605,10 @@ (set_attr "length" "8")]) (define_insn "ldexp3" - [(set (match_operand:V_FP 0 "register_operand" "=v") + [(set (match_operand:V_FP 0 "register_operand" "= v") (unspec:V_FP - [(match_operand:V_FP 1 "gcn_alu_operand" "vB") - (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")] + [(match_operand:V_FP 1 "gcn_alu_operand" " vB") + (match_operand: 2 "gcn_alu_operand" "vSvA")] UNSPEC_LDEXP))] "" "v_ldexp%i0\t%0, %1, %2" @@ -2452,8 +2638,8 @@ (set_attr "length" "8")]) (define_insn "frexp_exp2" - [(set (match_operand:V64SI 0 "register_operand" "=v") - (unspec:V64SI + [(set (match_operand: 0 "register_operand" "=v") + (unspec: [(match_operand:V_FP 1 "gcn_alu_operand" "vB")] UNSPEC_FREXP_EXP))] "" @@ -2640,9 +2826,27 @@ (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF]) (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF]) -(define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF]) -(define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF]) -(define_mode_iterator VCVT_IMODE [V64HI V64SI]) +(define_mode_iterator VCVT_MODE + [V2HI V2SI V2HF V2SF V2DF + V4HI V4SI V4HF V4SF V4DF + V8HI V8SI V8HF V8SF V8DF + V16HI V16SI V16HF V16SF V16DF + V32HI V32SI V32HF V32SF V32DF + V64HI V64SI V64HF V64SF V64DF]) +(define_mode_iterator VCVT_FMODE + [V2HF V2SF V2DF + V4HF V4SF V4DF + V8HF V8SF V8DF + V16HF V16SF V16DF + V32HF V32SF V32DF + V64HF V64SF V64DF]) +(define_mode_iterator VCVT_IMODE + [V2HI V2SI + V4HI V4SI + V8HI V8SI + V16HI V16SI + V32HI V32SI + V64HI V64SI]) (define_code_iterator cvt_op [fix unsigned_fix float unsigned_float @@ -3265,7 +3469,7 @@ (define_expand "reduc__scal_" [(set (match_operand: 0 "register_operand") (unspec: - [(match_operand:V_ALL 1 "register_operand")] + [(match_operand:V64_ALL 1 "register_operand")] REDUC_UNSPEC))] "" { @@ -3284,7 +3488,7 @@ (define_expand "fold_left_plus_" [(match_operand: 0 "register_operand") (match_operand: 1 "gcn_alu_operand") - (match_operand:V_FP 2 "gcn_alu_operand")] + (match_operand:V64_FP 2 "gcn_alu_operand")] "can_create_pseudo_p () && (flag_openacc || flag_openmp || flag_associative_math)" @@ -3300,11 +3504,11 @@ }) (define_insn "*_dpp_shr_" - [(set (match_operand:V_1REG 0 "register_operand" "=v") - (unspec:V_1REG - [(match_operand:V_1REG 1 "register_operand" "v") - (match_operand:V_1REG 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n")] + [(set (match_operand:V64_1REG 0 "register_operand" "=v") + (unspec:V64_1REG + [(match_operand:V64_1REG 1 "register_operand" "v") + (match_operand:V64_1REG 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] REDUC_UNSPEC))] ; GCN3 requires a carry out, GCN5 not "!(TARGET_GCN3 && SCALAR_INT_MODE_P (mode) @@ -3317,11 +3521,11 @@ (set_attr "length" "8")]) (define_insn_and_split "*_dpp_shr_" - [(set (match_operand:V_DI 0 "register_operand" "=v") - (unspec:V_DI - [(match_operand:V_DI 1 "register_operand" "v") - (match_operand:V_DI 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n")] + [(set (match_operand:V64_DI 0 "register_operand" "=v") + (unspec:V64_DI + [(match_operand:V64_DI 1 "register_operand" "v") + (match_operand:V64_DI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] REDUC_2REG_UNSPEC))] "" "#" @@ -3346,10 +3550,10 @@ ; Special cases for addition. (define_insn "*plus_carry_dpp_shr_" - [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") - (unspec:V_INT_1REG - [(match_operand:V_INT_1REG 1 "register_operand" "v") - (match_operand:V_INT_1REG 2 "register_operand" "v") + [(set (match_operand:V64_INT_1REG 0 "register_operand" "=v") + (unspec:V64_INT_1REG + [(match_operand:V64_INT_1REG 1 "register_operand" "v") + (match_operand:V64_INT_1REG 2 "register_operand" "v") (match_operand:SI 3 "const_int_operand" "n")] UNSPEC_PLUS_CARRY_DPP_SHR)) (clobber (reg:DI VCC_REG))] @@ -3363,12 +3567,12 @@ (set_attr "length" "8")]) (define_insn "*plus_carry_in_dpp_shr_" - [(set (match_operand:V_SI 0 "register_operand" "=v") - (unspec:V_SI - [(match_operand:V_SI 1 "register_operand" "v") - (match_operand:V_SI 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n") - (match_operand:DI 4 "register_operand" "cV")] + [(set (match_operand:V64_SI 0 "register_operand" "=v") + (unspec:V64_SI + [(match_operand:V64_SI 1 "register_operand" "v") + (match_operand:V64_SI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n") + (match_operand:DI 4 "register_operand" "cV")] UNSPEC_PLUS_CARRY_IN_DPP_SHR)) (clobber (reg:DI VCC_REG))] "" @@ -3381,11 +3585,11 @@ (set_attr "length" "8")]) (define_insn_and_split "*plus_carry_dpp_shr_" - [(set (match_operand:V_DI 0 "register_operand" "=v") - (unspec:V_DI - [(match_operand:V_DI 1 "register_operand" "v") - (match_operand:V_DI 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n")] + [(set (match_operand:V64_DI 0 "register_operand" "=v") + (unspec:V64_DI + [(match_operand:V64_DI 1 "register_operand" "v") + (match_operand:V64_DI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] UNSPEC_PLUS_CARRY_DPP_SHR)) (clobber (reg:DI VCC_REG))] "" @@ -3416,7 +3620,7 @@ (define_insn "mov_from_lane63_" [(set (match_operand: 0 "register_operand" "=Sg,v") (unspec: - [(match_operand:V_1REG 1 "register_operand" " v,v")] + [(match_operand:V64_1REG 1 "register_operand" " v,v")] UNSPEC_MOV_FROM_LANE63))] "" "@ @@ -3429,7 +3633,7 @@ (define_insn "mov_from_lane63_" [(set (match_operand: 0 "register_operand" "=Sg,v") (unspec: - [(match_operand:V_2REG 1 "register_operand" " v,v")] + [(match_operand:V64_2REG 1 "register_operand" " v,v")] UNSPEC_MOV_FROM_LANE63))] "" "@ diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index c27ee91..e1636f6 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -395,6 +395,97 @@ gcn_scalar_mode_supported_p (scalar_mode mode) || mode == TImode); } +/* Return a vector mode with N lanes of MODE. */ + +static machine_mode +VnMODE (int n, machine_mode mode) +{ + switch (mode) + { + case QImode: + switch (n) + { + case 2: return V2QImode; + case 4: return V4QImode; + case 8: return V8QImode; + case 16: return V16QImode; + case 32: return V32QImode; + case 64: return V64QImode; + } + break; + case HImode: + switch (n) + { + case 2: return V2HImode; + case 4: return V4HImode; + case 8: return V8HImode; + case 16: return V16HImode; + case 32: return V32HImode; + case 64: return V64HImode; + } + break; + case HFmode: + switch (n) + { + case 2: return V2HFmode; + case 4: return V4HFmode; + case 8: return V8HFmode; + case 16: return V16HFmode; + case 32: return V32HFmode; + case 64: return V64HFmode; + } + break; + case SImode: + switch (n) + { + case 2: return V2SImode; + case 4: return V4SImode; + case 8: return V8SImode; + case 16: return V16SImode; + case 32: return V32SImode; + case 64: return V64SImode; + } + break; + case SFmode: + switch (n) + { + case 2: return V2SFmode; + case 4: return V4SFmode; + case 8: return V8SFmode; + case 16: return V16SFmode; + case 32: return V32SFmode; + case 64: return V64SFmode; + } + break; + case DImode: + switch (n) + { + case 2: return V2DImode; + case 4: return V4DImode; + case 8: return V8DImode; + case 16: return V16DImode; + case 32: return V32DImode; + case 64: return V64DImode; + } + break; + case DFmode: + switch (n) + { + case 2: return V2DFmode; + case 4: return V4DFmode; + case 8: return V8DFmode; + case 16: return V16DFmode; + case 32: return V32DFmode; + case 64: return V64DFmode; + } + break; + default: + break; + } + + return VOIDmode; +} + /* Implement TARGET_CLASS_MAX_NREGS. Return the number of hard registers needed to hold a value of MODE in @@ -556,6 +647,23 @@ gcn_can_change_mode_class (machine_mode from, machine_mode to, { if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to)) return true; + + /* Vector conversions are only valid when changing mode with a fixed number + of lanes, or changing number of lanes with a fixed mode. Anything else + would require actual data movement. */ + if (VECTOR_MODE_P (from) && VECTOR_MODE_P (to) + && GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to) + && GET_MODE_INNER (from) != GET_MODE_INNER (to)) + return false; + + /* Vector/scalar conversions are only permitted when the scalar mode + is the same or smaller than the inner vector mode. */ + if ((VECTOR_MODE_P (from) && !VECTOR_MODE_P (to) + && GET_MODE_SIZE (to) >= GET_MODE_SIZE (GET_MODE_INNER (from))) + || (VECTOR_MODE_P (to) && !VECTOR_MODE_P (from) + && GET_MODE_SIZE (from) >= GET_MODE_SIZE (GET_MODE_INNER (to)))) + return false; + return (gcn_class_max_nregs (regclass, from) == gcn_class_max_nregs (regclass, to)); } @@ -595,6 +703,16 @@ gcn_class_likely_spilled_p (reg_class_t rclass) bool gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2) { + if (VECTOR_MODE_P (mode1) || VECTOR_MODE_P (mode2)) + { + int vf1 = (VECTOR_MODE_P (mode1) ? GET_MODE_NUNITS (mode1) : 1); + int vf2 = (VECTOR_MODE_P (mode2) ? GET_MODE_NUNITS (mode2) : 1); + machine_mode inner1 = (vf1 > 1 ? GET_MODE_INNER (mode1) : mode1); + machine_mode inner2 = (vf2 > 1 ? GET_MODE_INNER (mode2) : mode2); + + return (vf1 == vf2 || (inner1 == inner2 && vf2 <= vf1)); + } + return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE); } @@ -616,14 +734,16 @@ gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) rtx gcn_operand_part (machine_mode mode, rtx op, int n) { - if (GET_MODE_SIZE (mode) >= 256) + int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1; + + if (vf > 1) { - /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0); */ + machine_mode vsimode = VnMODE (vf, SImode); if (REG_P (op)) { gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER); - return gen_rtx_REG (V64SImode, REGNO (op) + n); + return gen_rtx_REG (vsimode, REGNO (op) + n); } if (GET_CODE (op) == CONST_VECTOR) { @@ -634,10 +754,10 @@ gcn_operand_part (machine_mode mode, rtx op, int n) RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode), CONST_VECTOR_ELT (op, i), n); - return gen_rtx_CONST_VECTOR (V64SImode, v); + return gen_rtx_CONST_VECTOR (vsimode, v); } if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR) - return gcn_gen_undef (V64SImode); + return gcn_gen_undef (vsimode); gcc_unreachable (); } else if (GET_MODE_SIZE (mode) == 8 && REG_P (op)) @@ -734,38 +854,6 @@ get_exec (int64_t val) return reg; } -/* Return value of scalar exec register. */ - -rtx -gcn_scalar_exec () -{ - return const1_rtx; -} - -/* Return pseudo holding scalar exec register. */ - -rtx -gcn_scalar_exec_reg () -{ - return get_exec (1); -} - -/* Return value of full exec register. */ - -rtx -gcn_full_exec () -{ - return constm1_rtx; -} - -/* Return pseudo holding full exec register. */ - -rtx -gcn_full_exec_reg () -{ - return get_exec (-1); -} - /* }}} */ /* {{{ Immediate constants. */ @@ -802,8 +890,13 @@ int gcn_inline_fp_constant_p (rtx x, bool allow_vector) { machine_mode mode = GET_MODE (x); + int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1; - if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode) + if (vf > 1) + mode = GET_MODE_INNER (mode); + + if (vf > 1 + && (mode == HFmode || mode == SFmode || mode == DFmode) && allow_vector) { int n; @@ -812,7 +905,7 @@ gcn_inline_fp_constant_p (rtx x, bool allow_vector) n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false); if (!n) return 0; - for (int i = 1; i < 64; i++) + for (int i = 1; i < vf; i++) if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) return 0; return 1; @@ -867,8 +960,13 @@ bool gcn_fp_constant_p (rtx x, bool allow_vector) { machine_mode mode = GET_MODE (x); + int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1; - if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode) + if (vf > 1) + mode = GET_MODE_INNER (mode); + + if (vf > 1 + && (mode == HFmode || mode == SFmode || mode == DFmode) && allow_vector) { int n; @@ -877,7 +975,7 @@ gcn_fp_constant_p (rtx x, bool allow_vector) n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false); if (!n) return false; - for (int i = 1; i < 64; i++) + for (int i = 1; i < vf; i++) if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) return false; return true; @@ -1091,6 +1189,244 @@ gcn_gen_undef (machine_mode mode) } /* }}} */ +/* {{{ Utility functions. */ + +/* Generalised accessor functions for instruction patterns. + The machine desription '@' prefix does something similar, but as of + GCC 10 is incompatible with define_subst, and anyway it doesn't + auto-handle the exec feature. + + Four macros are provided; each function only needs one: + + GEN_VN - create accessor functions for all sizes of one mode + GEN_VNM - create accessor functions for all sizes of all modes + GEN_VN_NOEXEC - for insns without "_exec" variants + GEN_VNM_NOEXEC - likewise + + E.g. add3 + GEN_VNM (add, 3, A(rtx dest, rtx s1, rtx s2), A(dest, s1, s2) + + gen_addvNsi3 (dst, a, b) + -> calls gen_addv64si3, or gen_addv32si3, etc. + + gen_addvNm3 (dst, a, b) + -> calls gen_addv64qi3, or gen_addv2di3, etc. + + The mode is determined from the first parameter, which must be called + "dest" (or else the macro doesn't work). + + Each function has two optional parameters at the end: merge_src and exec. + If exec is non-null, the function will call the "_exec" variant of the + insn. If exec is non-null but merge_src is null then an undef unspec + will be created. + + E.g. cont. + gen_addvNsi3 (v64sidst, a, b, oldval, exec) + -> calls gen_addv64si3_exec (v64sidst, a, b, oldval, exec) + + gen_addvNm3 (v2qidst, a, b, NULL, exec) + -> calls gen_addv2qi3_exec (v2qidst, a, b, + gcn_gen_undef (V2QImode), exec) + */ + +#define A(...) __VA_ARGS__ +#define GEN_VN_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \ +static rtx \ +gen_##PREFIX##vN##SUFFIX (PARAMS) \ +{ \ + machine_mode mode = GET_MODE (dest); \ + int n = GET_MODE_NUNITS (mode); \ + \ + switch (n) \ + { \ + case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \ + case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \ + case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \ + case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \ + case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \ + case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +#define GEN_VNM_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \ +GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \ +static rtx \ +gen_##PREFIX##vNm##SUFFIX (PARAMS) \ +{ \ + machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \ + \ + switch (mode) \ + { \ + case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS); \ + case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS); \ + case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS); \ + case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS); \ + case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS); \ + case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS); \ + case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS); \ + default: \ + break; \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +#define GEN_VN(PREFIX, SUFFIX, PARAMS, ARGS) \ +static rtx \ +gen_##PREFIX##vN##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \ +{ \ + machine_mode mode = GET_MODE (dest); \ + int n = GET_MODE_NUNITS (mode); \ + \ + if (exec && !merge_src) \ + merge_src = gcn_gen_undef (mode); \ + \ + if (exec) \ + switch (n) \ + { \ + case 2: return gen_##PREFIX##v2##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 4: return gen_##PREFIX##v4##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 8: return gen_##PREFIX##v8##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 16: return gen_##PREFIX##v16##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 32: return gen_##PREFIX##v32##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 64: return gen_##PREFIX##v64##SUFFIX##_exec (ARGS, merge_src, exec); \ + } \ + else \ + switch (n) \ + { \ + case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \ + case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \ + case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \ + case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \ + case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \ + case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +#define GEN_VNM(PREFIX, SUFFIX, PARAMS, ARGS) \ +GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \ +static rtx \ +gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \ +{ \ + machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \ + \ + switch (mode) \ + { \ + case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec); \ + case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec); \ + case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec); \ + case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \ + case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \ + case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \ + case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \ + default: \ + break; \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc), + A(dest, src1, src2, vcc)) +GEN_VN (add,di3_sext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,di3_vcc_zext_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc), + A(dest, src1, src2, vcc)) +GEN_VN (add,di3_zext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,di3_vcc_zext_dup2, A(rtx dest, rtx src1, rtx src2, rtx vcc), + A(dest, src1, src2, vcc)) +GEN_VN (addc,si3, A(rtx dest, rtx src1, rtx src2, rtx vccout, rtx vccin), + A(dest, src1, src2, vccout, vccin)) +GEN_VN (ashl,si3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift)) +GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec), + A(dest, addr, src, exec)) +GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src)) +GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src)) + +#undef GEN_VNM +#undef GEN_VN +#undef GET_VN_FN +#undef A + +/* Get icode for vector instructions without an optab. */ + +#define CODE_FOR(PREFIX, SUFFIX) \ +static int \ +get_code_for_##PREFIX##vN##SUFFIX (int nunits) \ +{ \ + switch (nunits) \ + { \ + case 2: return CODE_FOR_##PREFIX##v2##SUFFIX; \ + case 4: return CODE_FOR_##PREFIX##v4##SUFFIX; \ + case 8: return CODE_FOR_##PREFIX##v8##SUFFIX; \ + case 16: return CODE_FOR_##PREFIX##v16##SUFFIX; \ + case 32: return CODE_FOR_##PREFIX##v32##SUFFIX; \ + case 64: return CODE_FOR_##PREFIX##v64##SUFFIX; \ + } \ + \ + gcc_unreachable (); \ + return CODE_FOR_nothing; \ +} + +#define CODE_FOR_OP(PREFIX) \ + CODE_FOR (PREFIX, qi) \ + CODE_FOR (PREFIX, hi) \ + CODE_FOR (PREFIX, hf) \ + CODE_FOR (PREFIX, si) \ + CODE_FOR (PREFIX, sf) \ + CODE_FOR (PREFIX, di) \ + CODE_FOR (PREFIX, df) \ +static int \ +get_code_for_##PREFIX (machine_mode mode) \ +{ \ + int vf = GET_MODE_NUNITS (mode); \ + machine_mode smode = GET_MODE_INNER (mode); \ + \ + switch (smode) \ + { \ + case E_QImode: return get_code_for_##PREFIX##vNqi (vf); \ + case E_HImode: return get_code_for_##PREFIX##vNhi (vf); \ + case E_HFmode: return get_code_for_##PREFIX##vNhf (vf); \ + case E_SImode: return get_code_for_##PREFIX##vNsi (vf); \ + case E_SFmode: return get_code_for_##PREFIX##vNsf (vf); \ + case E_DImode: return get_code_for_##PREFIX##vNdi (vf); \ + case E_DFmode: return get_code_for_##PREFIX##vNdf (vf); \ + default: break; \ + } \ + \ + gcc_unreachable (); \ + return CODE_FOR_nothing; \ +} + +CODE_FOR_OP (reload_in) +CODE_FOR_OP (reload_out) + +#undef CODE_FOR_OP +#undef CODE_FOR + +/* }}} */ /* {{{ Addresses, pointers and moves. */ /* Return true is REG is a valid place to store a pointer, @@ -1644,60 +1980,6 @@ regno_ok_for_index_p (int regno) return regno == M0_REG || VGPR_REGNO_P (regno); } -/* Generate move which uses the exec flags. If EXEC is NULL, then it is - assumed that all lanes normally relevant to the mode of the move are - affected. If PREV is NULL, then a sensible default is supplied for - the inactive lanes. */ - -static rtx -gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL) -{ - machine_mode mode = GET_MODE (op0); - - if (vgpr_vector_mode_p (mode)) - { - if (exec && exec != CONSTM1_RTX (DImode)) - { - if (!prev) - prev = op0; - } - else - { - if (!prev) - prev = gcn_gen_undef (mode); - exec = gcn_full_exec_reg (); - } - - rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec)); - - return gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (2, set, - gen_rtx_CLOBBER (VOIDmode, - gen_rtx_SCRATCH (V64DImode)))); - } - - return (gen_rtx_PARALLEL - (VOIDmode, - gen_rtvec (2, gen_rtx_SET (op0, op1), - gen_rtx_USE (VOIDmode, - exec ? exec : gcn_scalar_exec ())))); -} - -/* Generate masked move. */ - -static rtx -gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL) -{ - if (exec) - return (gen_rtx_SET (op0, - gen_rtx_VEC_MERGE (GET_MODE (op0), - gen_rtx_VEC_DUPLICATE (GET_MODE - (op0), op1), - op2, exec))); - else - return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1))); -} - /* Expand vector init of OP0 by VEC. Implements vec_init instruction pattern. */ @@ -1707,10 +1989,11 @@ gcn_expand_vector_init (rtx op0, rtx vec) int64_t initialized_mask = 0; int64_t curr_mask = 1; machine_mode mode = GET_MODE (op0); + int vf = GET_MODE_NUNITS (mode); rtx val = XVECEXP (vec, 0, 0); - for (int i = 1; i < 64; i++) + for (int i = 1; i < vf; i++) if (rtx_equal_p (val, XVECEXP (vec, 0, i))) curr_mask |= (int64_t) 1 << i; @@ -1719,26 +2002,26 @@ gcn_expand_vector_init (rtx op0, rtx vec) else { val = force_reg (GET_MODE_INNER (mode), val); - emit_insn (gen_duplicate_load (op0, val)); + emit_insn (gen_vec_duplicatevNm (op0, val)); } initialized_mask |= curr_mask; - for (int i = 1; i < 64; i++) + for (int i = 1; i < vf; i++) if (!(initialized_mask & ((int64_t) 1 << i))) { curr_mask = (int64_t) 1 << i; rtx val = XVECEXP (vec, 0, i); - for (int j = i + 1; j < 64; j++) + for (int j = i + 1; j < vf; j++) if (rtx_equal_p (val, XVECEXP (vec, 0, j))) curr_mask |= (int64_t) 1 << j; if (gcn_constant_p (val)) - emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val), - get_exec (curr_mask))); + emit_insn (gen_movvNm (op0, gcn_vec_constant (mode, val), op0, + get_exec (curr_mask))); else { val = force_reg (GET_MODE_INNER (mode), val); - emit_insn (gen_duplicate_load (op0, val, op0, - get_exec (curr_mask))); + emit_insn (gen_vec_duplicatevNm (op0, val, op0, + get_exec (curr_mask))); } initialized_mask |= curr_mask; } @@ -1751,18 +2034,18 @@ strided_constant (machine_mode mode, int base, int val) { rtx x = gen_reg_rtx (mode); emit_move_insn (x, gcn_vec_constant (mode, base)); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32), - x, get_exec (0xffffffff00000000))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16), - x, get_exec (0xffff0000ffff0000))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8), - x, get_exec (0xff00ff00ff00ff00))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4), - x, get_exec (0xf0f0f0f0f0f0f0f0))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2), - x, get_exec (0xcccccccccccccccc))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1), - x, get_exec (0xaaaaaaaaaaaaaaaa))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 32), + x, get_exec (0xffffffff00000000))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 16), + x, get_exec (0xffff0000ffff0000))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 8), + x, get_exec (0xff00ff00ff00ff00))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 4), + x, get_exec (0xf0f0f0f0f0f0f0f0))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 2), + x, get_exec (0xcccccccccccccccc))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 1), + x, get_exec (0xaaaaaaaaaaaaaaaa))); return x; } @@ -1792,15 +2075,17 @@ gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode, case ADDR_SPACE_LDS: case ADDR_SPACE_GDS: /* FIXME: LDS support offsets, handle them!. */ - if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode) + if (vgpr_vector_mode_p (mode) + && GET_MODE_INNER (GET_MODE (x)) != SImode) { - rtx addrs = gen_reg_rtx (V64SImode); + machine_mode simode = VnMODE (GET_MODE_NUNITS (mode), SImode); + rtx addrs = gen_reg_rtx (simode); rtx base = force_reg (SImode, x); - rtx offsets = strided_constant (V64SImode, 0, + rtx offsets = strided_constant (simode, 0, GET_MODE_UNIT_SIZE (mode)); - emit_insn (gen_vec_duplicatev64si (addrs, base)); - emit_insn (gen_addv64si3 (addrs, offsets, addrs)); + emit_insn (gen_vec_duplicatevNsi (addrs, base)); + emit_insn (gen_addvNsi3 (addrs, offsets, addrs)); return addrs; } return x; @@ -1808,16 +2093,18 @@ gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode, gcc_unreachable (); } -/* Convert a (mem: (reg:DI)) to (mem: (reg:V64DI)) with the +/* Convert a (mem: (reg:DI)) to (mem: (reg:VnDI)) with the proper vector of stepped addresses. MEM will be a DImode address of a vector in an SGPR. - TMP will be a V64DImode VGPR pair or (scratch:V64DI). */ + TMP will be a VnDImode VGPR pair or (scratch:VnDI). */ rtx gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, rtx tmp) { + machine_mode pmode = VnMODE (GET_MODE_NUNITS (mode), DImode); + machine_mode offmode = VnMODE (GET_MODE_NUNITS (mode), SImode); gcc_assert (MEM_P (mem)); rtx mem_base = XEXP (mem, 0); rtx mem_index = NULL_RTX; @@ -1841,22 +2128,18 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, machine_mode inner = GET_MODE_INNER (mode); int shift = exact_log2 (GET_MODE_SIZE (inner)); - rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); - rtx undef_v64si = gcn_gen_undef (V64SImode); + rtx ramp = gen_rtx_REG (offmode, VGPR_REGNO (1)); rtx new_base = NULL_RTX; addr_space_t as = MEM_ADDR_SPACE (mem); rtx tmplo = (REG_P (tmp) - ? gcn_operand_part (V64DImode, tmp, 0) - : gen_reg_rtx (V64SImode)); + ? gcn_operand_part (pmode, tmp, 0) + : gen_reg_rtx (offmode)); /* tmplo[:] = ramp[:] << shift */ - if (exec) - emit_insn (gen_ashlv64si3_exec (tmplo, ramp, - gen_int_mode (shift, SImode), - undef_v64si, exec)); - else - emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode))); + emit_insn (gen_ashlvNsi3 (tmplo, ramp, + gen_int_mode (shift, SImode), + NULL, exec)); if (AS_FLAT_P (as)) { @@ -1866,53 +2149,41 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, { rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0); rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1); - rtx tmphi = gcn_operand_part (V64DImode, tmp, 1); + rtx tmphi = gcn_operand_part (pmode, tmp, 1); /* tmphi[:] = mem_base_hi */ - if (exec) - emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi, - undef_v64si, exec)); - else - emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi)); + emit_insn (gen_vec_duplicatevNsi (tmphi, mem_base_hi, NULL, exec)); /* tmp[:] += zext (mem_base) */ if (exec) { - emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo, - vcc, undef_v64si, exec)); - emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx, - vcc, vcc, undef_v64si, exec)); + emit_insn (gen_addvNsi3_vcc_dup (tmplo, mem_base_lo, tmplo, + vcc, NULL, exec)); + emit_insn (gen_addcvNsi3 (tmphi, tmphi, const0_rtx, + vcc, vcc, NULL, exec)); } else - emit_insn (gen_addv64di3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc)); + emit_insn (gen_addvNdi3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc)); } else { - tmp = gen_reg_rtx (V64DImode); - if (exec) - emit_insn (gen_addv64di3_vcc_zext_dup2_exec - (tmp, tmplo, mem_base, vcc, gcn_gen_undef (V64DImode), - exec)); - else - emit_insn (gen_addv64di3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc)); + tmp = gen_reg_rtx (pmode); + emit_insn (gen_addvNdi3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc, + NULL, exec)); } new_base = tmp; } else if (AS_ANY_DS_P (as)) { - if (!exec) - emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base)); - else - emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base, - gcn_gen_undef (V64SImode), exec)); + emit_insn (gen_addvNsi3_dup (tmplo, tmplo, mem_base, NULL, exec)); new_base = tmplo; } else { - mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base); - new_base = gen_rtx_PLUS (V64DImode, mem_base, - gen_rtx_SIGN_EXTEND (V64DImode, tmplo)); + mem_base = gen_rtx_VEC_DUPLICATE (pmode, mem_base); + new_base = gen_rtx_PLUS (pmode, mem_base, + gen_rtx_SIGN_EXTEND (pmode, tmplo)); } return gen_rtx_PLUS (GET_MODE (new_base), new_base, @@ -1929,42 +2200,33 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, If EXEC is set then _exec patterns will be used, otherwise plain. Return values. - ADDR_SPACE_FLAT - return V64DImode vector of absolute addresses. - ADDR_SPACE_GLOBAL - return V64SImode vector of offsets. */ + ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses. + ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */ rtx gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale, bool unsigned_p, rtx exec) { - rtx tmpsi = gen_reg_rtx (V64SImode); - rtx tmpdi = gen_reg_rtx (V64DImode); - rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL; - rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL; + int vf = GET_MODE_NUNITS (GET_MODE (offsets)); + rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode)); + rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode)); if (CONST_INT_P (scale) && INTVAL (scale) > 0 && exact_log2 (INTVAL (scale)) >= 0) - emit_insn (gen_ashlv64si3 (tmpsi, offsets, - GEN_INT (exact_log2 (INTVAL (scale))))); + emit_insn (gen_ashlvNsi3 (tmpsi, offsets, + GEN_INT (exact_log2 (INTVAL (scale))), + NULL, exec)); else - (exec - ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi, - exec)) - : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale))); + emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec)); /* "Global" instructions do not support negative register offsets. */ if (as == ADDR_SPACE_FLAT || !unsigned_p) { if (unsigned_p) - (exec - ? emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base, - undefdi, exec)) - : emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base))); + emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec)); else - (exec - ? emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base, - undefdi, exec)) - : emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base))); + emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec)); return tmpdi; } else if (as == ADDR_SPACE_GLOBAL) @@ -2065,59 +2327,9 @@ gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass, || GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT) { if (in_p) - switch (reload_mode) - { - case E_V64SImode: - sri->icode = CODE_FOR_reload_inv64si; - break; - case E_V64SFmode: - sri->icode = CODE_FOR_reload_inv64sf; - break; - case E_V64HImode: - sri->icode = CODE_FOR_reload_inv64hi; - break; - case E_V64HFmode: - sri->icode = CODE_FOR_reload_inv64hf; - break; - case E_V64QImode: - sri->icode = CODE_FOR_reload_inv64qi; - break; - case E_V64DImode: - sri->icode = CODE_FOR_reload_inv64di; - break; - case E_V64DFmode: - sri->icode = CODE_FOR_reload_inv64df; - break; - default: - gcc_unreachable (); - } + sri->icode = get_code_for_reload_in (reload_mode); else - switch (reload_mode) - { - case E_V64SImode: - sri->icode = CODE_FOR_reload_outv64si; - break; - case E_V64SFmode: - sri->icode = CODE_FOR_reload_outv64sf; - break; - case E_V64HImode: - sri->icode = CODE_FOR_reload_outv64hi; - break; - case E_V64HFmode: - sri->icode = CODE_FOR_reload_outv64hf; - break; - case E_V64QImode: - sri->icode = CODE_FOR_reload_outv64qi; - break; - case E_V64DImode: - sri->icode = CODE_FOR_reload_outv64di; - break; - case E_V64DFmode: - sri->icode = CODE_FOR_reload_outv64df; - break; - default: - gcc_unreachable (); - } + sri->icode = get_code_for_reload_out (reload_mode); break; } /* Fallthrough. */ @@ -3428,6 +3640,9 @@ gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op) if (VECTOR_MODE_P (from)) { + if (GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to)) + return false; + from = GET_MODE_INNER (from); to = GET_MODE_INNER (to); } @@ -3926,7 +4141,7 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , rtx mem = gen_rtx_MEM (GET_MODE (target), addrs); /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */ /* FIXME: set attributes. */ - emit_insn (gen_mov_with_exec (target, mem, exec)); + emit_insn (gen_movvNm (target, mem, NULL, exec)); return target; } case GCN_BUILTIN_FLAT_STORE_PTR_INT32: @@ -3961,20 +4176,18 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , rtx mem = gen_rtx_MEM (vmode, addrs); /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */ /* FIXME: set attributes. */ - emit_insn (gen_mov_with_exec (mem, val, exec)); + emit_insn (gen_movvNm (mem, val, NULL, exec)); return target; } case GCN_BUILTIN_SQRTVF: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_sqrtv64sf2_exec - (target, arg, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_sqrtv64sf2 (target, arg)); return target; } case GCN_BUILTIN_SQRTF: @@ -3992,20 +4205,17 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_absv64sf2_exec - (target, arg, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_absv64sf2 (target, arg)); return target; } case GCN_BUILTIN_LDEXPVF: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg1 = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, @@ -4014,15 +4224,13 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64SImode, EXPAND_NORMAL)); - emit_insn (gen_ldexpv64sf3_exec - (target, arg1, arg2, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_ldexpv64sf3 (target, arg1, arg2)); return target; } case GCN_BUILTIN_LDEXPV: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg1 = force_reg (V64DFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, @@ -4031,60 +4239,51 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64SImode, EXPAND_NORMAL)); - emit_insn (gen_ldexpv64df3_exec - (target, arg1, arg2, gcn_gen_undef (V64DFmode), exec)); + emit_insn (gen_ldexpv64df3 (target, arg1, arg2)); return target; } case GCN_BUILTIN_FREXPVF_EXP: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64sf_exp2_exec - (target, arg, gcn_gen_undef (V64SImode), exec)); + emit_insn (gen_frexpv64sf_exp2 (target, arg)); return target; } case GCN_BUILTIN_FREXPVF_MANT: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64sf_mant2_exec - (target, arg, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_frexpv64sf_mant2 (target, arg)); return target; } case GCN_BUILTIN_FREXPV_EXP: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64DFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64DFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64df_exp2_exec - (target, arg, gcn_gen_undef (V64SImode), exec)); + emit_insn (gen_frexpv64df_exp2 (target, arg)); return target; } case GCN_BUILTIN_FREXPV_MANT: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64DFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64DFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64df_mant2_exec - (target, arg, gcn_gen_undef (V64DFmode), exec)); + emit_insn (gen_frexpv64df_mant2 (target, arg)); return target; } case GCN_BUILTIN_OMP_DIM_SIZE: @@ -4239,10 +4438,11 @@ gcn_vectorize_get_mask_mode (machine_mode) Helper function for gcn_vectorize_vec_perm_const. */ static rtx -gcn_make_vec_perm_address (unsigned int *perm) +gcn_make_vec_perm_address (unsigned int *perm, int nelt) { - rtx x = gen_reg_rtx (V64SImode); - emit_move_insn (x, gcn_vec_constant (V64SImode, 0)); + machine_mode mode = VnMODE (nelt, SImode); + rtx x = gen_reg_rtx (mode); + emit_move_insn (x, gcn_vec_constant (mode, 0)); /* Permutation addresses use byte addressing. With each vector lane being 4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant, @@ -4258,15 +4458,13 @@ gcn_make_vec_perm_address (unsigned int *perm) { uint64_t exec_mask = 0; uint64_t lane_mask = 1; - for (int j = 0; j < 64; j++, lane_mask <<= 1) - if ((perm[j] * 4) & bit_mask) + for (int j = 0; j < nelt; j++, lane_mask <<= 1) + if (((perm[j] % nelt) * 4) & bit_mask) exec_mask |= lane_mask; if (exec_mask) - emit_insn (gen_addv64si3_exec (x, x, - gcn_vec_constant (V64SImode, - bit_mask), - x, get_exec (exec_mask))); + emit_insn (gen_addvNsi3 (x, x, gcn_vec_constant (mode, bit_mask), + x, get_exec (exec_mask))); } return x; @@ -4336,39 +4534,11 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, src1_lanes |= lane_bit; } - rtx addr = gcn_make_vec_perm_address (perm); - rtx (*ds_bpermute) (rtx, rtx, rtx, rtx); - - switch (vmode) - { - case E_V64QImode: - ds_bpermute = gen_ds_bpermutev64qi; - break; - case E_V64HImode: - ds_bpermute = gen_ds_bpermutev64hi; - break; - case E_V64SImode: - ds_bpermute = gen_ds_bpermutev64si; - break; - case E_V64HFmode: - ds_bpermute = gen_ds_bpermutev64hf; - break; - case E_V64SFmode: - ds_bpermute = gen_ds_bpermutev64sf; - break; - case E_V64DImode: - ds_bpermute = gen_ds_bpermutev64di; - break; - case E_V64DFmode: - ds_bpermute = gen_ds_bpermutev64df; - break; - default: - gcc_assert (false); - } + rtx addr = gcn_make_vec_perm_address (perm, nelt); /* Load elements from src0 to dst. */ - gcc_assert (~src1_lanes); - emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ())); + gcc_assert ((~src1_lanes) & (0xffffffffffffffffUL > (64-nelt))); + emit_insn (gen_ds_bpermutevNm (dst, addr, src0, get_exec (vmode))); /* Load elements from src1 to dst. */ if (src1_lanes) @@ -4379,8 +4549,8 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, the two source vectors together. */ rtx tmp = gen_reg_rtx (vmode); - emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ())); - emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes))); + emit_insn (gen_ds_bpermutevNm (tmp, addr, src1, get_exec (vmode))); + emit_insn (gen_movvNm (dst, tmp, dst, get_exec (src1_lanes))); } return true; @@ -4396,7 +4566,22 @@ gcn_vector_mode_supported_p (machine_mode mode) { return (mode == V64QImode || mode == V64HImode || mode == V64SImode || mode == V64DImode - || mode == V64SFmode || mode == V64DFmode); + || mode == V64SFmode || mode == V64DFmode + || mode == V32QImode || mode == V32HImode + || mode == V32SImode || mode == V32DImode + || mode == V32SFmode || mode == V32DFmode + || mode == V16QImode || mode == V16HImode + || mode == V16SImode || mode == V16DImode + || mode == V16SFmode || mode == V16DFmode + || mode == V8QImode || mode == V8HImode + || mode == V8SImode || mode == V8DImode + || mode == V8SFmode || mode == V8DFmode + || mode == V4QImode || mode == V4HImode + || mode == V4SImode || mode == V4DImode + || mode == V4SFmode || mode == V4DFmode + || mode == V2QImode || mode == V2HImode + || mode == V2SImode || mode == V2DImode + || mode == V2SFmode || mode == V2DFmode); } /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. @@ -4425,23 +4610,74 @@ gcn_vectorize_preferred_simd_mode (scalar_mode mode) } } +/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. + + Try all the vector modes. */ + +unsigned int gcn_autovectorize_vector_modes (vector_modes *modes, + bool ARG_UNUSED (all)) +{ + modes->safe_push (V64QImode); + modes->safe_push (V64HImode); + modes->safe_push (V64SImode); + modes->safe_push (V64SFmode); + modes->safe_push (V64DImode); + modes->safe_push (V64DFmode); + + modes->safe_push (V32QImode); + modes->safe_push (V32HImode); + modes->safe_push (V32SImode); + modes->safe_push (V32SFmode); + modes->safe_push (V32DImode); + modes->safe_push (V32DFmode); + + modes->safe_push (V16QImode); + modes->safe_push (V16HImode); + modes->safe_push (V16SImode); + modes->safe_push (V16SFmode); + modes->safe_push (V16DImode); + modes->safe_push (V16DFmode); + + modes->safe_push (V8QImode); + modes->safe_push (V8HImode); + modes->safe_push (V8SImode); + modes->safe_push (V8SFmode); + modes->safe_push (V8DImode); + modes->safe_push (V8DFmode); + + modes->safe_push (V4QImode); + modes->safe_push (V4HImode); + modes->safe_push (V4SImode); + modes->safe_push (V4SFmode); + modes->safe_push (V4DImode); + modes->safe_push (V4DFmode); + + modes->safe_push (V2QImode); + modes->safe_push (V2HImode); + modes->safe_push (V2SImode); + modes->safe_push (V2SFmode); + modes->safe_push (V2DImode); + modes->safe_push (V2DFmode); + + /* We shouldn't need VECT_COMPARE_COSTS as they should all cost the same. */ + return 0; +} + /* Implement TARGET_VECTORIZE_RELATED_MODE. All GCN vectors are 64-lane, so this is simpler than other architectures. In particular, we do *not* want to match vector bit-size. */ static opt_machine_mode -gcn_related_vector_mode (machine_mode ARG_UNUSED (vector_mode), +gcn_related_vector_mode (machine_mode vector_mode, scalar_mode element_mode, poly_uint64 nunits) { - if (known_ne (nunits, 0U) && known_ne (nunits, 64U)) - return VOIDmode; + int n = nunits.to_constant (); - machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode); - if (!VECTOR_MODE_P (pref_mode)) - return VOIDmode; + if (n == 0) + n = GET_MODE_NUNITS (vector_mode); - return pref_mode; + return VnMODE (n, element_mode); } /* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. @@ -4566,6 +4802,8 @@ gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn, The vector register SRC of mode MODE is reduced using the operation given by UNSPEC, and the scalar result is returned in lane 63 of a vector register. */ +/* FIXME: Implement reductions for sizes other than V64. + (They're currently disabled in the machine description.) */ rtx gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec) @@ -4975,10 +5213,11 @@ gcn_md_reorg (void) { if (VECTOR_MODE_P (GET_MODE (x))) { - new_exec = -1; - break; + int vf = GET_MODE_NUNITS (GET_MODE (x)); + new_exec = MAX ((uint64_t)new_exec, + 0xffffffffffffffffUL >> (64-vf)); } - else + else if (new_exec == 0) new_exec = 1; } } @@ -5693,13 +5932,12 @@ static void print_reg (FILE *file, rtx x) { machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); if (mode == BImode || mode == QImode || mode == HImode || mode == SImode - || mode == HFmode || mode == SFmode - || mode == V64SFmode || mode == V64SImode - || mode == V64QImode || mode == V64HImode) + || mode == HFmode || mode == SFmode) fprintf (file, "%s", reg_names[REGNO (x)]); - else if (mode == DImode || mode == V64DImode - || mode == DFmode || mode == V64DFmode) + else if (mode == DImode || mode == DFmode) { if (SGPR_REGNO_P (REGNO (x))) fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG, @@ -6146,20 +6384,20 @@ print_operand (FILE *file, rtx x, int code) case 'o': { const char *s = 0; - switch (GET_MODE_SIZE (GET_MODE (x))) + machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + switch (mode) { - case 1: + case E_QImode: s = "_ubyte"; break; - case 2: + case E_HImode: + case E_HFmode: s = "_ushort"; break; - /* The following are full-vector variants. */ - case 64: - s = "_ubyte"; - break; - case 128: - s = "_ushort"; + default: break; } @@ -6174,43 +6412,31 @@ print_operand (FILE *file, rtx x, int code) } case 's': { - const char *s = ""; - switch (GET_MODE_SIZE (GET_MODE (x))) + const char *s; + machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + switch (mode) { - case 1: + case E_QImode: s = "_byte"; break; - case 2: + case E_HImode: + case E_HFmode: s = "_short"; break; - case 4: + case E_SImode: + case E_SFmode: s = "_dword"; break; - case 8: + case E_DImode: + case E_DFmode: s = "_dwordx2"; break; - case 12: - s = "_dwordx3"; - break; - case 16: + case E_TImode: s = "_dwordx4"; break; - case 32: - s = "_dwordx8"; - break; - case 64: - s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16"; - break; - /* The following are full-vector variants. */ - case 128: - s = "_short"; - break; - case 256: - s = "_dword"; - break; - case 512: - s = "_dwordx2"; - break; default: output_operand_lossage ("invalid operand %%xn code"); return; @@ -6714,6 +6940,9 @@ gcn_dwarf_register_span (rtx rtl) #define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template #undef TARGET_ATTRIBUTE_TABLE #define TARGET_ATTRIBUTE_TABLE gcn_attribute_table +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ + gcn_autovectorize_vector_modes #undef TARGET_BUILTIN_DECL #define TARGET_BUILTIN_DECL gcn_builtin_decl #undef TARGET_CAN_CHANGE_MODE_CLASS -- cgit v1.1 From 0d8753cf30486c4e7fb07455b7cae49aa812c6a4 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Thu, 26 Mar 2020 21:22:45 +0000 Subject: amdgcn: Resolve insn conditions at compile time GET_MODE_NUNITS isn't a compile time constant, so we end up with many impossible insns in the machine description. Adding MODE_VF allows the insns to be eliminated completely. gcc/ChangeLog: * config/gcn/gcn-valu.md (2): Use MODE_VF. (2): Likewise. * config/gcn/gcn.h (MODE_VF): New macro. --- gcc/config/gcn/gcn-valu.md | 10 ++++++---- gcc/config/gcn/gcn.h | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 52d2fcb..c7be236 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -2873,8 +2873,9 @@ [(set (match_operand:VCVT_FMODE 0 "register_operand" "= v") (cvt_op:VCVT_FMODE (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))] - "gcn_valid_cvt_p (mode, mode, - _cvt)" + "MODE_VF (mode) == MODE_VF (mode) + && gcn_valid_cvt_p (mode, mode, + _cvt)" "v_cvt\t%0, %1" [(set_attr "type" "vop1") (set_attr "length" "8")]) @@ -2883,8 +2884,9 @@ [(set (match_operand:VCVT_IMODE 0 "register_operand" "= v") (cvt_op:VCVT_IMODE (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))] - "gcn_valid_cvt_p (mode, mode, - _cvt)" + "MODE_VF (mode) == MODE_VF (mode) + && gcn_valid_cvt_p (mode, mode, + _cvt)" "v_cvt\t%0, %1" [(set_attr "type" "vop1") (set_attr "length" "8")]) diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h index 318256c..38f7212 100644 --- a/gcc/config/gcn/gcn.h +++ b/gcc/config/gcn/gcn.h @@ -678,3 +678,27 @@ enum gcn_builtin_codes /* Trampolines */ #define TRAMPOLINE_SIZE 36 #define TRAMPOLINE_ALIGNMENT 64 + +/* MD Optimization. + The following are intended to be obviously constant at compile time to + allow genconditions to eliminate bad patterns at compile time. */ +#define MODE_VF(M) \ + ((M == V64QImode || M == V64HImode || M == V64HFmode || M == V64SImode \ + || M == V64SFmode || M == V64DImode || M == V64DFmode) \ + ? 64 \ + : (M == V32QImode || M == V32HImode || M == V32HFmode || M == V32SImode \ + || M == V32SFmode || M == V32DImode || M == V32DFmode) \ + ? 32 \ + : (M == V16QImode || M == V16HImode || M == V16HFmode || M == V16SImode \ + || M == V16SFmode || M == V16DImode || M == V16DFmode) \ + ? 16 \ + : (M == V8QImode || M == V8HImode || M == V8HFmode || M == V8SImode \ + || M == V8SFmode || M == V8DImode || M == V8DFmode) \ + ? 8 \ + : (M == V4QImode || M == V4HImode || M == V4HFmode || M == V4SImode \ + || M == V4SFmode || M == V4DImode || M == V4DFmode) \ + ? 4 \ + : (M == V2QImode || M == V2HImode || M == V2HFmode || M == V2SImode \ + || M == V2SFmode || M == V2DImode || M == V2DFmode) \ + ? 2 \ + : 1) -- cgit v1.1 From 5cfe08555034b29f301dcfb99a3691c81b2e2def Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Mon, 29 Jun 2020 15:20:09 +0100 Subject: amdgcn: Add vec_extract for partial vectors Add vec_extract expanders for all valid pairs of vector types. gcc/ChangeLog: * config/gcn/gcn-protos.h (get_exec): Add prototypes for two variants. * config/gcn/gcn-valu.md (vec_extract): New define_expand. * config/gcn/gcn.cc (get_exec): Export the existing function. Add a new overload variant. --- gcc/config/gcn/gcn-protos.h | 2 ++ gcc/config/gcn/gcn-valu.md | 34 ++++++++++++++++++++++++++++++++++ gcc/config/gcn/gcn.cc | 9 ++++++++- 3 files changed, 44 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index 6300c1c..f9a1fc0 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -24,6 +24,8 @@ extern bool gcn_constant64_p (rtx); extern bool gcn_constant_p (rtx); extern rtx gcn_convert_mask_mode (rtx reg); extern unsigned int gcn_dwarf_register_number (unsigned int regno); +extern rtx get_exec (int64_t); +extern rtx get_exec (machine_mode mode); extern char * gcn_expand_dpp_shr_insn (machine_mode, const char *, int, int); extern void gcn_expand_epilogue (); extern rtx gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index c7be236..9ea60e1 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -808,6 +808,40 @@ (set_attr "exec" "none") (set_attr "laneselect" "yes")]) +(define_expand "vec_extract" + [(set (match_operand:V_ALL_ALT 0 "register_operand") + (vec_select:V_ALL_ALT + (match_operand:V_ALL 1 "register_operand") + (parallel [(match_operand 2 "immediate_operand")])))] + "MODE_VF (mode) < MODE_VF (mode) + && mode == mode" + { + int numlanes = GET_MODE_NUNITS (mode); + int firstlane = INTVAL (operands[2]) * numlanes; + rtx tmp; + + if (firstlane == 0) + { + /* A plain move will do. */ + tmp = operands[1]; + } else { + /* FIXME: optimize this by using DPP where available. */ + + rtx permutation = gen_reg_rtx (mode); + emit_insn (gen_vec_series (permutation, + GEN_INT (firstlane*4), + GEN_INT (4))); + + tmp = gen_reg_rtx (mode); + emit_insn (gen_ds_bpermute (tmp, permutation, operands[1], + get_exec (mode))); + } + + emit_move_insn (operands[0], + gen_rtx_SUBREG (mode, tmp, 0)); + DONE; + }) + (define_expand "extract_last_" [(match_operand: 0 "register_operand") (match_operand:DI 1 "gcn_alu_operand") diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index e1636f6..fdcf290 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -846,7 +846,7 @@ gcn_ira_change_pseudo_allocno_class (int regno, reg_class_t cl, /* Create a new DImode pseudo reg and emit an instruction to initialize it to VAL. */ -static rtx +rtx get_exec (int64_t val) { rtx reg = gen_reg_rtx (DImode); @@ -854,6 +854,13 @@ get_exec (int64_t val) return reg; } +rtx +get_exec (machine_mode mode) +{ + int vf = (VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1); + return get_exec (0xffffffffffffffffUL >> (64-vf)); +} + /* }}} */ /* {{{ Immediate constants. */ -- cgit v1.1 From 769a10d0fc45e4923d7eb631170a117529ad5e39 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Wed, 11 Mar 2020 16:39:54 +0000 Subject: amdgcn: vec_init for multiple vector sizes Implements vec_init when the input is a vector of smaller vectors, or of vector MEM types, or a smaller vector duplicated several times. gcc/ChangeLog: * config/gcn/gcn-valu.md (vec_init): New. * config/gcn/gcn.cc (GEN_VN): Add andvNsi3, subvNsi3. (GEN_VNM): Add gathervNm_expr. (GEN_VN_NOEXEC): Add vec_seriesvNsi. (gcn_expand_vector_init): Add initialization of vectors from smaller vectors. --- gcc/config/gcn/gcn-valu.md | 10 +++ gcc/config/gcn/gcn.cc | 159 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 143 insertions(+), 26 deletions(-) (limited to 'gcc') diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 9ea60e1..f708e58 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -893,6 +893,16 @@ DONE; }) +(define_expand "vec_init" + [(match_operand:V_ALL 0 "register_operand") + (match_operand:V_ALL_ALT 1)] + "mode == mode + && MODE_VF (mode) < MODE_VF (mode)" + { + gcn_expand_vector_init (operands[0], operands[1]); + DONE; + }) + ;; }}} ;; {{{ Scatter / Gather diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index fdcf290..3dc294c 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -1365,12 +1365,17 @@ GEN_VN (add,di3_vcc_zext_dup2, A(rtx dest, rtx src1, rtx src2, rtx vcc), A(dest, src1, src2, vcc)) GEN_VN (addc,si3, A(rtx dest, rtx src1, rtx src2, rtx vccout, rtx vccin), A(dest, src1, src2, vccout, vccin)) +GEN_VN (and,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) GEN_VN (ashl,si3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift)) GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec), A(dest, addr, src, exec)) +GEN_VNM (gather,_expr, A(rtx dest, rtx addr, rtx as, rtx vol), + A(dest, addr, as, vol)) GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src)) GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (sub,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src)) +GEN_VN_NOEXEC (vec_series,si, A(rtx dest, rtx x, rtx c), A(dest, x, c)) #undef GEN_VNM #undef GEN_VN @@ -1993,44 +1998,146 @@ regno_ok_for_index_p (int regno) void gcn_expand_vector_init (rtx op0, rtx vec) { - int64_t initialized_mask = 0; - int64_t curr_mask = 1; + rtx val[64]; machine_mode mode = GET_MODE (op0); int vf = GET_MODE_NUNITS (mode); + machine_mode addrmode = VnMODE (vf, DImode); + machine_mode offsetmode = VnMODE (vf, SImode); - rtx val = XVECEXP (vec, 0, 0); + int64_t mem_mask = 0; + int64_t item_mask[64]; + rtx ramp = gen_reg_rtx (offsetmode); + rtx addr = gen_reg_rtx (addrmode); - for (int i = 1; i < vf; i++) - if (rtx_equal_p (val, XVECEXP (vec, 0, i))) - curr_mask |= (int64_t) 1 << i; + int unit_size = GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op0))); + emit_insn (gen_mulvNsi3_dup (ramp, gen_rtx_REG (offsetmode, VGPR_REGNO (1)), + GEN_INT (unit_size))); - if (gcn_constant_p (val)) - emit_move_insn (op0, gcn_vec_constant (mode, val)); - else + bool simple_repeat = true; + + /* Expand nested vectors into one vector. */ + int item_count = XVECLEN (vec, 0); + for (int i = 0, j = 0; i < item_count; i++) + { + rtx item = XVECEXP (vec, 0, i); + machine_mode mode = GET_MODE (item); + int units = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1; + item_mask[j] = (((uint64_t)-1)>>(64-units)) << j; + + if (simple_repeat && i != 0) + simple_repeat = item == XVECEXP (vec, 0, i-1); + + /* If its a vector of values then copy them into the final location. */ + if (GET_CODE (item) == CONST_VECTOR) + { + for (int k = 0; k < units; k++) + val[j++] = XVECEXP (item, 0, k); + continue; + } + /* Otherwise, we have a scalar or an expression that expands... */ + + if (MEM_P (item)) + { + rtx base = XEXP (item, 0); + if (MEM_ADDR_SPACE (item) == DEFAULT_ADDR_SPACE + && REG_P (base)) + { + /* We have a simple vector load. We can put the addresses in + the vector, combine it with any other such MEMs, and load it + all with a single gather at the end. */ + int64_t mask = ((0xffffffffffffffffUL + >> (64-GET_MODE_NUNITS (mode))) + << j); + rtx exec = get_exec (mask); + emit_insn (gen_subvNsi3 + (ramp, ramp, + gcn_vec_constant (offsetmode, j*unit_size), + ramp, exec)); + emit_insn (gen_addvNdi3_zext_dup2 + (addr, ramp, base, + (mem_mask ? addr : gcn_gen_undef (addrmode)), + exec)); + mem_mask |= mask; + } + else + /* The MEM is non-trivial, so let's load it independently. */ + item = force_reg (mode, item); + } + else if (!CONST_INT_P (item) && !CONST_DOUBLE_P (item)) + /* The item may be a symbol_ref, or something else non-trivial. */ + item = force_reg (mode, item); + + /* Duplicate the vector across each item. + It is either a smaller vector register that needs shifting, + or a MEM that needs loading. */ + val[j] = item; + j += units; + } + + int64_t initialized_mask = 0; + rtx prev = NULL; + + if (mem_mask) { - val = force_reg (GET_MODE_INNER (mode), val); - emit_insn (gen_vec_duplicatevNm (op0, val)); + emit_insn (gen_gathervNm_expr + (op0, gen_rtx_PLUS (addrmode, addr, + gen_rtx_VEC_DUPLICATE (addrmode, + const0_rtx)), + GEN_INT (DEFAULT_ADDR_SPACE), GEN_INT (0), + NULL, get_exec (mem_mask))); + prev = op0; + initialized_mask = mem_mask; } - initialized_mask |= curr_mask; - for (int i = 1; i < vf; i++) + + if (simple_repeat && item_count > 1 && !prev) + { + /* Special case for instances of {A, B, A, B, A, B, ....}, etc. */ + rtx src = gen_rtx_SUBREG (mode, val[0], 0); + rtx input_vf_mask = GEN_INT (GET_MODE_NUNITS (GET_MODE (val[0]))-1); + + rtx permutation = gen_reg_rtx (VnMODE (vf, SImode)); + emit_insn (gen_vec_seriesvNsi (permutation, GEN_INT (0), GEN_INT (1))); + rtx mask_dup = gen_reg_rtx (VnMODE (vf, SImode)); + emit_insn (gen_vec_duplicatevNsi (mask_dup, input_vf_mask)); + emit_insn (gen_andvNsi3 (permutation, permutation, mask_dup)); + emit_insn (gen_ashlvNsi3 (permutation, permutation, GEN_INT (2))); + emit_insn (gen_ds_bpermutevNm (op0, permutation, src, get_exec (mode))); + return; + } + + /* Write each value, elementwise, but coalesce matching values into one + instruction, where possible. */ + for (int i = 0; i < vf; i++) if (!(initialized_mask & ((int64_t) 1 << i))) { - curr_mask = (int64_t) 1 << i; - rtx val = XVECEXP (vec, 0, i); - - for (int j = i + 1; j < vf; j++) - if (rtx_equal_p (val, XVECEXP (vec, 0, j))) - curr_mask |= (int64_t) 1 << j; - if (gcn_constant_p (val)) - emit_insn (gen_movvNm (op0, gcn_vec_constant (mode, val), op0, - get_exec (curr_mask))); + if (gcn_constant_p (val[i])) + emit_insn (gen_movvNm (op0, gcn_vec_constant (mode, val[i]), prev, + get_exec (item_mask[i]))); + else if (VECTOR_MODE_P (GET_MODE (val[i])) + && (GET_MODE_NUNITS (GET_MODE (val[i])) == vf + || i == 0)) + emit_insn (gen_movvNm (op0, gen_rtx_SUBREG (mode, val[i], 0), prev, + get_exec (item_mask[i]))); + else if (VECTOR_MODE_P (GET_MODE (val[i]))) + { + rtx permutation = gen_reg_rtx (VnMODE (vf, SImode)); + emit_insn (gen_vec_seriesvNsi (permutation, GEN_INT (-i*4), + GEN_INT (4))); + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_ds_bpermutevNm (tmp, permutation, + gen_rtx_SUBREG (mode, val[i], 0), + get_exec (-1))); + emit_insn (gen_movvNm (op0, tmp, prev, get_exec (item_mask[i]))); + } else { - val = force_reg (GET_MODE_INNER (mode), val); - emit_insn (gen_vec_duplicatevNm (op0, val, op0, - get_exec (curr_mask))); + rtx reg = force_reg (GET_MODE_INNER (mode), val[i]); + emit_insn (gen_vec_duplicatevNm (op0, reg, prev, + get_exec (item_mask[i]))); } - initialized_mask |= curr_mask; + + initialized_mask |= item_mask[i]; + prev = op0; } } -- cgit v1.1 From bf6b5c74a6f1927174091c73aa51401895ef92f0 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Thu, 22 Sep 2022 12:48:30 +0100 Subject: amdgcn: Add vector integer negate insn Another example of the vectorizer needing explicit insns where the scalar expander just works. gcc/ChangeLog: * config/gcn/gcn-valu.md (neg2): New define_expand. --- gcc/config/gcn/gcn-valu.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'gcc') diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index f708e58..00c0e3b 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -2391,6 +2391,19 @@ (set_attr "length" "8,8")]) ;; }}} +;; {{{ Int unops + +(define_expand "neg2" + [(match_operand:V_INT 0 "register_operand") + (match_operand:V_INT 1 "register_operand")] + "" + { + emit_insn (gen_sub3 (operands[0], gcn_vec_constant (mode, 0), + operands[1])); + DONE; + }) + +;; }}} ;; {{{ FP binops - special cases ; GCN does not directly provide a DFmode subtract instruction, so we do it by -- cgit v1.1 From bd9a05594d227cde79a67dc715bd9d82e9c464e9 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Sat, 10 Sep 2022 23:47:19 +0100 Subject: amdgcn: vector testsuite tweaks The testsuite needs a few tweaks following my patches to add multiple vector sizes for amdgcn. gcc/testsuite/ChangeLog: * gcc.dg/pr104464.c: Xfail on amdgcn. * gcc.dg/signbit-2.c: Likewise. * gcc.dg/signbit-5.c: Likewise. * gcc.dg/vect/bb-slp-68.c: Likewise. * gcc.dg/vect/bb-slp-cond-1.c: Change expectations on amdgcn. * gcc.dg/vect/bb-slp-subgroups-3.c: Likewise. * gcc.dg/vect/no-vfa-vect-depend-2.c: Change expectations for multiple vector sizes. * gcc.dg/vect/pr33953.c: Likewise. * gcc.dg/vect/pr65947-12.c: Likewise. * gcc.dg/vect/pr65947-13.c: Likewise. * gcc.dg/vect/pr80631-2.c: Likewise. * gcc.dg/vect/slp-reduc-4.c: Likewise. * gcc.dg/vect/trapv-vect-reduc-4.c: Likewise. * lib/target-supports.exp (available_vector_sizes): Add more sizes for amdgcn. --- gcc/testsuite/gcc.dg/pr104464.c | 2 ++ gcc/testsuite/gcc.dg/signbit-2.c | 5 +++-- gcc/testsuite/gcc.dg/signbit-5.c | 1 + gcc/testsuite/gcc.dg/vect/bb-slp-68.c | 5 +++-- gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c | 3 ++- gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-3.c | 5 ++++- gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c | 3 ++- gcc/testsuite/gcc.dg/vect/pr33953.c | 3 ++- gcc/testsuite/gcc.dg/vect/pr65947-12.c | 3 ++- gcc/testsuite/gcc.dg/vect/pr65947-13.c | 3 ++- gcc/testsuite/gcc.dg/vect/pr80631-2.c | 3 ++- gcc/testsuite/gcc.dg/vect/slp-reduc-4.c | 3 ++- gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c | 3 ++- gcc/testsuite/lib/target-supports.exp | 3 ++- 14 files changed, 31 insertions(+), 14 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/pr104464.c b/gcc/testsuite/gcc.dg/pr104464.c index ed6a22c..d36a286 100644 --- a/gcc/testsuite/gcc.dg/pr104464.c +++ b/gcc/testsuite/gcc.dg/pr104464.c @@ -9,3 +9,5 @@ foo(void) { f += (F)(f != (F){}[0]); } + +/* { dg-xfail-if "-fnon-call-exceptions unsupported" { amdgcn-*-* } } */ diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c index 2f2dc44..99a455b 100644 --- a/gcc/testsuite/gcc.dg/signbit-2.c +++ b/gcc/testsuite/gcc.dg/signbit-2.c @@ -20,6 +20,7 @@ void fun2(int32_t *x, int n) x[i] = (-x[i]) >> 30; } -/* { dg-final { scan-tree-dump {\s+>\s+\{ 0(, 0)+ \}} optimized { target vect_int } } } */ +/* Xfail amdgcn where vector truth type is not integer type. */ +/* { dg-final { scan-tree-dump {\s+>\s+\{ 0(, 0)+ \}} optimized { target vect_int xfail amdgcn-*-* } } } */ /* { dg-final { scan-tree-dump {\s+>\s+0} optimized { target { ! vect_int } } } } */ -/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */ +/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized { xfail amdgcn-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/signbit-5.c b/gcc/testsuite/gcc.dg/signbit-5.c index 2b119cd..0fad56c 100644 --- a/gcc/testsuite/gcc.dg/signbit-5.c +++ b/gcc/testsuite/gcc.dg/signbit-5.c @@ -4,6 +4,7 @@ /* This test does not work when the truth type does not match vector type. */ /* { dg-additional-options "-mno-avx512f" { target { i?86-*-* x86_64-*-* } } } */ /* { dg-additional-options "-march=armv8-a" { target aarch64_sve } } */ +/* { dg-xfail-run-if "truth type does not match vector type" { amdgcn-*-* } } */ #include diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-68.c b/gcc/testsuite/gcc.dg/vect/bb-slp-68.c index 8718031..e7573a1 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-68.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-68.c @@ -18,5 +18,6 @@ void foo () x[9] = z[3] + 1.; } -/* We want to have the store group split into 4, 2, 4 when using 32byte vectors. */ -/* { dg-final { scan-tree-dump-not "from scalars" "slp2" } } */ +/* We want to have the store group split into 4, 2, 4 when using 32byte vectors. + Unfortunately it does not work when 64-byte vectors are available. */ +/* { dg-final { scan-tree-dump-not "from scalars" "slp2" { xfail amdgcn-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c index 4bd286b..1f5c621 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c @@ -46,5 +46,6 @@ int main () } /* { dg-final { scan-tree-dump {(no need for alias check [^\n]* when VF is 1|no alias between [^\n]* when [^\n]* is outside \(-16, 16\))} "vect" { target vect_element_align } } } */ -/* { dg-final { scan-tree-dump-times "loop vectorized" 1 "vect" { target vect_element_align } } } */ +/* { dg-final { scan-tree-dump-times "loop vectorized" 1 "vect" { target { vect_element_align && !amdgcn-*-* } } } } */ +/* { dg-final { scan-tree-dump-times "loop vectorized" 2 "vect" { target amdgcn-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-3.c b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-3.c index 03c062a..fb71991 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-3.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-subgroups-3.c @@ -42,4 +42,7 @@ main (int argc, char **argv) /* Because we disable the cost model, targets with variable-length vectors can end up vectorizing the store to a[0..7] on its own. With the cost model we do something sensible. */ -/* { dg-final { scan-tree-dump-times "optimized: basic block" 2 "slp2" { xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "optimized: basic block" 2 "slp2" { target { ! amdgcn-*-* } xfail vect_variable_length } } } */ + +/* amdgcn can do this in one vector. */ +/* { dg-final { scan-tree-dump-times "optimized: basic block" 1 "slp2" { target amdgcn-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c index 1880d1e..8995837 100644 --- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c +++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-depend-2.c @@ -51,4 +51,5 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "dependence distance negative" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "dependence distance negative" 1 "vect" { target { ! vect_multiple_sizes } } } } */ +/* { dg-final { scan-tree-dump "dependence distance negative" "vect" { target vect_multiple_sizes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr33953.c b/gcc/testsuite/gcc.dg/vect/pr33953.c index 4dd54cd..d376cf9 100644 --- a/gcc/testsuite/gcc.dg/vect/pr33953.c +++ b/gcc/testsuite/gcc.dg/vect/pr33953.c @@ -29,6 +29,7 @@ void blockmove_NtoN_blend_noremap32 (const UINT32 *srcdata, int srcwidth, } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_multiple_sizes } xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target vect_multiple_sizes xfail { vect_no_align && { ! vect_hw_misalign } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c index a47f414..9788eea 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-12.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c @@ -42,5 +42,6 @@ main (void) } /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target { vect_fold_extract_last && { ! vect_multiple_sizes } } } } } */ +/* { dg-final { scan-tree-dump "optimizing condition reduction with FOLD_EXTRACT_LAST" "vect" { target { vect_fold_extract_last && vect_multiple_sizes } } } } */ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c index a703923..079b5f9 100644 --- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c +++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c @@ -44,4 +44,5 @@ main (void) /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { xfail vect_fold_extract_last } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target { vect_fold_extract_last && { ! vect_multiple_sizes } } } } } */ +/* { dg-final { scan-tree-dump "optimizing condition reduction with FOLD_EXTRACT_LAST" "vect" { target { vect_fold_extract_last && vect_multiple_sizes } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-2.c b/gcc/testsuite/gcc.dg/vect/pr80631-2.c index 61e1131..4e58627 100644 --- a/gcc/testsuite/gcc.dg/vect/pr80631-2.c +++ b/gcc/testsuite/gcc.dg/vect/pr80631-2.c @@ -75,4 +75,5 @@ main () /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */ /* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target vect_condition xfail vect_fold_extract_last } } } */ -/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */ +/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target { { ! vect_multiple_sizes } && vect_fold_extract_last } } } } */ +/* { dg-final { scan-tree-dump "optimizing condition reduction with FOLD_EXTRACT_LAST" "vect" { target { vect_multiple_sizes && vect_fold_extract_last } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c index cffb011..15f5c25 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c +++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-4.c @@ -59,6 +59,7 @@ int main (void) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_min_max } } } */ /* For variable-length SVE, the number of scalar statements in the reduction exceeds the number of elements in a 128-bit granule. */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { xfail { vect_no_int_min_max || { aarch64_sve && vect_variable_length } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { ! vect_multiple_sizes } xfail { vect_no_int_min_max || { aarch64_sve && vect_variable_length } } } } } */ +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_multiple_sizes } } } } */ /* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" { xfail { aarch64_sve && vect_variable_length } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c index f09c964..24cf1f7 100644 --- a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c +++ b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c @@ -50,6 +50,7 @@ int main (void) /* We can't handle the first loop with variable-length vectors and so fall back to the fixed-length mininum instead. */ -/* { dg-final { scan-tree-dump-times "Detected reduction\\." 3 "vect" { xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump-times "Detected reduction\\." 3 "vect" { target { ! vect_multiple_sizes } xfail vect_variable_length } } } */ +/* { dg-final { scan-tree-dump "Detected reduction\\." "vect" { target vect_multiple_sizes } } } */ /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { ! vect_no_int_min_max } } } } */ /* { dg-final { scan-tree-dump-times {using an in-order \(fold-left\) reduction} 1 "vect" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 7c9dd45..fdd88e6 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -8400,7 +8400,8 @@ proc available_vector_sizes { } { } elseif { [istarget sparc*-*-*] } { lappend result 64 } elseif { [istarget amdgcn*-*-*] } { - lappend result 4096 + # 6 different lane counts, and 4 element sizes + lappend result 4096 2048 1024 512 256 128 64 32 16 8 4 2 } else { # The traditional default asumption. lappend result 128 -- cgit v1.1 From ee467644c53ee2f7d633a8e1f53603feafab4351 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 11 Oct 2022 11:34:55 +0200 Subject: tree-optimization/107212 - SLP reduction of reduction paths The following fixes an issue with how we handle epilogue generation for SLP reductions of reduction paths where the actual live lanes are not "canonical". We need to make sure to identify all live lanes as reductions and thus have to iterate over all participating SLP lanes when walking the reduction SSA use-def chain. Also the previous attempt likely to mitigate such issue in vectorizable_live_operation is misguided and has to be removed. PR tree-optimization/107212 * tree-vect-loop.cc (vectorizable_reduction): Make sure to set STMT_VINFO_REDUC_DEF for all live lanes in a SLP reduction. (vectorizable_live_operation): Do not pun to the SLP node representative for reduction epilogue generation. * gcc.dg/vect/pr107212-1.c: New testcase. * gcc.dg/vect/pr107212-2.c: Likewise. --- gcc/testsuite/gcc.dg/vect/pr107212-1.c | 27 +++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/pr107212-2.c | 23 +++++++++++++++++++++++ gcc/tree-vect-loop.cc | 20 +++++++++++++------- 3 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr107212-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr107212-2.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/pr107212-1.c b/gcc/testsuite/gcc.dg/vect/pr107212-1.c new file mode 100644 index 0000000..5343f9b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr107212-1.c @@ -0,0 +1,27 @@ +/* { dg-do run } */ + +#include "tree-vect.h" + +int main() +{ + check_vect (); + + unsigned int tab[6][2] = { {69, 73}, {36, 40}, {24, 16}, + {16, 11}, {4, 5}, {3, 1} }; + + int sum_0 = 0; + int sum_1 = 0; + + for(int t=0; t<6; t++) { + sum_0 += tab[t][0]; + sum_1 += tab[t][1]; + } + + int x1 = (sum_0 < 100); + int x2 = (sum_0 > 200); + + if (x1 || x2 || sum_1 != 146) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr107212-2.c b/gcc/testsuite/gcc.dg/vect/pr107212-2.c new file mode 100644 index 0000000..109c2b9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr107212-2.c @@ -0,0 +1,23 @@ +/* { dg-do run } */ + +#include "tree-vect.h" + +int sum_1 = 0; + +int main() +{ + check_vect (); + + unsigned int tab[6][2] = {{150, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}, {0, 0}}; + + int sum_0 = 0; + + for (int t = 0; t < 6; t++) { + sum_0 += tab[t][0]; + sum_1 += tab[t][0]; + } + + if (sum_0 < 100 || sum_0 > 200) + __builtin_abort(); + return 0; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 2536cc3..1996ecf 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -6822,10 +6822,20 @@ vectorizable_reduction (loop_vec_info loop_vinfo, } if (!REDUC_GROUP_FIRST_ELEMENT (vdef)) only_slp_reduc_chain = false; - /* ??? For epilogue generation live members of the chain need + /* For epilogue generation live members of the chain need to point back to the PHI via their original stmt for - info_for_reduction to work. */ - if (STMT_VINFO_LIVE_P (vdef)) + info_for_reduction to work. For SLP we need to look at + all lanes here - even though we only will vectorize from + the SLP node with live lane zero the other live lanes also + need to be identified as part of a reduction to be able + to skip code generation for them. */ + if (slp_for_stmt_info) + { + for (auto s : SLP_TREE_SCALAR_STMTS (slp_for_stmt_info)) + if (STMT_VINFO_LIVE_P (s)) + STMT_VINFO_REDUC_DEF (vect_orig_stmt (s)) = phi_info; + } + else if (STMT_VINFO_LIVE_P (vdef)) STMT_VINFO_REDUC_DEF (def) = phi_info; gimple_match_op op; if (!gimple_extract_op (vdef->stmt, &op)) @@ -9601,10 +9611,6 @@ vectorizable_live_operation (vec_info *vinfo, all involved stmts together. */ else if (slp_index != 0) return true; - else - /* For SLP reductions the meta-info is attached to - the representative. */ - stmt_info = SLP_TREE_REPRESENTATIVE (slp_node); } stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); gcc_assert (reduc_info->is_reduc_info); -- cgit v1.1 From ad7cff63de618ed723e5df68f0cc9e950d5769e3 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Tue, 11 Oct 2022 11:20:25 +0200 Subject: Move TRUE case first in range-op.cc. It's incredibly annoying that some of the BRS_TRUE cases come after BRS_FALSE, if only because we're not consistent. Having random ordering increases the changes of thinkos when adapting the irange code to floats. gcc/ChangeLog: * range-op.cc (operator_equal::op1_range): Move BRS_TRUE case up. (operator_lt::op2_range): Same. (operator_le::op2_range): Same. (operator_gt::op2_range): Same. (operator_ge::op2_range): Same. --- gcc/range-op.cc | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'gcc') diff --git a/gcc/range-op.cc b/gcc/range-op.cc index df0735c..4d5a033 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -531,6 +531,11 @@ operator_equal::op1_range (irange &r, tree type, { switch (get_bool_state (r, lhs, type)) { + case BRS_TRUE: + // If it's true, the result is the same as OP2. + r = op2; + break; + case BRS_FALSE: // If the result is false, the only time we know anything is // if OP2 is a constant. @@ -543,11 +548,6 @@ operator_equal::op1_range (irange &r, tree type, r.set_varying (type); break; - case BRS_TRUE: - // If it's true, the result is the same as OP2. - r = op2; - break; - default: break; } @@ -841,14 +841,14 @@ operator_lt::op2_range (irange &r, tree type, { switch (get_bool_state (r, lhs, type)) { - case BRS_FALSE: - build_le (r, type, op1.upper_bound ()); - break; - case BRS_TRUE: build_gt (r, type, op1.lower_bound ()); break; + case BRS_FALSE: + build_le (r, type, op1.upper_bound ()); + break; + default: break; } @@ -952,14 +952,14 @@ operator_le::op2_range (irange &r, tree type, { switch (get_bool_state (r, lhs, type)) { - case BRS_FALSE: - build_lt (r, type, op1.upper_bound ()); - break; - case BRS_TRUE: build_ge (r, type, op1.lower_bound ()); break; + case BRS_FALSE: + build_lt (r, type, op1.upper_bound ()); + break; + default: break; } @@ -1062,14 +1062,14 @@ operator_gt::op2_range (irange &r, tree type, { switch (get_bool_state (r, lhs, type)) { - case BRS_FALSE: - build_ge (r, type, op1.lower_bound ()); - break; - case BRS_TRUE: build_lt (r, type, op1.upper_bound ()); break; + case BRS_FALSE: + build_ge (r, type, op1.lower_bound ()); + break; + default: break; } @@ -1173,14 +1173,14 @@ operator_ge::op2_range (irange &r, tree type, { switch (get_bool_state (r, lhs, type)) { - case BRS_FALSE: - build_gt (r, type, op1.lower_bound ()); - break; - case BRS_TRUE: build_le (r, type, op1.upper_bound ()); break; + case BRS_FALSE: + build_gt (r, type, op1.lower_bound ()); + break; + default: break; } -- cgit v1.1 From 4cbc312a31424ca4ff7cdd358fc47b9c9511a646 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 10 Oct 2022 14:56:01 +0200 Subject: Share common ordered comparison code with UN*_EXPR. Most unordered comparisons can use the result from the ordered version, if the operands are known not to be NAN or if the result is true. gcc/ChangeLog: * range-op-float.cc (class foperator_unordered_lt): New. (class foperator_relop_unknown): Remove (class foperator_unordered_le): New. (class foperator_unordered_gt): New. (class foperator_unordered_ge): New. (class foperator_unordered_equal): New. (floating_op_table::floating_op_table): Replace all UN_EXPR entries with their appropriate fop_unordered_* counterpart. --- gcc/range-op-float.cc | 140 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 128 insertions(+), 12 deletions(-) (limited to 'gcc') diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index 3cf117d..8dd4bcc 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -1132,24 +1132,140 @@ foperator_ordered::op1_range (frange &r, tree type, return true; } -// Placeholder for unimplemented relational operators. +class foperator_unordered_lt : public range_operator_float +{ + using range_operator_float::fold_range; +public: + bool fold_range (irange &r, tree type, + const frange &op1, const frange &op2, + relation_kind rel) const final override + { + if (op1.known_isnan () || op2.known_isnan ()) + { + r = range_true (type); + return true; + } + if (!fop_lt.fold_range (r, type, op1, op2, rel)) + return false; + // The result is the same as the ordered version when the + // comparison is true or when the operands cannot be NANs. + if (finite_operands_p (op1, op2) || r == range_true (type)) + return true; + else + { + r = range_true_and_false (type); + return true; + } + } +} fop_unordered_lt; -class foperator_relop_unknown : public range_operator_float +class foperator_unordered_le : public range_operator_float { using range_operator_float::fold_range; +public: + bool fold_range (irange &r, tree type, + const frange &op1, const frange &op2, + relation_kind rel) const final override + { + if (op1.known_isnan () || op2.known_isnan ()) + { + r = range_true (type); + return true; + } + if (!fop_le.fold_range (r, type, op1, op2, rel)) + return false; + // The result is the same as the ordered version when the + // comparison is true or when the operands cannot be NANs. + if (finite_operands_p (op1, op2) || r == range_true (type)) + return true; + else + { + r = range_true_and_false (type); + return true; + } + } +} fop_unordered_le; +class foperator_unordered_gt : public range_operator_float +{ + using range_operator_float::fold_range; public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind) const final override + relation_kind rel) const final override { if (op1.known_isnan () || op2.known_isnan ()) - r = range_true (type); + { + r = range_true (type); + return true; + } + if (!fop_gt.fold_range (r, type, op1, op2, rel)) + return false; + // The result is the same as the ordered version when the + // comparison is true or when the operands cannot be NANs. + if (finite_operands_p (op1, op2) || r == range_true (type)) + return true; else - r.set_varying (type); - return true; + { + r = range_true_and_false (type); + return true; + } + } +} fop_unordered_gt; + +class foperator_unordered_ge : public range_operator_float +{ + using range_operator_float::fold_range; +public: + bool fold_range (irange &r, tree type, + const frange &op1, const frange &op2, + relation_kind rel) const final override + { + if (op1.known_isnan () || op2.known_isnan ()) + { + r = range_true (type); + return true; + } + if (!fop_ge.fold_range (r, type, op1, op2, rel)) + return false; + // The result is the same as the ordered version when the + // comparison is true or when the operands cannot be NANs. + if (finite_operands_p (op1, op2) || r == range_true (type)) + return true; + else + { + r = range_true_and_false (type); + return true; + } + } +} fop_unordered_ge; + +class foperator_unordered_equal : public range_operator_float +{ + using range_operator_float::fold_range; +public: + bool fold_range (irange &r, tree type, + const frange &op1, const frange &op2, + relation_kind rel) const final override + { + if (op1.known_isnan () || op2.known_isnan ()) + { + r = range_true (type); + return true; + } + if (!fop_equal.fold_range (r, type, op1, op2, rel)) + return false; + // The result is the same as the ordered version when the + // comparison is true or when the operands cannot be NANs. + if (finite_operands_p (op1, op2) || r == range_true (type)) + return true; + else + { + r = range_true_and_false (type); + return true; + } } -} fop_unordered_relop_unknown; +} fop_unordered_equal; // Instantiate a range_op_table for floating point operations. @@ -1174,11 +1290,11 @@ floating_op_table::floating_op_table () set (LE_EXPR, fop_le); set (GT_EXPR, fop_gt); set (GE_EXPR, fop_ge); - set (UNLE_EXPR, fop_unordered_relop_unknown); - set (UNLT_EXPR, fop_unordered_relop_unknown); - set (UNGE_EXPR, fop_unordered_relop_unknown); - set (UNGT_EXPR, fop_unordered_relop_unknown); - set (UNEQ_EXPR, fop_unordered_relop_unknown); + set (UNLE_EXPR, fop_unordered_le); + set (UNLT_EXPR, fop_unordered_lt); + set (UNGE_EXPR, fop_unordered_ge); + set (UNGT_EXPR, fop_unordered_gt); + set (UNEQ_EXPR, fop_unordered_equal); set (ORDERED_EXPR, fop_ordered); set (UNORDERED_EXPR, fop_unordered); } -- cgit v1.1 From fe7371e7f93c247b5d0e257ca2a68064123cd018 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 10 Oct 2022 16:58:29 +0200 Subject: Implement op1_range operators for unordered comparisons. gcc/ChangeLog: * range-op-float.cc (foperator_unordered_le::op1_range): New. (foperator_unordered_le::op2_range): New. (foperator_unordered_gt::op1_range): New. (foperator_unordered_gt::op2_range): New. (foperator_unordered_ge::op1_range): New. (foperator_unordered_ge::op2_range): New. (foperator_unordered_equal::op1_range): New. --- gcc/range-op-float.cc | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) (limited to 'gcc') diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index 8dd4bcc..ef51b75 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -1162,6 +1162,8 @@ public: class foperator_unordered_le : public range_operator_float { using range_operator_float::fold_range; + using range_operator_float::op1_range; + using range_operator_float::op2_range; public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, @@ -1184,11 +1186,65 @@ public: return true; } } + bool op1_range (frange &r, tree type, + const irange &lhs, const frange &op2, + relation_kind rel) const final override; + bool op2_range (frange &r, tree type, + const irange &lhs, const frange &op1, + relation_kind rel) const final override; } fop_unordered_le; +bool +foperator_unordered_le::op1_range (frange &r, tree type, + const irange &lhs, const frange &op2, + relation_kind) const +{ + switch (get_bool_state (r, lhs, type)) + { + case BRS_TRUE: + build_le (r, type, op2); + break; + + case BRS_FALSE: + build_gt (r, type, op2); + r.clear_nan (); + break; + + default: + break; + } + return true; +} + +bool +foperator_unordered_le::op2_range (frange &r, + tree type, + const irange &lhs, + const frange &op1, + relation_kind) const +{ + switch (get_bool_state (r, lhs, type)) + { + case BRS_TRUE: + build_ge (r, type, op1); + break; + + case BRS_FALSE: + build_lt (r, type, op1); + r.clear_nan (); + break; + + default: + break; + } + return true; +} + class foperator_unordered_gt : public range_operator_float { using range_operator_float::fold_range; + using range_operator_float::op1_range; + using range_operator_float::op2_range; public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, @@ -1211,11 +1267,67 @@ public: return true; } } + bool op1_range (frange &r, tree type, + const irange &lhs, const frange &op2, + relation_kind rel) const final override; + bool op2_range (frange &r, tree type, + const irange &lhs, const frange &op1, + relation_kind rel) const final override; } fop_unordered_gt; +bool +foperator_unordered_gt::op1_range (frange &r, + tree type, + const irange &lhs, + const frange &op2, + relation_kind) const +{ + switch (get_bool_state (r, lhs, type)) + { + case BRS_TRUE: + build_gt (r, type, op2); + break; + + case BRS_FALSE: + build_le (r, type, op2); + r.clear_nan (); + break; + + default: + break; + } + return true; +} + +bool +foperator_unordered_gt::op2_range (frange &r, + tree type, + const irange &lhs, + const frange &op1, + relation_kind) const +{ + switch (get_bool_state (r, lhs, type)) + { + case BRS_TRUE: + build_lt (r, type, op1); + break; + + case BRS_FALSE: + build_ge (r, type, op1); + r.clear_nan (); + break; + + default: + break; + } + return true; +} + class foperator_unordered_ge : public range_operator_float { using range_operator_float::fold_range; + using range_operator_float::op1_range; + using range_operator_float::op2_range; public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, @@ -1238,11 +1350,66 @@ public: return true; } } + bool op1_range (frange &r, tree type, + const irange &lhs, const frange &op2, + relation_kind rel) const final override; + bool op2_range (frange &r, tree type, + const irange &lhs, const frange &op1, + relation_kind rel) const final override; } fop_unordered_ge; +bool +foperator_unordered_ge::op1_range (frange &r, + tree type, + const irange &lhs, + const frange &op2, + relation_kind) const +{ + switch (get_bool_state (r, lhs, type)) + { + case BRS_TRUE: + build_ge (r, type, op2); + break; + + case BRS_FALSE: + build_lt (r, type, op2); + r.clear_nan (); + break; + + default: + break; + } + return true; +} + +bool +foperator_unordered_ge::op2_range (frange &r, tree type, + const irange &lhs, + const frange &op1, + relation_kind) const +{ + switch (get_bool_state (r, lhs, type)) + { + case BRS_TRUE: + build_le (r, type, op1); + break; + + case BRS_FALSE: + build_gt (r, type, op1); + r.clear_nan (); + break; + + default: + break; + } + return true; +} + class foperator_unordered_equal : public range_operator_float { using range_operator_float::fold_range; + using range_operator_float::op1_range; + using range_operator_float::op2_range; public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, @@ -1265,8 +1432,46 @@ public: return true; } } + bool op1_range (frange &r, tree type, + const irange &lhs, const frange &op2, + relation_kind rel) const final override; + bool op2_range (frange &r, tree type, + const irange &lhs, const frange &op1, + relation_kind rel) const final override + { + return op1_range (r, type, lhs, op1, rel); + } } fop_unordered_equal; +bool +foperator_unordered_equal::op1_range (frange &r, tree type, + const irange &lhs, + const frange &op2, + relation_kind) const +{ + switch (get_bool_state (r, lhs, type)) + { + case BRS_TRUE: + // If it's true, the result is the same as OP2 plus a NAN. + r = op2; + // Add both zeros if there's the possibility of zero equality. + frange_add_zeros (r, type); + // Add the posibility of a NAN. + r.update_nan (); + break; + + case BRS_FALSE: + // The false side indictates !NAN and not equal. We can at least + // represent !NAN. + r.set_varying (type); + r.clear_nan (); + break; + + default: + break; + } + return true; +} // Instantiate a range_op_table for floating point operations. static floating_op_table global_floating_table; -- cgit v1.1 From 706d8583706475fb103d1123e507f604dccb8fd3 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 10 Oct 2022 14:56:42 +0200 Subject: Implement ABS_EXPR operator for frange. Implementing ABS_EXPR allows us to fold certain __builtin_inf calls since they are expanded into calls to involving ABS_EXPR. This is an adaptation of the integer version. gcc/ChangeLog: * range-op-float.cc (class foperator_abs): New. (floating_op_table::floating_op_table): Add ABS_EXPR entry. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/vrp-float-abs-1.c: New test. --- gcc/range-op-float.cc | 91 +++++++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/vrp-float-abs-1.c | 17 +++++ 2 files changed, 108 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/vrp-float-abs-1.c (limited to 'gcc') diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index ef51b75..283eb13 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -1132,6 +1132,95 @@ foperator_ordered::op1_range (frange &r, tree type, return true; } +class foperator_abs : public range_operator_float +{ + using range_operator_float::fold_range; + using range_operator_float::op1_range; +public: + bool fold_range (frange &r, tree type, + const frange &op1, const frange &, + relation_kind) const final override; + bool op1_range (frange &r, tree type, + const frange &lhs, const frange &op2, + relation_kind rel) const final override; +} fop_abs; + +bool +foperator_abs::fold_range (frange &r, tree type, + const frange &op1, const frange &op2, + relation_kind) const +{ + if (empty_range_varying (r, type, op1, op2)) + return true; + if (op1.known_isnan ()) + { + r.set_nan (type, /*sign=*/false); + return true; + } + + const REAL_VALUE_TYPE lh_lb = op1.lower_bound (); + const REAL_VALUE_TYPE lh_ub = op1.upper_bound (); + // Handle the easy case where everything is positive. + if (real_compare (GE_EXPR, &lh_lb, &dconst0) + && !real_iszero (&lh_lb, /*sign=*/true) + && !op1.maybe_isnan (/*sign=*/true)) + { + r = op1; + return true; + } + + REAL_VALUE_TYPE min = real_value_abs (&lh_lb); + REAL_VALUE_TYPE max = real_value_abs (&lh_ub); + // If the range contains zero then we know that the minimum value in the + // range will be zero. + if (real_compare (LE_EXPR, &lh_lb, &dconst0) + && real_compare (GE_EXPR, &lh_ub, &dconst0)) + { + if (real_compare (GT_EXPR, &min, &max)) + max = min; + min = dconst0; + } + else + { + // If the range was reversed, swap MIN and MAX. + if (real_compare (GT_EXPR, &min, &max)) + std::swap (min, max); + } + + r.set (type, min, max); + if (op1.maybe_isnan ()) + r.update_nan (/*sign=*/false); + else + r.clear_nan (); + return true; +} + +bool +foperator_abs::op1_range (frange &r, tree type, + const frange &lhs, const frange &op2, + relation_kind) const +{ + if (empty_range_varying (r, type, lhs, op2)) + return true; + if (lhs.known_isnan ()) + { + r.set_nan (type); + return true; + } + + // Start with the positives because negatives are an impossible result. + frange positives (type, dconst0, frange_val_max (type)); + positives.update_nan (/*sign=*/false); + positives.intersect (lhs); + r = positives; + // Then add the negative of each pair: + // ABS(op1) = [5,20] would yield op1 => [-20,-5][5,20]. + r.union_ (frange (type, + real_value_negate (&positives.upper_bound ()), + real_value_negate (&positives.lower_bound ()))); + return true; +} + class foperator_unordered_lt : public range_operator_float { using range_operator_float::fold_range; @@ -1502,6 +1591,8 @@ floating_op_table::floating_op_table () set (UNEQ_EXPR, fop_unordered_equal); set (ORDERED_EXPR, fop_ordered); set (UNORDERED_EXPR, fop_unordered); + + set (ABS_EXPR, fop_abs); } // Return a pointer to the range_operator_float instance, if there is diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp-float-abs-1.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp-float-abs-1.c new file mode 100644 index 0000000..4b7b758 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp-float-abs-1.c @@ -0,0 +1,17 @@ +// { dg-do compile } +// { dg-options "-O2 -fno-thread-jumps -fdump-tree-evrp" } + +void link_error (); + +void +foo (double x, double y) +{ + if (x > y && __builtin_signbit (y) == 0) + { + // y == +INF is impossible. + if (__builtin_isinf (y)) + link_error (); + } +} + +// { dg-final { scan-tree-dump-not "link_error" "evrp" } } -- cgit v1.1 From b65945e7fb8d0f6cf911d371926d25e7ea82db62 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 11 Oct 2022 15:57:09 +0200 Subject: i386: Fix up RTL checking ICE [PR107185] On Tue, Oct 11, 2022 at 04:03:16PM +0800, liuhongt via Gcc-patches wrote: > gcc/ChangeLog: > > * config/i386/i386.md (*notxor_1): New post_reload > define_insn_and_split. > (*notxorqi_1): Ditto. > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -10826,6 +10826,39 @@ (define_insn "*_1" > (set_attr "type" "alu, alu, msklog") > (set_attr "mode" "")]) > > +(define_insn_and_split "*notxor_1" > + [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm,r,?k") > + (not:SWI248 > + (xor:SWI248 > + (match_operand:SWI248 1 "nonimmediate_operand" "%0,0,k") > + (match_operand:SWI248 2 "" "r,,k")))) > + (clobber (reg:CC FLAGS_REG))] > + "ix86_binary_operator_ok (XOR, mode, operands)" > + "#" > + "&& reload_completed" > + [(parallel > + [(set (match_dup 0) > + (xor:SWI248 (match_dup 1) (match_dup 2))) > + (clobber (reg:CC FLAGS_REG))]) > + (set (match_dup 0) > + (not:SWI248 (match_dup 1)))] > +{ > + if (MASK_REGNO_P (REGNO (operands[0]))) This causes --enable-checking=yes,rtl,extra regression on gcc.dg/store_merging_13.c test on x86_64-linux: .../gcc/testsuite/gcc.dg/store_merging_13.c: In function 'f13': .../gcc/testsuite/gcc.dg/store_merging_13.c:189:1: internal compiler error: RTL check: expected code 'reg', have 'mem' in rhs_regno, at rtl.h:1932 0x7b0c8f rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, int, char const*) ../../gcc/rtl.cc:916 0x8e74be rhs_regno ../../gcc/rtl.h:1932 0x9785fd rhs_regno ./genrtl.h:120 0x9785fd gen_split_260(rtx_insn*, rtx_def**) ../../gcc/config/i386/i386.md:10846 0x23596dc split_insns(rtx_def*, rtx_insn*) ../../gcc/config/i386/i386.md:16392 0xfccd5a try_split(rtx_def*, rtx_insn*, int) ../../gcc/emit-rtl.cc:3799 0x132e9d8 split_insn ../../gcc/recog.cc:3384 0x13359d5 split_all_insns() ../../gcc/recog.cc:3488 0x1335ae8 execute ../../gcc/recog.cc:4412 Please submit a full bug report, with preprocessed source (by using -freport-bug). Please include the complete backtrace with any bug report. See for instructions. Fixed thusly. 2022-10-11 Jakub Jelinek PR target/107185 * config/i386/i386.md (*notxor_1): Use MASK_REG_P (x) instead of MASK_REGNO_P (REGNO (x)). --- gcc/config/i386/i386.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9390dd5..8e84752 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -10843,7 +10843,7 @@ (set (match_dup 0) (not:SWI248 (match_dup 0)))] { - if (MASK_REGNO_P (REGNO (operands[0]))) + if (MASK_REG_P (operands[0])) { emit_insn (gen_kxnor (operands[0], operands[1], operands[2])); DONE; -- cgit v1.1 From 576d524559776ee6744f478da600939ca5c2d502 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Tue, 11 Oct 2022 16:00:33 +0200 Subject: Avoid calling tracer.trailer() twice. gcc/ChangeLog: * gimple-range-gori.cc (gori_compute::logical_combine): Avoid calling tracer.trailer(). --- gcc/gimple-range-gori.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc index b37d03c..5ff067c 100644 --- a/gcc/gimple-range-gori.cc +++ b/gcc/gimple-range-gori.cc @@ -810,8 +810,12 @@ gori_compute::logical_combine (vrange &r, enum tree_code code, } else res = false; - if (idx) - tracer.trailer (idx, "logical_combine", res, NULL_TREE, r); + if (idx && res) + { + tracer.print (idx, "logical_combine produced "); + r.dump (dump_file); + fputc ('\n', dump_file); + } } switch (code) -- cgit v1.1 From 2ceb4d531a303f3e70d8bb218c8759e6c0688f62 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Tue, 11 Oct 2022 15:02:01 -0400 Subject: c++ modules: lazy loading from within template [PR99377] Here when lazily loading the binding for f due to its first use from the template g, processing_template_decl is set which causes the call to note_vague_linkage_fn from module_state::read_cluster to have no effect, and thus we never push f onto deferred_fns and end up never emitting its definition despite needing it. The behavior of the lazy loading machinery shouldn't be sensitive to whether we're inside a template, so to that end this patch makes us clear processing_template_decl in the entrypoints lazy_load_binding and lazy_load_pendings. PR c++/99377 gcc/cp/ChangeLog: * module.cc (lazy_load_binding): Clear processing_template_decl. (lazy_load_pendings): Likewise. gcc/testsuite/ChangeLog: * g++.dg/modules/pr99377-2_a.C: New test. * g++.dg/modules/pr99377-2_b.C: New test. --- gcc/cp/module.cc | 8 ++++++++ gcc/testsuite/g++.dg/modules/pr99377-2_a.C | 6 ++++++ gcc/testsuite/g++.dg/modules/pr99377-2_b.C | 8 ++++++++ 3 files changed, 22 insertions(+) create mode 100644 gcc/testsuite/g++.dg/modules/pr99377-2_a.C create mode 100644 gcc/testsuite/g++.dg/modules/pr99377-2_b.C (limited to 'gcc') diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index 4d27ceb..7ffeefa 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -19083,6 +19083,10 @@ lazy_load_binding (unsigned mod, tree ns, tree id, binding_slot *mslot) timevar_start (TV_MODULE_IMPORT); + /* Make sure lazy loading from a template context behaves as if + from a non-template context. */ + processing_template_decl_sentinel ptds; + /* Stop GC happening, even in outermost loads (because our caller could well be building up a lookup set). */ function_depth++; @@ -19131,6 +19135,10 @@ lazy_load_binding (unsigned mod, tree ns, tree id, binding_slot *mslot) void lazy_load_pendings (tree decl) { + /* Make sure lazy loading from a template context behaves as if + from a non-template context. */ + processing_template_decl_sentinel ptds; + tree key_decl; pending_key key; key.ns = find_pending_key (decl, &key_decl); diff --git a/gcc/testsuite/g++.dg/modules/pr99377-2_a.C b/gcc/testsuite/g++.dg/modules/pr99377-2_a.C new file mode 100644 index 0000000..98d1854 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/pr99377-2_a.C @@ -0,0 +1,6 @@ +// PR c++/99377 +// { dg-additional-options -fmodules-ts } +// { dg-module-cmi pr99377_2 } +export module pr99377_2; + +export inline void f() { } diff --git a/gcc/testsuite/g++.dg/modules/pr99377-2_b.C b/gcc/testsuite/g++.dg/modules/pr99377-2_b.C new file mode 100644 index 0000000..1d5d79c --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/pr99377-2_b.C @@ -0,0 +1,8 @@ +// PR c++/99377 +// { dg-additional-options -fmodules-ts } +// { dg-do link } +import pr99377_2; + +template void g() { f(); } + +int main() { f(); } -- cgit v1.1 From 9736a42e1fb8df30d72cf28594d9046bf50200c1 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Tue, 11 Oct 2022 15:02:23 -0400 Subject: c++ modules: ICE with templated friend and std namespace [PR100134] The function depset::hash::add_binding_entity has an assert verifying that if a namespace contains an exported entity, then the namespace must have been opened in the module purview: if (data->hash->add_namespace_entities (decl, data->partitions)) { /* It contains an exported thing, so it is exported. */ gcc_checking_assert (DECL_MODULE_PURVIEW_P (decl)); DECL_MODULE_EXPORT_P (decl) = true; } We're tripping over this assert in the below testcase because by instantiating and exporting std::A, we in turn define and export the hidden friend std::f(A) without ever having opened the enclosing namespace std within the module purview, and thus DECL_MODULE_PURVIEW_P for std is false. It's important that the enclosing namespace is std here: if we use a different namespace then the ICE disappears. This probably has something to do with us predefining std via push_namespace from cxx_init_decl_processing (which makes it look like we've opened it within the TU), whereas with another namespace we would instead lazily create its NAMESPACE_DECL from add_imported_namespace. Since templated friend functions are special in that they give us a way to introduce a namespace-scope function without having to explicitly open the namespace, this patch proposes to fix this ICE by propagating DECL_MODULE_PURVIEW_P from the introduced function to the enclosing namespace during tsubst_friend_function. PR c++/100134 gcc/cp/ChangeLog: * pt.cc (tsubst_friend_function): Propagate DECL_MODULE_PURVIEW_P from the introduced namespace-scope function to the namespace. gcc/testsuite/ChangeLog: * g++.dg/modules/tpl-friend-8_a.H: New test. * g++.dg/modules/tpl-friend-8_b.C: New test. --- gcc/cp/pt.cc | 8 ++++++++ gcc/testsuite/g++.dg/modules/tpl-friend-8_a.H | 9 +++++++++ gcc/testsuite/g++.dg/modules/tpl-friend-8_b.C | 8 ++++++++ 3 files changed, 25 insertions(+) create mode 100644 gcc/testsuite/g++.dg/modules/tpl-friend-8_a.H create mode 100644 gcc/testsuite/g++.dg/modules/tpl-friend-8_b.C (limited to 'gcc') diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 5b9fc58..b80e7ff 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -11448,6 +11448,14 @@ tsubst_friend_function (tree decl, tree args) by duplicate_decls. */ new_friend = old_decl; } + + /* We've just introduced a namespace-scope function in the purview + without necessarily having opened the enclosing namespace, so + make sure the namespace is in the purview now too. */ + if (modules_p () + && DECL_MODULE_PURVIEW_P (STRIP_TEMPLATE (new_friend)) + && TREE_CODE (DECL_CONTEXT (new_friend)) == NAMESPACE_DECL) + DECL_MODULE_PURVIEW_P (DECL_CONTEXT (new_friend)) = true; } else { diff --git a/gcc/testsuite/g++.dg/modules/tpl-friend-8_a.H b/gcc/testsuite/g++.dg/modules/tpl-friend-8_a.H new file mode 100644 index 0000000..bd22904 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/tpl-friend-8_a.H @@ -0,0 +1,9 @@ +// PR c++/100134 +// { dg-additional-options -fmodule-header } +// { dg-module-cmi {} } + +namespace std { + template struct A { + friend void f(A) { } + }; +} diff --git a/gcc/testsuite/g++.dg/modules/tpl-friend-8_b.C b/gcc/testsuite/g++.dg/modules/tpl-friend-8_b.C new file mode 100644 index 0000000..76d7447 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/tpl-friend-8_b.C @@ -0,0 +1,8 @@ +// PR c++/100134 +// { dg-additional-options -fmodules-ts } +// { dg-module-cmi pr100134 } +export module pr100134; + +import "tpl-friend-8_a.H"; + +export std::A a; -- cgit v1.1 From 53955284c031a17e6e49e730ef8947fe557ff35e Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Tue, 11 Oct 2022 20:37:42 +0200 Subject: Fortran: check types of source expressions before conversion [PR107215] gcc/fortran/ChangeLog: PR fortran/107215 * arith.cc (gfc_int2int): Check validity of type of source expr. (gfc_int2real): Likewise. (gfc_int2complex): Likewise. (gfc_real2int): Likewise. (gfc_real2real): Likewise. (gfc_complex2int): Likewise. (gfc_complex2real): Likewise. (gfc_complex2complex): Likewise. (gfc_log2log): Likewise. (gfc_log2int): Likewise. (gfc_int2log): Likewise. gcc/testsuite/ChangeLog: PR fortran/107215 * gfortran.dg/pr107215.f90: New test. --- gcc/fortran/arith.cc | 33 +++++++++++++++++++++++++++++++++ gcc/testsuite/gfortran.dg/pr107215.f90 | 17 +++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 gcc/testsuite/gfortran.dg/pr107215.f90 (limited to 'gcc') diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc index 086b1f8..9e079e4 100644 --- a/gcc/fortran/arith.cc +++ b/gcc/fortran/arith.cc @@ -2040,6 +2040,9 @@ gfc_int2int (gfc_expr *src, int kind) gfc_expr *result; arith rc; + if (src->ts.type != BT_INTEGER) + return NULL; + result = gfc_get_constant_expr (BT_INTEGER, kind, &src->where); mpz_set (result->value.integer, src->value.integer); @@ -2085,6 +2088,9 @@ gfc_int2real (gfc_expr *src, int kind) gfc_expr *result; arith rc; + if (src->ts.type != BT_INTEGER) + return NULL; + result = gfc_get_constant_expr (BT_REAL, kind, &src->where); mpfr_set_z (result->value.real, src->value.integer, GFC_RND_MODE); @@ -2116,6 +2122,9 @@ gfc_int2complex (gfc_expr *src, int kind) gfc_expr *result; arith rc; + if (src->ts.type != BT_INTEGER) + return NULL; + result = gfc_get_constant_expr (BT_COMPLEX, kind, &src->where); mpc_set_z (result->value.complex, src->value.integer, GFC_MPC_RND_MODE); @@ -2150,6 +2159,9 @@ gfc_real2int (gfc_expr *src, int kind) arith rc; bool did_warn = false; + if (src->ts.type != BT_REAL) + return NULL; + result = gfc_get_constant_expr (BT_INTEGER, kind, &src->where); gfc_mpfr_to_mpz (result->value.integer, src->value.real, &src->where); @@ -2196,6 +2208,9 @@ gfc_real2real (gfc_expr *src, int kind) arith rc; bool did_warn = false; + if (src->ts.type != BT_REAL) + return NULL; + result = gfc_get_constant_expr (BT_REAL, kind, &src->where); mpfr_set (result->value.real, src->value.real, GFC_RND_MODE); @@ -2310,6 +2325,9 @@ gfc_complex2int (gfc_expr *src, int kind) arith rc; bool did_warn = false; + if (src->ts.type != BT_COMPLEX) + return NULL; + result = gfc_get_constant_expr (BT_INTEGER, kind, &src->where); gfc_mpfr_to_mpz (result->value.integer, mpc_realref (src->value.complex), @@ -2372,6 +2390,9 @@ gfc_complex2real (gfc_expr *src, int kind) arith rc; bool did_warn = false; + if (src->ts.type != BT_COMPLEX) + return NULL; + result = gfc_get_constant_expr (BT_REAL, kind, &src->where); mpc_real (result->value.real, src->value.complex, GFC_RND_MODE); @@ -2439,6 +2460,9 @@ gfc_complex2complex (gfc_expr *src, int kind) arith rc; bool did_warn = false; + if (src->ts.type != BT_COMPLEX) + return NULL; + result = gfc_get_constant_expr (BT_COMPLEX, kind, &src->where); mpc_set (result->value.complex, src->value.complex, GFC_MPC_RND_MODE); @@ -2504,6 +2528,9 @@ gfc_log2log (gfc_expr *src, int kind) { gfc_expr *result; + if (src->ts.type != BT_LOGICAL) + return NULL; + result = gfc_get_constant_expr (BT_LOGICAL, kind, &src->where); result->value.logical = src->value.logical; @@ -2518,6 +2545,9 @@ gfc_log2int (gfc_expr *src, int kind) { gfc_expr *result; + if (src->ts.type != BT_LOGICAL) + return NULL; + result = gfc_get_constant_expr (BT_INTEGER, kind, &src->where); mpz_set_si (result->value.integer, src->value.logical); @@ -2532,6 +2562,9 @@ gfc_int2log (gfc_expr *src, int kind) { gfc_expr *result; + if (src->ts.type != BT_INTEGER) + return NULL; + result = gfc_get_constant_expr (BT_LOGICAL, kind, &src->where); result->value.logical = (mpz_cmp_si (src->value.integer, 0) != 0); diff --git a/gcc/testsuite/gfortran.dg/pr107215.f90 b/gcc/testsuite/gfortran.dg/pr107215.f90 new file mode 100644 index 0000000..2c2a0ca --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr107215.f90 @@ -0,0 +1,17 @@ +! { dg-do compile } +! PR fortran/107215 - ICE in gfc_real2real and gfc_complex2complex +! Contributed by G.Steinmetz + +program p + double precision, parameter :: z = 1.0d0 + complex :: x(1) + real :: y(1) + x = [real :: -'1'] * z ! { dg-error "Operand of unary numeric operator" } + y = z * [real :: -'1'] ! { dg-error "Operand of unary numeric operator" } + x = [real :: -(.true.)] * z ! { dg-error "Operand of unary numeric operator" } + y = z * [real :: -(.true.)] ! { dg-error "Operand of unary numeric operator" } + x = [complex :: -'1'] * z ! { dg-error "Operand of unary numeric operator" } + y = z * [complex :: -'1'] ! { dg-error "Operand of unary numeric operator" } + x = [complex :: -(.true.)] * z ! { dg-error "Operand of unary numeric operator" } + y = z * [complex :: -(.true.)] ! { dg-error "Operand of unary numeric operator" } +end -- cgit v1.1 From 2c328e58c5d7cca10dd404ad8f81ec7664db1fbf Mon Sep 17 00:00:00 2001 From: Eric Botcazou Date: Wed, 12 Oct 2022 00:38:04 +0200 Subject: Enable support for atomic primitives on SPARC/Linux The SPARC/Linux port is very similar to the SPARC/Solaris port nowadays so it makes sense to copy the setting of the support for atomic primitives. This fixes the single regression in the gnat.dg testsuite: FAIL: gnat.dg/prot7.adb (test for excess errors) gcc/ada/ * libgnat/system-linux-sparc.ads (Support_Atomic_Primitives): New constant set to True. --- gcc/ada/libgnat/system-linux-sparc.ads | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/ada/libgnat/system-linux-sparc.ads b/gcc/ada/libgnat/system-linux-sparc.ads index cc502da..6d4ee38 100644 --- a/gcc/ada/libgnat/system-linux-sparc.ads +++ b/gcc/ada/libgnat/system-linux-sparc.ads @@ -133,6 +133,7 @@ private Stack_Check_Probes : constant Boolean := True; Stack_Check_Limits : constant Boolean := False; Support_Aggregates : constant Boolean := True; + Support_Atomic_Primitives : constant Boolean := True; Support_Composite_Assign : constant Boolean := True; Support_Composite_Compare : constant Boolean := True; Support_Long_Shifts : constant Boolean := True; -- cgit v1.1 From ab332cd78d083edb2fddaa3c02578cafade12725 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 12 Oct 2022 00:17:24 +0000 Subject: Daily bump. --- gcc/ChangeLog | 245 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/ada/ChangeLog | 5 + gcc/cp/ChangeLog | 12 +++ gcc/fortran/ChangeLog | 15 +++ gcc/testsuite/ChangeLog | 75 +++++++++++++++ 6 files changed, 353 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bc10014..a44f661 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,248 @@ +2022-10-11 Aldy Hernandez + + * gimple-range-gori.cc (gori_compute::logical_combine): Avoid + calling tracer.trailer(). + +2022-10-11 Jakub Jelinek + + PR target/107185 + * config/i386/i386.md (*notxor_1): Use MASK_REG_P (x) instead of + MASK_REGNO_P (REGNO (x)). + +2022-10-11 Aldy Hernandez + + * range-op-float.cc (class foperator_abs): New. + (floating_op_table::floating_op_table): Add ABS_EXPR entry. + +2022-10-11 Aldy Hernandez + + * range-op-float.cc (foperator_unordered_le::op1_range): New. + (foperator_unordered_le::op2_range): New. + (foperator_unordered_gt::op1_range): New. + (foperator_unordered_gt::op2_range): New. + (foperator_unordered_ge::op1_range): New. + (foperator_unordered_ge::op2_range): New. + (foperator_unordered_equal::op1_range): New. + +2022-10-11 Aldy Hernandez + + * range-op-float.cc (class foperator_unordered_lt): New. + (class foperator_relop_unknown): Remove + (class foperator_unordered_le): New. + (class foperator_unordered_gt): New. + (class foperator_unordered_ge): New. + (class foperator_unordered_equal): New. + (floating_op_table::floating_op_table): Replace all UN_EXPR + entries with their appropriate fop_unordered_* counterpart. + +2022-10-11 Aldy Hernandez + + * range-op.cc (operator_equal::op1_range): Move BRS_TRUE case up. + (operator_lt::op2_range): Same. + (operator_le::op2_range): Same. + (operator_gt::op2_range): Same. + (operator_ge::op2_range): Same. + +2022-10-11 Richard Biener + + PR tree-optimization/107212 + * tree-vect-loop.cc (vectorizable_reduction): Make sure to + set STMT_VINFO_REDUC_DEF for all live lanes in a SLP + reduction. + (vectorizable_live_operation): Do not pun to the SLP + node representative for reduction epilogue generation. + +2022-10-11 Andrew Stubbs + + * config/gcn/gcn-valu.md (neg2): New define_expand. + +2022-10-11 Andrew Stubbs + + * config/gcn/gcn-valu.md (vec_init): New. + * config/gcn/gcn.cc (GEN_VN): Add andvNsi3, subvNsi3. + (GEN_VNM): Add gathervNm_expr. + (GEN_VN_NOEXEC): Add vec_seriesvNsi. + (gcn_expand_vector_init): Add initialization of vectors from smaller + vectors. + +2022-10-11 Andrew Stubbs + + * config/gcn/gcn-protos.h (get_exec): Add prototypes for two variants. + * config/gcn/gcn-valu.md + (vec_extract): New define_expand. + * config/gcn/gcn.cc (get_exec): Export the existing function. Add a + new overload variant. + +2022-10-11 Andrew Stubbs + + * config/gcn/gcn-valu.md + (2): Use MODE_VF. + (2): Likewise. + * config/gcn/gcn.h (MODE_VF): New macro. + +2022-10-11 Andrew Stubbs + + * config/gcn/gcn-modes.def (VECTOR_MODE): Add new modes + V32QI, V32HI, V32SI, V32DI, V32TI, V32HF, V32SF, V32DF, + V16QI, V16HI, V16SI, V16DI, V16TI, V16HF, V16SF, V16DF, + V8QI, V8HI, V8SI, V8DI, V8TI, V8HF, V8SF, V8DF, + V4QI, V4HI, V4SI, V4DI, V4TI, V4HF, V4SF, V4DF, + V2QI, V2HI, V2SI, V2DI, V2TI, V2HF, V2SF, V2DF. + (ADJUST_ALIGNMENT): Likewise. + * config/gcn/gcn-protos.h (gcn_full_exec): Delete. + (gcn_full_exec_reg): Delete. + (gcn_scalar_exec): Delete. + (gcn_scalar_exec_reg): Delete. + (vgpr_1reg_mode_p): Use inner mode to identify vector registers. + (vgpr_2reg_mode_p): Likewise. + (vgpr_vector_mode_p): Use VECTOR_MODE_P. + * config/gcn/gcn-valu.md (V_QI, V_HI, V_HF, V_SI, V_SF, V_DI, V_DF, + V_QIHI, V_1REG, V_INT_1REG, V_INT_1REG_ALT, V_FP_1REG, V_2REG, V_noQI, + V_noHI, V_INT_noQI, V_INT_noHI, V_ALL, V_ALL_ALT, V_INT, V_FP): + Add additional vector modes. + (V64_SI, V64_DI, V64_ALL, V64_FP): New iterators. + (scalar_mode, SCALAR_MODE, vnsi, VnSI, vndi, VnDI, sdwa): + Add additional vector mode mappings. + (mov): Implement vector length conversions. + (ldexp3): Use VnSI. + (frexp_exp2): Likewise. + (VCVT_MODE, VCVT_FMODE, VCVT_IMODE): Add additional vector modes. + (reduc__scal_): Use V64_ALL. + (fold_left_plus_): Use V64_FP. + (*_dpp_shr_): Use V64_1REG. + (*_dpp_shr_): Use V64_DI. + (*plus_carry_dpp_shr_): Use V64_INT_1REG. + (*plus_carry_in_dpp_shr_): Use V64_SI. + (*plus_carry_dpp_shr_): Use V64_DI. + (mov_from_lane63_): Use V64_2REG. + * config/gcn/gcn.cc (VnMODE): New function. + (gcn_can_change_mode_class): Support multiple vector sizes. + (gcn_modes_tieable_p): Likewise. + (gcn_operand_part): Likewise. + (gcn_scalar_exec): Delete function. + (gcn_scalar_exec_reg): Delete function. + (gcn_full_exec): Delete function. + (gcn_full_exec_reg): Delete function. + (gcn_inline_fp_constant_p): Support multiple vector sizes. + (gcn_fp_constant_p): Likewise. + (A): New macro. + (GEN_VN_NOEXEC): New macro. + (GEN_VNM_NOEXEC): New macro. + (GEN_VN): New macro. + (GEN_VNM): New macro. + (GET_VN_FN): New macro. + (CODE_FOR): New macro. + (CODE_FOR_OP): New macro. + (gen_mov_with_exec): Delete function. + (gen_duplicate_load): Delete function. + (gcn_expand_vector_init): Support multiple vector sizes. + (strided_constant): Likewise. + (gcn_addr_space_legitimize_address): Likewise. + (gcn_expand_scalar_to_vector_address): Likewise. + (gcn_expand_scaled_offsets): Likewise. + (gcn_secondary_reload): Likewise. + (gcn_valid_cvt_p): Likewise. + (gcn_expand_builtin_1): Likewise. + (gcn_make_vec_perm_address): Likewise. + (gcn_vectorize_vec_perm_const): Likewise. + (gcn_vector_mode_supported_p): Likewise. + (gcn_autovectorize_vector_modes): New hook. + (gcn_related_vector_mode): Support multiple vector sizes. + (gcn_expand_dpp_shr_insn): Add FIXME comment. + (gcn_md_reorg): Support multiple vector sizes. + (print_reg): Likewise. + (print_operand): Likewise. + (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES): New hook. + +2022-10-11 Andre Vieira + + * tree-if-conv.cc (if_convertible_loop_p_1): Move ordering of loop bb's from + here... + (tree_if_conversion): ... to here. Also call bitfield lowering when + appropriate. + (version_loop_for_if_conversion): Adapt to enable loop versioning when we only + need to lower bitfields. + (ifcvt_split_critical_edges): Relax condition of expected loop form as this is + checked earlier. + (get_bitfield_rep): New function. + (lower_bitfield): Likewise. + (bitfields_to_lower_p): Likewise. + (need_to_lower_bitfields): New global boolean. + (need_to_ifcvt): Likewise. + * tree-vect-data-refs.cc (vect_find_stmt_data_reference): Improve diagnostic + message. + * tree-vect-patterns.cc (vect_recog_temp_ssa_var): Add default value for last + parameter. + (vect_recog_bitfield_ref_pattern): New. + (vect_recog_bit_insert_pattern): New. + +2022-10-11 liuhongt + + PR target/107093 + * config/i386/i386.md (*notxor_1): New post_reload + define_insn_and_split. + (*notxorqi_1): Ditto. + +2022-10-11 Aldy Hernandez + + PR tree-optimization/107195 + * value-range.cc (irange::set_range_from_nonzero_bits): Set range + to [0,0] when nonzero mask is 0. + +2022-10-11 Olivier Hainque + Olivier Hainque + + * configure: Regenerate. + +2022-10-11 Olivier Hainque + + * config.gcc (*vxworks*): Add t-slibgcc fragment + if enable_shared. + +2022-10-11 Olivier Hainque + + * config/vxworks.h (VX_LGCC_EH_SO0, VX_LGCC_EH_SO1): New + internal macros. + (VXWORKS_LIBGCC_SPEC): Use them and document. + +2022-10-11 Martin Liska + + * gimple-range-op.cc: Add override keyword. + +2022-10-11 Eugene Rozenfeld + + PR debug/107193 + * tree-cfg.cc (assign_discriminators): Move declaration of cur_locus_e + out of the loop. + +2022-10-11 Liwei Xu + liuhongt + + PR tree-optimization/54346 + * match.pd: Merge the index of VCST then generates the new vec_perm. + +2022-10-11 Jeff Law + + PR rtl-optimization/107182 + * cfgrtl.cc (fixup_reorder_chain): When optimizing a jump to a + return, clear EDGE_CROSSING on the appropriate edge. + +2022-10-11 Ju-Zhe Zhong + + * config/riscv/riscv-vector-builtins.cc (struct vector_type_info): Move + from config/riscv/riscv-vector-builtins.h. + (DEF_RVV_TYPE): Change USER_NAME to NAME. + (register_vector_type): Change user_name to name. + * config/riscv/riscv-vector-builtins.def (DEF_RVV_TYPE): Change + USER_NAME to NAME. + * config/riscv/riscv-vector-builtins.h (struct vector_type_info): Move + to riscv-vector-builtins.cc. + (DEF_RVV_TYPE): Change USER_NAME to NAME. + +2022-10-11 Ju-Zhe Zhong + + * config/riscv/riscv.md: Add vsetvl instruction type. + 2022-10-10 Nathan Sidwell * common.opt (-fabi-version=): Document 18. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index ee52440..6f50c16 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20221011 +20221012 diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index fc3bc97..e13486a 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,8 @@ +2022-10-11 Eric Botcazou + + * libgnat/system-linux-sparc.ads (Support_Atomic_Primitives): New + constant set to True. + 2022-10-06 Eric Botcazou * gcc-interface/ada-tree.def (LOAD_EXPR): New expression code. diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 50e5f3e..5e985b6 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,15 @@ +2022-10-11 Patrick Palka + + PR c++/100134 + * pt.cc (tsubst_friend_function): Propagate DECL_MODULE_PURVIEW_P + from the introduced namespace-scope function to the namespace. + +2022-10-11 Patrick Palka + + PR c++/99377 + * module.cc (lazy_load_binding): Clear processing_template_decl. + (lazy_load_pendings): Likewise. + 2022-10-10 Nathan Sidwell * mangle.cc (write_prefix): Add VAR_DECL & FIELD_DECL to diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 094a11d..7c86770 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,18 @@ +2022-10-11 Harald Anlauf + + PR fortran/107215 + * arith.cc (gfc_int2int): Check validity of type of source expr. + (gfc_int2real): Likewise. + (gfc_int2complex): Likewise. + (gfc_real2int): Likewise. + (gfc_real2real): Likewise. + (gfc_complex2int): Likewise. + (gfc_complex2real): Likewise. + (gfc_complex2complex): Likewise. + (gfc_log2log): Likewise. + (gfc_log2int): Likewise. + (gfc_int2log): Likewise. + 2022-10-10 Jakub Jelinek * trans-openmp.cc (gfc_trans_omp_assume): Use create_tmp_var_raw diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7230773..350c852 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,78 @@ +2022-10-11 Harald Anlauf + + PR fortran/107215 + * gfortran.dg/pr107215.f90: New test. + +2022-10-11 Patrick Palka + + PR c++/100134 + * g++.dg/modules/tpl-friend-8_a.H: New test. + * g++.dg/modules/tpl-friend-8_b.C: New test. + +2022-10-11 Patrick Palka + + PR c++/99377 + * g++.dg/modules/pr99377-2_a.C: New test. + * g++.dg/modules/pr99377-2_b.C: New test. + +2022-10-11 Aldy Hernandez + + * gcc.dg/tree-ssa/vrp-float-abs-1.c: New test. + +2022-10-11 Richard Biener + + PR tree-optimization/107212 + * gcc.dg/vect/pr107212-1.c: New testcase. + * gcc.dg/vect/pr107212-2.c: Likewise. + +2022-10-11 Andrew Stubbs + + * gcc.dg/pr104464.c: Xfail on amdgcn. + * gcc.dg/signbit-2.c: Likewise. + * gcc.dg/signbit-5.c: Likewise. + * gcc.dg/vect/bb-slp-68.c: Likewise. + * gcc.dg/vect/bb-slp-cond-1.c: Change expectations on amdgcn. + * gcc.dg/vect/bb-slp-subgroups-3.c: Likewise. + * gcc.dg/vect/no-vfa-vect-depend-2.c: Change expectations for multiple + vector sizes. + * gcc.dg/vect/pr33953.c: Likewise. + * gcc.dg/vect/pr65947-12.c: Likewise. + * gcc.dg/vect/pr65947-13.c: Likewise. + * gcc.dg/vect/pr80631-2.c: Likewise. + * gcc.dg/vect/slp-reduc-4.c: Likewise. + * gcc.dg/vect/trapv-vect-reduc-4.c: Likewise. + * lib/target-supports.exp (available_vector_sizes): Add more sizes + for amdgcn. + +2022-10-11 Andre Vieira + + * gcc.dg/vect/vect-bitfield-read-1.c: New test. + * gcc.dg/vect/vect-bitfield-read-2.c: New test. + * gcc.dg/vect/vect-bitfield-read-3.c: New test. + * gcc.dg/vect/vect-bitfield-read-4.c: New test. + * gcc.dg/vect/vect-bitfield-read-5.c: New test. + * gcc.dg/vect/vect-bitfield-read-6.c: New test. + * gcc.dg/vect/vect-bitfield-write-1.c: New test. + * gcc.dg/vect/vect-bitfield-write-2.c: New test. + * gcc.dg/vect/vect-bitfield-write-3.c: New test. + * gcc.dg/vect/vect-bitfield-write-4.c: New test. + * gcc.dg/vect/vect-bitfield-write-5.c: New test. + +2022-10-11 liuhongt + + * gcc.target/i386/pr107093.c: New test. + +2022-10-11 Aldy Hernandez + + PR tree-optimization/107195 + * gcc.dg/tree-ssa/pr107195-1.c: New test. + * gcc.dg/tree-ssa/pr107195-2.c: New test. + +2022-10-11 Liwei Xu + liuhongt + + * gcc.dg/pr54346.c: New test. + 2022-10-10 Nathan Sidwell * g++.dg/abi/lambda-ctx1-17.C: New. -- cgit v1.1 From bb00d0974ebe66ca9dc6621e68498396cd542b28 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Tue, 11 Oct 2022 17:22:23 +0200 Subject: Add default relation_kind to floating point range-op entries. The methods from which these derive all have a default relation_kind. This patch just adds the default, to make it easier to write unit tests later. gcc/ChangeLog: * range-op-float.cc: Add relation_kind = VREL_VARYING to all methods. --- gcc/range-op-float.cc | 80 +++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 40 deletions(-) (limited to 'gcc') diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index 283eb13..71cdd11 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -318,14 +318,14 @@ class foperator_identity : public range_operator_float public: bool fold_range (frange &r, tree type ATTRIBUTE_UNUSED, const frange &op1, const frange &op2 ATTRIBUTE_UNUSED, - relation_kind) const final override + relation_kind = VREL_VARYING) const final override { r = op1; return true; } bool op1_range (frange &r, tree type ATTRIBUTE_UNUSED, const frange &lhs, const frange &op2 ATTRIBUTE_UNUSED, - relation_kind) const final override + relation_kind = VREL_VARYING) const final override { r = lhs; return true; @@ -341,17 +341,17 @@ class foperator_equal : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; relation_kind op1_op2_relation (const irange &lhs) const final override { return equal_op1_op2_relation (lhs); } bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op2_range (frange &r, tree type, const irange &lhs, const frange &op1, - relation_kind rel) const final override + relation_kind rel = VREL_VARYING) const final override { return op1_range (r, type, lhs, op1, rel); } @@ -447,14 +447,14 @@ class foperator_not_equal : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override; + relation_kind rel = VREL_VARYING) const final override; relation_kind op1_op2_relation (const irange &lhs) const final override { return not_equal_op1_op2_relation (lhs); } bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; } fop_not_equal; bool @@ -548,17 +548,17 @@ class foperator_lt : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; relation_kind op1_op2_relation (const irange &lhs) const final override { return lt_op1_op2_relation (lhs); } bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op2_range (frange &r, tree type, const irange &lhs, const frange &op1, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; } fop_lt; bool @@ -663,17 +663,17 @@ class foperator_le : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override; + relation_kind rel = VREL_VARYING) const final override; relation_kind op1_op2_relation (const irange &lhs) const final override { return le_op1_op2_relation (lhs); } bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind rel = VREL_VARYING) const final override; bool op2_range (frange &r, tree type, const irange &lhs, const frange &op1, - relation_kind rel) const final override; + relation_kind rel = VREL_VARYING) const final override; } fop_le; bool @@ -770,17 +770,17 @@ class foperator_gt : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; relation_kind op1_op2_relation (const irange &lhs) const final override { return gt_op1_op2_relation (lhs); } bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op2_range (frange &r, tree type, const irange &lhs, const frange &op1, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; } fop_gt; bool @@ -885,17 +885,17 @@ class foperator_ge : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; relation_kind op1_op2_relation (const irange &lhs) const final override { return ge_op1_op2_relation (lhs); } bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op2_range (frange &r, tree type, const irange &lhs, const frange &op1, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; } fop_ge; bool @@ -996,13 +996,13 @@ class foperator_unordered : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op2_range (frange &r, tree type, const irange &lhs, const frange &op1, - relation_kind rel) const final override + relation_kind rel = VREL_VARYING) const final override { return op1_range (r, type, lhs, op1, rel); } @@ -1073,13 +1073,13 @@ class foperator_ordered : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op2_range (frange &r, tree type, const irange &lhs, const frange &op1, - relation_kind rel) const final override + relation_kind rel = VREL_VARYING) const final override { return op1_range (r, type, lhs, op1, rel); } @@ -1139,10 +1139,10 @@ class foperator_abs : public range_operator_float public: bool fold_range (frange &r, tree type, const frange &op1, const frange &, - relation_kind) const final override; + relation_kind = VREL_VARYING) const final override; bool op1_range (frange &r, tree type, const frange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind rel = VREL_VARYING) const final override; } fop_abs; bool @@ -1227,7 +1227,7 @@ class foperator_unordered_lt : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override + relation_kind rel = VREL_VARYING) const final override { if (op1.known_isnan () || op2.known_isnan ()) { @@ -1256,7 +1256,7 @@ class foperator_unordered_le : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override + relation_kind rel = VREL_VARYING) const final override { if (op1.known_isnan () || op2.known_isnan ()) { @@ -1277,10 +1277,10 @@ public: } bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op2_range (frange &r, tree type, const irange &lhs, const frange &op1, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; } fop_unordered_le; bool @@ -1337,7 +1337,7 @@ class foperator_unordered_gt : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override + relation_kind rel = VREL_VARYING) const final override { if (op1.known_isnan () || op2.known_isnan ()) { @@ -1358,10 +1358,10 @@ public: } bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op2_range (frange &r, tree type, const irange &lhs, const frange &op1, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; } fop_unordered_gt; bool @@ -1420,7 +1420,7 @@ class foperator_unordered_ge : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override + relation_kind rel = VREL_VARYING) const final override { if (op1.known_isnan () || op2.known_isnan ()) { @@ -1441,10 +1441,10 @@ public: } bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op2_range (frange &r, tree type, const irange &lhs, const frange &op1, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; } fop_unordered_ge; bool @@ -1502,7 +1502,7 @@ class foperator_unordered_equal : public range_operator_float public: bool fold_range (irange &r, tree type, const frange &op1, const frange &op2, - relation_kind rel) const final override + relation_kind rel = VREL_VARYING) const final override { if (op1.known_isnan () || op2.known_isnan ()) { @@ -1523,10 +1523,10 @@ public: } bool op1_range (frange &r, tree type, const irange &lhs, const frange &op2, - relation_kind rel) const final override; + relation_kind = VREL_VARYING) const final override; bool op2_range (frange &r, tree type, const irange &lhs, const frange &op1, - relation_kind rel) const final override + relation_kind rel = VREL_VARYING) const final override { return op1_range (r, type, lhs, op1, rel); } -- cgit v1.1 From a901343aa314eb08b362fc6878456d12f96e49b7 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Tue, 11 Oct 2022 17:22:40 +0200 Subject: Add an frange(type) constructor analogous to the irange version. gcc/ChangeLog: * value-range.h (frange::frange): Add constructor taking type. --- gcc/value-range.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'gcc') diff --git a/gcc/value-range.h b/gcc/value-range.h index 07a2067..9d630e4 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -281,6 +281,7 @@ public: frange (); frange (const frange &); frange (tree, tree, value_range_kind = VR_RANGE); + frange (tree type); frange (tree type, const REAL_VALUE_TYPE &min, const REAL_VALUE_TYPE &max, value_range_kind = VR_RANGE); static bool supports_p (const_tree type) @@ -1059,6 +1060,13 @@ frange::frange (const frange &src) *this = src; } +inline +frange::frange (tree type) +{ + m_discriminator = VR_FRANGE; + set_varying (type); +} + // frange constructor from REAL_VALUE_TYPE endpoints. inline -- cgit v1.1 From 6ce0823721d476cabb2007fecc12c07202325e17 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Tue, 11 Oct 2022 19:20:47 +0200 Subject: Disable tree to bool conversion in frange::update_nan. We have a set_nan(type) method which can be confused with update_nan(bool) because of the silent conversion of pointers to bool. Currently, if you call update_nan(tree), you'll set the possibility of NAN with a sign of true if tree is non-null. This is prone to error and this patch disallows this behavior. gcc/ChangeLog: * value-range.cc (frange::set_nonnegative): Pass bool to update_nan. * value-range.h: Disallow conversion to bool in update_nan(). --- gcc/value-range.cc | 2 +- gcc/value-range.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/value-range.cc b/gcc/value-range.cc index e07d2aa..26a2b78 100644 --- a/gcc/value-range.cc +++ b/gcc/value-range.cc @@ -779,7 +779,7 @@ frange::set_nonnegative (tree type) // Set +NAN as the only possibility. if (HONOR_NANS (type)) - update_nan (/*sign=*/0); + update_nan (/*sign=*/false); } // Here we copy between any two irange's. The ranges can be legacy or diff --git a/gcc/value-range.h b/gcc/value-range.h index 9d630e4..cb5e9d0 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -317,6 +317,7 @@ public: const REAL_VALUE_TYPE &upper_bound () const; void update_nan (); void update_nan (bool sign); + void update_nan (tree) = delete; // Disallow silent conversion to bool. void clear_nan (); // fpclassify like API -- cgit v1.1 From 658788f3480e95f1dda0a143b60be89099e5d3c7 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Tue, 11 Oct 2022 19:21:14 +0200 Subject: Add method to query the sign of a NAN. In writing some range-op entries I noticed we don't have a way to query the sign of the NAN in a range, unless the range only contains NAN, in which case you can just use frange::signbit_p. This patch adds a method that returns TRUE if there exists the possiblity of a NAN and we know its sign. gcc/ChangeLog: * value-range.h (frange::nan_signbit_p): New. --- gcc/value-range.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'gcc') diff --git a/gcc/value-range.h b/gcc/value-range.h index cb5e9d0..60b989b 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -328,6 +328,7 @@ public: bool maybe_isnan (bool sign) const; bool maybe_isinf () const; bool signbit_p (bool &signbit) const; + bool nan_signbit_p (bool &signbit) const; private: void verify_range (); bool normalize_kind (); @@ -1358,4 +1359,20 @@ frange::signbit_p (bool &signbit) const return false; } +// If range has a NAN with a known sign, set it in SIGNBIT and return +// TRUE. + +inline bool +frange::nan_signbit_p (bool &signbit) const +{ + if (undefined_p ()) + return false; + + if (m_pos_nan == m_neg_nan) + return false; + + signbit = m_neg_nan; + return true; +} + #endif // GCC_VALUE_RANGE_H -- cgit v1.1 From 1c0670c62fc7c7b610876e61bf971ea1db19680e Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Tue, 11 Oct 2022 17:23:33 +0200 Subject: Add stubs for floating point range-op tests. gcc/ChangeLog: * range-op-float.cc (frange_float): New. (range_op_float_tests): New. * range-op.cc (range_op_tests): Call range_op_float_tests. --- gcc/range-op-float.cc | 26 ++++++++++++++++++++++++++ gcc/range-op.cc | 3 +++ 2 files changed, 29 insertions(+) (limited to 'gcc') diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index 71cdd11..22b7418 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -1612,3 +1612,29 @@ floating_op_table::set (enum tree_code code, range_operator_float &op) gcc_checking_assert (m_range_tree[code] == NULL); m_range_tree[code] = &op; } + +#if CHECKING_P +#include "selftest.h" + +namespace selftest +{ + +// Build an frange from string endpoints. + +inline frange +frange_float (const char *lb, const char *ub, tree type = float_type_node) +{ + REAL_VALUE_TYPE min, max; + gcc_assert (real_from_string (&min, lb) == 0); + gcc_assert (real_from_string (&max, ub) == 0); + return frange (type, min, max); +} + +void +range_op_float_tests () +{ +} + +} // namespace selftest + +#endif // CHECKING_P diff --git a/gcc/range-op.cc b/gcc/range-op.cc index 4d5a033..16fa1f4 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -4829,6 +4829,9 @@ range_op_tests () range_op_bitwise_and_tests (); range_op_cast_tests (); range_relational_tests (); + + extern void range_op_float_tests (); + range_op_float_tests (); } } // namespace selftest -- cgit v1.1 From 6d2294a83e1417720e1f8ddf5aa4dc1a35ef89d5 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Wed, 12 Oct 2022 09:31:32 +0200 Subject: regenerate configure files Needed after a recent change. gcc/ChangeLog: * configure: Regenerate. libatomic/ChangeLog: * configure: Regenerate. libbacktrace/ChangeLog: * configure: Regenerate. libcc1/ChangeLog: * configure: Regenerate. libffi/ChangeLog: * configure: Regenerate. libgfortran/ChangeLog: * configure: Regenerate. libgomp/ChangeLog: * configure: Regenerate. libitm/ChangeLog: * configure: Regenerate. libobjc/ChangeLog: * configure: Regenerate. liboffloadmic/ChangeLog: * configure: Regenerate. * plugin/configure: Regenerate. libphobos/ChangeLog: * configure: Regenerate. libquadmath/ChangeLog: * configure: Regenerate. libsanitizer/ChangeLog: * configure: Regenerate. libssp/ChangeLog: * configure: Regenerate. libstdc++-v3/ChangeLog: * configure: Regenerate. libvtv/ChangeLog: * configure: Regenerate. lto-plugin/ChangeLog: * configure: Regenerate. zlib/ChangeLog: * configure: Regenerate. --- gcc/configure | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/configure b/gcc/configure index db36681..99ba76522 100755 --- a/gcc/configure +++ b/gcc/configure @@ -19713,7 +19713,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19692 "configure" +#line 19716 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -19819,7 +19819,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19798 "configure" +#line 19822 "configure" #include "confdefs.h" #if HAVE_DLFCN_H -- cgit v1.1 From a9c83fb79530ec58017d669e7a951e1b582ab0b4 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Tue, 11 Oct 2022 20:45:21 +0200 Subject: gcov: rename gcov_write_summary gcc/ChangeLog: * gcov-io.cc (gcov_write_summary): Rename to ... (gcov_write_object_summary): ... this. * gcov-io.h (GCOV_TAG_OBJECT_SUMMARY_LENGTH): Rename from ... (GCOV_TAG_SUMMARY_LENGTH): ... this. libgcc/ChangeLog: * libgcov-driver.c: Use new function. * libgcov.h (gcov_write_summary): Rename to ... (gcov_write_object_summary): ... this. --- gcc/gcov-io.cc | 8 ++++---- gcc/gcov-io.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/gcov-io.cc b/gcc/gcov-io.cc index 62032cc..af5b13c 100644 --- a/gcc/gcov-io.cc +++ b/gcc/gcov-io.cc @@ -372,13 +372,13 @@ gcov_write_length (gcov_position_t position) #else /* IN_LIBGCOV */ -/* Write a summary structure to the gcov file. */ +/* Write an object summary structure to the gcov file. */ GCOV_LINKAGE void -gcov_write_summary (gcov_unsigned_t tag, const struct gcov_summary *summary) +gcov_write_object_summary (const struct gcov_summary *summary) { - gcov_write_unsigned (tag); - gcov_write_unsigned (GCOV_TAG_SUMMARY_LENGTH); + gcov_write_unsigned (GCOV_TAG_OBJECT_SUMMARY); + gcov_write_unsigned (GCOV_TAG_OBJECT_SUMMARY_LENGTH); gcov_write_unsigned (summary->runs); gcov_write_unsigned (summary->sum_max); } diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h index 3094763..e91cd73 100644 --- a/gcc/gcov-io.h +++ b/gcc/gcov-io.h @@ -266,8 +266,8 @@ typedef uint64_t gcov_type_unsigned; #define GCOV_TAG_COUNTER_LENGTH(NUM) ((NUM) * 2 * GCOV_WORD_SIZE) #define GCOV_TAG_COUNTER_NUM(LENGTH) ((LENGTH / GCOV_WORD_SIZE) / 2) #define GCOV_TAG_OBJECT_SUMMARY ((gcov_unsigned_t)0xa1000000) +#define GCOV_TAG_OBJECT_SUMMARY_LENGTH (2 * GCOV_WORD_SIZE) #define GCOV_TAG_PROGRAM_SUMMARY ((gcov_unsigned_t)0xa3000000) /* Obsolete */ -#define GCOV_TAG_SUMMARY_LENGTH (2 * GCOV_WORD_SIZE) #define GCOV_TAG_AFDO_FILE_NAMES ((gcov_unsigned_t)0xaa000000) #define GCOV_TAG_AFDO_FUNCTION ((gcov_unsigned_t)0xac000000) #define GCOV_TAG_AFDO_WORKING_SET ((gcov_unsigned_t)0xaf000000) -- cgit v1.1 From 11c72f20d4d7ba1862a257cef05dc3a5e84a276d Mon Sep 17 00:00:00 2001 From: "Cui,Lili" Date: Thu, 29 Sep 2022 14:28:06 +0800 Subject: Remove AVX512_VP2INTERSECT from PTA_SAPPHIRERAPIDS gcc/ChangeLog: * config/i386/driver-i386.cc (host_detect_local_cpu): Move sapphirerapids out of AVX512_VP2INTERSECT. * config/i386/i386.h: Remove AVX512_VP2INTERSECT from PTA_SAPPHIRERAPIDS * doc/invoke.texi: Remove AVX512_VP2INTERSECT from SAPPHIRERAPIDS --- gcc/config/i386/driver-i386.cc | 13 +++++-------- gcc/config/i386/i386.h | 7 +++---- gcc/doc/invoke.texi | 8 ++++---- 3 files changed, 12 insertions(+), 16 deletions(-) (limited to 'gcc') diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc index 3c702fd..ef56704 100644 --- a/gcc/config/i386/driver-i386.cc +++ b/gcc/config/i386/driver-i386.cc @@ -589,15 +589,12 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* This is unknown family 0x6 CPU. */ if (has_feature (FEATURE_AVX)) { + /* Assume Tiger Lake */ if (has_feature (FEATURE_AVX512VP2INTERSECT)) - { - if (has_feature (FEATURE_TSXLDTRK)) - /* Assume Sapphire Rapids. */ - cpu = "sapphirerapids"; - else - /* Assume Tiger Lake */ - cpu = "tigerlake"; - } + cpu = "tigerlake"; + /* Assume Sapphire Rapids. */ + else if (has_feature (FEATURE_TSXLDTRK)) + cpu = "sapphirerapids"; /* Assume Cooper Lake */ else if (has_feature (FEATURE_AVX512BF16)) cpu = "cooperlake"; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 900a3bc..372a2cf 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2326,10 +2326,9 @@ constexpr wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT constexpr wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT | PTA_KL | PTA_WIDEKL; constexpr wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_ICELAKE_SERVER | PTA_MOVDIRI - | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE - | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE - | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16 - | PTA_AVX512BF16; + | PTA_MOVDIR64B | PTA_ENQCMD | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG + | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 + | PTA_UINTR | PTA_AVXVNNI | PTA_AVX512FP16 | PTA_AVX512BF16; constexpr wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1; constexpr wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE; diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 271c8bb8..a9ecc44 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -32057,11 +32057,11 @@ Intel sapphirerapids CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, -AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2 +AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, -MOVDIRI, MOVDIR64B, AVX512VP2INTERSECT, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, -SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16 -and AVX512BF16 instruction set support. +MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, +UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16 and AVX512BF16 +instruction set support. @item alderlake Intel Alderlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -- cgit v1.1 From fcb94f551f29587f04dac0e9b99c98094479835d Mon Sep 17 00:00:00 2001 From: Ju-Zhe Zhong Date: Tue, 11 Oct 2022 12:48:20 +0800 Subject: RISC-V: Move function place to make it looks better. [NFC] gcc/ChangeLog: * config/riscv/riscv-vector-builtins.h (class rvv_switcher): Move to this to .... * config/riscv/riscv-vector-builtins.cc (class rvv_switcher): here. --- gcc/config/riscv/riscv-vector-builtins.cc | 19 +++++++++++++++++++ gcc/config/riscv/riscv-vector-builtins.h | 19 ------------------- 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index 7033b1f..d523192 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -86,6 +86,25 @@ static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1]; extern GTY(()) tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; +/* RAII class for enabling enough RVV features to define the built-in + types and implement the riscv_vector.h pragma. + + Note: According to 'TYPE_MODE' macro implementation, we need set + have_regs_of_mode[mode] to be true if we want to get the exact mode + from 'TYPE_MODE'. However, have_regs_of_mode has not been set yet in + targetm.init_builtins (). We need rvv_switcher to set have_regs_of_mode + before targetm.init_builtins () and recover back have_regs_of_mode + after targetm.init_builtins (). */ +class rvv_switcher +{ +public: + rvv_switcher (); + ~rvv_switcher (); + +private: + bool m_old_have_regs_of_mode[MAX_MACHINE_MODE]; +}; + rvv_switcher::rvv_switcher () { /* Set have_regs_of_mode before targetm.init_builtins (). */ diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index ec85e0b..5c01a76 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -36,25 +36,6 @@ enum vector_type_index NUM_VECTOR_TYPES }; -/* RAII class for enabling enough RVV features to define the built-in - types and implement the riscv_vector.h pragma. - - Note: According to 'TYPE_MODE' macro implementation, we need set - have_regs_of_mode[mode] to be true if we want to get the exact mode - from 'TYPE_MODE'. However, have_regs_of_mode has not been set yet in - targetm.init_builtins (). We need rvv_switcher to set have_regs_of_mode - before targetm.init_builtins () and recover back have_regs_of_mode - after targetm.init_builtins (). */ -class rvv_switcher -{ -public: - rvv_switcher (); - ~rvv_switcher (); - -private: - bool m_old_have_regs_of_mode[MAX_MACHINE_MODE]; -}; - } // end namespace riscv_vector #endif -- cgit v1.1 From ef0d7978914b89021d66df3baf03ccc103cf7467 Mon Sep 17 00:00:00 2001 From: Ju-Zhe Zhong Date: Tue, 11 Oct 2022 14:15:21 +0800 Subject: RISC-V: Refine register_builtin_types function. [NFC] gcc/ChangeLog: * config/riscv/riscv-vector-builtins.cc (builtin_types): Redefine vector types. (build_const_pointer): New function. (register_builtin_type): Ditto. (DEF_RVV_TYPE): Simplify macro. (register_vector_type): Refine implementation. * config/riscv/riscv-vector-builtins.h (rvv_builtin_types_t): New. --- gcc/config/riscv/riscv-vector-builtins.cc | 80 +++++++++++++++---------------- gcc/config/riscv/riscv-vector-builtins.h | 10 ++++ 2 files changed, 50 insertions(+), 40 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index d523192..b7cd1c5 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -73,18 +73,14 @@ static CONSTEXPR const vector_type_info vector_types[] = { #include "riscv-vector-builtins.def" }; -/* The scalar type associated with each vector type. */ -static GTY (()) tree scalar_types[NUM_VECTOR_TYPES]; -/* The machine mode associated with each vector type. */ -static GTY (()) machine_mode vector_modes[NUM_VECTOR_TYPES]; /* The RVV types, with their built-in "__rvv..._t" name. Allow an index of NUM_VECTOR_TYPES, which always yields a null tree. */ static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1]; /* Same, but with the riscv_vector.h "v..._t" name. */ -extern GTY(()) tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; -tree builtin_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; +extern GTY (()) rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 1]; +rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 1]; /* RAII class for enabling enough RVV features to define the built-in types and implement the riscv_vector.h pragma. @@ -172,6 +168,39 @@ mangle_builtin_type (const_tree type) return NULL; } +/* Return a representation of "const T *". */ +static tree +build_const_pointer (tree t) +{ + return build_pointer_type (build_qualified_type (t, TYPE_QUAL_CONST)); +} + +/* Helper function for register a single built-in RVV ABI type. */ +static void +register_builtin_type (vector_type_index type, tree eltype, machine_mode mode) +{ + builtin_types[type].scalar = eltype; + builtin_types[type].scalar_ptr = build_pointer_type (eltype); + builtin_types[type].scalar_const_ptr = build_const_pointer (eltype); + if (!riscv_v_ext_enabled_vector_mode_p (mode)) + return; + + tree vectype = build_vector_type_for_mode (eltype, mode); + gcc_assert (VECTOR_MODE_P (TYPE_MODE (vectype)) && TYPE_MODE (vectype) == mode + && TYPE_MODE_RAW (vectype) == mode && TYPE_ALIGN (vectype) <= 128 + && known_eq (tree_to_poly_uint64 (TYPE_SIZE (vectype)), + GET_MODE_BITSIZE (mode))); + vectype = build_distinct_type_copy (vectype); + gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype)); + SET_TYPE_STRUCTURAL_EQUALITY (vectype); + TYPE_ARTIFICIAL (vectype) = 1; + TYPE_INDIVISIBLE_P (vectype) = 1; + add_vector_type_attribute (vectype, vector_types[type].mangled_name); + make_type_sizeless (vectype); + abi_vector_types[type] = vectype; + lang_hooks.types.register_builtin_type (vectype, vector_types[type].abi_name); +} + /* Register the built-in RVV ABI types, such as __rvv_int32m1_t. */ static void register_builtin_types () @@ -186,42 +215,12 @@ register_builtin_types () = TARGET_64BIT ? unsigned_intSI_type_node : long_unsigned_type_node; machine_mode mode; -#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ - VECTOR_MODE_MIN_VLEN_32) \ +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ + VECTOR_MODE_MIN_VLEN_32, ARGS...) \ mode = TARGET_MIN_VLEN > 32 ? VECTOR_MODE##mode \ : VECTOR_MODE_MIN_VLEN_32##mode; \ - scalar_types[VECTOR_TYPE_##NAME] \ - = riscv_v_ext_enabled_vector_mode_p (mode) ? SCALAR_TYPE##_type_node \ - : NULL_TREE; \ - vector_modes[VECTOR_TYPE_##NAME] \ - = riscv_v_ext_enabled_vector_mode_p (mode) ? mode : VOIDmode; + register_builtin_type (VECTOR_TYPE_##NAME, SCALAR_TYPE##_type_node, mode); #include "riscv-vector-builtins.def" - - for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i) - { - tree eltype = scalar_types[i]; - mode = vector_modes[i]; - /* We disabled the datatypes according '-march'. */ - if (!eltype) - continue; - - tree vectype = build_vector_type_for_mode (eltype, mode); - gcc_assert ( - VECTOR_MODE_P (TYPE_MODE (vectype)) && TYPE_MODE (vectype) == mode - && TYPE_MODE_RAW (vectype) == mode && TYPE_ALIGN (vectype) <= 128 - && known_eq (tree_to_poly_uint64 (TYPE_SIZE (vectype)), - GET_MODE_BITSIZE (mode))); - vectype = build_distinct_type_copy (vectype); - gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype)); - SET_TYPE_STRUCTURAL_EQUALITY (vectype); - TYPE_ARTIFICIAL (vectype) = 1; - TYPE_INDIVISIBLE_P (vectype) = 1; - add_vector_type_attribute (vectype, vector_types[i].mangled_name); - make_type_sizeless (vectype); - abi_vector_types[i] = vectype; - lang_hooks.types.register_builtin_type (vectype, - vector_types[i].abi_name); - } } /* Register vector type TYPE under its risv_vector.h name. */ @@ -247,7 +246,8 @@ register_vector_type (vector_type_index type) && TYPE_MAIN_VARIANT (TREE_TYPE (decl)) == vectype) vectype = TREE_TYPE (decl); - builtin_vector_types[0][type] = vectype; + builtin_types[type].vector = vectype; + builtin_types[type].vector_ptr = build_pointer_type (vectype); } /* Initialize all compiler built-ins related to RVV that should be diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index 5c01a76..7d4b807 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -36,6 +36,16 @@ enum vector_type_index NUM_VECTOR_TYPES }; +/* Builtin types that are used to register RVV intrinsics. */ +struct GTY (()) rvv_builtin_types_t +{ + tree vector; + tree scalar; + tree vector_ptr; + tree scalar_ptr; + tree scalar_const_ptr; +}; + } // end namespace riscv_vector #endif -- cgit v1.1 From 0af2b2f241cbac81a6af503cc4d575a9a0aaf7c0 Mon Sep 17 00:00:00 2001 From: Ju-Zhe Zhong Date: Tue, 11 Oct 2022 14:21:59 +0800 Subject: RISC-V: Apply clang-format for riscv-vector-builtins.* [NFC] gcc/ChangeLog: * config/riscv/riscv-vector-builtins.cc (DEF_RVV_TYPE): Apply clang-format. (add_vector_type_attribute): Ditto. * config/riscv/riscv-vector-builtins.def (DEF_RVV_TYPE): Apply clang-format. * config/riscv/riscv-vector-builtins.h (DEF_RVV_TYPE): Apply clang-format. --- gcc/config/riscv/riscv-vector-builtins.cc | 8 ++++---- gcc/config/riscv/riscv-vector-builtins.def | 2 +- gcc/config/riscv/riscv-vector-builtins.h | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index b7cd1c5..99c4825 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -68,7 +68,7 @@ struct vector_type_info /* Information about each RVV type. */ static CONSTEXPR const vector_type_info vector_types[] = { -#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, ARGS...) \ +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, ARGS...) \ {#NAME, #ABI_NAME, "u" #NCHARS #ABI_NAME}, #include "riscv-vector-builtins.def" }; @@ -76,7 +76,7 @@ static CONSTEXPR const vector_type_info vector_types[] = { /* The RVV types, with their built-in "__rvv..._t" name. Allow an index of NUM_VECTOR_TYPES, which always yields a null tree. */ -static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1]; +static GTY (()) tree abi_vector_types[NUM_VECTOR_TYPES + 1]; /* Same, but with the riscv_vector.h "v..._t" name. */ extern GTY (()) rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 1]; @@ -124,8 +124,8 @@ add_vector_type_attribute (tree type, const char *mangled_name) { tree mangled_name_tree = get_identifier (mangled_name); tree value = tree_cons (NULL_TREE, mangled_name_tree, NULL_TREE); - TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("RVV type"), value, - TYPE_ATTRIBUTES (type)); + TYPE_ATTRIBUTES (type) + = tree_cons (get_identifier ("RVV type"), value, TYPE_ATTRIBUTES (type)); } /* Force TYPE to be a sizeless type. */ diff --git a/gcc/config/riscv/riscv-vector-builtins.def b/gcc/config/riscv/riscv-vector-builtins.def index 664734b..83603fe 100644 --- a/gcc/config/riscv/riscv-vector-builtins.def +++ b/gcc/config/riscv/riscv-vector-builtins.def @@ -32,7 +32,7 @@ along with GCC; see the file COPYING3. If not see TARGET_MIN_VLEN > 32. Otherwise the machine mode is VNx1SImode. */ #ifndef DEF_RVV_TYPE -#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ VECTOR_MODE_MIN_VLEN_32) #endif diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index 7d4b807..7f4e89c 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -30,8 +30,7 @@ const unsigned int MAX_TUPLE_SIZE = 8; "vector types" for brevity. */ enum vector_type_index { -#define DEF_RVV_TYPE(NAME, ABI_NAME, NCHARS, ARGS...) \ - VECTOR_TYPE_##NAME, +#define DEF_RVV_TYPE(NAME, ABI_NAME, NCHARS, ARGS...) VECTOR_TYPE_##NAME, #include "riscv-vector-builtins.def" NUM_VECTOR_TYPES }; -- cgit v1.1 From fbf423309e103b54f7c9d39b2f7870b9bedfe9d2 Mon Sep 17 00:00:00 2001 From: Ju-Zhe Zhong Date: Tue, 11 Oct 2022 14:23:33 +0800 Subject: RISC-V: Remove TUPLE size macro define. [NFC] gcc/ChangeLog: * config/riscv/riscv-vector-builtins.h: Remove unused macro. --- gcc/config/riscv/riscv-vector-builtins.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index 7f4e89c..ea67da9 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -23,9 +23,6 @@ namespace riscv_vector { -/* This is for segment instructions. */ -const unsigned int MAX_TUPLE_SIZE = 8; - /* Enumerates the RVV types, together called "vector types" for brevity. */ enum vector_type_index -- cgit v1.1 From 9bf74082bc93226e1ceb66430706e957e460c841 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Fri, 7 Oct 2022 20:34:53 -0400 Subject: c++: defer all consteval in default args [DR2631] The proposed resolution of CWG2631 extends our current handling of source_location::current to all consteval functions: default arguments are not evaluated until they're used in a call, the same should apply to evaluation of immediate invocations. And similarly for default member initializers. Previously we folded source_location::current in cp_fold_r; now we fold all consteval calls in default arguments/member initializers in bot_replace. DR 2631 gcc/cp/ChangeLog: * cp-tree.h (source_location_current_p): Remove. * name-lookup.h (struct cp_binding_level): Remove immediate_fn_ctx_p. * call.cc (in_immediate_context): All default args and DMI are potentially immediate context. (immediate_invocation_p): Don't treat source_location specially. (struct in_consteval_if_p_temp_override): Move to cp-tree.h. * constexpr.cc (get_nth_callarg): Move to cp-tree.h. * cp-gimplify.cc (cp_fold_r): Don't fold consteval. * name-lookup.cc (begin_scope): Don't set immediate_fn_ctx_p. * parser.cc (cp_parser_lambda_declarator_opt): Likewise. (cp_parser_direct_declarator): Likewise. * pt.cc (tsubst_default_argument): Open sk_function_parms level. * tree.cc (source_location_current_p): Remove. (bot_replace): Fold consteval here. (break_out_target_exprs): Handle errors. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/consteval-defarg3.C: New test. --- gcc/cp/call.cc | 38 +++++++------------ gcc/cp/constexpr.cc | 20 ---------- gcc/cp/cp-gimplify.cc | 7 ---- gcc/cp/cp-tree.h | 32 +++++++++++++++- gcc/cp/name-lookup.cc | 2 - gcc/cp/name-lookup.h | 5 +-- gcc/cp/parser.cc | 24 ------------ gcc/cp/pt.cc | 3 ++ gcc/cp/tree.cc | 52 ++++++++++++-------------- gcc/testsuite/g++.dg/cpp2a/consteval-defarg3.C | 23 ++++++++++++ 10 files changed, 95 insertions(+), 111 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/consteval-defarg3.C (limited to 'gcc') diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 70ec964..2fa33c5 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -9301,7 +9301,8 @@ build_trivial_dtor_call (tree instance, bool no_ptr_deref) } /* Return true if in an immediate function context, or an unevaluated operand, - or a subexpression of an immediate invocation. */ + or a default argument/member initializer, or a subexpression of an immediate + invocation. */ bool in_immediate_context () @@ -9309,8 +9310,11 @@ in_immediate_context () return (cp_unevaluated_operand != 0 || (current_function_decl != NULL_TREE && DECL_IMMEDIATE_FUNCTION_P (current_function_decl)) - || (current_binding_level->kind == sk_function_parms - && current_binding_level->immediate_fn_ctx_p) + /* DR 2631: default args and DMI aren't immediately evaluated. + Return true here so immediate_invocation_p returns false. */ + || current_binding_level->kind == sk_function_parms + || current_binding_level->kind == sk_template_parms + || parsing_nsdmi () || in_consteval_if_p); } @@ -9318,28 +9322,12 @@ in_immediate_context () is an immediate invocation. */ static bool -immediate_invocation_p (tree fn, int nargs) +immediate_invocation_p (tree fn) { return (TREE_CODE (fn) == FUNCTION_DECL && DECL_IMMEDIATE_FUNCTION_P (fn) - && !in_immediate_context () - /* As an exception, we defer std::source_location::current () - invocations until genericization because LWG3396 mandates - special behavior for it. */ - && (nargs > 1 || !source_location_current_p (fn))); -} - -/* temp_override for in_consteval_if_p, which can't use make_temp_override - because it is a bitfield. */ - -struct in_consteval_if_p_temp_override { - bool save_in_consteval_if_p; - in_consteval_if_p_temp_override () - : save_in_consteval_if_p (in_consteval_if_p) {} - void reset () { in_consteval_if_p = save_in_consteval_if_p; } - ~in_consteval_if_p_temp_override () - { reset (); } -}; + && !in_immediate_context ()); +} /* Subroutine of the various build_*_call functions. Overload resolution has chosen a winning candidate CAND; build up a CALL_EXPR accordingly. @@ -9398,7 +9386,7 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) SET_EXPR_LOCATION (expr, input_location); if (TREE_THIS_VOLATILE (fn) && cfun) current_function_returns_abnormally = 1; - if (immediate_invocation_p (fn, vec_safe_length (args))) + if (immediate_invocation_p (fn)) { tree obj_arg = NULL_TREE, exprimm = expr; if (DECL_CONSTRUCTOR_P (fn)) @@ -9543,7 +9531,7 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) in_consteval_if_p_temp_override icip; /* If the call is immediate function invocation, make sure taking address of immediate functions is allowed in its arguments. */ - if (immediate_invocation_p (STRIP_TEMPLATE (fn), nargs)) + if (immediate_invocation_p (STRIP_TEMPLATE (fn))) in_consteval_if_p = true; /* The implicit parameters to a constructor are not considered by overload @@ -10072,7 +10060,7 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) if (TREE_CODE (fn) == ADDR_EXPR) { tree fndecl = STRIP_TEMPLATE (TREE_OPERAND (fn, 0)); - if (immediate_invocation_p (fndecl, nargs)) + if (immediate_invocation_p (fndecl)) { tree obj_arg = NULL_TREE; /* Undo convert_from_reference called by build_cxx_call. */ diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index 06dcd71..2038f43 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -1324,26 +1324,6 @@ save_fundef_copy (tree fun, tree copy) *slot = copy; } -/* We have an expression tree T that represents a call, either CALL_EXPR - or AGGR_INIT_EXPR. Return the Nth argument. */ - -static inline tree -get_nth_callarg (tree t, int n) -{ - switch (TREE_CODE (t)) - { - case CALL_EXPR: - return CALL_EXPR_ARG (t, n); - - case AGGR_INIT_EXPR: - return AGGR_INIT_EXPR_ARG (t, n); - - default: - gcc_unreachable (); - return NULL; - } -} - /* Whether our evaluation wants a prvalue (e.g. CONSTRUCTOR or _CST), a glvalue (e.g. VAR_DECL or _REF), or nothing. */ diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc index d0e12c9..a937060 100644 --- a/gcc/cp/cp-gimplify.cc +++ b/gcc/cp/cp-gimplify.cc @@ -1010,13 +1010,6 @@ cp_fold_r (tree *stmt_p, int *walk_subtrees, void *data_) } break; - case CALL_EXPR: - if (tree fndecl = cp_get_callee_fndecl_nofold (stmt)) - if (DECL_IMMEDIATE_FUNCTION_P (fndecl) - && source_location_current_p (fndecl)) - *stmt_p = stmt = cxx_constant_value (stmt); - break; - default: break; } diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index ab6f85a..80037fa 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -2030,6 +2030,18 @@ make_temp_override (T& var, type_identity_t overrider) return { var, overrider }; } +/* temp_override for in_consteval_if_p, which can't use make_temp_override + because it is a bitfield. */ + +struct in_consteval_if_p_temp_override { + bool save_in_consteval_if_p; + in_consteval_if_p_temp_override () + : save_in_consteval_if_p (in_consteval_if_p) {} + void reset () { in_consteval_if_p = save_in_consteval_if_p; } + ~in_consteval_if_p_temp_override () + { reset (); } +}; + /* The cached class binding level, from the most recently exited class, or NULL if none. */ @@ -4201,6 +4213,25 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter) for ((arg) = first_aggr_init_expr_arg ((call), &(iter)); (arg); \ (arg) = next_aggr_init_expr_arg (&(iter))) +/* We have an expression tree T that represents a call, either CALL_EXPR + or AGGR_INIT_EXPR. Return a reference to the Nth argument. */ + +static inline tree& +get_nth_callarg (tree t, int n) +{ + switch (TREE_CODE (t)) + { + case CALL_EXPR: + return CALL_EXPR_ARG (t, n); + + case AGGR_INIT_EXPR: + return AGGR_INIT_EXPR_ARG (t, n); + + default: + gcc_unreachable (); + } +} + /* VEC_INIT_EXPR accessors. */ #define VEC_INIT_EXPR_SLOT(NODE) TREE_OPERAND (VEC_INIT_EXPR_CHECK (NODE), 0) #define VEC_INIT_EXPR_INIT(NODE) TREE_OPERAND (VEC_INIT_EXPR_CHECK (NODE), 1) @@ -7880,7 +7911,6 @@ extern tree bind_template_template_parm (tree, tree); extern tree array_type_nelts_total (tree); extern tree array_type_nelts_top (tree); extern bool array_of_unknown_bound_p (const_tree); -extern bool source_location_current_p (tree); extern tree break_out_target_exprs (tree, bool = false); extern tree build_ctor_subob_ref (tree, tree, tree); extern tree replace_placeholders (tree, tree, bool * = NULL); diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc index 25657cf..14e937d 100644 --- a/gcc/cp/name-lookup.cc +++ b/gcc/cp/name-lookup.cc @@ -4302,8 +4302,6 @@ begin_scope (scope_kind kind, tree entity) case sk_function_parms: scope->keep = keep_next_level_flag; - if (entity) - scope->immediate_fn_ctx_p = DECL_IMMEDIATE_FUNCTION_P (entity); break; case sk_namespace: diff --git a/gcc/cp/name-lookup.h b/gcc/cp/name-lookup.h index 7201ae8..9e3b698 100644 --- a/gcc/cp/name-lookup.h +++ b/gcc/cp/name-lookup.h @@ -307,13 +307,10 @@ struct GTY(()) cp_binding_level { 'this_entity'. */ unsigned defining_class_p : 1; - /* true for SK_FUNCTION_PARMS of immediate functions. */ - unsigned immediate_fn_ctx_p : 1; - /* True for SK_FUNCTION_PARMS of a requires-expression. */ unsigned requires_expression: 1; - /* 21 bits left to fill a 32-bit word. */ + /* 22 bits left to fill a 32-bit word. */ }; /* The binding level currently in effect. */ diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index dc3d17c..4e3ed66 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -11519,31 +11519,11 @@ cp_parser_lambda_declarator_opt (cp_parser* parser, tree lambda_expr) opening parenthesis if present. */ if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN)) { - bool is_consteval = false; - /* For C++20, before parsing the parameter list check if there is - a consteval specifier in the corresponding decl-specifier-seq. */ - if (cxx_dialect >= cxx20) - { - for (size_t n = cp_parser_skip_balanced_tokens (parser, 1); - cp_lexer_nth_token_is (parser->lexer, n, CPP_KEYWORD); n++) - { - if (cp_lexer_peek_nth_token (parser->lexer, n)->keyword - == RID_CONSTEVAL) - { - is_consteval = true; - break; - } - } - } - matching_parens parens; parens.consume_open (parser); begin_scope (sk_function_parms, /*entity=*/NULL_TREE); - if (is_consteval) - current_binding_level->immediate_fn_ctx_p = true; - /* Parse parameters. */ param_list = cp_parser_parameter_declaration_clause @@ -23186,10 +23166,6 @@ cp_parser_direct_declarator (cp_parser* parser, begin_scope (sk_function_parms, NULL_TREE); - /* Signal we are in the immediate function context. */ - if (flags & CP_PARSER_FLAGS_CONSTEVAL) - current_binding_level->immediate_fn_ctx_p = true; - /* Parse the parameter-declaration-clause. */ params = cp_parser_parameter_declaration_clause (parser, flags); diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index b80e7ff..ec337e2 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -13933,6 +13933,8 @@ tsubst_default_argument (tree fn, int parmnum, tree type, tree arg, push_to_top_level (); push_access_scope (fn); push_deferring_access_checks (dk_no_deferred); + /* So in_immediate_context knows this is a default argument. */ + begin_scope (sk_function_parms, fn); start_lambda_scope (parm); /* The default argument expression may cause implicitly defined @@ -13956,6 +13958,7 @@ tsubst_default_argument (tree fn, int parmnum, tree type, tree arg, inform (input_location, " when instantiating default argument for call to %qD", fn); + leave_scope (); pop_deferring_access_checks (); pop_access_scope (fn); pop_from_top_level (); diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc index 3532e44..45348c5 100644 --- a/gcc/cp/tree.cc +++ b/gcc/cp/tree.cc @@ -3125,32 +3125,6 @@ array_type_nelts_total (tree type) return sz; } -/* Return true if FNDECL is std::source_location::current () method. */ - -bool -source_location_current_p (tree fndecl) -{ - gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL - && DECL_IMMEDIATE_FUNCTION_P (fndecl)); - if (DECL_NAME (fndecl) == NULL_TREE - || TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE - || TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != RECORD_TYPE - || DECL_CONTEXT (fndecl) != TREE_TYPE (TREE_TYPE (fndecl)) - || !id_equal (DECL_NAME (fndecl), "current")) - return false; - - tree source_location = DECL_CONTEXT (fndecl); - if (TYPE_NAME (source_location) == NULL_TREE - || TREE_CODE (TYPE_NAME (source_location)) != TYPE_DECL - || TYPE_IDENTIFIER (source_location) == NULL_TREE - || !id_equal (TYPE_IDENTIFIER (source_location), - "source_location") - || !decl_in_std_namespace_p (TYPE_NAME (source_location))) - return false; - - return true; -} - struct bot_data { splay_tree target_remap; @@ -3298,7 +3272,7 @@ bot_manip (tree* tp, int* walk_subtrees, void* data_) variables. */ static tree -bot_replace (tree* t, int* /*walk_subtrees*/, void* data_) +bot_replace (tree* t, int* walk_subtrees, void* data_) { bot_data &data = *(bot_data*)data_; splay_tree target_remap = data.target_remap; @@ -3328,6 +3302,27 @@ bot_replace (tree* t, int* /*walk_subtrees*/, void* data_) /*check_access=*/false, /*nonnull=*/true, tf_warning_or_error); } + else if (cxx_dialect >= cxx20 + && (TREE_CODE (*t) == CALL_EXPR + || TREE_CODE (*t) == AGGR_INIT_EXPR) + && !in_immediate_context ()) + { + /* Expand immediate invocations. */ + if (tree fndecl = cp_get_callee_fndecl_nofold (*t)) + if (DECL_IMMEDIATE_FUNCTION_P (fndecl)) + { + /* Make in_immediate_context true within the args. */ + in_consteval_if_p_temp_override ito; + in_consteval_if_p = true; + int nargs = call_expr_nargs (*t); + for (int i = 0; i < nargs; ++i) + cp_walk_tree (&get_nth_callarg (*t, i), bot_replace, data_, NULL); + *t = cxx_constant_value (*t); + if (*t == error_mark_node) + return error_mark_node; + *walk_subtrees = 0; + } + } return NULL_TREE; } @@ -3353,7 +3348,8 @@ break_out_target_exprs (tree t, bool clear_location /* = false */) bot_data data = { target_remap, clear_location }; if (cp_walk_tree (&t, bot_manip, &data, NULL) == error_mark_node) t = error_mark_node; - cp_walk_tree (&t, bot_replace, &data, NULL); + if (cp_walk_tree (&t, bot_replace, &data, NULL) == error_mark_node) + t = error_mark_node; if (!--target_remap_count) { diff --git a/gcc/testsuite/g++.dg/cpp2a/consteval-defarg3.C b/gcc/testsuite/g++.dg/cpp2a/consteval-defarg3.C new file mode 100644 index 0000000..316219c --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/consteval-defarg3.C @@ -0,0 +1,23 @@ +// DR 2631: default args and DMI aren't immediately evaluated +// { dg-do compile { target c++20 } } +// { dg-final { scan-assembler-not "foober" } } + +consteval int foober(); + +int g(int = foober()); +struct A { int i = foober(); }; +template struct B { }; +struct C +{ + consteval C(int = foober()) { } +}; +int h(C = C()); + +consteval int foober() { return 42; } + +int main() { + A a; + B<> b; + g(); + h(); +} -- cgit v1.1 From 7858368c3f3875f6bf634119e5731dc3c808a7c3 Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Tue, 11 Oct 2022 22:08:48 +0200 Subject: Fortran: check types of operands of arithmetic binary operations [PR107217] gcc/fortran/ChangeLog: PR fortran/107217 * arith.cc (gfc_arith_plus): Compare consistency of types of operands. (gfc_arith_minus): Likewise. (gfc_arith_times): Likewise. (gfc_arith_divide): Likewise. (arith_power): Check that both operands are of numeric type. gcc/testsuite/ChangeLog: PR fortran/107217 * gfortran.dg/pr107217.f90: New test. --- gcc/fortran/arith.cc | 15 +++++++++++++++ gcc/testsuite/gfortran.dg/pr107217.f90 | 18 ++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 gcc/testsuite/gfortran.dg/pr107217.f90 (limited to 'gcc') diff --git a/gcc/fortran/arith.cc b/gcc/fortran/arith.cc index 9e079e4..14ba931 100644 --- a/gcc/fortran/arith.cc +++ b/gcc/fortran/arith.cc @@ -624,6 +624,9 @@ gfc_arith_plus (gfc_expr *op1, gfc_expr *op2, gfc_expr **resultp) gfc_expr *result; arith rc; + if (op1->ts.type != op2->ts.type) + return ARITH_INVALID_TYPE; + result = gfc_get_constant_expr (op1->ts.type, op1->ts.kind, &op1->where); switch (op1->ts.type) @@ -658,6 +661,9 @@ gfc_arith_minus (gfc_expr *op1, gfc_expr *op2, gfc_expr **resultp) gfc_expr *result; arith rc; + if (op1->ts.type != op2->ts.type) + return ARITH_INVALID_TYPE; + result = gfc_get_constant_expr (op1->ts.type, op1->ts.kind, &op1->where); switch (op1->ts.type) @@ -692,6 +698,9 @@ gfc_arith_times (gfc_expr *op1, gfc_expr *op2, gfc_expr **resultp) gfc_expr *result; arith rc; + if (op1->ts.type != op2->ts.type) + return ARITH_INVALID_TYPE; + result = gfc_get_constant_expr (op1->ts.type, op1->ts.kind, &op1->where); switch (op1->ts.type) @@ -727,6 +736,9 @@ gfc_arith_divide (gfc_expr *op1, gfc_expr *op2, gfc_expr **resultp) gfc_expr *result; arith rc; + if (op1->ts.type != op2->ts.type) + return ARITH_INVALID_TYPE; + rc = ARITH_OK; result = gfc_get_constant_expr (op1->ts.type, op1->ts.kind, &op1->where); @@ -815,6 +827,9 @@ arith_power (gfc_expr *op1, gfc_expr *op2, gfc_expr **resultp) gfc_expr *result; arith rc; + if (!gfc_numeric_ts (&op1->ts) || !gfc_numeric_ts (&op2->ts)) + return ARITH_INVALID_TYPE; + rc = ARITH_OK; result = gfc_get_constant_expr (op1->ts.type, op1->ts.kind, &op1->where); diff --git a/gcc/testsuite/gfortran.dg/pr107217.f90 b/gcc/testsuite/gfortran.dg/pr107217.f90 new file mode 100644 index 0000000..9c8492e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr107217.f90 @@ -0,0 +1,18 @@ +! { dg-do compile } +! PR fortran/107217 - ICE in gfc_arith_times +! Contributed by G.Steinmetz + +program p + print *, [real :: (['1'])] * 2 ! { dg-error "Cannot convert" } + print *, 2 * [real :: (['1'])] ! { dg-error "Cannot convert" } + print *, [real :: (['1'])] + 2 ! { dg-error "Cannot convert" } + print *, [real :: (['1'])] - 2 ! { dg-error "Cannot convert" } + print *, [real :: (['1'])] / 2 ! { dg-error "Cannot convert" } + print *, 1 / [real :: (['1'])] ! { dg-error "Cannot convert" } + print *, [real :: (['1'])] ** 2 ! { dg-error "Cannot convert" } + print *, 2 ** [real :: (['1'])] ! { dg-error "Cannot convert" } + print *, 2.0 ** [real :: (.true.)] ! { dg-error "Cannot convert" } + print *, [real :: (.true.)] ** 2.0 ! { dg-error "Cannot convert" } + print *, [complex :: (['1'])] ** (1.0,2.0) ! { dg-error "Cannot convert" } + print *, (1.0,2.0) ** [complex :: (['1'])] ! { dg-error "Cannot convert" } +end -- cgit v1.1 From a0c1a059101a3067d96211cbc4fae5905796d1db Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Tue, 11 Oct 2022 17:23:42 +0200 Subject: Add range-op entry for floating point NEGATE_EXPR. gcc/ChangeLog: * range-op-float.cc (class foperator_negate): New. (floating_op_table::floating_op_table): Add NEGATE_EXPR (range_op_float_tests): Add negate tests. --- gcc/range-op-float.cc | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'gcc') diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index 22b7418..229b9d2 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -1132,6 +1132,52 @@ foperator_ordered::op1_range (frange &r, tree type, return true; } +class foperator_negate : public range_operator_float +{ + using range_operator_float::fold_range; + using range_operator_float::op1_range; +public: + bool fold_range (frange &r, tree type, + const frange &op1, const frange &op2, + relation_kind = VREL_VARYING) const final override + { + if (empty_range_varying (r, type, op1, op2)) + return true; + if (op1.known_isnan ()) + { + bool sign; + if (op1.nan_signbit_p (sign)) + r.set_nan (type, !sign); + else + r.set_nan (type); + return true; + } + + REAL_VALUE_TYPE lh_lb = op1.lower_bound (); + REAL_VALUE_TYPE lh_ub = op1.upper_bound (); + lh_lb = real_value_negate (&lh_lb); + lh_ub = real_value_negate (&lh_ub); + r.set (type, lh_ub, lh_lb); + if (op1.maybe_isnan ()) + { + bool sign; + if (op1.nan_signbit_p (sign)) + r.update_nan (!sign); + else + r.update_nan (); + } + else + r.clear_nan (); + return true; + } + bool op1_range (frange &r, tree type, + const frange &lhs, const frange &op2, + relation_kind rel = VREL_VARYING) const final override + { + return fold_range (r, type, lhs, op2, rel); + } +} fop_negate; + class foperator_abs : public range_operator_float { using range_operator_float::fold_range; @@ -1593,6 +1639,7 @@ floating_op_table::floating_op_table () set (UNORDERED_EXPR, fop_unordered); set (ABS_EXPR, fop_abs); + set (NEGATE_EXPR, fop_negate); } // Return a pointer to the range_operator_float instance, if there is @@ -1633,6 +1680,21 @@ frange_float (const char *lb, const char *ub, tree type = float_type_node) void range_op_float_tests () { + frange r, r0, r1; + frange trange (float_type_node); + + // negate([-5, +10]) => [-10, 5] + r0 = frange_float ("-5", "10"); + fop_negate.fold_range (r, float_type_node, r0, trange); + ASSERT_EQ (r, frange_float ("-10", "5")); + + // negate([0, 1] -NAN) => [-1, -0] +NAN + r0 = frange_float ("0", "1"); + r0.update_nan (true); + fop_negate.fold_range (r, float_type_node, r0, trange); + r1 = frange_float ("-1", "-0"); + r1.update_nan (false); + ASSERT_EQ (r, r1); } } // namespace selftest -- cgit v1.1 From bc0d700b574a22f12db7f7587ce885f64ebcbfb0 Mon Sep 17 00:00:00 2001 From: Marek Polacek Date: Wed, 28 Sep 2022 10:37:34 -0400 Subject: c++: Remove maybe-rvalue OR in implicit move This patch removes the two-stage overload resolution when performing implicit move, whereby the compiler does two separate overload resolutions: one treating the operand as an rvalue, and then (if that resolution fails) another one treating the operand as an lvalue. In the standard this was introduced via CWG 1579 and implemented in gcc in r251035. In r11-2412, we disabled the fallback OR in C++20 (but not in C++17). Then C++23 P2266 removed the fallback overload resolution, and changed the implicit move rules once again. So we wound up with three different behaviors. The two overload resolutions approach was complicated and quirky, so users should transition to the newer model. Removing the maybe-rvalue OR also allows us to simplify our code, for instance, now we can get rid of LOOKUP_PREFER_RVALUE altogether. This change means that code that previously didn't compile in C++17 will now compile, for example: struct S1 { S1(S1 &&); }; struct S2 : S1 {}; S1 f (S2 s) { return s; // OK, derived-to-base, use S1::S1(S1&&) } And conversely, code that used to work in C++17 may not compile anymore: struct W { W(); }; struct F { F(W&); F(W&&) = delete; }; F fn () { W w; return w; // use w as rvalue -> use of deleted function F::F(W&&) } I plan to add a note to porting_to.html. gcc/cp/ChangeLog: * call.cc (standard_conversion): Remove LOOKUP_PREFER_RVALUE code. (reference_binding): Honor clk_implicit_rval even pre-C++20. (implicit_conversion_1): Remove LOOKUP_PREFER_RVALUE code. (build_user_type_conversion_1): Likewise. (convert_like_internal): Likewise. (build_over_call): Likewise. * cp-tree.h (LOOKUP_PREFER_RVALUE): Remove. (LOOKUP_NO_NARROWING): Adjust definition. * except.cc (build_throw): Don't perform two overload resolutions. * typeck.cc (maybe_warn_pessimizing_move): Don't use LOOKUP_PREFER_RVALUE. (check_return_expr): Don't perform two overload resolutions. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/Wredundant-move10.C: Adjust dg-warning. * g++.dg/cpp0x/Wredundant-move7.C: Likewise. * g++.dg/cpp0x/move-return2.C: Remove dg-error. * g++.dg/cpp0x/move-return4.C: Likewise. * g++.dg/cpp0x/ref-qual20.C: Adjust expected return value. * g++.dg/cpp0x/move-return5.C: New test. --- gcc/cp/call.cc | 41 ++------------------------ gcc/cp/cp-tree.h | 6 +--- gcc/cp/except.cc | 23 +++------------ gcc/cp/typeck.cc | 34 ++++----------------- gcc/testsuite/g++.dg/cpp0x/Wredundant-move10.C | 2 +- gcc/testsuite/g++.dg/cpp0x/Wredundant-move7.C | 6 ++-- gcc/testsuite/g++.dg/cpp0x/move-return2.C | 2 +- gcc/testsuite/g++.dg/cpp0x/move-return4.C | 2 +- gcc/testsuite/g++.dg/cpp0x/move-return5.C | 20 +++++++++++++ gcc/testsuite/g++.dg/cpp0x/ref-qual20.C | 2 +- 10 files changed, 41 insertions(+), 97 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/move-return5.C (limited to 'gcc') diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 2fa33c5..b9c08d6 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -1272,9 +1272,6 @@ standard_conversion (tree to, tree from, tree expr, bool c_cast_p, } } conv = build_conv (ck_rvalue, from, conv); - if (flags & LOOKUP_PREFER_RVALUE) - /* Tell convert_like to set LOOKUP_PREFER_RVALUE. */ - conv->rvaluedness_matches_p = true; /* If we're performing copy-initialization, remember to skip explicit constructors. */ if (flags & LOOKUP_ONLYCONVERTING) @@ -1572,9 +1569,6 @@ standard_conversion (tree to, tree from, tree expr, bool c_cast_p, type. A temporary object is created to hold the result of the conversion unless we're binding directly to a reference. */ conv->need_temporary_p = !(flags & LOOKUP_NO_TEMP_BIND); - if (flags & LOOKUP_PREFER_RVALUE) - /* Tell convert_like to set LOOKUP_PREFER_RVALUE. */ - conv->rvaluedness_matches_p = true; /* If we're performing copy-initialization, remember to skip explicit constructors. */ if (flags & LOOKUP_ONLYCONVERTING) @@ -1883,7 +1877,7 @@ reference_binding (tree rto, tree rfrom, tree expr, bool c_cast_p, int flags, /* Unless it's really a C++20 lvalue being treated as an xvalue. But in C++23, such an expression is just an xvalue, not a special lvalue, so the binding is once again ill-formed. */ - && !(cxx_dialect == cxx20 + && !(cxx_dialect <= cxx20 && (gl_kind & clk_implicit_rval)) && (!CP_TYPE_CONST_NON_VOLATILE_P (to) || (flags & LOOKUP_NO_RVAL_BIND)) @@ -2044,9 +2038,8 @@ implicit_conversion_1 (tree to, tree from, tree expr, bool c_cast_p, /* Other flags only apply to the primary function in overload resolution, or after we've chosen one. */ flags &= (LOOKUP_ONLYCONVERTING|LOOKUP_NO_CONVERSION|LOOKUP_COPY_PARM - |LOOKUP_NO_TEMP_BIND|LOOKUP_NO_RVAL_BIND|LOOKUP_PREFER_RVALUE - |LOOKUP_NO_NARROWING|LOOKUP_PROTECT|LOOKUP_NO_NON_INTEGRAL - |LOOKUP_SHORTCUT_BAD_CONVS); + |LOOKUP_NO_TEMP_BIND|LOOKUP_NO_RVAL_BIND|LOOKUP_NO_NARROWING + |LOOKUP_PROTECT|LOOKUP_NO_NON_INTEGRAL|LOOKUP_SHORTCUT_BAD_CONVS); /* FIXME: actually we don't want warnings either, but we can't just have 'complain &= ~(tf_warning|tf_error)' because it would cause @@ -4451,14 +4444,6 @@ build_user_type_conversion_1 (tree totype, tree expr, int flags, if (cand->viable == -1) conv->bad_p = true; - /* We're performing the maybe-rvalue overload resolution and - a conversion function is in play. Reject converting the return - value of the conversion function to a base class. */ - if ((flags & LOOKUP_PREFER_RVALUE) && !DECL_CONSTRUCTOR_P (cand->fn)) - for (conversion *t = cand->second_conv; t; t = next_conversion (t)) - if (t->kind == ck_base) - return NULL; - /* Remember that this was a list-initialization. */ if (flags & LOOKUP_NO_NARROWING) conv->check_narrowing = true; @@ -8292,9 +8277,6 @@ convert_like_internal (conversion *convs, tree expr, tree fn, int argnum, explicit constructors. */ if (convs->copy_init_p) flags |= LOOKUP_ONLYCONVERTING; - if (convs->rvaluedness_matches_p) - /* standard_conversion got LOOKUP_PREFER_RVALUE. */ - flags |= LOOKUP_PREFER_RVALUE; expr = build_temp (expr, totype, flags, &diag_kind, complain); if (diag_kind && complain) { @@ -9560,23 +9542,6 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) ++arg_index; parm = TREE_CHAIN (parm); } - - if (cxx_dialect < cxx20 - && (cand->flags & LOOKUP_PREFER_RVALUE)) - { - /* The implicit move specified in 15.8.3/3 fails "...if the type of - the first parameter of the selected constructor is not an rvalue - reference to the object's type (possibly cv-qualified)...." */ - gcc_assert (!(complain & tf_error)); - tree ptype = convs[0]->type; - /* Allow calling a by-value converting constructor even though it - isn't permitted by the above, because we've allowed it since GCC 5 - (PR58051) and it's allowed in C++20. But don't call a copy - constructor. */ - if ((TYPE_REF_P (ptype) && !TYPE_REF_IS_RVALUE (ptype)) - || CONVERSION_RANK (convs[0]) > cr_exact) - return error_mark_node; - } } /* Bypass access control for 'this' parameter. */ else if (TREE_CODE (TREE_TYPE (fn)) == METHOD_TYPE) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 80037fa..3b67be6 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -5874,12 +5874,8 @@ enum overload_flags { NO_SPECIAL = 0, DTOR_FLAG, TYPENAME_FLAG }; #define LOOKUP_DESTRUCTOR (1 << 5) /* Do not permit references to bind to temporaries. */ #define LOOKUP_NO_TEMP_BIND (1 << 6) -/* We're trying to treat an lvalue as an rvalue. */ -/* FIXME remove when we extend the P1825 semantics to all standard modes, the - C++20 approach uses IMPLICIT_RVALUE_P instead. */ -#define LOOKUP_PREFER_RVALUE (LOOKUP_NO_TEMP_BIND << 1) /* We're inside an init-list, so narrowing conversions are ill-formed. */ -#define LOOKUP_NO_NARROWING (LOOKUP_PREFER_RVALUE << 1) +#define LOOKUP_NO_NARROWING (LOOKUP_NO_TEMP_BIND << 1) /* We're looking up a constructor for list-initialization. */ #define LOOKUP_LIST_INIT_CTOR (LOOKUP_NO_NARROWING << 1) /* This is the first parameter of a copy constructor. */ diff --git a/gcc/cp/except.cc b/gcc/cp/except.cc index b8a85ed..703d1d5 100644 --- a/gcc/cp/except.cc +++ b/gcc/cp/except.cc @@ -715,25 +715,10 @@ build_throw (location_t loc, tree exp) treated as an rvalue for the purposes of overload resolution to favor move constructors over copy constructors. */ if (tree moved = treat_lvalue_as_rvalue_p (exp, /*return*/false)) - { - if (cxx_dialect < cxx20) - { - releasing_vec exp_vec (make_tree_vector_single (moved)); - moved = (build_special_member_call - (object, complete_ctor_identifier, &exp_vec, - TREE_TYPE (object), flags|LOOKUP_PREFER_RVALUE, - tf_none)); - if (moved != error_mark_node) - { - exp = moved; - converted = true; - } - } - else - /* In C++20 we just treat the return value as an rvalue that - can bind to lvalue refs. */ - exp = moved; - } + /* In C++20 we treat the return value as an rvalue that + can bind to lvalue refs. In C++23, such an expression is just + an xvalue. */ + exp = moved; /* Call the copy constructor. */ if (!converted) diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc index b4a8e3c..69c378d 100644 --- a/gcc/cp/typeck.cc +++ b/gcc/cp/typeck.cc @@ -10697,21 +10697,12 @@ maybe_warn_pessimizing_move (tree expr, tree type, bool return_p) tree t = convert_for_initialization (NULL_TREE, type, moved, (LOOKUP_NORMAL - | LOOKUP_ONLYCONVERTING - | LOOKUP_PREFER_RVALUE), + | LOOKUP_ONLYCONVERTING), ICR_RETURN, NULL_TREE, 0, tf_none); /* If this worked, implicit rvalue would work, so the call to std::move is redundant. */ - if (t != error_mark_node - /* Trying to move something const will never succeed unless - there's T(const T&&), which it almost never is, and if - so, T wouldn't be error_mark_node now: the above convert_ - call with LOOKUP_PREFER_RVALUE returns an error if a const T& - overload is selected. */ - || (CP_TYPE_CONST_P (TREE_TYPE (arg)) - && same_type_ignoring_top_level_qualifiers_p - (TREE_TYPE (arg), type))) + if (t != error_mark_node) { auto_diagnostic_group d; if (warning_at (loc, OPT_Wredundant_move, @@ -11054,23 +11045,10 @@ check_return_expr (tree retval, bool *no_warning) ? CLASS_TYPE_P (functype) : !SCALAR_TYPE_P (functype) || !SCALAR_TYPE_P (TREE_TYPE (retval))) && (moved = treat_lvalue_as_rvalue_p (retval, /*return*/true))) - { - if (cxx_dialect < cxx20) - { - moved = convert_for_initialization - (NULL_TREE, functype, moved, flags|LOOKUP_PREFER_RVALUE, - ICR_RETURN, NULL_TREE, 0, tf_none); - if (moved != error_mark_node) - { - retval = moved; - converted = true; - } - } - else - /* In C++20 we just treat the return value as an rvalue that - can bind to lvalue refs. */ - retval = moved; - } + /* In C++20 and earlier we treat the return value as an rvalue + that can bind to lvalue refs. In C++23, such an expression is just + an xvalue (see reference_binding). */ + retval = moved; /* The call in a (lambda) thunk needs no conversions. */ if (TREE_CODE (retval) == CALL_EXPR diff --git a/gcc/testsuite/g++.dg/cpp0x/Wredundant-move10.C b/gcc/testsuite/g++.dg/cpp0x/Wredundant-move10.C index a215a47..17dd807 100644 --- a/gcc/testsuite/g++.dg/cpp0x/Wredundant-move10.C +++ b/gcc/testsuite/g++.dg/cpp0x/Wredundant-move10.C @@ -57,5 +57,5 @@ struct S2: S1 {}; S1 f3(const S2 s) { - return std::move(s); // { dg-warning "redundant move" "" { target c++20 } } + return std::move(s); // { dg-warning "redundant move" } } diff --git a/gcc/testsuite/g++.dg/cpp0x/Wredundant-move7.C b/gcc/testsuite/g++.dg/cpp0x/Wredundant-move7.C index 3fec525..6547777 100644 --- a/gcc/testsuite/g++.dg/cpp0x/Wredundant-move7.C +++ b/gcc/testsuite/g++.dg/cpp0x/Wredundant-move7.C @@ -28,7 +28,7 @@ struct S2 : S1 {}; S1 f (S2 s) { - return std::move(s); // { dg-warning "redundant move in return statement" "" { target c++20 } } + return std::move(s); // { dg-warning "redundant move in return statement" } } struct R1 { @@ -40,7 +40,7 @@ struct R2 : R1 {}; R1 f2 (const R2 s) { - return std::move(s); // { dg-warning "redundant move in return statement" "" { target c++20 } } + return std::move(s); // { dg-warning "redundant move in return statement" } } struct T1 { @@ -55,5 +55,5 @@ f3 (const T2 s) { // Without std::move: const T1 & // With std::move: const T1 && - return std::move(s); // { dg-warning "redundant move in return statement" "" { target c++20 } } + return std::move(s); // { dg-warning "redundant move in return statement" } } diff --git a/gcc/testsuite/g++.dg/cpp0x/move-return2.C b/gcc/testsuite/g++.dg/cpp0x/move-return2.C index 999f2c95..8e750ef 100644 --- a/gcc/testsuite/g++.dg/cpp0x/move-return2.C +++ b/gcc/testsuite/g++.dg/cpp0x/move-return2.C @@ -7,5 +7,5 @@ struct S2 : S1 {}; S1 f (S2 s) { - return s; // { dg-error "use of deleted function" "" { target c++17_down } } + return s; } diff --git a/gcc/testsuite/g++.dg/cpp0x/move-return4.C b/gcc/testsuite/g++.dg/cpp0x/move-return4.C index 3fc5808..0f0ca1f 100644 --- a/gcc/testsuite/g++.dg/cpp0x/move-return4.C +++ b/gcc/testsuite/g++.dg/cpp0x/move-return4.C @@ -13,5 +13,5 @@ struct A : Base A foo() { A v; - return v; // { dg-error "cannot bind rvalue reference" "" { target c++17_down } } + return v; } diff --git a/gcc/testsuite/g++.dg/cpp0x/move-return5.C b/gcc/testsuite/g++.dg/cpp0x/move-return5.C new file mode 100644 index 0000000..695000b --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/move-return5.C @@ -0,0 +1,20 @@ +// { dg-do compile { target c++11 } } +// This used to compile in C++11...17 because we performed two +// separate overload resolutions: one treating the operand as +// an rvalue, and then (if that resolution fails) another one +// treating the operand as an lvalue. + +struct W { + W(); +}; + +struct F { + F(W&); + F(W&&) = delete; +}; + +F fn () +{ + W w; + return w; // { dg-error "use of deleted function" } +} diff --git a/gcc/testsuite/g++.dg/cpp0x/ref-qual20.C b/gcc/testsuite/g++.dg/cpp0x/ref-qual20.C index cfbef30..314f19b 100644 --- a/gcc/testsuite/g++.dg/cpp0x/ref-qual20.C +++ b/gcc/testsuite/g++.dg/cpp0x/ref-qual20.C @@ -52,7 +52,7 @@ f5 () int main () { - int return_lval = __cplusplus > 201703L ? -1 : 2; + int return_lval = -1; Y y1 = f (A()); if (y1.y != return_lval) __builtin_abort (); -- cgit v1.1 From ddb7f0a0cac48762ba6408d69538f8115c4a2739 Mon Sep 17 00:00:00 2001 From: Lewis Hyatt Date: Thu, 6 Oct 2022 18:05:02 -0400 Subject: preprocessor: Fix tracking of system header state [PR60014,PR60723] The token_streamer class (which implements gcc mode -E and -save-temps/-no-integrated-cpp) needs to keep track whether the last tokens output were in a system header, so that it can generate line marker annotations as necessary for a downstream consumer to reconstruct the state. The logic for tracking it, which was added by r5-1863 to resolve PR60723, has some edge case issues as revealed by the three new test cases. The first, coming from the original PR60014, was incidentally fixed by r9-1926 for unrelated reasons. The other two were still failing on master prior to this commit. Such code paths were not realizable prior to r13-1544, which made it possible for the token streamer to see CPP_PRAGMA tokens in more contexts. The two main issues being corrected here are: 1) print.prev_was_system_token needs to indicate whether the previous token output was in a system location. However, it was not being set on every token, only on those that triggered the main code path; specifically it was not triggered on a CPP_PRAGMA token. Testcase 2 covers this case. 2) The token_streamer uses a variable "line_marker_emitted" to remember whether a line marker has been emitted while processing a given token, so that it wouldn't be done more than once in case multiple conditions requiring a line marker are true. There was no reason for this to be a member variable that retains its value from token to token, since it is just needed for tracking the state locally while processing a single given token. The fact that it could retain its value for a subsequent token is rather difficult to observe, but testcase 3 demonstrates incorrect behavior resulting from that. Moving this to a local variable also simplifies understanding the control flow going forward. gcc/c-family/ChangeLog: PR preprocessor/60014 PR preprocessor/60723 * c-ppoutput.cc (class token_streamer): Remove member line_marker_emitted to... (token_streamer::stream): ...a local variable here. Set print.prev_was_system_token on all code paths. gcc/testsuite/ChangeLog: PR preprocessor/60014 PR preprocessor/60723 * gcc.dg/cpp/pr60014-1.c: New test. * gcc.dg/cpp/pr60014-1.h: New test. * gcc.dg/cpp/pr60014-2.c: New test. * gcc.dg/cpp/pr60014-2.h: New test. * gcc.dg/cpp/pr60014-3.c: New test. * gcc.dg/cpp/pr60014-3.h: New test. --- gcc/c-family/c-ppoutput.cc | 17 ++++++++++------- gcc/testsuite/gcc.dg/cpp/pr60014-1.c | 9 +++++++++ gcc/testsuite/gcc.dg/cpp/pr60014-1.h | 5 +++++ gcc/testsuite/gcc.dg/cpp/pr60014-2.c | 5 +++++ gcc/testsuite/gcc.dg/cpp/pr60014-2.h | 5 +++++ gcc/testsuite/gcc.dg/cpp/pr60014-3.c | 16 ++++++++++++++++ gcc/testsuite/gcc.dg/cpp/pr60014-3.h | 2 ++ 7 files changed, 52 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/cpp/pr60014-1.c create mode 100644 gcc/testsuite/gcc.dg/cpp/pr60014-1.h create mode 100644 gcc/testsuite/gcc.dg/cpp/pr60014-2.c create mode 100644 gcc/testsuite/gcc.dg/cpp/pr60014-2.h create mode 100644 gcc/testsuite/gcc.dg/cpp/pr60014-3.c create mode 100644 gcc/testsuite/gcc.dg/cpp/pr60014-3.h (limited to 'gcc') diff --git a/gcc/c-family/c-ppoutput.cc b/gcc/c-family/c-ppoutput.cc index 98081cc..a99d9e9 100644 --- a/gcc/c-family/c-ppoutput.cc +++ b/gcc/c-family/c-ppoutput.cc @@ -184,15 +184,13 @@ class token_streamer bool avoid_paste; bool do_line_adjustments; bool in_pragma; - bool line_marker_emitted; public: token_streamer (cpp_reader *pfile) :avoid_paste (false), do_line_adjustments (cpp_get_options (pfile)->lang != CLK_ASM && !flag_no_line_commands), - in_pragma (false), - line_marker_emitted (false) + in_pragma (false) { gcc_assert (!print.streamer); print.streamer = this; @@ -227,7 +225,14 @@ token_streamer::stream (cpp_reader *pfile, const cpp_token *token, if (token->type == CPP_EOF) return; + /* Keep track when we move into and out of system locations. */ + const bool is_system_token = in_system_header_at (loc); + const bool system_state_changed + = (is_system_token != print.prev_was_system_token); + print.prev_was_system_token = is_system_token; + /* Subtle logic to output a space if and only if necessary. */ + bool line_marker_emitted = false; if (avoid_paste) { unsigned src_line = LOCATION_LINE (loc); @@ -301,19 +306,17 @@ token_streamer::stream (cpp_reader *pfile, const cpp_token *token, if (do_line_adjustments && !in_pragma && !line_marker_emitted - && print.prev_was_system_token != !!in_system_header_at (loc) + && system_state_changed && !is_location_from_builtin_token (loc)) /* The system-ness of this token is different from the one of the previous token. Let's emit a line change to mark the new system-ness before we emit the token. */ { - do_line_change (pfile, token, loc, false); - print.prev_was_system_token = !!in_system_header_at (loc); + line_marker_emitted = do_line_change (pfile, token, loc, false); } if (!in_pragma || should_output_pragmas ()) { cpp_output_token (token, print.outf); - line_marker_emitted = false; print.printed = true; } } diff --git a/gcc/testsuite/gcc.dg/cpp/pr60014-1.c b/gcc/testsuite/gcc.dg/cpp/pr60014-1.c new file mode 100644 index 0000000..de52b30 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/pr60014-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-save-temps -Wint-conversion" } */ +#include "pr60014-1.h" +int main () +{ + X(a, + b); + char *should_warn = 1; /* { dg-warning {-Wint-conversion} } */ +} diff --git a/gcc/testsuite/gcc.dg/cpp/pr60014-1.h b/gcc/testsuite/gcc.dg/cpp/pr60014-1.h new file mode 100644 index 0000000..50c159c --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/pr60014-1.h @@ -0,0 +1,5 @@ +#pragma GCC system_header + +/* N.B. the semicolon in the macro definition is important, since it produces a + second token from this system header on the same line as the __LINE__ token. */ +#define X(a, b) __LINE__; diff --git a/gcc/testsuite/gcc.dg/cpp/pr60014-2.c b/gcc/testsuite/gcc.dg/cpp/pr60014-2.c new file mode 100644 index 0000000..115c985 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/pr60014-2.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-save-temps -Wint-conversion" } */ +#include "pr60014-2.h" +X +char *should_warn = 1; /* { dg-warning {-Wint-conversion} } */ diff --git a/gcc/testsuite/gcc.dg/cpp/pr60014-2.h b/gcc/testsuite/gcc.dg/cpp/pr60014-2.h new file mode 100644 index 0000000..455f1ed --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/pr60014-2.h @@ -0,0 +1,5 @@ +#pragma GCC system_header + +/* N.B. the semicolon in the macro definition is important, since it produces a + second token from this system header on the same line as the _Pragma. */ +#define X _Pragma("GCC diagnostic push"); diff --git a/gcc/testsuite/gcc.dg/cpp/pr60014-3.c b/gcc/testsuite/gcc.dg/cpp/pr60014-3.c new file mode 100644 index 0000000..c430603 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/pr60014-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-save-temps -Wint-conversion" } */ +#include "pr60014-3.h" + +/* The line continuation on the next line is what triggers the problem here, + because it synchronizes the output line between the input source and the + preprocessed output (whereas without the line continuation, the + preprocessed output would be off by one line from having output a #pragma + on a line by itself). Therefore, the token streamer doesn't have a reason + to generate a line marker purely based on the line number. That gives it + the chance to consider whether instead it needs to generate a line marker + based on a change of the "in-system-header" state, allowing us to test that + it comes to the right conclusion, which it did not, prior to this commit to + resolve PR60014. */ +P(GCC diagnostic) \ +const char *should_warn = 1; /* { dg-warning {-Wint-conversion} } */ diff --git a/gcc/testsuite/gcc.dg/cpp/pr60014-3.h b/gcc/testsuite/gcc.dg/cpp/pr60014-3.h new file mode 100644 index 0000000..aedf038 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/pr60014-3.h @@ -0,0 +1,2 @@ +#pragma GCC system_header +#define P(x) _Pragma(#x) -- cgit v1.1 From 781f477a13ae14ca661018518f4d4a9e6881b0dd Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 13 Oct 2022 00:17:37 +0000 Subject: Daily bump. --- gcc/ChangeLog | 80 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/c-family/ChangeLog | 9 ++++++ gcc/cp/ChangeLog | 35 ++++++++++++++++++++++ gcc/fortran/ChangeLog | 9 ++++++ gcc/testsuite/ChangeLog | 30 +++++++++++++++++++ 6 files changed, 164 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a44f661..7d9d899 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,83 @@ +2022-10-12 Aldy Hernandez + + * range-op-float.cc (class foperator_negate): New. + (floating_op_table::floating_op_table): Add NEGATE_EXPR + (range_op_float_tests): Add negate tests. + +2022-10-12 Ju-Zhe Zhong + + * config/riscv/riscv-vector-builtins.h: Remove unused macro. + +2022-10-12 Ju-Zhe Zhong + + * config/riscv/riscv-vector-builtins.cc (DEF_RVV_TYPE): Apply + clang-format. + (add_vector_type_attribute): Ditto. + * config/riscv/riscv-vector-builtins.def (DEF_RVV_TYPE): Apply + clang-format. + * config/riscv/riscv-vector-builtins.h (DEF_RVV_TYPE): Apply + clang-format. + +2022-10-12 Ju-Zhe Zhong + + * config/riscv/riscv-vector-builtins.cc (builtin_types): Redefine + vector types. + (build_const_pointer): New function. + (register_builtin_type): Ditto. + (DEF_RVV_TYPE): Simplify macro. + (register_vector_type): Refine implementation. + * config/riscv/riscv-vector-builtins.h (rvv_builtin_types_t): New. + +2022-10-12 Ju-Zhe Zhong + + * config/riscv/riscv-vector-builtins.h (class rvv_switcher): Move to + this to .... + * config/riscv/riscv-vector-builtins.cc (class rvv_switcher): + here. + +2022-10-12 Cui,Lili + + * config/i386/driver-i386.cc (host_detect_local_cpu): + Move sapphirerapids out of AVX512_VP2INTERSECT. + * config/i386/i386.h: Remove AVX512_VP2INTERSECT from PTA_SAPPHIRERAPIDS + * doc/invoke.texi: Remove AVX512_VP2INTERSECT from SAPPHIRERAPIDS + +2022-10-12 Martin Liska + + * gcov-io.cc (gcov_write_summary): Rename to ... + (gcov_write_object_summary): ... this. + * gcov-io.h (GCOV_TAG_OBJECT_SUMMARY_LENGTH): Rename from ... + (GCOV_TAG_SUMMARY_LENGTH): ... this. + +2022-10-12 Martin Liska + + * configure: Regenerate. + +2022-10-12 Aldy Hernandez + + * range-op-float.cc (frange_float): New. + (range_op_float_tests): New. + * range-op.cc (range_op_tests): Call range_op_float_tests. + +2022-10-12 Aldy Hernandez + + * value-range.h (frange::nan_signbit_p): New. + +2022-10-12 Aldy Hernandez + + * value-range.cc (frange::set_nonnegative): Pass bool to + update_nan. + * value-range.h: Disallow conversion to bool in update_nan(). + +2022-10-12 Aldy Hernandez + + * value-range.h (frange::frange): Add constructor taking type. + +2022-10-12 Aldy Hernandez + + * range-op-float.cc: Add relation_kind = VREL_VARYING to all + methods. + 2022-10-11 Aldy Hernandez * gimple-range-gori.cc (gori_compute::logical_combine): Avoid diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 6f50c16..0c497e6 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20221012 +20221013 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index f6176a7..1fade0a 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,12 @@ +2022-10-12 Lewis Hyatt + + PR preprocessor/60014 + PR preprocessor/60723 + * c-ppoutput.cc (class token_streamer): Remove member + line_marker_emitted to... + (token_streamer::stream): ...a local variable here. Set + print.prev_was_system_token on all code paths. + 2022-10-10 Nathan Sidwell * c-opts.cc (c_common_post_options): Bump abi to 18. diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 5e985b6..320b465 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,38 @@ +2022-10-12 Marek Polacek + + * call.cc (standard_conversion): Remove LOOKUP_PREFER_RVALUE code. + (reference_binding): Honor clk_implicit_rval even pre-C++20. + (implicit_conversion_1): Remove LOOKUP_PREFER_RVALUE code. + (build_user_type_conversion_1): Likewise. + (convert_like_internal): Likewise. + (build_over_call): Likewise. + * cp-tree.h (LOOKUP_PREFER_RVALUE): Remove. + (LOOKUP_NO_NARROWING): Adjust definition. + * except.cc (build_throw): Don't perform two overload resolutions. + * typeck.cc (maybe_warn_pessimizing_move): Don't use + LOOKUP_PREFER_RVALUE. + (check_return_expr): Don't perform two overload resolutions. + +2022-10-12 Jason Merrill + + DR 2631 + * cp-tree.h (source_location_current_p): Remove. + * name-lookup.h (struct cp_binding_level): Remove + immediate_fn_ctx_p. + * call.cc (in_immediate_context): All default args + and DMI are potentially immediate context. + (immediate_invocation_p): Don't treat source_location specially. + (struct in_consteval_if_p_temp_override): Move to cp-tree.h. + * constexpr.cc (get_nth_callarg): Move to cp-tree.h. + * cp-gimplify.cc (cp_fold_r): Don't fold consteval. + * name-lookup.cc (begin_scope): Don't set immediate_fn_ctx_p. + * parser.cc (cp_parser_lambda_declarator_opt): Likewise. + (cp_parser_direct_declarator): Likewise. + * pt.cc (tsubst_default_argument): Open sk_function_parms level. + * tree.cc (source_location_current_p): Remove. + (bot_replace): Fold consteval here. + (break_out_target_exprs): Handle errors. + 2022-10-11 Patrick Palka PR c++/100134 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 7c86770..fa03b33 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,12 @@ +2022-10-12 Harald Anlauf + + PR fortran/107217 + * arith.cc (gfc_arith_plus): Compare consistency of types of operands. + (gfc_arith_minus): Likewise. + (gfc_arith_times): Likewise. + (gfc_arith_divide): Likewise. + (arith_power): Check that both operands are of numeric type. + 2022-10-11 Harald Anlauf PR fortran/107215 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 350c852..737ee21 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,33 @@ +2022-10-12 Lewis Hyatt + + PR preprocessor/60014 + PR preprocessor/60723 + * gcc.dg/cpp/pr60014-1.c: New test. + * gcc.dg/cpp/pr60014-1.h: New test. + * gcc.dg/cpp/pr60014-2.c: New test. + * gcc.dg/cpp/pr60014-2.h: New test. + * gcc.dg/cpp/pr60014-3.c: New test. + * gcc.dg/cpp/pr60014-3.h: New test. + +2022-10-12 Marek Polacek + + * g++.dg/cpp0x/Wredundant-move10.C: Adjust dg-warning. + * g++.dg/cpp0x/Wredundant-move7.C: Likewise. + * g++.dg/cpp0x/move-return2.C: Remove dg-error. + * g++.dg/cpp0x/move-return4.C: Likewise. + * g++.dg/cpp0x/ref-qual20.C: Adjust expected return value. + * g++.dg/cpp0x/move-return5.C: New test. + +2022-10-12 Harald Anlauf + + PR fortran/107217 + * gfortran.dg/pr107217.f90: New test. + +2022-10-12 Jason Merrill + + DR 2631 + * g++.dg/cpp2a/consteval-defarg3.C: New test. + 2022-10-11 Harald Anlauf PR fortran/107215 -- cgit v1.1 From e867f6b7e636391cf70a8d9b9bac79248b3fad67 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Thu, 13 Oct 2022 01:13:36 +0000 Subject: c: Do not use *_IS_IEC_60559 == 2 A late change for C2x (addressing comments from the second round of editorial review before the CD ballot, postdating the most recent public working draft) removed the value 2 for *_IS_IEC_60559 (a new macro added in C2x). Adjust the implementation accordingly not to use this value. Bootstrapped with no regressions for x86_64-pc-linux-gnu. gcc/ * ginclude/float.h (FLT_IS_IEC_60559, DBL_IS_IEC_60559) (LDBL_IS_IEC_60559): Update comment. gcc/c-family/ * c-cppbuiltin.cc (builtin_define_float_constants): Do not use value 2 for *_IS_IEC_60559. gcc/testsuite/ * gcc.dg/c2x-float-10.c: Do not expect value 2 for *_IS_IEC_60559. --- gcc/c-family/c-cppbuiltin.cc | 10 +++------- gcc/ginclude/float.h | 3 +-- gcc/testsuite/gcc.dg/c2x-float-10.c | 6 +++--- 3 files changed, 7 insertions(+), 12 deletions(-) (limited to 'gcc') diff --git a/gcc/c-family/c-cppbuiltin.cc b/gcc/c-family/c-cppbuiltin.cc index 4b8486c..2e39acb 100644 --- a/gcc/c-family/c-cppbuiltin.cc +++ b/gcc/c-family/c-cppbuiltin.cc @@ -319,14 +319,10 @@ builtin_define_float_constants (const char *name_prefix, } /* For C2x *_IS_IEC_60559. 0 means the type does not match an IEC - 60559 format, 1 that it matches a format but not operations and 2 - that it matches a format and operations (but may not conform to - Annex F; we take this as meaning exceptions and rounding modes - need not be supported). */ + 60559 format, 1 that it matches a format but not necessarily + operations. */ sprintf (name, "__%s_IS_IEC_60559__", name_prefix); - builtin_define_with_int_value (name, - (fmt->ieee_bits == 0 - ? 0 : (fmt->round_towards_zero ? 1 : 2))); + builtin_define_with_int_value (name, fmt->ieee_bits != 0); } /* Define __DECx__ constants for TYPE using NAME_PREFIX and SUFFIX. */ diff --git a/gcc/ginclude/float.h b/gcc/ginclude/float.h index afe4a71..bc5439d 100644 --- a/gcc/ginclude/float.h +++ b/gcc/ginclude/float.h @@ -248,8 +248,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define DBL_NORM_MAX __DBL_NORM_MAX__ #define LDBL_NORM_MAX __LDBL_NORM_MAX__ -/* Whether each type matches an IEC 60559 format (1 for format, 2 for - format and operations). */ +/* Whether each type matches an IEC 60559 format. */ #undef FLT_IS_IEC_60559 #undef DBL_IS_IEC_60559 #undef LDBL_IS_IEC_60559 diff --git a/gcc/testsuite/gcc.dg/c2x-float-10.c b/gcc/testsuite/gcc.dg/c2x-float-10.c index 7b53a6a..7206921 100644 --- a/gcc/testsuite/gcc.dg/c2x-float-10.c +++ b/gcc/testsuite/gcc.dg/c2x-float-10.c @@ -21,13 +21,13 @@ _Static_assert (FLT_IS_IEC_60559 == 0); _Static_assert (DBL_IS_IEC_60559 == 0); _Static_assert (LDBL_IS_IEC_60559 == 0); #else -_Static_assert (FLT_IS_IEC_60559 == 2); -_Static_assert (DBL_IS_IEC_60559 == 2); +_Static_assert (FLT_IS_IEC_60559 == 1); +_Static_assert (DBL_IS_IEC_60559 == 1); #if LDBL_MANT_DIG == 106 || LDBL_MIN_EXP == -16382 /* IBM long double and m68k extended format do not meet the definition of an IEC 60559 interchange or extended format. */ _Static_assert (LDBL_IS_IEC_60559 == 0); #else -_Static_assert (LDBL_IS_IEC_60559 == 2); +_Static_assert (LDBL_IS_IEC_60559 == 1); #endif #endif -- cgit v1.1 From 7f9a7465c863e482708d2a00f5f7ff91ae3a7e0b Mon Sep 17 00:00:00 2001 From: Andre Vieira Date: Thu, 13 Oct 2022 10:34:27 +0100 Subject: vect: Don't pattern match BITFIELD_REF's of non-integrals [PR107226] The original patch supported matching the vect_recog_bitfield_ref_pattern for BITFIELD_REF's where the first operand didn't have a INTEGRAL_TYPE_P type. That means it would also match vectors, leading to regressions in targets that supported vectorization of those. gcc/ChangeLog: PR tree-optimization/107226 * tree-vect-patterns.cc (vect_recog_bitfield_ref_pattern): Reject BITFIELD_REF's with non integral typed first operands. --- gcc/tree-vect-patterns.cc | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 0cc315d..6afd57a 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -1913,6 +1913,7 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info, return NULL; if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref)) + || !INTEGRAL_TYPE_P (TREE_TYPE (container)) || TYPE_MODE (TREE_TYPE (container)) == E_BLKmode) return NULL; @@ -1921,25 +1922,7 @@ vect_recog_bitfield_ref_pattern (vec_info *vinfo, stmt_vec_info stmt_info, tree ret = gimple_assign_lhs (first_stmt); tree ret_type = TREE_TYPE (ret); bool shift_first = true; - tree vectype; - - /* If the first operand of the BIT_FIELD_REF is not an INTEGER type, convert - it to one of the same width so we can perform the necessary masking and - shifting. */ - if (!INTEGRAL_TYPE_P (TREE_TYPE (container))) - { - unsigned HOST_WIDE_INT container_size = - tree_to_uhwi (TYPE_SIZE (TREE_TYPE (container))); - tree int_type = build_nonstandard_integer_type (container_size, true); - pattern_stmt - = gimple_build_assign (vect_recog_temp_ssa_var (int_type), - VIEW_CONVERT_EXPR, container); - vectype = get_vectype_for_scalar_type (vinfo, int_type); - container = gimple_assign_lhs (pattern_stmt); - append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vectype); - } - else - vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (container)); + tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (container)); /* We move the conversion earlier if the loaded type is smaller than the return type to enable the use of widening loads. */ -- cgit v1.1 From 9f0d4adabe2035886a1aa8d2ca990a90de000613 Mon Sep 17 00:00:00 2001 From: Andre Vieira Date: Thu, 13 Oct 2022 12:09:38 +0100 Subject: ifcvt: Fix bitpos calculation in bitfield lowering [PR107229] The bitposition calculation for the bitfield lowering in loop if conversion was not taking DECL_FIELD_OFFSET into account, which meant that it would result in wrong bitpositions for bitfields that did not end up having representations starting at the beginning of the struct. gcc/ChangeLog: PR tree-optimization/107229 * tree-if-conv.cc (get_bitfield_rep): Fix bitposition calculation. gcc/testsuite/ChangeLog: * gcc.dg/vect/pr107229-1.c: New test. * gcc.dg/vect/pr107229-2.c: New test. * gcc.dg/vect/pr107229-3.c: New test. --- gcc/testsuite/gcc.dg/vect/pr107229-1.c | 16 ++++++++++++++++ gcc/testsuite/gcc.dg/vect/pr107229-2.c | 18 ++++++++++++++++++ gcc/testsuite/gcc.dg/vect/pr107229-3.c | 19 +++++++++++++++++++ gcc/tree-if-conv.cc | 32 ++++++++++++++++++++++++++++---- 4 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr107229-1.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr107229-2.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr107229-3.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/pr107229-1.c b/gcc/testsuite/gcc.dg/vect/pr107229-1.c new file mode 100644 index 0000000..67b4323 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr107229-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* PR tree-optimization/107229. */ + +int a, c; +struct { + long d; + int : 8; + int : 27; + int e : 21; +} f; +void g(int b) { a = a & 1; } +int main() { + while (c) + g(f.e); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr107229-2.c b/gcc/testsuite/gcc.dg/vect/pr107229-2.c new file mode 100644 index 0000000..88bffb6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr107229-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* PR tree-optimization/107229. */ + +int a, c; +struct { + long f; + long g; + long d; + int : 8; + int : 27; + int e : 21; +} f; +void g(int b) { a = a & 1; } +int main() { + while (c) + g(f.e); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr107229-3.c b/gcc/testsuite/gcc.dg/vect/pr107229-3.c new file mode 100644 index 0000000..4abd8c1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr107229-3.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* PR tree-optimization/107229. */ + +int a, c; +struct { + long f; + long g; + long d; + int : 8; + int : 32; + int : 2; + int e : 21; +} f; +void g(int b) { a = a & 1; } +int main() { + while (c) + g(f.e); + return 0; +} diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index e468a46..01637c5 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -3298,10 +3298,34 @@ get_bitfield_rep (gassign *stmt, bool write, tree *bitpos, *struct_expr = TREE_OPERAND (comp_ref, 0); if (bitpos) - *bitpos - = fold_build2 (MINUS_EXPR, bitsizetype, - DECL_FIELD_BIT_OFFSET (field_decl), - DECL_FIELD_BIT_OFFSET (rep_decl)); + { + /* To calculate the bitposition of the BITFIELD_REF we have to determine + where our bitfield starts in relation to the container REP_DECL. The + DECL_FIELD_OFFSET of the original bitfield's member FIELD_DECL tells + us how many bytes from the start of the structure there are until the + start of the group of bitfield members the FIELD_DECL belongs to, + whereas DECL_FIELD_BIT_OFFSET will tell us how many bits from that + position our actual bitfield member starts. For the container + REP_DECL adding DECL_FIELD_OFFSET and DECL_FIELD_BIT_OFFSET will tell + us the distance between the start of the structure and the start of + the container, though the first is in bytes and the later other in + bits. With this in mind we calculate the bit position of our new + BITFIELD_REF by subtracting the number of bits between the start of + the structure and the container from the number of bits from the start + of the structure and the actual bitfield member. */ + tree bf_pos = fold_build2 (MULT_EXPR, bitsizetype, + DECL_FIELD_OFFSET (field_decl), + build_int_cst (bitsizetype, BITS_PER_UNIT)); + bf_pos = fold_build2 (PLUS_EXPR, bitsizetype, bf_pos, + DECL_FIELD_BIT_OFFSET (field_decl)); + tree rep_pos = fold_build2 (MULT_EXPR, bitsizetype, + DECL_FIELD_OFFSET (rep_decl), + build_int_cst (bitsizetype, BITS_PER_UNIT)); + rep_pos = fold_build2 (PLUS_EXPR, bitsizetype, rep_pos, + DECL_FIELD_BIT_OFFSET (rep_decl)); + + *bitpos = fold_build2 (MINUS_EXPR, bitsizetype, bf_pos, rep_pos); + } return rep_decl; -- cgit v1.1 From 786e4c024f941671a233f5779d73a5d22f4e9588 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 13 Oct 2022 12:59:09 +0200 Subject: diagnose return statement in match.pd (with { ... } expressions The expression in (with { ... } is used like a statement expression which means control flow that leaves it is not allowed. The following explicitely diagnoses 'return' and fixes up the few cases that crept into match.pd (oops). Any such return will prematurely end matching the current expression. * genmatch.cc (parser::parse_c_expr): Diagnose 'return'. * match.pd: Replace 'return' statements in with expressions with appropriate variants. --- gcc/genmatch.cc | 7 +- gcc/match.pd | 293 ++++++++++++++++++++++++++++---------------------------- 2 files changed, 150 insertions(+), 150 deletions(-) (limited to 'gcc') diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc index a0b22c5..4a88024 100644 --- a/gcc/genmatch.cc +++ b/gcc/genmatch.cc @@ -4447,8 +4447,11 @@ parser::parse_c_expr (cpp_ttype start) /* If this is possibly a user-defined identifier mark it used. */ if (token->type == CPP_NAME) { - id_base *idb = get_operator ((const char *)CPP_HASHNODE - (token->val.node.node)->ident.str); + const char *str + = (const char *)CPP_HASHNODE (token->val.node.node)->ident.str; + if (strcmp (str, "return") == 0) + fatal_at (token, "return statement not allowed in C expression"); + id_base *idb = get_operator (str); user_id *p; if (idb && (p = dyn_cast (idb)) && p->is_oper_list) record_operlist (token->src_loc, p); diff --git a/gcc/match.pd b/gcc/match.pd index 3550c16..fd64ad7 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -7930,131 +7930,131 @@ and, /* Build a vector of integers from the tree mask. */ vec_perm_builder builder; - if (!tree_to_vec_perm_builder (&builder, op2)) - return NULL_TREE; - - /* Create a vec_perm_indices for the integer vector. */ - poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); - bool single_arg = (op0 == op1); - vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); } - (if (sel.series_p (0, 1, 0, 1)) - { op0; } - (if (sel.series_p (0, 1, nelts, 1)) - { op1; } - (with - { - if (!single_arg) - { - if (sel.all_from_input_p (0)) - op1 = op0; - else if (sel.all_from_input_p (1)) + (if (tree_to_vec_perm_builder (&builder, op2)) + (with + { + /* Create a vec_perm_indices for the integer vector. */ + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); + bool single_arg = (op0 == op1); + vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); + } + (if (sel.series_p (0, 1, 0, 1)) + { op0; } + (if (sel.series_p (0, 1, nelts, 1)) + { op1; } + (with + { + if (!single_arg) + { + if (sel.all_from_input_p (0)) + op1 = op0; + else if (sel.all_from_input_p (1)) + { + op0 = op1; + sel.rotate_inputs (1); + } + else if (known_ge (poly_uint64 (sel[0]), nelts)) + { + std::swap (op0, op1); + sel.rotate_inputs (1); + } + } + gassign *def; + tree cop0 = op0, cop1 = op1; + if (TREE_CODE (op0) == SSA_NAME + && (def = dyn_cast (SSA_NAME_DEF_STMT (op0))) + && gimple_assign_rhs_code (def) == CONSTRUCTOR) + cop0 = gimple_assign_rhs1 (def); + if (TREE_CODE (op1) == SSA_NAME + && (def = dyn_cast (SSA_NAME_DEF_STMT (op1))) + && gimple_assign_rhs_code (def) == CONSTRUCTOR) + cop1 = gimple_assign_rhs1 (def); + tree t; + } + (if ((TREE_CODE (cop0) == VECTOR_CST + || TREE_CODE (cop0) == CONSTRUCTOR) + && (TREE_CODE (cop1) == VECTOR_CST + || TREE_CODE (cop1) == CONSTRUCTOR) + && (t = fold_vec_perm (type, cop0, cop1, sel))) + { t; } + (with + { + bool changed = (op0 == op1 && !single_arg); + tree ins = NULL_TREE; + unsigned at = 0; + + /* See if the permutation is performing a single element + insert from a CONSTRUCTOR or constant and use a BIT_INSERT_EXPR + in that case. But only if the vector mode is supported, + otherwise this is invalid GIMPLE. */ + if (op_mode != BLKmode + && (TREE_CODE (cop0) == VECTOR_CST + || TREE_CODE (cop0) == CONSTRUCTOR + || TREE_CODE (cop1) == VECTOR_CST + || TREE_CODE (cop1) == CONSTRUCTOR)) { - op0 = op1; - sel.rotate_inputs (1); + bool insert_first_p = sel.series_p (1, 1, nelts + 1, 1); + if (insert_first_p) + { + /* After canonicalizing the first elt to come from the + first vector we only can insert the first elt from + the first vector. */ + at = 0; + if ((ins = fold_read_from_vector (cop0, sel[0]))) + op0 = op1; + } + /* The above can fail for two-element vectors which always + appear to insert the first element, so try inserting + into the second lane as well. For more than two + elements that's wasted time. */ + if (!insert_first_p || (!ins && maybe_eq (nelts, 2u))) + { + unsigned int encoded_nelts = sel.encoding ().encoded_nelts (); + for (at = 0; at < encoded_nelts; ++at) + if (maybe_ne (sel[at], at)) + break; + if (at < encoded_nelts + && (known_eq (at + 1, nelts) + || sel.series_p (at + 1, 1, at + 1, 1))) + { + if (known_lt (poly_uint64 (sel[at]), nelts)) + ins = fold_read_from_vector (cop0, sel[at]); + else + ins = fold_read_from_vector (cop1, sel[at] - nelts); + } + } } - else if (known_ge (poly_uint64 (sel[0]), nelts)) + + /* Generate a canonical form of the selector. */ + if (!ins && sel.encoding () != builder) { - std::swap (op0, op1); - sel.rotate_inputs (1); + /* Some targets are deficient and fail to expand a single + argument permutation while still allowing an equivalent + 2-argument version. */ + tree oldop2 = op2; + if (sel.ninputs () == 2 + || can_vec_perm_const_p (result_mode, op_mode, sel, false)) + op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); + else + { + vec_perm_indices sel2 (builder, 2, nelts); + if (can_vec_perm_const_p (result_mode, op_mode, sel2, false)) + op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2); + else + /* Not directly supported with either encoding, + so use the preferred form. */ + op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); + } + if (!operand_equal_p (op2, oldop2, 0)) + changed = true; } - } - gassign *def; - tree cop0 = op0, cop1 = op1; - if (TREE_CODE (op0) == SSA_NAME - && (def = dyn_cast (SSA_NAME_DEF_STMT (op0))) - && gimple_assign_rhs_code (def) == CONSTRUCTOR) - cop0 = gimple_assign_rhs1 (def); - if (TREE_CODE (op1) == SSA_NAME - && (def = dyn_cast (SSA_NAME_DEF_STMT (op1))) - && gimple_assign_rhs_code (def) == CONSTRUCTOR) - cop1 = gimple_assign_rhs1 (def); - - tree t; - } - (if ((TREE_CODE (cop0) == VECTOR_CST - || TREE_CODE (cop0) == CONSTRUCTOR) - && (TREE_CODE (cop1) == VECTOR_CST - || TREE_CODE (cop1) == CONSTRUCTOR) - && (t = fold_vec_perm (type, cop0, cop1, sel))) - { t; } - (with - { - bool changed = (op0 == op1 && !single_arg); - tree ins = NULL_TREE; - unsigned at = 0; - - /* See if the permutation is performing a single element - insert from a CONSTRUCTOR or constant and use a BIT_INSERT_EXPR - in that case. But only if the vector mode is supported, - otherwise this is invalid GIMPLE. */ - if (op_mode != BLKmode - && (TREE_CODE (cop0) == VECTOR_CST - || TREE_CODE (cop0) == CONSTRUCTOR - || TREE_CODE (cop1) == VECTOR_CST - || TREE_CODE (cop1) == CONSTRUCTOR)) - { - bool insert_first_p = sel.series_p (1, 1, nelts + 1, 1); - if (insert_first_p) - { - /* After canonicalizing the first elt to come from the - first vector we only can insert the first elt from - the first vector. */ - at = 0; - if ((ins = fold_read_from_vector (cop0, sel[0]))) - op0 = op1; - } - /* The above can fail for two-element vectors which always - appear to insert the first element, so try inserting - into the second lane as well. For more than two - elements that's wasted time. */ - if (!insert_first_p || (!ins && maybe_eq (nelts, 2u))) - { - unsigned int encoded_nelts = sel.encoding ().encoded_nelts (); - for (at = 0; at < encoded_nelts; ++at) - if (maybe_ne (sel[at], at)) - break; - if (at < encoded_nelts - && (known_eq (at + 1, nelts) - || sel.series_p (at + 1, 1, at + 1, 1))) - { - if (known_lt (poly_uint64 (sel[at]), nelts)) - ins = fold_read_from_vector (cop0, sel[at]); - else - ins = fold_read_from_vector (cop1, sel[at] - nelts); - } - } - } - - /* Generate a canonical form of the selector. */ - if (!ins && sel.encoding () != builder) - { - /* Some targets are deficient and fail to expand a single - argument permutation while still allowing an equivalent - 2-argument version. */ - tree oldop2 = op2; - if (sel.ninputs () == 2 - || can_vec_perm_const_p (result_mode, op_mode, sel, false)) - op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); - else - { - vec_perm_indices sel2 (builder, 2, nelts); - if (can_vec_perm_const_p (result_mode, op_mode, sel2, false)) - op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2); - else - /* Not directly supported with either encoding, - so use the preferred form. */ - op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); - } - if (!operand_equal_p (op2, oldop2, 0)) - changed = true; - } - } - (if (ins) - (bit_insert { op0; } { ins; } - { bitsize_int (at * vector_element_bits (type)); }) - (if (changed) - (vec_perm { op0; } { op1; } { op2; })))))))))) + } + (if (ins) + (bit_insert { op0; } { ins; } + { bitsize_int (at * vector_element_bits (type)); }) + (if (changed) + (vec_perm { op0; } { op1; } { op2; })))))))))))) /* VEC_PERM_EXPR (v, v, mask) -> v where v contains same element. */ @@ -8094,37 +8094,34 @@ and, (simplify (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4) - (with - { - if (!TYPE_VECTOR_SUBPARTS (type).is_constant ()) - return NULL_TREE; - - tree op0; - machine_mode result_mode = TYPE_MODE (type); - machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); - int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); - vec_perm_builder builder0; - vec_perm_builder builder1; - vec_perm_builder builder2 (nelts, nelts, 1); - - if (!tree_to_vec_perm_builder (&builder0, @3) - || !tree_to_vec_perm_builder (&builder1, @4)) - return NULL_TREE; - - vec_perm_indices sel0 (builder0, 2, nelts); - vec_perm_indices sel1 (builder1, 1, nelts); - - for (int i = 0; i < nelts; i++) - builder2.quick_push (sel0[sel1[i].to_constant ()]); + (if (TYPE_VECTOR_SUBPARTS (type).is_constant ()) + (with + { + machine_mode result_mode = TYPE_MODE (type); + machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1)); + int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); + vec_perm_builder builder0; + vec_perm_builder builder1; + vec_perm_builder builder2 (nelts, nelts, 1); + } + (if (tree_to_vec_perm_builder (&builder0, @3) + && tree_to_vec_perm_builder (&builder1, @4)) + (with + { + vec_perm_indices sel0 (builder0, 2, nelts); + vec_perm_indices sel1 (builder1, 1, nelts); - vec_perm_indices sel2 (builder2, 2, nelts); + for (int i = 0; i < nelts; i++) + builder2.quick_push (sel0[sel1[i].to_constant ()]); - if (!can_vec_perm_const_p (result_mode, op_mode, sel2, false)) - return NULL_TREE; + vec_perm_indices sel2 (builder2, 2, nelts); - op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); - } - (vec_perm @1 @2 { op0; }))) + tree op0 = NULL_TREE; + if (can_vec_perm_const_p (result_mode, op_mode, sel2, false)) + op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2); + } + (if (op0) + (vec_perm @1 @2 { op0; }))))))) /* Match count trailing zeroes for simplify_count_trailing_zeroes in fwprop. -- cgit v1.1