diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64-builtins.cc | 83 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.cc | 4 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 3 | ||||
-rw-r--r-- | gcc/internal-fn.cc | 20 | ||||
-rw-r--r-- | gcc/internal-fn.def | 23 | ||||
-rw-r--r-- | gcc/optabs.cc | 3 | ||||
-rw-r--r-- | gcc/predict.cc | 11 | ||||
-rw-r--r-- | gcc/predict.h | 1 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/vect_unary_1.c | 65 | ||||
-rw-r--r-- | gcc/testsuite/gfortran.dg/vect/pr106253.f | 35 |
12 files changed, 157 insertions, 93 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index a486321e..adfddb8 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -2555,89 +2555,6 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, gcc_unreachable (); } -tree -aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, - tree type_in) -{ - machine_mode in_mode, out_mode; - - if (TREE_CODE (type_out) != VECTOR_TYPE - || TREE_CODE (type_in) != VECTOR_TYPE) - return NULL_TREE; - - out_mode = TYPE_MODE (type_out); - in_mode = TYPE_MODE (type_in); - -#undef AARCH64_CHECK_BUILTIN_MODE -#define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 -#define AARCH64_FIND_FRINT_VARIANT(N) \ - (AARCH64_CHECK_BUILTIN_MODE (2, D) \ - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \ - : (AARCH64_CHECK_BUILTIN_MODE (4, S) \ - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \ - : (AARCH64_CHECK_BUILTIN_MODE (2, S) \ - ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \ - : NULL_TREE))) - switch (fn) - { -#undef AARCH64_CHECK_BUILTIN_MODE -#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ - (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode) - CASE_CFN_IFLOOR: - CASE_CFN_LFLOOR: - CASE_CFN_LLFLOOR: - { - enum aarch64_builtins builtin; - if (AARCH64_CHECK_BUILTIN_MODE (2, D)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di; - else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si; - else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si; - else - return NULL_TREE; - - return aarch64_builtin_decls[builtin]; - } - CASE_CFN_ICEIL: - CASE_CFN_LCEIL: - CASE_CFN_LLCEIL: - { - enum aarch64_builtins builtin; - if (AARCH64_CHECK_BUILTIN_MODE (2, D)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di; - else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si; - else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si; - else - return NULL_TREE; - - return aarch64_builtin_decls[builtin]; - } - CASE_CFN_IROUND: - CASE_CFN_LROUND: - CASE_CFN_LLROUND: - { - enum aarch64_builtins builtin; - if (AARCH64_CHECK_BUILTIN_MODE (2, D)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di; - else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si; - else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) - builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si; - else - return NULL_TREE; - - return aarch64_builtin_decls[builtin]; - } - default: - return NULL_TREE; - } - - return NULL_TREE; -} - /* Return builtin for reciprocal square root. */ tree diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index dabd047..19c9d3c 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -986,7 +986,6 @@ gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *, rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); tree aarch64_general_builtin_decl (unsigned, bool); tree aarch64_general_builtin_rsqrt (unsigned int); -tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); void handle_arm_acle_h (void); void handle_arm_neon_h (void); diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index d049f9a..25f4cbb 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -27584,10 +27584,6 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_VECTORIZE_BUILTINS #define TARGET_VECTORIZE_BUILTINS -#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION -#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ - aarch64_builtin_vectorized_function - #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ aarch64_autovectorize_vector_modes diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 95cb1e2..3a3c729 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -24004,6 +24004,7 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, case ldexp_optab: case scalb_optab: case round_optab: + case lround_optab: return opt_type == OPTIMIZE_FOR_SPEED; case rint_optab: diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3b02d0c..bf29f44 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -19926,9 +19926,6 @@ && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT) && !flag_trapping_math && !flag_rounding_math)" { - if (optimize_insn_for_size_p ()) - FAIL; - if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH && <SWI248x:MODE>mode != HImode && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index d666ccc..28973d9 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -120,6 +120,7 @@ init_internal_fns () #define len_store_direct { 3, 3, false } #define vec_set_direct { 3, 3, false } #define unary_direct { 0, 0, true } +#define unary_convert_direct { -1, 0, true } #define binary_direct { 0, 0, true } #define ternary_direct { 0, 0, true } #define cond_unary_direct { 1, 1, true } @@ -3679,6 +3680,19 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab) emit_move_insn (lhs_rtx, ops[0].value); } +/* Expand a call to a convert-like optab using the operands in STMT. + FN has a single output operand and NARGS input operands. */ + +static void +expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab, + unsigned int nargs) +{ + tree_pair types = direct_internal_fn_types (fn, stmt); + insn_code icode = convert_optab_handler (optab, TYPE_MODE (types.first), + TYPE_MODE (types.second)); + expand_fn_using_insn (stmt, icode, 1, nargs); +} + /* Expanders for optabs that can use expand_direct_optab_fn. */ #define expand_unary_optab_fn(FN, STMT, OPTAB) \ @@ -3711,6 +3725,11 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab) #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \ expand_direct_optab_fn (FN, STMT, OPTAB, 4) +/* Expanders for optabs that can use expand_convert_optab_fn. */ + +#define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \ + expand_convert_optab_fn (FN, STMT, OPTAB, 1) + /* RETURN_TYPE and ARGS are a return type and argument list that are in principle compatible with FN (which satisfies direct_internal_fn_p). Return the types that should be used to determine whether the @@ -3783,6 +3802,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, } #define direct_unary_optab_supported_p direct_optab_supported_p +#define direct_unary_convert_optab_supported_p convert_optab_supported_p #define direct_binary_optab_supported_p direct_optab_supported_p #define direct_ternary_optab_supported_p direct_optab_supported_p #define direct_cond_unary_optab_supported_p direct_optab_supported_p diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index d2d550d..7c398ba 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -61,6 +61,9 @@ along with GCC; see the file COPYING3. If not see - binary: a normal binary optab, such as vec_interleave_lo_<mode> - ternary: a normal ternary optab, such as fma<mode>4 + - unary_convert: a single-input conversion optab, such as + lround<srcmode><dstmode>2. + - cond_binary: a conditional binary optab, such as cond_add<mode> - cond_ternary: a conditional ternary optab, such as cond_fma_rev<mode> @@ -267,6 +270,26 @@ DEF_INTERNAL_FLT_FLOATN_FN (SQRT, ECF_CONST, sqrt, unary) DEF_INTERNAL_FLT_FN (TAN, ECF_CONST, tan, unary) DEF_INTERNAL_FLT_FN (TANH, ECF_CONST, tanh, unary) +/* Floating-point to integer conversions. + + ??? Here we preserve the I/L/LL prefix convention from the + corresponding built-in functions, rather than make the internal + functions polymorphic in both the argument and the return types. + Perhaps an alternative would be to pass a zero of the required + return type as a second parameter. */ +DEF_INTERNAL_FLT_FN (ICEIL, ECF_CONST, lceil, unary_convert) +DEF_INTERNAL_FLT_FN (IFLOOR, ECF_CONST, lfloor, unary_convert) +DEF_INTERNAL_FLT_FN (IRINT, ECF_CONST, lrint, unary_convert) +DEF_INTERNAL_FLT_FN (IROUND, ECF_CONST, lround, unary_convert) +DEF_INTERNAL_FLT_FN (LCEIL, ECF_CONST, lceil, unary_convert) +DEF_INTERNAL_FLT_FN (LFLOOR, ECF_CONST, lfloor, unary_convert) +DEF_INTERNAL_FLT_FN (LRINT, ECF_CONST, lrint, unary_convert) +DEF_INTERNAL_FLT_FN (LROUND, ECF_CONST, lround, unary_convert) +DEF_INTERNAL_FLT_FN (LLCEIL, ECF_CONST, lceil, unary_convert) +DEF_INTERNAL_FLT_FN (LLFLOOR, ECF_CONST, lfloor, unary_convert) +DEF_INTERNAL_FLT_FN (LLRINT, ECF_CONST, lrint, unary_convert) +DEF_INTERNAL_FLT_FN (LLROUND, ECF_CONST, lround, unary_convert) + /* FP rounding. */ DEF_INTERNAL_FLT_FLOATN_FN (CEIL, ECF_CONST, ceil, unary) DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floor, unary) diff --git a/gcc/optabs.cc b/gcc/optabs.cc index a50dd79..165f8d1 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -5828,7 +5828,8 @@ expand_sfix_optab (rtx to, rtx from, convert_optab tab) FOR_EACH_MODE_FROM (fmode, GET_MODE (from)) FOR_EACH_MODE_FROM (imode, GET_MODE (to)) { - icode = convert_optab_handler (tab, imode, fmode); + icode = convert_optab_handler (tab, imode, fmode, + insn_optimization_type ()); if (icode != CODE_FOR_nothing) { rtx_insn *last = get_last_insn (); diff --git a/gcc/predict.cc b/gcc/predict.cc index b36caa3..1bc7ab9 100644 --- a/gcc/predict.cc +++ b/gcc/predict.cc @@ -362,6 +362,17 @@ optimize_insn_for_speed_p (void) return !optimize_insn_for_size_p (); } +/* Return the optimization type that should be used for the current + instruction. */ + +optimization_type +insn_optimization_type () +{ + return (optimize_insn_for_speed_p () + ? OPTIMIZE_FOR_SPEED + : OPTIMIZE_FOR_SIZE); +} + /* Return TRUE if LOOP should be optimized for size. */ optimize_size_level diff --git a/gcc/predict.h b/gcc/predict.h index 8649974..2548437 100644 --- a/gcc/predict.h +++ b/gcc/predict.h @@ -68,6 +68,7 @@ extern enum optimize_size_level optimize_edge_for_size_p (edge); extern bool optimize_edge_for_speed_p (edge); extern enum optimize_size_level optimize_insn_for_size_p (void); extern bool optimize_insn_for_speed_p (void); +extern optimization_type insn_optimization_type (); extern enum optimize_size_level optimize_loop_for_size_p (class loop *); extern bool optimize_loop_for_speed_p (class loop *); extern bool optimize_loop_nest_for_speed_p (class loop *); diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c index 8516808..94d9af1 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c +++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 --save-temps" } */ +/* { dg-options "-O3 -fno-math-errno --save-temps" } */ /* { dg-final { check-function-bodies "**" "" "" } } */ #include <stdint.h> @@ -184,3 +184,66 @@ TEST2 (int, ctz, int) ** ret */ TEST4 (int, ctz, int) + +/* +** test2_int_iroundf_float: +** fcvtas v0.2s, v1.2s +** ret +*/ +TEST2 (int, iroundf, float) + +/* +** test2_int64_t_llround_double: +** fcvtas v0.2d, v1.2d +** ret +*/ +TEST2 (int64_t, llround, double) + +/* +** test4_int_iroundf_float: +** fcvtas v0.4s, v1.4s +** ret +*/ +TEST4 (int, iroundf, float) + +/* +** test2_int_ifloorf_float: +** fcvtms v0.2s, v1.2s +** ret +*/ +TEST2 (int, ifloorf, float) + +/* +** test2_int64_t_llfloor_double: +** fcvtms v0.2d, v1.2d +** ret +*/ +TEST2 (int64_t, llfloor, double) + +/* +** test4_int_ifloorf_float: +** fcvtms v0.4s, v1.4s +** ret +*/ +TEST4 (int, ifloorf, float) + +/* +** test2_int_iceilf_float: +** fcvtps v0.2s, v1.2s +** ret +*/ +TEST2 (int, iceilf, float) + +/* +** test2_int64_t_llceil_double: +** fcvtps v0.2d, v1.2d +** ret +*/ +TEST2 (int64_t, llceil, double) + +/* +** test4_int_iceilf_float: +** fcvtps v0.4s, v1.4s +** ret +*/ +TEST4 (int, iceilf, float) diff --git a/gcc/testsuite/gfortran.dg/vect/pr106253.f b/gcc/testsuite/gfortran.dg/vect/pr106253.f new file mode 100644 index 0000000..1b6b7e8 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/vect/pr106253.f @@ -0,0 +1,35 @@ +! { dg-do compile } + + SUBROUTINE DGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX, & + & BETA, Y, INCY ) + LOGICAL LSAME + IF ( .NOT.LSAME( TRANS, 'N' ).AND. & + & .NOT.LSAME( TRANS, 'C' ) )THEN + END IF + END + subroutine evlrnf (ptrs0t, nclsm, prnf0t) + real, dimension (1:nclsm,1:nclsm), intent (in) :: ptrs0t + real, dimension (1:nclsm,1:nclsm), intent (out):: prnf0t + real, allocatable, dimension (:,:) :: utrsft ! probas up + real, allocatable, dimension (:,:) :: dtrsft ! probas down + real, allocatable, dimension (:,:) :: xwrkt ! matrice + do icls = 1, nclsm + do ival = ipic - 1, 1, -1 + xwrkt = trs2a2 (ival, ipic, utrsft, dtrsft, ncls) + enddo + enddo + contains + function trs2a2 (j, k, u, d, m) + real, dimension (1:m,1:m) :: trs2a2 ! resultat + real, dimension (1:m,1:m) :: u, d ! matrices utrsft, dtrsft + end function trs2a2 + end + program rnflow + integer, parameter :: ncls = 256 ! nombre de classes + integer, dimension (1:ncls,1:ncls) :: mrnftt ! matrice theorique + real, dimension (1:ncls,1:ncls) :: ptrst ! matrice Markov + real, dimension (1:ncls,1:ncls) :: prnft ! matrice Rainflow + call evlrnf (ptrst, ncls, prnft) + mrnftt = nint (real (nsim) * real (npic) * prnft) + call cmpmat (mrnftt, mrnfst) + end program rnflow |