12 files changed, 157 insertions, 93 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index a486321e..adfddb8 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2555,89 +2555,6 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
   gcc_unreachable ();
 }
 
-tree
-aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
-				     tree type_in)
-{
-  machine_mode in_mode, out_mode;
-
-  if (TREE_CODE (type_out) != VECTOR_TYPE
-      || TREE_CODE (type_in) != VECTOR_TYPE)
-    return NULL_TREE;
-
-  out_mode = TYPE_MODE (type_out);
-  in_mode = TYPE_MODE (type_in);
-
-#undef AARCH64_CHECK_BUILTIN_MODE
-#define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
-#define AARCH64_FIND_FRINT_VARIANT(N) \
-  (AARCH64_CHECK_BUILTIN_MODE (2, D) \
-    ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \
-    : (AARCH64_CHECK_BUILTIN_MODE (4, S) \
-	? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \
-	: (AARCH64_CHECK_BUILTIN_MODE (2, S) \
-	   ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \
-	   : NULL_TREE)))
-  switch (fn)
-    {
-#undef AARCH64_CHECK_BUILTIN_MODE
-#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
-  (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode)
-    CASE_CFN_IFLOOR:
-    CASE_CFN_LFLOOR:
-    CASE_CFN_LLFLOOR:
-      {
-	enum aarch64_builtins builtin;
-	if (AARCH64_CHECK_BUILTIN_MODE (2, D))
-	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di;
-	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
-	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si;
-	else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
-	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si;
-	else
-	  return NULL_TREE;
-
-	return aarch64_builtin_decls[builtin];
-      }
-    CASE_CFN_ICEIL:
-    CASE_CFN_LCEIL:
-    CASE_CFN_LLCEIL:
-      {
-	enum aarch64_builtins builtin;
-	if (AARCH64_CHECK_BUILTIN_MODE (2, D))
-	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di;
-	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
-	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si;
-	else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
-	  builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si;
-	else
-	  return NULL_TREE;
-
-	return aarch64_builtin_decls[builtin];
-      }
-    CASE_CFN_IROUND:
-    CASE_CFN_LROUND:
-    CASE_CFN_LLROUND:
-      {
-	enum aarch64_builtins builtin;
-	if (AARCH64_CHECK_BUILTIN_MODE (2, D))
-	  builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di;
-	else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
-	  builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si;
-	else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
-	  builtin =	AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si;
-	else
-	  return NULL_TREE;
-
-	return aarch64_builtin_decls[builtin];
-      }
-    default:
-      return NULL_TREE;
-    }
-
-  return NULL_TREE;
-}
-
 /* Return builtin for reciprocal square root.  */
 
 tree
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index dabd047..19c9d3c 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -986,7 +986,6 @@ gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *,
 rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int);
 tree aarch64_general_builtin_decl (unsigned, bool);
 tree aarch64_general_builtin_rsqrt (unsigned int);
-tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
 void handle_arm_acle_h (void);
 void handle_arm_neon_h (void);
 
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d049f9a..25f4cbb 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -27584,10 +27584,6 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_VECTORIZE_BUILTINS
 #define TARGET_VECTORIZE_BUILTINS
 
-#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
-#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
-  aarch64_builtin_vectorized_function
-
 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
   aarch64_autovectorize_vector_modes
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 95cb1e2..3a3c729 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -24004,6 +24004,7 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
     case ldexp_optab:
     case scalb_optab:
     case round_optab:
+    case lround_optab:
       return opt_type == OPTIMIZE_FOR_SPEED;
 
     case rint_optab:
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3b02d0c..bf29f44 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -19926,9 +19926,6 @@
        && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
        && !flag_trapping_math && !flag_rounding_math)"
 {
-  if (optimize_insn_for_size_p ())
-    FAIL;
-
   if (SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
       && <SWI248x:MODE>mode != HImode
       && ((<SWI248x:MODE>mode != DImode) || TARGET_64BIT)
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index d666ccc..28973d9 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -120,6 +120,7 @@ init_internal_fns ()
 #define len_store_direct { 3, 3, false }
 #define vec_set_direct { 3, 3, false }
 #define unary_direct { 0, 0, true }
+#define unary_convert_direct { -1, 0, true }
 #define binary_direct { 0, 0, true }
 #define ternary_direct { 0, 0, true }
 #define cond_unary_direct { 1, 1, true }
@@ -3679,6 +3680,19 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
     emit_move_insn (lhs_rtx, ops[0].value);
 }
 
+/* Expand a call to a convert-like optab using the operands in STMT.
+   FN has a single output operand and NARGS input operands.  */
+
+static void
+expand_convert_optab_fn (internal_fn fn, gcall *stmt, convert_optab optab,
+			 unsigned int nargs)
+{
+  tree_pair types = direct_internal_fn_types (fn, stmt);
+  insn_code icode = convert_optab_handler (optab, TYPE_MODE (types.first),
+					  TYPE_MODE (types.second));
+  expand_fn_using_insn (stmt, icode, 1, nargs);
+}
+
 /* Expanders for optabs that can use expand_direct_optab_fn.  */
 
 #define expand_unary_optab_fn(FN, STMT, OPTAB) \
@@ -3711,6 +3725,11 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \
   expand_direct_optab_fn (FN, STMT, OPTAB, 4)
 
+/* Expanders for optabs that can use expand_convert_optab_fn.  */
+
+#define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \
+  expand_convert_optab_fn (FN, STMT, OPTAB, 1)
+
 /* RETURN_TYPE and ARGS are a return type and argument list that are
    in principle compatible with FN (which satisfies direct_internal_fn_p).
    Return the types that should be used to determine whether the
@@ -3783,6 +3802,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 }
 
 #define direct_unary_optab_supported_p direct_optab_supported_p
+#define direct_unary_convert_optab_supported_p convert_optab_supported_p
 #define direct_binary_optab_supported_p direct_optab_supported_p
 #define direct_ternary_optab_supported_p direct_optab_supported_p
 #define direct_cond_unary_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index d2d550d..7c398ba 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -61,6 +61,9 @@ along with GCC; see the file COPYING3.  If not see
    - binary: a normal binary optab, such as vec_interleave_lo_<mode>
    - ternary: a normal ternary optab, such as fma<mode>4
 
+   - unary_convert: a single-input conversion optab, such as
+     lround<srcmode><dstmode>2.
+
    - cond_binary: a conditional binary optab, such as cond_add<mode>
    - cond_ternary: a conditional ternary optab, such as cond_fma_rev<mode>
 
@@ -267,6 +270,26 @@ DEF_INTERNAL_FLT_FLOATN_FN (SQRT, ECF_CONST, sqrt, unary)
 DEF_INTERNAL_FLT_FN (TAN, ECF_CONST, tan, unary)
 DEF_INTERNAL_FLT_FN (TANH, ECF_CONST, tanh, unary)
 
+/* Floating-point to integer conversions.
+
+   ??? Here we preserve the I/L/LL prefix convention from the
+   corresponding built-in functions, rather than make the internal
+   functions polymorphic in both the argument and the return types.
+   Perhaps an alternative would be to pass a zero of the required
+   return type as a second parameter.  */
+DEF_INTERNAL_FLT_FN (ICEIL, ECF_CONST, lceil, unary_convert)
+DEF_INTERNAL_FLT_FN (IFLOOR, ECF_CONST, lfloor, unary_convert)
+DEF_INTERNAL_FLT_FN (IRINT, ECF_CONST, lrint, unary_convert)
+DEF_INTERNAL_FLT_FN (IROUND, ECF_CONST, lround, unary_convert)
+DEF_INTERNAL_FLT_FN (LCEIL, ECF_CONST, lceil, unary_convert)
+DEF_INTERNAL_FLT_FN (LFLOOR, ECF_CONST, lfloor, unary_convert)
+DEF_INTERNAL_FLT_FN (LRINT, ECF_CONST, lrint, unary_convert)
+DEF_INTERNAL_FLT_FN (LROUND, ECF_CONST, lround, unary_convert)
+DEF_INTERNAL_FLT_FN (LLCEIL, ECF_CONST, lceil, unary_convert)
+DEF_INTERNAL_FLT_FN (LLFLOOR, ECF_CONST, lfloor, unary_convert)
+DEF_INTERNAL_FLT_FN (LLRINT, ECF_CONST, lrint, unary_convert)
+DEF_INTERNAL_FLT_FN (LLROUND, ECF_CONST, lround, unary_convert)
+
 /* FP rounding.  */
 DEF_INTERNAL_FLT_FLOATN_FN (CEIL, ECF_CONST, ceil, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floor, unary)
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index a50dd79..165f8d1 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -5828,7 +5828,8 @@ expand_sfix_optab (rtx to, rtx from, convert_optab tab)
   FOR_EACH_MODE_FROM (fmode, GET_MODE (from))
     FOR_EACH_MODE_FROM (imode, GET_MODE (to))
       {
-	icode = convert_optab_handler (tab, imode, fmode);
+	icode = convert_optab_handler (tab, imode, fmode,
+				       insn_optimization_type ());
 	if (icode != CODE_FOR_nothing)
 	  {
 	    rtx_insn *last = get_last_insn ();
diff --git a/gcc/predict.cc b/gcc/predict.cc
index b36caa3..1bc7ab9 100644
--- a/gcc/predict.cc
+++ b/gcc/predict.cc
@@ -362,6 +362,17 @@ optimize_insn_for_speed_p (void)
   return !optimize_insn_for_size_p ();
 }
 
+/* Return the optimization type that should be used for the current
+   instruction.  */
+
+optimization_type
+insn_optimization_type ()
+{
+  return (optimize_insn_for_speed_p ()
+	  ? OPTIMIZE_FOR_SPEED
+	  : OPTIMIZE_FOR_SIZE);
+}
+
 /* Return TRUE if LOOP should be optimized for size.  */
 
 optimize_size_level
diff --git a/gcc/predict.h b/gcc/predict.h
index 8649974..2548437 100644
--- a/gcc/predict.h
+++ b/gcc/predict.h
@@ -68,6 +68,7 @@ extern enum optimize_size_level optimize_edge_for_size_p (edge);
 extern bool optimize_edge_for_speed_p (edge);
 extern enum optimize_size_level optimize_insn_for_size_p (void);
 extern bool optimize_insn_for_speed_p (void);
+extern optimization_type insn_optimization_type ();
 extern enum optimize_size_level optimize_loop_for_size_p (class loop *);
 extern bool optimize_loop_for_speed_p (class loop *);
 extern bool optimize_loop_nest_for_speed_p (class loop *);
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c
index 8516808..94d9af1 100644
--- a/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_1.c
@@ -1,4 +1,4 @@
-/* { dg-options "-O3 --save-temps" } */
+/* { dg-options "-O3 -fno-math-errno --save-temps" } */
 /* { dg-final { check-function-bodies "**" "" "" } } */
 
 #include <stdint.h>
@@ -184,3 +184,66 @@ TEST2 (int, ctz, int)
 **	ret
 */
 TEST4 (int, ctz, int)
+
+/*
+** test2_int_iroundf_float:
+**	fcvtas	v0.2s, v1.2s
+**	ret
+*/
+TEST2 (int, iroundf, float)
+
+/*
+** test2_int64_t_llround_double:
+**	fcvtas	v0.2d, v1.2d
+**	ret
+*/
+TEST2 (int64_t, llround, double)
+
+/*
+** test4_int_iroundf_float:
+**	fcvtas	v0.4s, v1.4s
+**	ret
+*/
+TEST4 (int, iroundf, float)
+
+/*
+** test2_int_ifloorf_float:
+**	fcvtms	v0.2s, v1.2s
+**	ret
+*/
+TEST2 (int, ifloorf, float)
+
+/*
+** test2_int64_t_llfloor_double:
+**	fcvtms	v0.2d, v1.2d
+**	ret
+*/
+TEST2 (int64_t, llfloor, double)
+
+/*
+** test4_int_ifloorf_float:
+**	fcvtms	v0.4s, v1.4s
+**	ret
+*/
+TEST4 (int, ifloorf, float)
+
+/*
+** test2_int_iceilf_float:
+**	fcvtps	v0.2s, v1.2s
+**	ret
+*/
+TEST2 (int, iceilf, float)
+
+/*
+** test2_int64_t_llceil_double:
+**	fcvtps	v0.2d, v1.2d
+**	ret
+*/
+TEST2 (int64_t, llceil, double)
+
+/*
+** test4_int_iceilf_float:
+**	fcvtps	v0.4s, v1.4s
+**	ret
+*/
+TEST4 (int, iceilf, float)
diff --git a/gcc/testsuite/gfortran.dg/vect/pr106253.f b/gcc/testsuite/gfortran.dg/vect/pr106253.f
new file mode 100644
index 0000000..1b6b7e8
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/pr106253.f
@@ -0,0 +1,35 @@
+! { dg-do compile }
+
+      SUBROUTINE DGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX,           &
+     &                   BETA, Y, INCY )
+      LOGICAL            LSAME
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.                            &
+     &         .NOT.LSAME( TRANS, 'C' )      )THEN
+      END IF
+      END
+      subroutine evlrnf (ptrs0t, nclsm, prnf0t) 
+      real, dimension (1:nclsm,1:nclsm), intent (in) :: ptrs0t
+      real, dimension (1:nclsm,1:nclsm), intent (out):: prnf0t
+      real, allocatable, dimension (:,:) :: utrsft ! probas up
+      real, allocatable, dimension (:,:) :: dtrsft ! probas down
+      real, allocatable, dimension (:,:) :: xwrkt ! matrice
+      do icls = 1, nclsm
+         do ival = ipic - 1, 1, -1
+            xwrkt = trs2a2 (ival, ipic, utrsft, dtrsft, ncls)
+         enddo
+      enddo
+      contains
+      function trs2a2 (j, k, u, d, m)
+      real, dimension (1:m,1:m) :: trs2a2  ! resultat
+      real, dimension (1:m,1:m) :: u, d    ! matrices utrsft, dtrsft
+      end function trs2a2
+      end
+      program rnflow
+      integer, parameter :: ncls  =     256 ! nombre de classes
+      integer, dimension (1:ncls,1:ncls) :: mrnftt ! matrice theorique
+      real, dimension (1:ncls,1:ncls)    :: ptrst  ! matrice Markov
+      real, dimension (1:ncls,1:ncls)    :: prnft  ! matrice Rainflow
+      call evlrnf (ptrst, ncls, prnft)
+      mrnftt = nint (real (nsim) * real (npic) * prnft)
+      call cmpmat (mrnftt, mrnfst)
+      end program rnflow