diff options
author | Uros Bizjak <uros@gcc.gnu.org> | 2008-03-28 19:17:00 +0100 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2008-03-28 19:17:00 +0100 |
commit | 9aba5d2249ed7556d7a73e43959853e07647d90b (patch) | |
tree | fb54ee8d61b1b8a6ce1cae37c98eb583355c1cfe | |
parent | d60a2d4d2fdc2f8218c0ef7fcee4aea63953e2e8 (diff) | |
download | gcc-9aba5d2249ed7556d7a73e43959853e07647d90b.zip gcc-9aba5d2249ed7556d7a73e43959853e07647d90b.tar.gz gcc-9aba5d2249ed7556d7a73e43959853e07647d90b.tar.bz2 |
i386.c (override_options): Initialize ix86_veclib_handler to ix86_veclibabi_svml when -mveclibabi=svml is used.
* config/i386/i386.c (override_options): Initialize
ix86_veclib_handler to ix86_veclibabi_svml when
-mveclibabi=svml is used.
(ix86_veclibabi_svml): New function for SVML ABI style
vectorization support.
* doc/invoke.texi (-mveclibabi) [svml]: Document new target option.
testsuite/ChangeLog:
* gcc.target/i386/vectorize6.c: New test.
From-SVN: r133692
-rw-r--r-- | gcc/ChangeLog | 91 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 121 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 29 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vectorize6.c | 16 |
5 files changed, 207 insertions, 56 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 16590e4..7f0fe26 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2008-03-28 Uros Bizjak <ubizjak@gmail.com> + + * config/i386/i386.c (override_options): Initialize + ix86_veclib_handler to ix86_veclibabi_svml when + -mveclibabi=svml is used. + (ix86_veclibabi_svml): New function for SVML ABI style + vectorization support. + * doc/invoke.texi (-mveclibabi) [svml]: Document new target option. + 2008-03-28 Rafael Espindola <espindola@google.com> * fold-const.c (tree_unary_nonnegative_warnv_p): Make it public. @@ -34,8 +43,7 @@ ASSERT_EXPR <name, expr OP limit>. (register_edge_assert_for_1): Adjust callers. (find_assert_locations): Likewise. - (process_assert_insertions_for): Build condition from - expression. + (process_assert_insertions_for): Build condition from expression. (extract_range_from_assert): Handle ASSERT_EXPRs of the form ASSERT_EXPR <name, expr OP limit>. (register_edge_assert_for_2): New helper registering @@ -54,7 +62,7 @@ 2008-03-28 Nick Clifton <nickc@redhat.com> * config/mn10300/mn10300.c (mn10300_secondary_reload_class): - Return GENERAL_REGS for stack adjustment reloads. + Return GENERAL_REGS for stack adjustment reloads. 2008-03-28 Andrew Pinski <andrew_pinski@playstation.sony.com> @@ -123,44 +131,42 @@ tree_to_aff_combination_expand. (get_inner_reference_aff): New function. * tree-parloops.c (loop_parallel_p): Free vectorizer info. - * tree-ssa-loop-im.c: Include tree-affine.h and pointer-set.h. - (struct lim_aux_data): sm_done field removed. - (mem_ref_loc_p, mem_ref_locs_p): New types. - (struct mem_ref): Added id, stored, accesses_in_loop, - indep_loop, dep_loop, indep_ref, dep_ref fields. - Removed is_stored, locs and next fields. - (memory_accesses): New variable. - (movement_possibility): Do not allow moving statements - that store to memory. - (outermost_indep_loop, simple_mem_ref_in_stmt, mem_ref_in_stmt): - New functions. - (determine_max_movement): For statements with memory references, - find the outermost loop in that the reference is independent. - (move_computations_stmt): Mark the virtual operands for - renaming. - (memref_free, mem_ref_alloc, mem_ref_locs_alloc, mark_ref_stored, - gather_mem_refs_stmt, gather_mem_refs_in_loops, vtoe_hash, vtoe_eq, - vtoe_free, record_vop_access, get_vop_accesses, get_vop_stores, - add_vop_ref_mapping, create_vop_ref_mapping_loop, - create_vop_ref_mapping, analyze_memory_references, - cannot_overlap_p, mem_refs_may_alias_p, rewrite_mem_ref_loc, - get_all_locs_in_loop, ref_always_accessed_p, - refs_independent_p, record_indep_loop, ref_indep_loop_p_1, - ref_indep_loop_p, can_sm_ref_p, find_refs_for_sm, - store_motion_loop, store_motion): New functions. - (struct vop_to_refs_elt): New type. - (record_mem_ref_loc, free_mem_ref_locs, rewrite_mem_refs, - memref_hash, memref_eq, hoist_memory_references): Rewritten. - (schedule_sm): Replaced by... - (execute_sm): ... this. - (determine_lsm_ref, hoist_memory_references, - loop_suitable_for_sm, gather_mem_refs_stmt, gather_mem_refs, - find_more_ref_vops, free_mem_ref, free_mem_refs, - determine_lsm_loop, determine_lsm): Removed. - (tree_ssa_lim_finalize): Free data structures used by store - motion. - (tree_ssa_lim): Call analyze_memory_references. Use - store_motion instead of determine_lsm. + * tree-ssa-loop-im.c: Include tree-affine.h and pointer-set.h. + (struct lim_aux_data): sm_done field removed. + (mem_ref_loc_p, mem_ref_locs_p): New types. + (struct mem_ref): Added id, stored, accesses_in_loop, + indep_loop, dep_loop, indep_ref, dep_ref fields. + Removed is_stored, locs and next fields. + (memory_accesses): New variable. + (movement_possibility): Do not allow moving statements + that store to memory. + (outermost_indep_loop, simple_mem_ref_in_stmt, mem_ref_in_stmt): + New functions. + (determine_max_movement): For statements with memory references, + find the outermost loop in that the reference is independent. + (move_computations_stmt): Mark the virtual operands for renaming. + (memref_free, mem_ref_alloc, mem_ref_locs_alloc, mark_ref_stored, + gather_mem_refs_stmt, gather_mem_refs_in_loops, vtoe_hash, vtoe_eq, + vtoe_free, record_vop_access, get_vop_accesses, get_vop_stores, + add_vop_ref_mapping, create_vop_ref_mapping_loop, + create_vop_ref_mapping, analyze_memory_references, + cannot_overlap_p, mem_refs_may_alias_p, rewrite_mem_ref_loc, + get_all_locs_in_loop, ref_always_accessed_p, + refs_independent_p, record_indep_loop, ref_indep_loop_p_1, + ref_indep_loop_p, can_sm_ref_p, find_refs_for_sm, + store_motion_loop, store_motion): New functions. + (struct vop_to_refs_elt): New type. + (record_mem_ref_loc, free_mem_ref_locs, rewrite_mem_refs, + memref_hash, memref_eq, hoist_memory_references): Rewritten. + (schedule_sm): Replaced by... + (execute_sm): ... this. + (determine_lsm_ref, hoist_memory_references, + loop_suitable_for_sm, gather_mem_refs_stmt, gather_mem_refs, + find_more_ref_vops, free_mem_ref, free_mem_refs, + determine_lsm_loop, determine_lsm): Removed. + (tree_ssa_lim_finalize): Free data structures used by store motion. + (tree_ssa_lim): Call analyze_memory_references. Use + store_motion instead of determine_lsm. 2008-03-27 Paolo Bonzini <bonzini@gnu.org> @@ -523,8 +529,7 @@ 2008-03-23 H.J. Lu <hongjiu.lu@intel.com> - * config/i386/i386.h (STATIC_CHAIN_REGNUM): Use R10_REG and - CX_REG. + * config/i386/i386.h (STATIC_CHAIN_REGNUM): Use R10_REG and CX_REG. 2008-03-23 Zuxy Meng <zuxy.meng@gmail.com> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 84a54d7..4fc8fcd 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1830,6 +1830,7 @@ static int ix86_isa_flags_explicit; /* Vectorization library interface and handlers. */ tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL; +static tree ix86_veclibabi_svml (enum built_in_function, tree, tree); static tree ix86_veclibabi_acml (enum built_in_function, tree, tree); /* Implement TARGET_HANDLE_OPTION. */ @@ -2673,7 +2674,9 @@ override_options (void) /* Use external vectorized library in vectorizing intrinsics. */ if (ix86_veclibabi_string) { - if (strcmp (ix86_veclibabi_string, "acml") == 0) + if (strcmp (ix86_veclibabi_string, "svml") == 0) + ix86_veclib_handler = ix86_veclibabi_svml; + else if (strcmp (ix86_veclibabi_string, "acml") == 0) ix86_veclib_handler = ix86_veclibabi_acml; else error ("unknown vectorization library ABI type (%s) for " @@ -21415,8 +21418,120 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, return NULL_TREE; } -/* Handler for an ACML-style interface to a library with vectorized - intrinsics. */ +/* Handler for an SVML-style interface to + a library with vectorized intrinsics. */ + +static tree +ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in) +{ + char name[20]; + tree fntype, new_fndecl, args; + unsigned arity; + const char *bname; + enum machine_mode el_mode, in_mode; + int n, in_n; + + /* The SVML is suitable for unsafe math only. */ + if (!flag_unsafe_math_optimizations) + return NULL_TREE; + + el_mode = TYPE_MODE (TREE_TYPE (type_out)); + n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + if (el_mode != in_mode + || n != in_n) + return NULL_TREE; + + switch (fn) + { + case BUILT_IN_EXP: + case BUILT_IN_LOG: + case BUILT_IN_LOG10: + case BUILT_IN_POW: + case BUILT_IN_TANH: + case BUILT_IN_TAN: + case BUILT_IN_ATAN: + case BUILT_IN_ATAN2: + case BUILT_IN_ATANH: + case BUILT_IN_CBRT: + case BUILT_IN_SINH: + case BUILT_IN_SIN: + case BUILT_IN_ASINH: + case BUILT_IN_ASIN: + case BUILT_IN_COSH: + case BUILT_IN_COS: + case BUILT_IN_ACOSH: + case BUILT_IN_ACOS: + if (el_mode != DFmode || n != 2) + return NULL_TREE; + break; + + case BUILT_IN_EXPF: + case BUILT_IN_LOGF: + case BUILT_IN_LOG10F: + case BUILT_IN_POWF: + case BUILT_IN_TANHF: + case BUILT_IN_TANF: + case BUILT_IN_ATANF: + case BUILT_IN_ATAN2F: + case BUILT_IN_ATANHF: + case BUILT_IN_CBRTF: + case BUILT_IN_SINHF: + case BUILT_IN_SINF: + case BUILT_IN_ASINHF: + case BUILT_IN_ASINF: + case BUILT_IN_COSHF: + case BUILT_IN_COSF: + case BUILT_IN_ACOSHF: + case BUILT_IN_ACOSF: + if (el_mode != SFmode || n != 4) + return NULL_TREE; + break; + + default: + return NULL_TREE; + } + + bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn])); + + if (fn == BUILT_IN_LOGF) + strcpy (name, "vmlsLn4"); + else if (fn == BUILT_IN_LOG) + strcpy (name, "vmldLn2"); + else if (n == 4) + { + sprintf (name, "vmls%s", bname+10); + name[strlen (name)-1] = '4'; + } + else + sprintf (name, "vmld%s2", bname+10); + + /* Convert to uppercase. */ + name[4] &= ~0x20; + + arity = 0; + for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args; + args = TREE_CHAIN (args)) + arity++; + + if (arity == 1) + fntype = build_function_type_list (type_out, type_in, NULL); + else + fntype = build_function_type_list (type_out, type_in, type_in, NULL); + + /* Build a function declaration for the vectorized function. */ + new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype); + TREE_PUBLIC (new_fndecl) = 1; + DECL_EXTERNAL (new_fndecl) = 1; + DECL_IS_NOVOPS (new_fndecl) = 1; + TREE_READONLY (new_fndecl) = 1; + + return new_fndecl; +} + +/* Handler for an ACML-style interface to + a library with vectorized intrinsics. */ static tree ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 649a64b..8718810 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -10765,15 +10765,26 @@ decreased by up to 2 ulp (i.e. the inverse of 1.0 equals 0.99999994). @item -mveclibabi=@var{type} @opindex mveclibabi Specifies the ABI type to use for vectorizing intrinsics using an -external library. Supported types are @code{acml} for the AMD -math core library style of interfacing. GCC will currently emit -calls to @code{__vrd2_sin}, @code{__vrd2_cos}, @code{__vrd2_exp}, -@code{__vrd2_log}, @code{__vrd2_log2}, @code{__vrd2_log10}, -@code{__vrs4_sinf}, @code{__vrs4_cosf}, @code{__vrs4_expf}, -@code{__vrs4_logf}, @code{__vrs4_log2f}, @code{__vrs4_log10f} -and @code{__vrs4_powf} when using this type and @option{-ftree-vectorize} -is enabled. A ACML ABI compatible library will have to be specified -at link time. +external library. Supported types are @code{svml} for the Intel short +vector math library and @code{acml} for the AMD math core library style +of interfacing. GCC will currently emit calls to @code{vmldExp2}, +@code{vmldLn2}, @code{vmldLog102}, @code{vmldLog102}, @code{vmldPow2}, +@code{vmldTanh2}, @code{vmldTan2}, @code{vmldAtan2}, @code{vmldAtanh2}, +@code{vmldCbrt2}, @code{vmldSinh2}, @code{vmldSin2}, @code{vmldAsinh2}, +@code{vmldAsin2}, @code{vmldCosh2}, @code{vmldCos2}, @code{vmldAcosh2}, +@code{vmldAcos2}, @code{vmlsExp4}, @code{vmlsLn4}, @code{vmlsLog104}, +@code{vmlsLog104}, @code{vmlsPow4}, @code{vmlsTanh4}, @code{vmlsTan4}, +@code{vmlsAtan4}, @code{vmlsAtanh4}, @code{vmlsCbrt4}, @code{vmlsSinh4}, +@code{vmlsSin4}, @code{vmlsAsinh4}, @code{vmlsAsin4}, @code{vmlsCosh4}, +@code{vmlsCos4}, @code{vmlsAcosh4} and @code{vmlsAcos4} for corresponding +function type when @option{-mveclibabi=svml} is used and @code{__vrd2_sin}, +@code{__vrd2_cos}, @code{__vrd2_exp}, @code{__vrd2_log}, @code{__vrd2_log2}, +@code{__vrd2_log10}, @code{__vrs4_sinf}, @code{__vrs4_cosf}, +@code{__vrs4_expf}, @code{__vrs4_logf}, @code{__vrs4_log2f}, +@code{__vrs4_log10f} and @code{__vrs4_powf} for corresponding function type +when @option{-mveclibabi=acml} is used. Both @option{-ftree-vectorize} and +@option{-funsafe-math-optimizations} have to be enabled. A SVML or ACML ABI +compatible library will have to be specified at link time. @item -mpush-args @itemx -mno-push-args diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2ac6eb0..3bd9eab 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2008-03-28 Uros Bizjak <ubizjak@gmail.com> + + * gcc.target/i386/vectorize6.c: New test. + 2008-03-28 Tobias Burnus <burnus@net-b.de> PR fortran/35721 @@ -20,7 +24,7 @@ 2008-03-28 Andrew Pinski <andrew_pinski@playstation.sony.com> PR target/31334 - * gcc.target/powerpc/altivec-25.c: Nnew testcase. + * gcc.target/powerpc/altivec-25.c: New testcase. 2008-03-27 Jerry DeLisle <jvdelisle@gcc.gnu.org> diff --git a/gcc/testsuite/gcc.target/i386/vectorize6.c b/gcc/testsuite/gcc.target/i386/vectorize6.c new file mode 100644 index 0000000..78ec53d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vectorize6.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -ftree-vectorize -mveclibabi=svml -ffast-math" } */ + +double x[256]; + +extern double sin(double); + +void foo(void) +{ + int i; + + for (i=0; i<256; ++i) + x[i] = sin(x[i]); +} + +/* { dg-final { scan-assembler "vmldSin2" } } */ |