diff options
author | Will Schmidt <will_schmidt@vnet.ibm.com> | 2017-09-19 13:42:48 +0000 |
---|---|---|
committer | Will Schmidt <willschm@gcc.gnu.org> | 2017-09-19 13:42:48 +0000 |
commit | d14c60ad26f57ed6d32180a96cc27c7bf1d7f5b2 (patch) | |
tree | 2231680c5bbeace8f32ac41319b46a171a02bb17 /gcc | |
parent | 81b29ad80a7fb41b5534c1ae45565e1c353dbf0e (diff) | |
download | gcc-d14c60ad26f57ed6d32180a96cc27c7bf1d7f5b2.zip gcc-d14c60ad26f57ed6d32180a96cc27c7bf1d7f5b2.tar.gz gcc-d14c60ad26f57ed6d32180a96cc27c7bf1d7f5b2.tar.bz2 |
rs6000.c (rs6000_gimple_fold_builtin): Add handling for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
[gcc]
2017-09-19 Will Schmidt <will_schmidt@vnet.ibm.com>
* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD.
From-SVN: r252975
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-c.c | 74 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 42 |
3 files changed, 51 insertions, 72 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 98806a3..d2a7c14 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2017-09-19 Will Schmidt <will_schmidt@vnet.ibm.com> + + * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling + for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*). + * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): + Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD. + 2017-09-19 Richard Biener <rguenther@suse.de> PR tree-optimization/82244 diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index d27f563..a49db97 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -6472,85 +6472,15 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, return stmt; } - /* Expand vec_ld into an expression that masks the address and - performs the load. We need to expand this early to allow + /* Expand vec_st into an expression that masks the address and + performs the store. We need to expand this early to allow the best aliasing, as by the time we get into RTL we no longer are able to honor __restrict__, for example. We may want to consider this for all memory access built-ins. When -maltivec=be is specified, or the wrong number of arguments is provided, simply punt to existing built-in processing. */ - if (fcode == ALTIVEC_BUILTIN_VEC_LD - && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG) - && nargs == 2) - { - tree arg0 = (*arglist)[0]; - tree arg1 = (*arglist)[1]; - - /* Strip qualifiers like "const" from the pointer arg. */ - tree arg1_type = TREE_TYPE (arg1); - if (TREE_CODE (arg1_type) == ARRAY_TYPE && c_dialect_cxx ()) - { - /* Force array-to-pointer decay for C++. */ - arg1 = default_conversion (arg1); - arg1_type = TREE_TYPE (arg1); - } - if (!POINTER_TYPE_P (arg1_type)) - goto bad; - - tree inner_type = TREE_TYPE (arg1_type); - if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0) - { - arg1_type = build_pointer_type (build_qualified_type (inner_type, - 0)); - arg1 = fold_convert (arg1_type, arg1); - } - - /* Construct the masked address. Let existing error handling take - over if we don't have a constant offset. */ - arg0 = fold (arg0); - - if (TREE_CODE (arg0) == INTEGER_CST) - { - if (!ptrofftype_p (TREE_TYPE (arg0))) - arg0 = build1 (NOP_EXPR, sizetype, arg0); - - tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type, - arg1, arg0); - tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr, - build_int_cst (arg1_type, -16)); - - /* Find the built-in to get the return type so we can convert - the result properly (or fall back to default handling if the - arguments aren't compatible). */ - for (desc = altivec_overloaded_builtins; - desc->code && desc->code != fcode; desc++) - continue; - - for (; desc->code == fcode; desc++) - if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1) - && (rs6000_builtin_type_compatible (TREE_TYPE (arg1), - desc->op2))) - { - tree ret_type = rs6000_builtin_type (desc->ret_type); - if (TYPE_MODE (ret_type) == V2DImode) - /* Type-based aliasing analysis thinks vector long - and vector long long are different and will put them - in distinct alias classes. Force our return type - to be a may-alias type to avoid this. */ - ret_type - = build_pointer_type_for_mode (ret_type, Pmode, - true/*can_alias_all*/); - else - ret_type = build_pointer_type (ret_type); - aligned = build1 (NOP_EXPR, ret_type, aligned); - tree ret_val = build_indirect_ref (loc, aligned, RO_NULL); - return ret_val; - } - } - } - /* Similarly for stvx. */ if (fcode == ALTIVEC_BUILTIN_VEC_ST && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG) && nargs == 3) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index bc1c4db..1978634 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -16546,6 +16546,48 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) update_call_from_tree (gsi, res); return true; } + /* Vector loads. */ + case ALTIVEC_BUILTIN_LVX_V16QI: + case ALTIVEC_BUILTIN_LVX_V8HI: + case ALTIVEC_BUILTIN_LVX_V4SI: + case ALTIVEC_BUILTIN_LVX_V4SF: + case ALTIVEC_BUILTIN_LVX_V2DI: + case ALTIVEC_BUILTIN_LVX_V2DF: + { + arg0 = gimple_call_arg (stmt, 0); // offset + arg1 = gimple_call_arg (stmt, 1); // address + /* Do not fold for -maltivec=be on LE targets. */ + if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN) + return false; + lhs = gimple_call_lhs (stmt); + location_t loc = gimple_location (stmt); + /* Since arg1 may be cast to a different type, just use ptr_type_node + here instead of trying to enforce TBAA on pointer types. */ + tree arg1_type = ptr_type_node; + tree lhs_type = TREE_TYPE (lhs); + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create + the tree using the value from arg0. The resulting type will match + the type of arg1. */ + gimple_seq stmts = NULL; + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0); + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, + arg1_type, arg1, temp_offset); + /* Mask off any lower bits from the address. */ + tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR, + arg1_type, temp_addr, + build_int_cst (arg1_type, -16)); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + /* Use the build2 helper to set up the mem_ref. The MEM_REF could also + take an offset, but since we've already incorporated the offset + above, here we just pass in a zero. */ + gimple *g; + g = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr, + build_int_cst (arg1_type, 0))); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } + default: if (TARGET_DEBUG_BUILTIN) fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", |