diff options
-rw-r--r-- | gcc/ChangeLog | 39 | ||||
-rw-r--r-- | gcc/config/rs6000/altivec.md | 104 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-c.c | 124 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 465 | ||||
-rw-r--r-- | gcc/config/rs6000/vector.md | 18 |
5 files changed, 671 insertions, 79 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4f35001..029a402 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,42 @@ +2016-04-27 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * config/rs6000/altivec.md (altivec_lvx_<mode>): Remove. + (altivec_lvx_<mode>_internal): Document. + (altivec_lvx_<mode>_2op): New define_insn. + (altivec_lvx_<mode>_1op): Likewise. + (altivec_lvx_<mode>_2op_si): Likewise. + (altivec_lvx_<mode>_1op_si): Likewise. + (altivec_stvx_<mode>): Remove. + (altivec_stvx_<mode>_internal): Document. + (altivec_stvx_<mode>_2op): New define_insn. + (altivec_stvx_<mode>_1op): Likewise. + (altivec_stvx_<mode>_2op_si): Likewise. + (altivec_stvx_<mode>_1op_si): Likewise. + * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin): + Expand vec_ld and vec_st during parsing. + * config/rs6000/rs6000.c (altivec_expand_lvx_be): Commentary + changes. + (altivec_expand_stvx_be): Likewise. + (altivec_expand_lv_builtin): Expand lvx built-ins to expose the + address-masking behavior in RTL. + (altivec_expand_stv_builtin): Expand stvx built-ins to expose the + address-masking behavior in RTL. + (altivec_expand_builtin): Change builtin code arguments for calls + to altivec_expand_stv_builtin and altivec_expand_lv_builtin. + (insn_is_swappable_p): Avoid incorrect swap optimization in the + presence of lvx/stvx patterns. + (alignment_with_canonical_addr): New function. + (alignment_mask): Likewise. + (find_alignment_op): Likewise. + (recombine_lvx_pattern): Likewise. + (recombine_stvx_pattern): Likewise. + (recombine_lvx_stvx_patterns): Likewise. + (rs6000_analyze_swaps): Perform a pre-pass to recognize lvx and + stvx patterns from expand. + * config/rs6000/vector.md (vector_altivec_load_<mode>): Use new + expansions. + (vector_altivec_store_<mode>): Likewise. + 2016-04-26 Evandro Menezes <e.menezes@samsung.com> * config/aarch64/aarch64.md diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 9c3084d..7a8c8eb 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2514,20 +2514,9 @@ "lvxl %0,%y1" [(set_attr "type" "vecload")]) -(define_expand "altivec_lvx_<mode>" - [(parallel - [(set (match_operand:VM2 0 "register_operand" "=v") - (match_operand:VM2 1 "memory_operand" "Z")) - (unspec [(const_int 0)] UNSPEC_LVX)])] - "TARGET_ALTIVEC" -{ - if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) - { - altivec_expand_lvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_LVX); - DONE; - } -}) - +; This version of lvx is used only in cases where we need to force an lvx +; over any other load, and we don't care about losing CSE opportunities. +; Its primary use is for prologue register saves. (define_insn "altivec_lvx_<mode>_internal" [(parallel [(set (match_operand:VM2 0 "register_operand" "=v") @@ -2537,20 +2526,45 @@ "lvx %0,%y1" [(set_attr "type" "vecload")]) -(define_expand "altivec_stvx_<mode>" - [(parallel - [(set (match_operand:VM2 0 "memory_operand" "=Z") - (match_operand:VM2 1 "register_operand" "v")) - (unspec [(const_int 0)] UNSPEC_STVX)])] - "TARGET_ALTIVEC" -{ - if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) - { - altivec_expand_stvx_be (operands[0], operands[1], <MODE>mode, UNSPEC_STVX); - DONE; - } -}) +; The next two patterns embody what lvx should usually look like. +(define_insn "altivec_lvx_<mode>_2op" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:DI (plus:DI (match_operand:DI 1 "register_operand" "b") + (match_operand:DI 2 "register_operand" "r")) + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_64BIT" + "lvx %0,%1,%2" + [(set_attr "type" "vecload")]) +(define_insn "altivec_lvx_<mode>_1op" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:DI (match_operand:DI 1 "register_operand" "r") + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_64BIT" + "lvx %0,0,%1" + [(set_attr "type" "vecload")]) + +; 32-bit versions of the above. +(define_insn "altivec_lvx_<mode>_2op_si" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:SI (plus:SI (match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "register_operand" "r")) + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_32BIT" + "lvx %0,%1,%2" + [(set_attr "type" "vecload")]) + +(define_insn "altivec_lvx_<mode>_1op_si" + [(set (match_operand:VM2 0 "register_operand" "=v") + (mem:VM2 (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -16))))] + "TARGET_ALTIVEC && TARGET_32BIT" + "lvx %0,0,%1" + [(set_attr "type" "vecload")]) + +; This version of stvx is used only in cases where we need to force an stvx +; over any other store, and we don't care about losing CSE opportunities. +; Its primary use is for epilogue register restores. (define_insn "altivec_stvx_<mode>_internal" [(parallel [(set (match_operand:VM2 0 "memory_operand" "=Z") @@ -2560,6 +2574,42 @@ "stvx %1,%y0" [(set_attr "type" "vecstore")]) +; The next two patterns embody what stvx should usually look like. +(define_insn "altivec_stvx_<mode>_2op" + [(set (mem:VM2 (and:DI (plus:DI (match_operand:DI 1 "register_operand" "b") + (match_operand:DI 2 "register_operand" "r")) + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_64BIT" + "stvx %0,%1,%2" + [(set_attr "type" "vecstore")]) + +(define_insn "altivec_stvx_<mode>_1op" + [(set (mem:VM2 (and:DI (match_operand:DI 1 "register_operand" "r") + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_64BIT" + "stvx %0,0,%1" + [(set_attr "type" "vecstore")]) + +; 32-bit versions of the above. +(define_insn "altivec_stvx_<mode>_2op_si" + [(set (mem:VM2 (and:SI (plus:SI (match_operand:SI 1 "register_operand" "b") + (match_operand:SI 2 "register_operand" "r")) + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_32BIT" + "stvx %0,%1,%2" + [(set_attr "type" "vecstore")]) + +(define_insn "altivec_stvx_<mode>_1op_si" + [(set (mem:VM2 (and:SI (match_operand:SI 1 "register_operand" "r") + (const_int -16))) + (match_operand:VM2 0 "register_operand" "v"))] + "TARGET_ALTIVEC && TARGET_32BIT" + "stvx %0,0,%1" + [(set_attr "type" "vecstore")]) + (define_expand "altivec_stvxl_<mode>" [(parallel [(set (match_operand:VM2 0 "memory_operand" "=Z") diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index ceb80b2..55751a6 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -4800,6 +4800,130 @@ assignment for unaligned loads and stores"); return stmt; } + /* Expand vec_ld into an expression that masks the address and + performs the load. We need to expand this early to allow + the best aliasing, as by the time we get into RTL we no longer + are able to honor __restrict__, for example. We may want to + consider this for all memory access built-ins. + + When -maltivec=be is specified, simply punt to existing + built-in processing. */ + if (fcode == ALTIVEC_BUILTIN_VEC_LD + && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)) + { + tree arg0 = (*arglist)[0]; + tree arg1 = (*arglist)[1]; + + /* Strip qualifiers like "const" from the pointer arg. */ + tree arg1_type = TREE_TYPE (arg1); + tree inner_type = TREE_TYPE (arg1_type); + if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0) + { + arg1_type = build_pointer_type (build_qualified_type (inner_type, + 0)); + arg1 = fold_convert (arg1_type, arg1); + } + + /* Construct the masked address. Let existing error handling take + over if we don't have a constant offset. */ + arg0 = fold (arg0); + + if (TREE_CODE (arg0) == INTEGER_CST) + { + if (!ptrofftype_p (TREE_TYPE (arg0))) + arg0 = build1 (NOP_EXPR, sizetype, arg0); + + tree arg1_type = TREE_TYPE (arg1); + tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type, + arg1, arg0); + tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr, + build_int_cst (arg1_type, -16)); + + /* Find the built-in to get the return type so we can convert + the result properly (or fall back to default handling if the + arguments aren't compatible). */ + for (desc = altivec_overloaded_builtins; + desc->code && desc->code != fcode; desc++) + continue; + + for (; desc->code == fcode; desc++) + if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1) + && (rs6000_builtin_type_compatible (TREE_TYPE (arg1), + desc->op2))) + { + tree ret_type = rs6000_builtin_type (desc->ret_type); + if (TYPE_MODE (ret_type) == V2DImode) + /* Type-based aliasing analysis thinks vector long + and vector long long are different and will put them + in distinct alias classes. Force our return type + to be a may-alias type to avoid this. */ + ret_type + = build_pointer_type_for_mode (ret_type, Pmode, + true/*can_alias_all*/); + else + ret_type = build_pointer_type (ret_type); + aligned = build1 (NOP_EXPR, ret_type, aligned); + tree ret_val = build_indirect_ref (loc, aligned, RO_NULL); + return ret_val; + } + } + } + + /* Similarly for stvx. */ + if (fcode == ALTIVEC_BUILTIN_VEC_ST + && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)) + { + tree arg0 = (*arglist)[0]; + tree arg1 = (*arglist)[1]; + tree arg2 = (*arglist)[2]; + + /* Construct the masked address. Let existing error handling take + over if we don't have a constant offset. */ + arg1 = fold (arg1); + + if (TREE_CODE (arg1) == INTEGER_CST) + { + if (!ptrofftype_p (TREE_TYPE (arg1))) + arg1 = build1 (NOP_EXPR, sizetype, arg1); + + tree arg2_type = TREE_TYPE (arg2); + tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg2_type, + arg2, arg1); + tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg2_type, addr, + build_int_cst (arg2_type, -16)); + + /* Find the built-in to make sure a compatible one exists; if not + we fall back to default handling to get the error message. */ + for (desc = altivec_overloaded_builtins; + desc->code && desc->code != fcode; desc++) + continue; + + for (; desc->code == fcode; desc++) + if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1) + && rs6000_builtin_type_compatible (TREE_TYPE (arg1), desc->op2) + && rs6000_builtin_type_compatible (TREE_TYPE (arg2), + desc->op3)) + { + tree arg0_type = TREE_TYPE (arg0); + if (TYPE_MODE (arg0_type) == V2DImode) + /* Type-based aliasing analysis thinks vector long + and vector long long are different and will put them + in distinct alias classes. Force our address type + to be a may-alias type to avoid this. */ + arg0_type + = build_pointer_type_for_mode (arg0_type, Pmode, + true/*can_alias_all*/); + else + arg0_type = build_pointer_type (arg0_type); + aligned = build1 (NOP_EXPR, arg0_type, aligned); + tree stg = build_indirect_ref (loc, aligned, RO_NULL); + tree retval = build2 (MODIFY_EXPR, TREE_TYPE (stg), stg, + convert (TREE_TYPE (stg), arg0)); + return retval; + } + } + } + for (n = 0; !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs; fnargs = TREE_CHAIN (fnargs), n++) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 1d0076c..fba4f9e 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -13025,9 +13025,9 @@ swap_selector_for_mode (machine_mode mode) return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); } -/* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target - with -maltivec=be specified. Issue the load followed by an element-reversing - permute. */ +/* Generate code for an "lvxl", or "lve*x" built-in for a little endian target + with -maltivec=be specified. Issue the load followed by an element- + reversing permute. */ void altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) { @@ -13043,8 +13043,8 @@ altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) emit_insn (gen_rtx_SET (op0, vperm)); } -/* Generate code for a "stvx" or "stvxl" built-in for a little endian target - with -maltivec=be specified. Issue the store preceded by an element-reversing +/* Generate code for a "stvxl" built-in for a little endian target with + -maltivec=be specified. Issue the store preceded by an element-reversing permute. */ void altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) @@ -13106,22 +13106,65 @@ altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk) op1 = copy_to_mode_reg (mode1, op1); - if (op0 == const0_rtx) - { - addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1); - } - else - { - op0 = copy_to_mode_reg (mode0, op0); - addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1)); - } + /* For LVX, express the RTL accurately by ANDing the address with -16. + LVXL and LVE*X expand to use UNSPECs to hide their special behavior, + so the raw address is fine. */ + switch (icode) + { + case CODE_FOR_altivec_lvx_v2df_2op: + case CODE_FOR_altivec_lvx_v2di_2op: + case CODE_FOR_altivec_lvx_v4sf_2op: + case CODE_FOR_altivec_lvx_v4si_2op: + case CODE_FOR_altivec_lvx_v8hi_2op: + case CODE_FOR_altivec_lvx_v16qi_2op: + { + rtx rawaddr; + if (op0 == const0_rtx) + rawaddr = op1; + else + { + op0 = copy_to_mode_reg (mode0, op0); + rawaddr = gen_rtx_PLUS (Pmode, op1, op0); + } + addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); + addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr); - pat = GEN_FCN (icode) (target, addr); + /* For -maltivec=be, emit the load and follow it up with a + permute to swap the elements. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + rtx temp = gen_reg_rtx (tmode); + emit_insn (gen_rtx_SET (temp, addr)); - if (! pat) - return 0; - emit_insn (pat); + rtx sel = swap_selector_for_mode (tmode); + rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (target, vperm)); + } + else + emit_insn (gen_rtx_SET (target, addr)); + break; + } + + default: + if (op0 == const0_rtx) + addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1); + else + { + op0 = copy_to_mode_reg (mode0, op0); + addr = gen_rtx_MEM (blk ? BLKmode : tmode, + gen_rtx_PLUS (Pmode, op1, op0)); + } + + pat = GEN_FCN (icode) (target, addr); + if (! pat) + return 0; + emit_insn (pat); + + break; + } + return target; } @@ -13208,7 +13251,7 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp) rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); rtx op2 = expand_normal (arg2); - rtx pat, addr; + rtx pat, addr, rawaddr; machine_mode tmode = insn_data[icode].operand[0].mode; machine_mode smode = insn_data[icode].operand[1].mode; machine_mode mode1 = Pmode; @@ -13220,24 +13263,69 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp) || arg2 == error_mark_node) return const0_rtx; - if (! (*insn_data[icode].operand[1].predicate) (op0, smode)) - op0 = copy_to_mode_reg (smode, op0); - op2 = copy_to_mode_reg (mode2, op2); - if (op1 == const0_rtx) - { - addr = gen_rtx_MEM (tmode, op2); - } - else - { - op1 = copy_to_mode_reg (mode1, op1); - addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2)); - } + /* For STVX, express the RTL accurately by ANDing the address with -16. + STVXL and STVE*X expand to use UNSPECs to hide their special behavior, + so the raw address is fine. */ + switch (icode) + { + case CODE_FOR_altivec_stvx_v2df_2op: + case CODE_FOR_altivec_stvx_v2di_2op: + case CODE_FOR_altivec_stvx_v4sf_2op: + case CODE_FOR_altivec_stvx_v4si_2op: + case CODE_FOR_altivec_stvx_v8hi_2op: + case CODE_FOR_altivec_stvx_v16qi_2op: + { + if (op1 == const0_rtx) + rawaddr = op2; + else + { + op1 = copy_to_mode_reg (mode1, op1); + rawaddr = gen_rtx_PLUS (Pmode, op2, op1); + } + + addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); + addr = gen_rtx_MEM (tmode, addr); + + op0 = copy_to_mode_reg (tmode, op0); + + /* For -maltivec=be, emit a permute to swap the elements, followed + by the store. */ + if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) + { + rtx temp = gen_reg_rtx (tmode); + rtx sel = swap_selector_for_mode (tmode); + rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel), + UNSPEC_VPERM); + emit_insn (gen_rtx_SET (temp, vperm)); + emit_insn (gen_rtx_SET (addr, temp)); + } + else + emit_insn (gen_rtx_SET (addr, op0)); + + break; + } + + default: + { + if (! (*insn_data[icode].operand[1].predicate) (op0, smode)) + op0 = copy_to_mode_reg (smode, op0); + + if (op1 == const0_rtx) + addr = gen_rtx_MEM (tmode, op2); + else + { + op1 = copy_to_mode_reg (mode1, op1); + addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1)); + } + + pat = GEN_FCN (icode) (addr, op0); + if (pat) + emit_insn (pat); + } + } - pat = GEN_FCN (icode) (addr, op0); - if (pat) - emit_insn (pat); return NULL_RTX; } @@ -14073,18 +14161,18 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) switch (fcode) { case ALTIVEC_BUILTIN_STVX_V2DF: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp); case ALTIVEC_BUILTIN_STVX_V2DI: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp); case ALTIVEC_BUILTIN_STVX_V4SF: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp); case ALTIVEC_BUILTIN_STVX: case ALTIVEC_BUILTIN_STVX_V4SI: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp); case ALTIVEC_BUILTIN_STVX_V8HI: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp); case ALTIVEC_BUILTIN_STVX_V16QI: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp); case ALTIVEC_BUILTIN_STVEBX: return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp); case ALTIVEC_BUILTIN_STVEHX: @@ -14272,23 +14360,23 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi, exp, target, false); case ALTIVEC_BUILTIN_LVX_V2DF: - return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df, + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op, exp, target, false); case ALTIVEC_BUILTIN_LVX_V2DI: - return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di, + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op, exp, target, false); case ALTIVEC_BUILTIN_LVX_V4SF: - return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf, + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op, exp, target, false); case ALTIVEC_BUILTIN_LVX: case ALTIVEC_BUILTIN_LVX_V4SI: - return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si, + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op, exp, target, false); case ALTIVEC_BUILTIN_LVX_V8HI: - return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi, + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op, exp, target, false); case ALTIVEC_BUILTIN_LVX_V16QI: - return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi, + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op, exp, target, false); case ALTIVEC_BUILTIN_LVLX: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx, @@ -37139,7 +37227,9 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, fix them up by converting them to permuting ones. Exceptions: UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL body instead of a SET; and UNSPEC_STVE, which has an UNSPEC - for the SET source. */ + for the SET source. Also we must now make an exception for lvx + and stvx when they are not in the UNSPEC_LVX/STVX form (with the + explicit "& -16") since this leads to unrecognizable insns. */ rtx body = PATTERN (insn); int i = INSN_UID (insn); @@ -37147,6 +37237,11 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, { if (GET_CODE (body) == SET) { + rtx rhs = SET_SRC (body); + gcc_assert (GET_CODE (rhs) == MEM); + if (GET_CODE (XEXP (rhs, 0)) == AND) + return 0; + *special = SH_NOSWAP_LD; return 1; } @@ -37156,8 +37251,14 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, if (insn_entry[i].is_store) { - if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC) + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) != UNSPEC) { + rtx lhs = SET_DEST (body); + gcc_assert (GET_CODE (lhs) == MEM); + if (GET_CODE (XEXP (lhs, 0)) == AND) + return 0; + *special = SH_NOSWAP_ST; return 1; } @@ -37827,13 +37928,274 @@ dump_swap_insn_table (swap_web_entry *insn_entry) fputs ("\n", dump_file); } +/* Return RTX with its address canonicalized to (reg) or (+ reg reg). + Here RTX is an (& addr (const_int -16)). Always return a new copy + to avoid problems with combine. */ +static rtx +alignment_with_canonical_addr (rtx align) +{ + rtx canon; + rtx addr = XEXP (align, 0); + + if (REG_P (addr)) + canon = addr; + + else if (GET_CODE (addr) == PLUS) + { + rtx addrop0 = XEXP (addr, 0); + rtx addrop1 = XEXP (addr, 1); + + if (!REG_P (addrop0)) + addrop0 = force_reg (GET_MODE (addrop0), addrop0); + + if (!REG_P (addrop1)) + addrop1 = force_reg (GET_MODE (addrop1), addrop1); + + canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1); + } + + else + canon = force_reg (GET_MODE (addr), addr); + + return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16)); +} + +/* Check whether an rtx is an alignment mask, and if so, return + a fully-expanded rtx for the masking operation. */ +static rtx +alignment_mask (rtx_insn *insn) +{ + rtx body = PATTERN (insn); + + if (GET_CODE (body) != SET + || GET_CODE (SET_SRC (body)) != AND + || !REG_P (XEXP (SET_SRC (body), 0))) + return 0; + + rtx mask = XEXP (SET_SRC (body), 1); + + if (GET_CODE (mask) == CONST_INT) + { + if (INTVAL (mask) == -16) + return alignment_with_canonical_addr (SET_SRC (body)); + else + return 0; + } + + if (!REG_P (mask)) + return 0; + + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + df_ref use; + rtx real_mask = 0; + + FOR_EACH_INSN_INFO_USE (use, insn_info) + { + if (!rtx_equal_p (DF_REF_REG (use), mask)) + continue; + + struct df_link *def_link = DF_REF_CHAIN (use); + if (!def_link || def_link->next) + return 0; + + rtx_insn *const_insn = DF_REF_INSN (def_link->ref); + rtx const_body = PATTERN (const_insn); + if (GET_CODE (const_body) != SET) + return 0; + + real_mask = SET_SRC (const_body); + + if (GET_CODE (real_mask) != CONST_INT + || INTVAL (real_mask) != -16) + return 0; + } + + if (real_mask == 0) + return 0; + + return alignment_with_canonical_addr (SET_SRC (body)); +} + +/* Given INSN that's a load or store based at BASE_REG, look for a + feeding computation that aligns its address on a 16-byte boundary. */ +static rtx +find_alignment_op (rtx_insn *insn, rtx base_reg) +{ + df_ref base_use; + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + rtx and_operation = 0; + + FOR_EACH_INSN_INFO_USE (base_use, insn_info) + { + if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) + continue; + + struct df_link *base_def_link = DF_REF_CHAIN (base_use); + if (!base_def_link || base_def_link->next) + break; + + rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref); + and_operation = alignment_mask (and_insn); + if (and_operation != 0) + break; + } + + return and_operation; +} + +struct del_info { bool replace; rtx_insn *replace_insn; }; + +/* If INSN is the load for an lvx pattern, put it in canonical form. */ +static void +recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) +{ + rtx body = PATTERN (insn); + gcc_assert (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == VEC_SELECT + && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM); + + rtx mem = XEXP (SET_SRC (body), 0); + rtx base_reg = XEXP (mem, 0); + + rtx and_operation = find_alignment_op (insn, base_reg); + + if (and_operation != 0) + { + df_ref def; + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + FOR_EACH_INSN_INFO_DEF (def, insn_info) + { + struct df_link *link = DF_REF_CHAIN (def); + if (!link || link->next) + break; + + rtx_insn *swap_insn = DF_REF_INSN (link->ref); + if (!insn_is_swap_p (swap_insn) + || insn_is_load_p (swap_insn) + || insn_is_store_p (swap_insn)) + break; + + /* Expected lvx pattern found. Change the swap to + a copy, and propagate the AND operation into the + load. */ + to_delete[INSN_UID (swap_insn)].replace = true; + to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; + + XEXP (mem, 0) = and_operation; + SET_SRC (body) = mem; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "lvx opportunity found at %d\n", + INSN_UID (insn)); + } + } +} + +/* If INSN is the store for an stvx pattern, put it in canonical form. */ +static void +recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) +{ + rtx body = PATTERN (insn); + gcc_assert (GET_CODE (body) == SET + && GET_CODE (SET_DEST (body)) == MEM + && GET_CODE (SET_SRC (body)) == VEC_SELECT); + rtx mem = SET_DEST (body); + rtx base_reg = XEXP (mem, 0); + + rtx and_operation = find_alignment_op (insn, base_reg); + + if (and_operation != 0) + { + rtx src_reg = XEXP (SET_SRC (body), 0); + df_ref src_use; + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + FOR_EACH_INSN_INFO_USE (src_use, insn_info) + { + if (!rtx_equal_p (DF_REF_REG (src_use), src_reg)) + continue; + + struct df_link *link = DF_REF_CHAIN (src_use); + if (!link || link->next) + break; + + rtx_insn *swap_insn = DF_REF_INSN (link->ref); + if (!insn_is_swap_p (swap_insn) + || insn_is_load_p (swap_insn) + || insn_is_store_p (swap_insn)) + break; + + /* Expected stvx pattern found. Change the swap to + a copy, and propagate the AND operation into the + store. */ + to_delete[INSN_UID (swap_insn)].replace = true; + to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; + + XEXP (mem, 0) = and_operation; + SET_SRC (body) = src_reg; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "stvx opportunity found at %d\n", + INSN_UID (insn)); + } + } +} + +/* Look for patterns created from builtin lvx and stvx calls, and + canonicalize them to be properly recognized as such. */ +static void +recombine_lvx_stvx_patterns (function *fun) +{ + int i; + basic_block bb; + rtx_insn *insn; + + int num_insns = get_max_uid (); + del_info *to_delete = XCNEWVEC (del_info, num_insns); + + FOR_ALL_BB_FN (bb, fun) + FOR_BB_INSNS (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + if (insn_is_load_p (insn) && insn_is_swap_p (insn)) + recombine_lvx_pattern (insn, to_delete); + else if (insn_is_store_p (insn) && insn_is_swap_p (insn)) + recombine_stvx_pattern (insn, to_delete); + } + + /* Turning swaps into copies is delayed until now, to avoid problems + with deleting instructions during the insn walk. */ + for (i = 0; i < num_insns; i++) + if (to_delete[i].replace) + { + rtx swap_body = PATTERN (to_delete[i].replace_insn); + rtx src_reg = XEXP (SET_SRC (swap_body), 0); + rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg); + rtx_insn *new_insn = emit_insn_before (copy, + to_delete[i].replace_insn); + set_block_for_insn (new_insn, + BLOCK_FOR_INSN (to_delete[i].replace_insn)); + df_insn_rescan (new_insn); + df_insn_delete (to_delete[i].replace_insn); + remove_insn (to_delete[i].replace_insn); + to_delete[i].replace_insn->set_deleted (); + } + + free (to_delete); +} + /* Main entry point for this pass. */ unsigned int rs6000_analyze_swaps (function *fun) { swap_web_entry *insn_entry; basic_block bb; - rtx_insn *insn; + rtx_insn *insn, *curr_insn = 0; /* Dataflow analysis for use-def chains. */ df_set_flags (DF_RD_PRUNE_DEAD_DEFS); @@ -37841,12 +38203,15 @@ rs6000_analyze_swaps (function *fun) df_analyze (); df_set_flags (DF_DEFER_INSN_RESCAN); + /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */ + recombine_lvx_stvx_patterns (fun); + /* Allocate structure to represent webs of insns. */ insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); /* Walk the insns to gather basic data. */ FOR_ALL_BB_FN (bb, fun) - FOR_BB_INSNS (bb, insn) + FOR_BB_INSNS_SAFE (bb, insn, curr_insn) { unsigned int uid = INSN_UID (insn); if (NONDEBUG_INSN_P (insn)) diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 02fb3e3..5c66fe4 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -167,7 +167,14 @@ if (VECTOR_MEM_VSX_P (<MODE>mode)) { operands[1] = rs6000_address_for_altivec (operands[1]); - emit_insn (gen_altivec_lvx_<mode> (operands[0], operands[1])); + rtx and_op = XEXP (operands[1], 0); + gcc_assert (GET_CODE (and_op) == AND); + rtx addr = XEXP (and_op, 0); + if (GET_CODE (addr) == PLUS) + emit_insn (gen_altivec_lvx_<mode>_2op (operands[0], XEXP (addr, 0), + XEXP (addr, 1))); + else + emit_insn (gen_altivec_lvx_<mode>_1op (operands[0], operands[1])); DONE; } }") @@ -183,7 +190,14 @@ if (VECTOR_MEM_VSX_P (<MODE>mode)) { operands[0] = rs6000_address_for_altivec (operands[0]); - emit_insn (gen_altivec_stvx_<mode> (operands[0], operands[1])); + rtx and_op = XEXP (operands[0], 0); + gcc_assert (GET_CODE (and_op) == AND); + rtx addr = XEXP (and_op, 0); + if (GET_CODE (addr) == PLUS) + emit_insn (gen_altivec_stvx_<mode>_2op (operands[1], XEXP (addr, 0), + XEXP (addr, 1))); + else + emit_insn (gen_altivec_stvx_<mode>_1op (operands[1], operands[0])); DONE; } }") |