diff options
author | Michael Meissner <meissner@linux.vnet.ibm.com> | 2018-01-22 19:36:18 +0000 |
---|---|---|
committer | Michael Meissner <meissner@gcc.gnu.org> | 2018-01-22 19:36:18 +0000 |
commit | de0ecff83d8639cfa0075fab7a5f9a42657dd94e (patch) | |
tree | 95c285569c28af1b5a5b74b11206c8095f905878 | |
parent | bc8b0d04284de4288cae4e4ab3bc2d6c36d36245 (diff) | |
download | gcc-de0ecff83d8639cfa0075fab7a5f9a42657dd94e.zip gcc-de0ecff83d8639cfa0075fab7a5f9a42657dd94e.tar.gz gcc-de0ecff83d8639cfa0075fab7a5f9a42657dd94e.tar.bz2 |
re PR target/83862 (powerpc: ICE in signbit testcase)
[gcc]
2018-01-22 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/83862
* config/rs6000/rs6000-protos.h (rs6000_split_signbit): Delete,
no longer used.
* config/rs6000/rs6000.c (rs6000_split_signbit): Likewise.
* config/rs6000/rs6000.md (signbit<mode>2): Change code for IEEE
128-bit to produce an UNSPEC move to get the double word with the
signbit and then a shift directly to do signbit.
(signbit<mode>2_dm): Replace old IEEE 128-bit signbit
implementation with a new version that just does either a direct
move or a regular move. Move memory interface to separate insns.
Move insns so they are next to the expander.
(signbit<mode>2_dm_mem_be): New combiner insns to combine load
with signbit move. Split big and little endian case.
(signbit<mode>2_dm_mem_le): Likewise.
(signbit<mode>2_dm_<su>ext): Delete, no longer used.
(signbit<mode>2_dm2): Likewise.
[gcc/testsuite]
2018-01-22 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/83862
* gcc.target/powerpc/pr83862.c: New test.
From-SVN: r256959
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 43 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 119 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/pr83862.c | 34 |
6 files changed, 127 insertions, 94 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 86a19bc..bfcf4da 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2018-01-22 Michael Meissner <meissner@linux.vnet.ibm.com> + + PR target/83862 + * config/rs6000/rs6000-protos.h (rs6000_split_signbit): Delete, + no longer used. + * config/rs6000/rs6000.c (rs6000_split_signbit): Likewise. + * config/rs6000/rs6000.md (signbit<mode>2): Change code for IEEE + 128-bit to produce an UNSPEC move to get the double word with the + signbit and then a shift directly to do signbit. + (signbit<mode>2_dm): Replace old IEEE 128-bit signbit + implementation with a new version that just does either a direct + move or a regular move. Move memory interface to separate insns. + Move insns so they are next to the expander. + (signbit<mode>2_dm_mem_be): New combiner insns to combine load + with signbit move. Split big and little endian case. + (signbit<mode>2_dm_mem_le): Likewise. + (signbit<mode>2_dm_<su>ext): Delete, no longer used. + (signbit<mode>2_dm2): Likewise. + 2018-01-22 Sebastian Perta <sebastian.perta@renesas.com> * config/rl78/rl78.md: New define_expand "anddi3". diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 9c6c9a6..3cb5ee8 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -132,7 +132,6 @@ extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx); extern int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx); extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx); -extern void rs6000_split_signbit (rtx, rtx); extern void rs6000_expand_atomic_compare_and_swap (rtx op[]); extern rtx swap_endian_selector_for_mode (machine_mode mode); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 47e07cf..b457b2a 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -23424,49 +23424,6 @@ rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1) emit_move_insn (dest, target); } -/* Split a signbit operation on 64-bit machines with direct move. Also allow - for the value to come from memory or if it is already loaded into a GPR. */ - -void -rs6000_split_signbit (rtx dest, rtx src) -{ - machine_mode d_mode = GET_MODE (dest); - machine_mode s_mode = GET_MODE (src); - rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest); - rtx shift_reg = dest_di; - - gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64); - - if (MEM_P (src)) - { - rtx mem = (WORDS_BIG_ENDIAN - ? adjust_address (src, DImode, 0) - : adjust_address (src, DImode, 8)); - emit_insn (gen_rtx_SET (dest_di, mem)); - } - - else - { - unsigned int r = reg_or_subregno (src); - - if (INT_REGNO_P (r)) - shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0)); - - else - { - /* Generate the special mfvsrd instruction to get it in a GPR. */ - gcc_assert (VSX_REGNO_P (r)); - if (s_mode == KFmode) - emit_insn (gen_signbitkf2_dm2 (dest_di, src)); - else - emit_insn (gen_signbittf2_dm2 (dest_di, src)); - } - } - - emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63))); - return; -} - /* A subroutine of the atomic operation splitters. Jump to LABEL if COND is true. Mark the jump as unlikely to be taken. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 757ec38..3707566 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -4779,12 +4779,19 @@ { if (FLOAT128_IEEE_P (<MODE>mode)) { + rtx dest = operands[0]; + rtx src = operands[1]; + rtx tmp = gen_reg_rtx (DImode); + rtx dest_di = gen_lowpart (DImode, dest); + if (<MODE>mode == KFmode) - emit_insn (gen_signbitkf2_dm (operands[0], operands[1])); + emit_insn (gen_signbitkf2_dm (tmp, src)); else if (<MODE>mode == TFmode) - emit_insn (gen_signbittf2_dm (operands[0], operands[1])); + emit_insn (gen_signbittf2_dm (tmp, src)); else gcc_unreachable (); + + emit_insn (gen_lshrdi3 (dest_di, tmp, GEN_INT (63))); DONE; } operands[2] = gen_reg_rtx (DFmode); @@ -4805,6 +4812,66 @@ } }) +;; Optimize IEEE 128-bit signbit on 64-bit systems with direct move to avoid +;; multiple direct moves. If we used a SUBREG:DI of the Floa128 type, the +;; register allocator would typically move the entire _Float128 item to GPRs (2 +;; instructions on ISA 3.0, 3-4 instructions on ISA 2.07). +;; +;; After register allocation, if the _Float128 had originally been in GPRs, the +;; split allows the post reload phases to eliminate the move, and do the shift +;; directly with the register that contains the signbit. +(define_insn_and_split "signbit<mode>2_dm" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r") + (unspec:DI [(match_operand:SIGNBIT 1 "gpc_reg_operand" "wa,r")] + UNSPEC_SIGNBIT))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "@ + mfvsrd %0,%x1 + #" + "&& reload_completed && int_reg_operand (operands[1], <MODE>mode)" + [(set (match_dup 0) + (match_dup 2))] +{ + operands[2] = gen_highpart (DImode, operands[1]); +} + [(set_attr "type" "mftgpr,*")]) + +;; Optimize IEEE 128-bit signbit on to avoid loading the value into a vector +;; register and then doing a direct move if the value comes from memory. On +;; little endian, we have to load the 2nd double-word to get the sign bit. +(define_insn_and_split "*signbit<mode>2_dm_mem" + [(set (match_operand:DI 0 "gpc_reg_operand" "=b") + (unspec:DI [(match_operand:SIGNBIT 1 "memory_operand" "m")] + UNSPEC_SIGNBIT))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& 1" + [(set (match_dup 0) + (match_dup 2))] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + rtx addr = XEXP (src, 0); + + if (WORDS_BIG_ENDIAN) + operands[2] = adjust_address (src, DImode, 0); + + else if (REG_P (addr) || SUBREG_P (addr)) + operands[2] = adjust_address (src, DImode, 8); + + else if (GET_CODE (addr) == PLUS && REG_P (XEXP (addr, 0)) + && CONST_INT_P (XEXP (addr, 1)) && mem_operand_gpr (src, DImode)) + operands[2] = adjust_address (src, DImode, 8); + + else + { + rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest; + emit_insn (gen_rtx_SET (tmp, addr)); + operands[2] = change_address (src, DImode, + gen_rtx_PLUS (DImode, tmp, GEN_INT (8))); + } +}) + (define_expand "copysign<mode>3" [(set (match_dup 3) (abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" ""))) @@ -4834,54 +4901,6 @@ operands[5] = CONST0_RTX (<MODE>mode); }) -;; Optimize signbit on 64-bit systems with direct move to avoid doing the store -;; and load. -(define_insn_and_split "signbit<mode>2_dm" - [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r") - (unspec:SI - [(match_operand:SIGNBIT 1 "input_operand" "wa,m,r")] - UNSPEC_SIGNBIT))] - "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" - "#" - "&& reload_completed" - [(const_int 0)] -{ - rs6000_split_signbit (operands[0], operands[1]); - DONE; -} - [(set_attr "length" "8,8,4") - (set_attr "type" "mftgpr,load,integer")]) - -(define_insn_and_split "*signbit<mode>2_dm_<su>ext" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,r") - (any_extend:DI - (unspec:SI - [(match_operand:SIGNBIT 1 "input_operand" "wa,m,r")] - UNSPEC_SIGNBIT)))] - "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" - "#" - "&& reload_completed" - [(const_int 0)] -{ - rs6000_split_signbit (operands[0], operands[1]); - DONE; -} - [(set_attr "length" "8,8,4") - (set_attr "type" "mftgpr,load,integer")]) - -;; TARGET_MODES_TIEABLE_P doesn't allow DImode to be tied with the various -;; floating point types, which makes normal SUBREG's problematical. Instead -;; use a special pattern to avoid using a normal movdi. -(define_insn "signbit<mode>2_dm2" - [(set (match_operand:DI 0 "gpc_reg_operand" "=r") - (unspec:DI [(match_operand:SIGNBIT 1 "gpc_reg_operand" "wa") - (const_int 0)] - UNSPEC_SIGNBIT))] - "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" - "mfvsrd %0,%x1" - [(set_attr "type" "mftgpr")]) - - ;; Use an unspec rather providing an if-then-else in RTL, to prevent the ;; compiler from optimizing -0.0 (define_insn "copysign<mode>3_fcpsgn" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 03b9c1a..485079a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-01-22 Michael Meissner <meissner@linux.vnet.ibm.com> + + PR target/83862 + * gcc.target/powerpc/pr83862.c: New test. + 2018-01-22 Carl Love <cel@us.ibm.com> * gcc.target/powerpc/powerpc.exp: Add torture tests for builtins-4-runnable.c, builtins-6-runnable.c, diff --git a/gcc/testsuite/gcc.target/powerpc/pr83862.c b/gcc/testsuite/gcc.target/powerpc/pr83862.c new file mode 100644 index 0000000..3cadb57 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr83862.c @@ -0,0 +1,34 @@ +/* PR target/83862.c */ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-require-effective-target ppc_float128_sw } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mpower8-vector -O2 -mabi=ieeelongdouble -Wno-psabi" } */ + +/* On little endian systems, optimizing signbit of IEEE 128-bit values from + memory could abort if the memory address was indexed (reg+reg). The + optimization is only on 64-bit machines with direct move. + + Compile with -g -O2 -mabi=ieeelongdouble -Wno-psabi. */ + +#ifndef TYPE +#define TYPE long double +#endif + +int sbr (TYPE a) { return __builtin_signbit (a); } +int sbm (TYPE *a) { return __builtin_signbit (*a); } +int sbo (TYPE *a) { return __builtin_signbit (a[4]); } +int sbi (TYPE *a, unsigned long n) { return __builtin_signbit (a[n]); } +void sbs (int *p, TYPE a) { *p = __builtin_signbit (a); } + +/* On big endian systems, this will generate 2 LDs and 1 LDX, while on + little endian systems, this will generate 3 LDs and an ADD. */ + +/* { dg-final { scan-assembler-times {\mldx?\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mmfvsrd\M} 2 } } */ +/* { dg-final { scan-assembler-times {\msrdi\M} 5 } } */ +/* { dg-final { scan-assembler-not {\mmfvsrld\M} } } */ +/* { dg-final { scan-assembler-not {\mstxvx?\M} } } */ +/* { dg-final { scan-assembler-not {\mstxvw4x\M} } } */ +/* { dg-final { scan-assembler-not {\mstxvd2x\M} } } */ +/* { dg-final { scan-assembler-not {\mstvx\M} } } */ + |