diff options
author | Oleg Endo <olegendo@gcc.gnu.org> | 2012-11-06 11:55:43 +0000 |
---|---|---|
committer | Oleg Endo <olegendo@gcc.gnu.org> | 2012-11-06 11:55:43 +0000 |
commit | 0f9a3fd393aeb363bafb9114a6df2fe5a139d3f5 (patch) | |
tree | 01a8b85e730cf6de97aa2c8b321737ee681c8337 /gcc | |
parent | ee2ba85639333f03774668c5a4b25d7ae08b1036 (diff) | |
download | gcc-0f9a3fd393aeb363bafb9114a6df2fe5a139d3f5.zip gcc-0f9a3fd393aeb363bafb9114a6df2fe5a139d3f5.tar.gz gcc-0f9a3fd393aeb363bafb9114a6df2fe5a139d3f5.tar.bz2 |
re PR target/54089 ([SH] Refactor shift patterns)
PR target/54089
* config/sh/sh.c (and_xor_ior_costs, addsubcosts): Double the costs for
ops larger than SImode.
* config/sh/sh.md (rotcl, *rotcl): New insns and splits.
(ashldi3_k): Convert to insn_and_split and use new rotcl insn.
PR target/54089
* gcc.target/sh/pr54089-8.c: New.
* gcc.target/sh/pr54089-9.c: New.
From-SVN: r193236
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/config/sh/sh.c | 30 | ||||
-rw-r--r-- | gcc/config/sh/sh.md | 217 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/sh/pr54089-8.c | 203 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/sh/pr54089-9.c | 63 |
6 files changed, 509 insertions, 18 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 255ce39..b22409ec 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2012-11-06 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/54089 + * config/sh/sh.c (and_xor_ior_costs, addsubcosts): Double the costs for + ops larger than SImode. + * config/sh/sh.md (rotcl, *rotcl): New insns and splits. + (ashldi3_k): Convert to insn_and_split and use new rotcl insn. + 2012-11-06 Vladimir Yakovlev <vladimir.b.yakovlev@intel.com> * config/i386/i386-protos.h (emit_i387_cw_initialization): Deleted. diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c index 1826a854..ae671a2 100644 --- a/gcc/config/sh/sh.c +++ b/gcc/config/sh/sh.c @@ -3224,14 +3224,18 @@ shiftcosts (rtx x) static inline int and_xor_ior_costs (rtx x, int code) { - int i; + /* On SH1-4 we have only max. SImode operations. + Double the cost for modes > SImode. */ + const int cost_scale = !TARGET_SHMEDIA + && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD + ? 2 : 1; /* A logical operation with two registers is a single cycle instruction. */ if (!CONST_INT_P (XEXP (x, 1))) - return 1; + return 1 * cost_scale; - i = INTVAL (XEXP (x, 1)); + int i = INTVAL (XEXP (x, 1)); if (TARGET_SHMEDIA) { @@ -3244,19 +3248,19 @@ and_xor_ior_costs (rtx x, int code) /* These constants are single cycle extu.[bw] instructions. */ if ((i == 0xff || i == 0xffff) && code == AND) - return 1; + return 1 * cost_scale; /* Constants that can be used in an instruction as an immediate are a single cycle, but this requires r0, so make it a little more expensive. */ if (CONST_OK_FOR_K08 (i)) - return 2; + return 2 * cost_scale; /* Constants that can be loaded with a mov immediate need one more cycle. This case is probably unnecessary. */ if (CONST_OK_FOR_I08 (i)) - return 2; + return 2 * cost_scale; /* Any other constant requires an additional 2 cycle pc-relative load. This case is probably unnecessary. */ - return 3; + return 3 * cost_scale; } /* Return the cost of an addition or a subtraction. */ @@ -3264,15 +3268,21 @@ and_xor_ior_costs (rtx x, int code) static inline int addsubcosts (rtx x) { + /* On SH1-4 we have only max. SImode operations. + Double the cost for modes > SImode. */ + const int cost_scale = !TARGET_SHMEDIA + && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD + ? 2 : 1; + /* Adding a register is a single cycle insn. */ if (REG_P (XEXP (x, 1)) || GET_CODE (XEXP (x, 1)) == SUBREG) - return 1; + return 1 * cost_scale; /* Likewise for small constants. */ if (CONST_INT_P (XEXP (x, 1)) && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1)))) - return 1; + return 1 * cost_scale; if (TARGET_SHMEDIA) switch (GET_CODE (XEXP (x, 1))) @@ -3297,7 +3307,7 @@ addsubcosts (rtx x) /* Any other constant requires a 2 cycle pc-relative load plus an addition. */ - return 3; + return 3 * cost_scale; } /* Return the cost of a multiply. */ diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index a002304..3c92265 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -4006,10 +4006,11 @@ label: FAIL; }) -;; The rotcr insn is used primarily in DImode right shifts (arithmetic -;; and logical). It can also be used to implement things like +;; The rotcr and rotcl insns are used primarily in DImode shifts by one. +;; They can also be used to implement things like ;; bool t = a == b; -;; int x = (y >> 1) | (t << 31); +;; int x0 = (y >> 1) | (t << 31); // rotcr +;; int x1 = (y << 1) | t; // rotcl (define_insn "rotcr" [(set (match_operand:SI 0 "arith_reg_dest" "=r") (ior:SI (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") @@ -4022,6 +4023,17 @@ label: "rotcr %0" [(set_attr "type" "arith")]) +(define_insn "rotcl" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") + (const_int 1)) + (match_operand:SI 2 "t_reg_operand"))) + (set (reg:SI T_REG) + (lshiftrt:SI (match_dup 1) (const_int 31)))] + "TARGET_SH1" + "rotcl %0" + [(set_attr "type" "arith")]) + ;; Simplified rotcr version for combine, which allows arbitrary shift ;; amounts for the reg. If the shift amount is '1' rotcr can be used ;; directly. Otherwise we have to insert a shift in between. @@ -4121,6 +4133,160 @@ label: (ashift:SI (match_dup 1) (const_int 31)))) (clobber (reg:SI T_REG))])]) +;; Basically the same as the rotcr pattern above, but for rotcl. +;; FIXME: Fold copy pasted split code for rotcr and rotcl. +(define_insn_and_split "*rotcl" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")) + (and:SI (match_operand:SI 3 "arith_reg_or_t_reg_operand") + (const_int 1)))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + gcc_assert (INTVAL (operands[2]) > 0); + + if (INTVAL (operands[2]) > 1) + { + const rtx shift_count = GEN_INT (INTVAL (operands[2]) - 1); + rtx prev_set_t_insn = NULL_RTX; + rtx tmp_t_reg = NULL_RTX; + + /* If we're going to emit a shift sequence that clobbers the T_REG, + try to find the previous insn that sets the T_REG and emit the + shift insn before that insn, to remove the T_REG dependency. + If the insn that sets the T_REG cannot be found, store the T_REG + in a temporary reg and restore it after the shift. */ + if (sh_ashlsi_clobbers_t_reg_p (shift_count) + && ! sh_dynamicalize_shift_p (shift_count)) + { + prev_set_t_insn = prev_nonnote_insn_bb (curr_insn); + + /* Skip the nott insn, which was probably inserted by the splitter + of *rotcl_neg_t. Don't use one of the recog functions + here during insn splitting, since that causes problems in later + passes. */ + if (prev_set_t_insn != NULL_RTX) + { + rtx pat = PATTERN (prev_set_t_insn); + if (GET_CODE (pat) == SET + && t_reg_operand (XEXP (pat, 0), SImode) + && negt_reg_operand (XEXP (pat, 1), SImode)) + prev_set_t_insn = prev_nonnote_insn_bb (prev_set_t_insn); + } + + if (! (prev_set_t_insn != NULL_RTX + && reg_set_p (get_t_reg_rtx (), prev_set_t_insn) + && ! reg_referenced_p (get_t_reg_rtx (), + PATTERN (prev_set_t_insn)))) + { + prev_set_t_insn = NULL_RTX; + tmp_t_reg = gen_reg_rtx (SImode); + emit_insn (gen_move_insn (tmp_t_reg, get_t_reg_rtx ())); + } + } + + rtx shift_result = gen_reg_rtx (SImode); + rtx shift_insn = gen_ashlsi3 (shift_result, operands[1], shift_count); + operands[1] = shift_result; + + /* Emit the shift insn before the insn that sets T_REG, if possible. */ + if (prev_set_t_insn != NULL_RTX) + emit_insn_before (shift_insn, prev_set_t_insn); + else + emit_insn (shift_insn); + + /* Restore T_REG if it has been saved before. */ + if (tmp_t_reg != NULL_RTX) + emit_insn (gen_cmpgtsi_t (tmp_t_reg, const0_rtx)); + } + + /* For the rotcl insn to work, operands[3] must be in T_REG. + If it is not we can get it there by shifting it right one bit. + In this case T_REG is not an input for this insn, thus we don't have to + pay attention as of where to insert the shlr insn. */ + if (! t_reg_operand (operands[3], SImode)) + { + /* We don't care about the shifted result here, only the T_REG. */ + emit_insn (gen_shlr (gen_reg_rtx (SImode), operands[3])); + operands[3] = get_t_reg_rtx (); + } + + emit_insn (gen_rotcl (operands[0], operands[1], operands[3])); + DONE; +}) + +;; rotcl combine pattern variations +(define_insn_and_split "*rotcl" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")) + (match_operand:SI 3 "t_reg_operand"))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 1) (match_dup 2)) + (and:SI (match_dup 3) (const_int 1)))) + (clobber (reg:SI T_REG))])]) + +(define_insn_and_split "*rotcl" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (and:SI (match_operand:SI 1 "arith_reg_or_t_reg_operand") + (const_int 1)) + (ashift:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "const_int_operand")))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 2) (match_dup 3)) + (and:SI (match_dup 1) (const_int 1)))) + (clobber (reg:SI T_REG))])]) + +(define_insn_and_split "*rotcl" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (ashift:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")) + (lshiftrt:SI (match_operand:SI 3 "arith_reg_operand") + (const_int 31)))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 1) (match_dup 2)) + (and:SI (reg:SI T_REG) (const_int 1)))) + (clobber (reg:SI T_REG))])] +{ + /* We don't care about the result of the left shift, only the T_REG. */ + emit_insn (gen_shll (gen_reg_rtx (SImode), operands[3])); +}) + +(define_insn_and_split "*rotcl" + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (lshiftrt:SI (match_operand:SI 3 "arith_reg_operand") + (const_int 31)) + (ashift:SI (match_operand:SI 1 "arith_reg_operand") + (match_operand:SI 2 "const_int_operand")))) + (clobber (reg:SI T_REG))] + "TARGET_SH1" + "#" + "&& can_create_pseudo_p ()" + [(parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 1) (match_dup 2)) + (and:SI (reg:SI T_REG) (const_int 1)))) + (clobber (reg:SI T_REG))])] +{ + /* We don't care about the result of the left shift, only the T_REG. */ + emit_insn (gen_shll (gen_reg_rtx (SImode), operands[3])); +}) + ;; rotcr combine bridge pattern which will make combine try out more ;; complex patterns. (define_insn_and_split "*rotcr" @@ -4189,6 +4355,35 @@ label: emit_insn (gen_nott (get_t_reg_rtx ())); }) +;; rotcl combine patterns for rotating in the negated T_REG value. +;; For some strange reason these have to be specified as splits which combine +;; will pick up. If they are specified as insn_and_split like the +;; *rotcr_neg_t patterns above, combine would recognize them successfully +;; but not emit them on non-SH2A targets. +(define_split + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (match_operand:SI 1 "negt_reg_operand") + (ashift:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "const_int_operand"))))] + "TARGET_SH1" + [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1))) + (parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 2) (match_dup 3)) + (and:SI (reg:SI T_REG) (const_int 1)))) + (clobber (reg:SI T_REG))])]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_dest") + (ior:SI (ashift:SI (match_operand:SI 2 "arith_reg_operand") + (match_operand:SI 3 "const_int_operand")) + (match_operand:SI 1 "negt_reg_operand")))] + "TARGET_SH1" + [(set (reg:SI T_REG) (xor:SI (reg:SI T_REG) (const_int 1))) + (parallel [(set (match_dup 0) + (ior:SI (ashift:SI (match_dup 2) (match_dup 3)) + (and:SI (reg:SI T_REG) (const_int 1)))) + (clobber (reg:SI T_REG))])]) + ;; . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ;; SImode shift left @@ -4480,16 +4675,22 @@ label: DONE; }) -;; This should be an define_insn_and_split. -(define_insn "ashldi3_k" +(define_insn_and_split "ashldi3_k" [(set (match_operand:DI 0 "arith_reg_dest" "=r") (ashift:DI (match_operand:DI 1 "arith_reg_operand" "0") (const_int 1))) (clobber (reg:SI T_REG))] "TARGET_SH1" - "shll %R0\;rotcl %S0" - [(set_attr "length" "4") - (set_attr "type" "arith")]) + "#" + "&& reload_completed" + [(const_int 0)] +{ + rtx high = gen_highpart (SImode, operands[0]); + rtx low = gen_lowpart (SImode, operands[0]); + emit_insn (gen_shll (low, low)); + emit_insn (gen_rotcl (high, high, get_t_reg_rtx ())); + DONE; +}) (define_insn "ashldi3_media" [(set (match_operand:DI 0 "arith_reg_dest" "=r,r") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2108e77..9ff8500 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2012-11-06 Oleg Endo <olegendo@gcc.gnu.org> + + PR target/54089 + * gcc.target/sh/pr54089-8.c: New. + * gcc.target/sh/pr54089-9.c: New. + 2012-11-06 Vladimir Yakovlev <vladimir.b.yakovlev@intel.com> * gcc.target/i386/avx-vzeroupper-5.c: Changed scan-assembler-times. diff --git a/gcc/testsuite/gcc.target/sh/pr54089-8.c b/gcc/testsuite/gcc.target/sh/pr54089-8.c new file mode 100644 index 0000000..fa9e8f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/sh/pr54089-8.c @@ -0,0 +1,203 @@ +/* Check that the rotcl instruction is generated. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ +/* { dg-final { scan-assembler-times "rotcl" 28 } } */ + +typedef char bool; + +long long +test_00 (long long a) +{ + return a << 1; +} + +unsigned int +test_01 (unsigned int a, int b, int c) +{ + bool r = b == c; + return ((a << 1) | r); +} + +unsigned int +test_02 (unsigned int a, int b, int c) +{ + bool r = b == c; + return ((a << 2) | r); +} + +unsigned int +test_03 (unsigned int a, int b, int c) +{ + bool r = b == c; + return ((a << 3) | r); +} + +unsigned int +test_04 (unsigned int a, int b, int c) +{ + bool r = b == c; + return ((a << 4) | r); +} + +unsigned int +test_05 (unsigned int a, int b, int c) +{ + bool r = b == c; + return ((a << 5) | r); +} + +unsigned int +test_06 (unsigned int a, int b, int c) +{ + bool r = b == c; + return ((a << 6) | r); +} + +unsigned int +test_07 (unsigned int a, int b, int c) +{ + bool r = b == c; + return ((a << 7) | r); +} + +unsigned int +test_08 (unsigned int a, int b, int c) +{ + bool r = b == c; + return ((a << 8) | r); +} + +unsigned int +test_09 (unsigned int a, int b, int c) +{ + bool r = b == c; + return ((a << 31) | r); +} + +unsigned int +test_10 (unsigned int a, int b) +{ + /* 1x shlr, 1x rotcl */ + return (a << 1) | (b & 1); +} + +unsigned int +test_11 (unsigned int a, int b) +{ + /* 1x shlr, 1x rotcl (+1x add as shll) */ + return (a << 2) | (b & 1); +} + +unsigned int +test_12 (unsigned int a, int b) +{ + /* 1x shlr, 1x shll2, 1x rotcl */ + return (a << 3) | (b & 1); +} + +unsigned int +test_13 (unsigned int a, int b) +{ + /* 1x shll, 1x rotcl */ + bool r = b < 0; + return (a << 1) | r; +} + +unsigned int +test_14 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a << 1) | r); +} + +unsigned int +test_15 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a << 11) | r); +} + +unsigned int +test_16 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a << 3) | r); +} + +unsigned int +test_17 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a << 4) | r); +} + +unsigned int +test_18 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a << 5) | r); +} + +unsigned int +test_19 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a << 6) | r); +} + +unsigned int +test_20 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a << 7) | r); +} + +unsigned int +test_21 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a << 8) | r); +} + +unsigned int +test_22 (unsigned int a, int b, int c) +{ + bool r = b != c; + return ((a << 31) | r); +} + +unsigned int +test_23 (unsigned int a, int b, int c) +{ + /* 1x shll, 1x rotcl */ + return (a >> 31) | (b << 13); +} + +unsigned int +test_24 (unsigned int a, unsigned int b) +{ + /* 1x shll, 1x rotcl */ + return (a >> 31) | (b << 1); +} + +unsigned int +test_25 (unsigned int a, unsigned int b) +{ + /* 1x shll, 1x rotcl */ + return (a >> 31) | (b << 3); +} + +unsigned int +test_26 (unsigned int a, unsigned int b) +{ + /* 1x shll, 1x rotcl */ + return (b << 3) | (a >> 31); +} + +unsigned int +test_27 (unsigned int a, unsigned int b) +{ + /* 1x shlr, 1x rotcl */ + return (a << 1) | ((b >> 4) & 1); +} diff --git a/gcc/testsuite/gcc.target/sh/pr54089-9.c b/gcc/testsuite/gcc.target/sh/pr54089-9.c new file mode 100644 index 0000000..bd889dc --- /dev/null +++ b/gcc/testsuite/gcc.target/sh/pr54089-9.c @@ -0,0 +1,63 @@ +/* Check that the rotcr instruction is generated. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ +/* { dg-final { scan-assembler-times "rotcl" 4 } } */ +/* { dg-final { scan-assembler-not "movt" } } */ +/* { dg-final { scan-assembler-not "or\t" } } */ +/* { dg-final { scan-assembler-not "rotl" } } */ +/* { dg-final { scan-assembler-not "and" } } */ + +typedef char bool; + +int +test_00 (int* a, int* b) +{ + int i; + int r = 0; + for (i = 0; i < 16; ++i) + { + bool t = a[i] == b[i]; + r = (r << 1) | t; + } + return r; +} + +int +test_01 (int* a, int* b) +{ + int i; + int r = 0; + for (i = 0; i < 16; ++i) + { + bool t = a[i] == b[i]; + r = (r << 2) | t; + } + return r; +} + +int +test_02 (int* a, int* b) +{ + int i; + int r = 0; + for (i = 0; i < 16; ++i) + { + bool t = a[i] == b[i]; + r = (r << 3) | t; + } + return r; +} + +int +test_03 (const bool* a) +{ + int i; + int r = 0; + for (i = 0; i < 16; ++i) + { + bool t = a[i]; + r = (r << 1) | (t & 1); + } + return r; +} |