diff options
-rw-r--r-- | gcc/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/config/sh/sh-mem.cc | 203 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/sh/cmpstrn.c | 13 |
4 files changed, 137 insertions, 88 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5c1769b..dda0eb3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2014-01-13 Christian Bruel <christian.bruel@st.com> + + * config/sh/sh-mem.cc (sh_expand_cmpnstr): Unroll small sizes and + optimized non constant lengths. + 2014-01-13 Jakub Jelinek <jakub@redhat.com> PR libgomp/59194 diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc index 3dca5f0..e29ff77 100644 --- a/gcc/config/sh/sh-mem.cc +++ b/gcc/config/sh/sh-mem.cc @@ -324,7 +324,6 @@ sh_expand_cmpnstr (rtx *operands) rtx addr2 = operands[2]; rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); - rtx tmp0 = gen_reg_rtx (SImode); rtx tmp1 = gen_reg_rtx (SImode); rtx tmp2 = gen_reg_rtx (SImode); @@ -334,98 +333,132 @@ sh_expand_cmpnstr (rtx *operands) rtx L_end_loop_byte = gen_label_rtx (); rtx len = force_reg (SImode, operands[3]); - int constp = (CONST_INT_P (operands[3])); - int bytes = (constp ? INTVAL (operands[3]) : 0); - int witers = bytes / 4; + int constp = CONST_INT_P (operands[3]); - /* We could still loop on a register count. Not found very - convincing to optimize yet. */ - if (! constp) - return false; - - if (witers > 1) + /* Loop on a register count. */ + if (constp) { - rtx L_loop_long = gen_label_rtx (); - rtx L_end_loop_long = gen_label_rtx (); + rtx tmp0 = gen_reg_rtx (SImode); rtx tmp3 = gen_reg_rtx (SImode); rtx lenw = gen_reg_rtx (SImode); - int align = INTVAL (operands[4]); - - emit_move_insn (tmp0, const0_rtx); - - if (align < 4) - { - emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); - emit_insn (gen_tstsi_t (GEN_INT (3), tmp1)); - jump = emit_jump_insn (gen_branch_false (L_loop_byte)); - add_int_reg_note (jump, REG_BR_PROB, prob_likely); - } - - addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); - addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); - - /* word count. Do we have iterations ? */ - emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); - - /*start long loop. */ - emit_label (L_loop_long); - - /* tmp2 is aligned, OK to load. */ - emit_move_insn (tmp2, addr2); - emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); - - /* tmp1 is aligned, OK to load. */ - emit_move_insn (tmp1, addr1); - emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4)); - - /* Is there a 0 byte ? */ - emit_insn (gen_andsi3 (tmp3, tmp2, tmp1)); - - emit_insn (gen_cmpstr_t (tmp0, tmp3)); - jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); - add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); - emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); - jump = emit_jump_insn (gen_branch_false (L_end_loop_long)); - add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); - - if (TARGET_SH2) - emit_insn (gen_dect (lenw, lenw)); - else - { - emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); - emit_insn (gen_tstsi_t (lenw, lenw)); - } - jump = emit_jump_insn (gen_branch_false (L_loop_long)); - add_int_reg_note (jump, REG_BR_PROB, prob_likely); - - /* end loop. Reached max iterations. */ - if (bytes % 4 == 0) - { - /* Done. */ - jump = emit_jump_insn (gen_jump_compact (L_return)); - emit_barrier_after (jump); - } - else - { - /* Remaining bytes to read. */ - emit_move_insn (len, GEN_INT (bytes % 4)); - jump = emit_jump_insn (gen_jump_compact (L_loop_byte)); - emit_barrier_after (jump); - } - - emit_label (L_end_loop_long); - - /* Remaining bytes to read. */ - emit_move_insn (len, GEN_INT (4)); + rtx L_loop_long = gen_label_rtx (); + rtx L_end_loop_long = gen_label_rtx (); + rtx L_small = gen_label_rtx (); - /* Found last word. Restart it byte per byte. */ - emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4)); - emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4)); + int align = INTVAL (operands[4]); + int bytes = INTVAL (operands[3]); + int witers = bytes / 4; + + if (witers > 1) + { + addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0); + addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0); + + emit_move_insn (tmp0, const0_rtx); + + if (align < 4) + { + emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr)); + emit_insn (gen_tstsi_t (GEN_INT (3), tmp1)); + jump = emit_jump_insn (gen_branch_false (L_loop_byte)); + add_int_reg_note (jump, REG_BR_PROB, prob_likely); + } + + /* word count. Do we have iterations ? */ + emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); + + /*start long loop. */ + emit_label (L_loop_long); + + /* tmp2 is aligned, OK to load. */ + emit_move_insn (tmp2, addr2); + emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, + GET_MODE_SIZE (SImode))); + + /* tmp1 is aligned, OK to load. */ + emit_move_insn (tmp1, addr1); + emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, + GET_MODE_SIZE (SImode))); + + /* Is there a 0 byte ? */ + emit_insn (gen_andsi3 (tmp3, tmp2, tmp1)); + + emit_insn (gen_cmpstr_t (tmp0, tmp3)); + jump = emit_jump_insn (gen_branch_true (L_end_loop_long)); + add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); + + emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); + jump = emit_jump_insn (gen_branch_false (L_end_loop_long)); + add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); + + if (TARGET_SH2) + emit_insn (gen_dect (lenw, lenw)); + else + { + emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); + emit_insn (gen_tstsi_t (lenw, lenw)); + } + + jump = emit_jump_insn (gen_branch_false (L_loop_long)); + add_int_reg_note (jump, REG_BR_PROB, prob_likely); + + /* end loop. Reached max iterations. */ + if (bytes % 4 == 0) + { + /* Done. */ + jump = emit_jump_insn (gen_jump_compact (L_return)); + emit_barrier_after (jump); + } + else + { + /* Remaining bytes to read. */ + jump = emit_jump_insn (gen_jump_compact (L_small)); + emit_barrier_after (jump); + } + + emit_label (L_end_loop_long); + + /* Found last word. Restart it byte per byte. */ + bytes = 4; + emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, + -GET_MODE_SIZE (SImode))); + emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, + -GET_MODE_SIZE (SImode))); + } + + emit_label (L_small); + + gcc_assert (bytes <= 7); + + addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); + addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); + + while (bytes--) + { + emit_insn (gen_extendqisi2 (tmp1, addr1)); + emit_insn (gen_extendqisi2 (tmp2, addr2)); + + emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx)); + jump = emit_jump_insn (gen_branch_true (L_end_loop_byte)); + add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); + + emit_insn (gen_cmpeqsi_t (tmp1, tmp2)); + if (flag_delayed_branch) + emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2))); + jump = emit_jump_insn (gen_branch_false (L_end_loop_byte)); + add_int_reg_note (jump, REG_BR_PROB, prob_unlikely); + + addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode)); + addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode)); + } + + jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte)); + emit_barrier_after (jump); } - addr1 = adjust_address (addr1, QImode, 0); - addr2 = adjust_address (addr2, QImode, 0); + addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0); + addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0); emit_label (L_loop_byte); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c6238e0..b22f872 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2014-01-13 Christian Bruel <christian.bruel@st.com> + + * gcc.target/sh/cmpstrn.c: New case. + 2014-01-13 Jakub Jelinek <jakub@redhat.com> * gcc.dg/vect/vect-simd-clone-10.c: Add dg-do run. diff --git a/gcc/testsuite/gcc.target/sh/cmpstrn.c b/gcc/testsuite/gcc.target/sh/cmpstrn.c index bc72b2c..3a1d0d1 100644 --- a/gcc/testsuite/gcc.target/sh/cmpstrn.c +++ b/gcc/testsuite/gcc.target/sh/cmpstrn.c @@ -6,16 +6,23 @@ /* { dg-final { scan-assembler-not "jmp" } } */ /* { dg-final { scan-assembler-times "cmp/str" 1 } } */ -/* Test that the cmp/str loop is optimized out. */ -test01(const char *s1, const char *s2, int n) +/* Test that cmp/str is not used for small lengths. */ +test01(const char *s1) { return __builtin_strncmp (s1, "abcde", 3); } /* Test that the cmp/str loop is used. */ -test02(const char *s1, const char *s2, int n) +test02(const char *s1) { return __builtin_strncmp (s1, "abcdefghi", 8); } +/* Test that no call is generated */ +test03(const char *s1, int n) +{ + return __builtin_strncmp (s1, "abcde", n); +} + + |