aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/config/sh/sh-mem.cc203
-rw-r--r--gcc/testsuite/ChangeLog4
-rw-r--r--gcc/testsuite/gcc.target/sh/cmpstrn.c13
4 files changed, 137 insertions, 88 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5c1769b..dda0eb3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2014-01-13 Christian Bruel <christian.bruel@st.com>
+
+ * config/sh/sh-mem.cc (sh_expand_cmpnstr): Unroll small sizes and
+ optimized non constant lengths.
+
2014-01-13 Jakub Jelinek <jakub@redhat.com>
PR libgomp/59194
diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc
index 3dca5f0..e29ff77 100644
--- a/gcc/config/sh/sh-mem.cc
+++ b/gcc/config/sh/sh-mem.cc
@@ -324,7 +324,6 @@ sh_expand_cmpnstr (rtx *operands)
rtx addr2 = operands[2];
rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
- rtx tmp0 = gen_reg_rtx (SImode);
rtx tmp1 = gen_reg_rtx (SImode);
rtx tmp2 = gen_reg_rtx (SImode);
@@ -334,98 +333,132 @@ sh_expand_cmpnstr (rtx *operands)
rtx L_end_loop_byte = gen_label_rtx ();
rtx len = force_reg (SImode, operands[3]);
- int constp = (CONST_INT_P (operands[3]));
- int bytes = (constp ? INTVAL (operands[3]) : 0);
- int witers = bytes / 4;
+ int constp = CONST_INT_P (operands[3]);
- /* We could still loop on a register count. Not found very
- convincing to optimize yet. */
- if (! constp)
- return false;
-
- if (witers > 1)
+ /* Loop on a register count. */
+ if (constp)
{
- rtx L_loop_long = gen_label_rtx ();
- rtx L_end_loop_long = gen_label_rtx ();
+ rtx tmp0 = gen_reg_rtx (SImode);
rtx tmp3 = gen_reg_rtx (SImode);
rtx lenw = gen_reg_rtx (SImode);
- int align = INTVAL (operands[4]);
-
- emit_move_insn (tmp0, const0_rtx);
-
- if (align < 4)
- {
- emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
- emit_insn (gen_tstsi_t (GEN_INT (3), tmp1));
- jump = emit_jump_insn (gen_branch_false (L_loop_byte));
- add_int_reg_note (jump, REG_BR_PROB, prob_likely);
- }
-
- addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
- addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
-
- /* word count. Do we have iterations ? */
- emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
-
- /*start long loop. */
- emit_label (L_loop_long);
-
- /* tmp2 is aligned, OK to load. */
- emit_move_insn (tmp2, addr2);
- emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4));
-
- /* tmp1 is aligned, OK to load. */
- emit_move_insn (tmp1, addr1);
- emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4));
-
- /* Is there a 0 byte ? */
- emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
-
- emit_insn (gen_cmpstr_t (tmp0, tmp3));
- jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
- add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
- emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
- jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
- add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
-
- if (TARGET_SH2)
- emit_insn (gen_dect (lenw, lenw));
- else
- {
- emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
- emit_insn (gen_tstsi_t (lenw, lenw));
- }
- jump = emit_jump_insn (gen_branch_false (L_loop_long));
- add_int_reg_note (jump, REG_BR_PROB, prob_likely);
-
- /* end loop. Reached max iterations. */
- if (bytes % 4 == 0)
- {
- /* Done. */
- jump = emit_jump_insn (gen_jump_compact (L_return));
- emit_barrier_after (jump);
- }
- else
- {
- /* Remaining bytes to read. */
- emit_move_insn (len, GEN_INT (bytes % 4));
- jump = emit_jump_insn (gen_jump_compact (L_loop_byte));
- emit_barrier_after (jump);
- }
-
- emit_label (L_end_loop_long);
-
- /* Remaining bytes to read. */
- emit_move_insn (len, GEN_INT (4));
+ rtx L_loop_long = gen_label_rtx ();
+ rtx L_end_loop_long = gen_label_rtx ();
+ rtx L_small = gen_label_rtx ();
- /* Found last word. Restart it byte per byte. */
- emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4));
- emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4));
+ int align = INTVAL (operands[4]);
+ int bytes = INTVAL (operands[3]);
+ int witers = bytes / 4;
+
+ if (witers > 1)
+ {
+ addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
+ addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
+
+ emit_move_insn (tmp0, const0_rtx);
+
+ if (align < 4)
+ {
+ emit_insn (gen_iorsi3 (tmp1, s1_addr, s2_addr));
+ emit_insn (gen_tstsi_t (GEN_INT (3), tmp1));
+ jump = emit_jump_insn (gen_branch_false (L_loop_byte));
+ add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+ }
+
+ /* word count. Do we have iterations ? */
+ emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
+
+ /*start long loop. */
+ emit_label (L_loop_long);
+
+ /* tmp2 is aligned, OK to load. */
+ emit_move_insn (tmp2, addr2);
+ emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
+ GET_MODE_SIZE (SImode)));
+
+ /* tmp1 is aligned, OK to load. */
+ emit_move_insn (tmp1, addr1);
+ emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
+ GET_MODE_SIZE (SImode)));
+
+ /* Is there a 0 byte ? */
+ emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
+
+ emit_insn (gen_cmpstr_t (tmp0, tmp3));
+ jump = emit_jump_insn (gen_branch_true (L_end_loop_long));
+ add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+ emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+ jump = emit_jump_insn (gen_branch_false (L_end_loop_long));
+ add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+ if (TARGET_SH2)
+ emit_insn (gen_dect (lenw, lenw));
+ else
+ {
+ emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
+ emit_insn (gen_tstsi_t (lenw, lenw));
+ }
+
+ jump = emit_jump_insn (gen_branch_false (L_loop_long));
+ add_int_reg_note (jump, REG_BR_PROB, prob_likely);
+
+ /* end loop. Reached max iterations. */
+ if (bytes % 4 == 0)
+ {
+ /* Done. */
+ jump = emit_jump_insn (gen_jump_compact (L_return));
+ emit_barrier_after (jump);
+ }
+ else
+ {
+ /* Remaining bytes to read. */
+ jump = emit_jump_insn (gen_jump_compact (L_small));
+ emit_barrier_after (jump);
+ }
+
+ emit_label (L_end_loop_long);
+
+ /* Found last word. Restart it byte per byte. */
+ bytes = 4;
+ emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
+ -GET_MODE_SIZE (SImode)));
+ emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
+ -GET_MODE_SIZE (SImode)));
+ }
+
+ emit_label (L_small);
+
+ gcc_assert (bytes <= 7);
+
+ addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
+ addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
+
+ while (bytes--)
+ {
+ emit_insn (gen_extendqisi2 (tmp1, addr1));
+ emit_insn (gen_extendqisi2 (tmp2, addr2));
+
+ emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
+ jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
+ add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+ emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
+ if (flag_delayed_branch)
+ emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
+ jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
+ add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
+
+ addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
+ addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
+ }
+
+ jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
+ emit_barrier_after (jump);
}
- addr1 = adjust_address (addr1, QImode, 0);
- addr2 = adjust_address (addr2, QImode, 0);
+ addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
+ addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
emit_label (L_loop_byte);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c6238e0..b22f872 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2014-01-13 Christian Bruel <christian.bruel@st.com>
+
+ * gcc.target/sh/cmpstrn.c: New case.
+
2014-01-13 Jakub Jelinek <jakub@redhat.com>
* gcc.dg/vect/vect-simd-clone-10.c: Add dg-do run.
diff --git a/gcc/testsuite/gcc.target/sh/cmpstrn.c b/gcc/testsuite/gcc.target/sh/cmpstrn.c
index bc72b2c..3a1d0d1 100644
--- a/gcc/testsuite/gcc.target/sh/cmpstrn.c
+++ b/gcc/testsuite/gcc.target/sh/cmpstrn.c
@@ -6,16 +6,23 @@
/* { dg-final { scan-assembler-not "jmp" } } */
/* { dg-final { scan-assembler-times "cmp/str" 1 } } */
-/* Test that the cmp/str loop is optimized out. */
-test01(const char *s1, const char *s2, int n)
+/* Test that cmp/str is not used for small lengths. */
+test01(const char *s1)
{
return __builtin_strncmp (s1, "abcde", 3);
}
/* Test that the cmp/str loop is used. */
-test02(const char *s1, const char *s2, int n)
+test02(const char *s1)
{
return __builtin_strncmp (s1, "abcdefghi", 8);
}
+/* Test that no call is generated */
+test03(const char *s1, int n)
+{
+ return __builtin_strncmp (s1, "abcde", n);
+}
+
+