aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorStan Cox <coxs@gnu.org>1995-12-22 20:24:34 +0000
committerStan Cox <coxs@gnu.org>1995-12-22 20:24:34 +0000
commit628448b35cdad27bf9064a252265a8c7f634091f (patch)
treec6e0c47cd102c808d0b3d457260c9e9549160ea4 /gcc
parent3f803cd94cdc0aa589dc959da7d438437f992e04 (diff)
downloadgcc-628448b35cdad27bf9064a252265a8c7f634091f.zip
gcc-628448b35cdad27bf9064a252265a8c7f634091f.tar.gz
gcc-628448b35cdad27bf9064a252265a8c7f634091f.tar.bz2
(strlensi): New pattern.
From-SVN: r10831
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.md76
1 files changed, 69 insertions, 7 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 70f6953..4363315 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4183,7 +4183,7 @@
(const_int 1)
(match_operand:SI 2 "general_operand" "r"))
(match_operand:SI 3 "const_int_operand" "n"))]
- "TARGET_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
+ "TARGET_USE_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
"*
{
CC_STATUS_INIT;
@@ -4201,7 +4201,7 @@
(xor:SI (ashift:SI (const_int 1)
(match_operand:SI 1 "general_operand" "r"))
(match_operand:SI 2 "general_operand" "0")))]
- "TARGET_BIT_TEST && GET_CODE (operands[1]) != CONST_INT"
+ "TARGET_USE_BIT_TEST && GET_CODE (operands[1]) != CONST_INT"
"*
{
CC_STATUS_INIT;
@@ -4214,7 +4214,7 @@
(xor:SI (match_operand:SI 1 "general_operand" "0")
(ashift:SI (const_int 1)
(match_operand:SI 2 "general_operand" "r"))))]
- "TARGET_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
+ "TARGET_USE_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
"*
{
CC_STATUS_INIT;
@@ -5742,17 +5742,58 @@
(define_expand "strlensi"
[(parallel [(set (match_dup 4)
(unspec:SI [(mem:BLK (match_operand:BLK 1 "general_operand" ""))
- (match_operand:QI 2 "register_operand" "")
+ (match_operand:QI 2 "immediate_operand" "")
(match_operand:SI 3 "immediate_operand" "")] 0))
(clobber (match_dup 1))])
(set (match_dup 5)
(not:SI (match_dup 4)))
(set (match_operand:SI 0 "register_operand" "")
- (minus:SI (match_dup 5)
- (const_int 1)))]
+ (plus:SI (match_dup 5)
+ (const_int -1)))]
""
"
{
+ if (TARGET_UNROLL_STRLEN && operands[2] == const0_rtx && optimize > 1)
+ {
+ rtx address;
+ rtx scratch;
+
+ /* well it seems that some optimizer does not combine a call like
+ foo(strlen(bar), strlen(bar));
+ when the move and the subtraction is done here. It does calculate
+ the length just once when these instructions are done inside of
+ output_strlen_unroll(). But I think since &bar[strlen(bar)] is
+ often used and I use one fewer register for the lifetime of
+ output_strlen_unroll() this is better. */
+ scratch = gen_reg_rtx (SImode);
+ address = force_reg (SImode, XEXP (operands[1], 0));
+
+ /* move address to scratch-register
+ this is done here because the i586 can do the following and
+ in the same cycle with the following move. */
+ if (GET_CODE (operands[3]) != CONST_INT || INTVAL (operands[3]) < 4)
+ emit_insn (gen_movsi (scratch, address));
+
+ emit_insn (gen_movsi (operands[0], address));
+
+ if(TARGET_USE_Q_REG)
+ emit_insn (gen_strlensi_unroll5 (operands[0],
+ operands[3],
+ scratch,
+ operands[0]));
+ else
+ emit_insn (gen_strlensi_unroll4 (operands[0],
+ operands[3],
+ scratch,
+ operands[0]));
+
+ /* gen_strlensi_unroll[45] returns the address of the zero
+ at the end of the string, like memchr(), so compute the
+ length by subtracting the startaddress. */
+ emit_insn (gen_subsi3 (operands[0], operands[0], address));
+ DONE;
+ }
+
operands[1] = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
operands[4] = gen_reg_rtx (SImode);
operands[5] = gen_reg_rtx (SImode);
@@ -5765,7 +5806,7 @@
(define_insn ""
[(set (match_operand:SI 0 "register_operand" "=&c")
(unspec:SI [(mem:BLK (match_operand:SI 1 "address_operand" "D"))
- (match_operand:QI 2 "register_operand" "a")
+ (match_operand:QI 2 "immediate_operand" "a")
(match_operand:SI 3 "immediate_operand" "i")] 0))
(clobber (match_dup 1))]
""
@@ -5779,3 +5820,24 @@
output_asm_insn (AS2 (mov%L0,%1,%0), xops);
return \"repnz\;scas%B2\";
}")
+
+;; the only difference between the following patterns is the register preference
+;; on a pentium using a q-register saves one clock cycle per 4 characters
+
+(define_insn "strlensi_unroll4"
+ [(set (match_operand:SI 0 "register_operand" "=r,r")
+ (unspec:SI [(mem:BLK (match_operand:SI 3 "register_operand" "0,0"))
+ (match_operand:SI 1 "immediate_operand" "i,i")
+ (match_operand:SI 2 "register_operand" "=&q,&!r")] 0))
+ (clobber (match_dup 2))]
+ "(TARGET_USE_ANY_REG && optimize > 1)"
+ "* return output_strlen_unroll (operands);")
+
+(define_insn "strlensi_unroll5"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (unspec:SI [(mem:BLK (match_operand:SI 3 "register_operand" "0"))
+ (match_operand:SI 1 "immediate_operand" "i")
+ (match_operand:SI 2 "register_operand" "=&q")] 0))
+ (clobber (match_dup 2))]
+ "(TARGET_USE_Q_REG && optimize > 1)"
+ "* return output_strlen_unroll (operands);")