aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJan Hubicka <jh@suse.cz>2000-01-11 16:44:34 +0100
committerJan Hubicka <hubicka@gcc.gnu.org>2000-01-11 15:44:34 +0000
commit7c7ef435b14e947507abab8801e6dc2eb531dc85 (patch)
tree0c27101c36aedb9f44f8d169515ece811b136363 /gcc
parenta500c31b2e3b80e6553a5362a1bbf9ebb4d6b40c (diff)
downloadgcc-7c7ef435b14e947507abab8801e6dc2eb531dc85.zip
gcc-7c7ef435b14e947507abab8801e6dc2eb531dc85.tar.gz
gcc-7c7ef435b14e947507abab8801e6dc2eb531dc85.tar.bz2
i386.c (ix86_attr_length_default): Handle TYPE_STR and TYPE_CLD.
* i386.c (ix86_attr_length_default): Handle TYPE_STR and TYPE_CLD. * i386.md (FIRST_PSEUDO_REGISTER): Set to 20. (FIXED_REGISTERS): Set dirflag as fixed. (CALL_USED_REGISTERS): Set dirflag as used. (REG_ALLOC_ORDER): Set dirflag as last one. (DIRFLAG_REG): New macro. (MD_ASM_CLOBBERS): Asm clobber dirflag for backward compatibility. (HI_REGISTER_NAMES): Add dirflag. (DEBUF_PRINT_REG): Handle dirflag. * i386.md (type attribute): New cld and str types. (length_opcode attribute): Set cld and str to 1. (memory attribute): Set str to unknown - it is not clear from the patterns. (pent_np function unit): Prefixed string operations takes 12 cycles minimally; cld takes 2 cycles. (ppro_uops attribute): Str is "many" and cld is "few". (ppro_p0 unit): Handle cld here. (k6_alux unit): Handle cld and str types. (k6_load unit): It is ocupied by str opcodes. (k6_store unit): It is ocupied by str opcodes. (athlon_decode): Str is vector decoded. (athlon_ieu): Handle str and cld. (cld pattern): New. (movstrsi, clrstr, cmpstr, strlen expander): Emit cld instruction (movstrsi_1, clrstrsi_1, cmpstrsi_1, strlensi_1, cmpstrsi_nz_1 insn): Do not output cld instruction From-SVN: r31326
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog29
-rw-r--r--gcc/config/i386/i386.c3
-rw-r--r--gcc/config/i386/i386.h20
-rw-r--r--gcc/config/i386/i386.md76
4 files changed, 105 insertions, 23 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7231266..feb6f07 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,32 @@
+Tue Jan 11 16:26:47 MET 2000 Jan Hubicka <jh@suse.cz>
+
+ * i386.c (ix86_attr_length_default): Handle TYPE_STR and TYPE_CLD.
+ * i386.md (FIRST_PSEUDO_REGISTER): Set to 20.
+ (FIXED_REGISTERS): Set dirflag as fixed.
+ (CALL_USED_REGISTERS): Set dirflag as used.
+ (REG_ALLOC_ORDER): Set dirflag as last one.
+ (DIRFLAG_REG): New macro.
+ (MD_ASM_CLOBBERS): Asm clobber dirflag for backward compatibility.
+ (HI_REGISTER_NAMES): Add dirflag.
+ (DEBUF_PRINT_REG): Handle dirflag.
+ * i386.md (type attribute): New cld and str types.
+ (length_opcode attribute): Set cld and str to 1.
+ (memory attribute): Set str to unknown - it is not clear from the
+ patterns.
+ (pent_np function unit): Prefixed string operations takes 12 cycles
+ minimally; cld takes 2 cycles.
+ (ppro_uops attribute): Str is "many" and cld is "few".
+ (ppro_p0 unit): Handle cld here.
+ (k6_alux unit): Handle cld and str types.
+ (k6_load unit): It is ocupied by str opcodes.
+ (k6_store unit): It is ocupied by str opcodes.
+ (athlon_decode): Str is vector decoded.
+ (athlon_ieu): Handle str and cld.
+ (cld pattern): New.
+ (movstrsi, clrstr, cmpstr, strlen expander): Emit cld instruction
+ (movstrsi_1, clrstrsi_1, cmpstrsi_1, strlensi_1,
+ cmpstrsi_nz_1 insn): Do not output cld instruction
+
Tue Jan 11 06:14:39 2000 David Starner <dstarner98@aasaa.ofe.org>
* gcc.texi (G++ and GCC): Add Java and Chill.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 895cc8c..d35bafb 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5452,6 +5452,9 @@ ix86_attr_length_default (insn)
case TYPE_FCMOV:
case TYPE_IBR:
break;
+ case TYPE_STR:
+ case TYPE_CLD:
+ len = 0;
case TYPE_ALU1:
case TYPE_NEGNOT:
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2bde60e..ed7f849 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -610,7 +610,7 @@ extern int ix86_arch;
eliminated during reloading in favor of either the stack or frame
pointer. */
-#define FIRST_PSEUDO_REGISTER 19
+#define FIRST_PSEUDO_REGISTER 20
/* Number of hardware registers that go into the DWARF-2 unwind info.
If not defined, equals FIRST_PSEUDO_REGISTER. */
@@ -621,8 +621,8 @@ extern int ix86_arch;
and are not available for the register allocator.
On the 80386, the stack pointer is such, as is the arg pointer. */
#define FIXED_REGISTERS \
-/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr*/ \
-{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr, dir*/ \
+{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 }
/* 1 for registers not available across function calls.
These must include the FIXED_REGISTERS and also any
@@ -632,8 +632,8 @@ extern int ix86_arch;
Aside from that, you can include as many other registers as you like. */
#define CALL_USED_REGISTERS \
-/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr*/ \
-{ 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,flags,fpsr, dir*/ \
+{ 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
/* Order in which to allocate registers. Each register must be
listed once, even those in FIXED_REGISTERS. List frame pointer
@@ -655,8 +655,8 @@ extern int ix86_arch;
generated by allocating edx first, so restore the 'natural' order of things. */
#define REG_ALLOC_ORDER \
-/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,cc,fpsr*/ \
-{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,17, 18 }
+/*ax,dx,cx,bx,si,di,bp,sp,st,st1,st2,st3,st4,st5,st6,st7,arg,cc,fpsr, dir*/ \
+{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,17, 18, 19 }
/* A C statement (sans semicolon) to choose the order in which to
allocate hard registers for pseudo-registers local to a basic
@@ -764,6 +764,7 @@ extern int ix86_arch;
#define FLAGS_REG 17
#define FPSR_REG 18
+#define DIRFLAG_REG 19
/* Value should be nonzero if functions must have frame pointers.
Zero means the frame pointer need not be set up (and parms
@@ -1067,6 +1068,7 @@ enum reg_class
do { \
(CLOBBERS) = tree_cons (NULL_TREE, build_string (5, "flags"), (CLOBBERS));\
(CLOBBERS) = tree_cons (NULL_TREE, build_string (4, "fpsr"), (CLOBBERS)); \
+ (CLOBBERS) = tree_cons (NULL_TREE, build_string (7, "dirflag"), (CLOBBERS)); \
} while (0)
/* Stack layout; function entry, exit and calling. */
@@ -2169,7 +2171,7 @@ while (0)
#define HI_REGISTER_NAMES \
{"ax","dx","cx","bx","si","di","bp","sp", \
"st","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)","", \
- "flags","fpsr" }
+ "flags","fpsr", "dirflag" }
#define REGISTER_NAMES HI_REGISTER_NAMES
@@ -2382,6 +2384,8 @@ do { long l; \
fprintf (FILE, "%d ", REGNO (X)); \
if (REGNO (X) == FLAGS_REG) \
{ fputs ("flags", FILE); break; } \
+ if (REGNO (X) == DIRFLAG_REG) \
+ { fputs ("dirflag", FILE); break; } \
if (REGNO (X) == FPSR_REG) \
{ fputs ("fpsr", FILE); break; } \
if (REGNO (X) == ARG_POINTER_REGNUM) \
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index f9316de..6fd7c8e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -83,7 +83,7 @@
;; A basic instruction type. Refinements due to arguments to be
;; provided in other attributes.
(define_attr "type"
- "other,multi,alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch"
+ "other,multi,alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,fmov,fop,fop1,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,str,cld"
(const_string "other"))
;; The (bounding maximum) length of an instruction in bytes.
@@ -105,6 +105,8 @@
(define_attr "length_opcode" ""
(cond [(eq_attr "type" "imovx,setcc,icmov")
(const_int 3)
+ (eq_attr "type" "str,cld")
+ (const_int 1)
(and (eq_attr "type" "incdec")
(ior (match_operand:SI 1 "register_operand" "")
(match_operand:HI 1 "register_operand" "")))
@@ -127,9 +129,9 @@
;; if the instruction is complex.
(define_attr "memory" "none,load,store,both,unknown"
- (cond [(eq_attr "type" "other,multi")
+ (cond [(eq_attr "type" "other,multi,str")
(const_string "unknown")
- (eq_attr "type" "lea,fcmov,fpspc")
+ (eq_attr "type" "lea,fcmov,fpspc,cld")
(const_string "none")
(eq_attr "type" "push")
(if_then_else (match_operand 1 "memory_operand" "")
@@ -260,6 +262,12 @@
(eq_attr "type" "imul"))
11 11)
+;; Rep movs takes minimally 12 cycles.
+(define_function_unit "pent_np" 1 0
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "str"))
+ 12 12)
+
; ??? IDIV for SI takes 46 cycles, for HI 30, for QI 22
(define_function_unit "pent_np" 1 0
(and (eq_attr "cpu" "pentium")
@@ -304,6 +312,11 @@
(eq_attr "memory" "store"))))
2 2)
+(define_function_unit "pent_np" 1 0
+ (and (eq_attr "cpu" "pentium")
+ (eq_attr "type" "cld"))
+ 2 2)
+
(define_function_unit "fpu" 1 0
(and (eq_attr "cpu" "pentium")
(and (eq_attr "type" "fmov")
@@ -469,9 +482,9 @@
;; cycles to decode in decoder 0.
(define_attr "ppro_uops" "one,few,many"
- (cond [(eq_attr "type" "other,multi,call,callv,fpspc")
+ (cond [(eq_attr "type" "other,multi,call,callv,fpspc,str")
(const_string "many")
- (eq_attr "type" "icmov,fcmov")
+ (eq_attr "type" "icmov,fcmov,str,cld")
(const_string "few")
(eq_attr "type" "imov")
(if_then_else (eq_attr "memory" "store,both")
@@ -496,7 +509,7 @@
(define_function_unit "ppro_p0" 1 0
(and (eq_attr "cpu" "pentiumpro")
- (eq_attr "type" "ishift,lea,ibr"))
+ (eq_attr "type" "ishift,lea,ibr,cld"))
1 1)
(define_function_unit "ppro_p0" 1 0
@@ -611,7 +624,7 @@
;; Shift instructions and certain arithmetic are issued only to X pipe.
(define_function_unit "k6_alux" 1 0
(and (eq_attr "cpu" "k6")
- (eq_attr "type" "ishift,alu1,negnot"))
+ (eq_attr "type" "ishift,alu1,negnot,cld"))
1 1)
;; The QI mode arithmetic is issued to X pipe only.
@@ -644,6 +657,12 @@
(eq_attr "memory" "load,both")))
1 1)
+(define_function_unit "k6_load" 1 0
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "str")
+ (eq_attr "memory" "load,both")))
+ 10 10)
+
;; Lea have two instructions, so latency is probably 2
(define_function_unit "k6_store" 1 0
(and (eq_attr "cpu" "k6")
@@ -652,6 +671,11 @@
(define_function_unit "k6_store" 1 0
(and (eq_attr "cpu" "k6")
+ (eq_attr "type" "str"))
+ 10 10)
+
+(define_function_unit "k6_store" 1 0
+ (and (eq_attr "cpu" "k6")
(ior (eq_attr "type" "push")
(eq_attr "memory" "store,both")))
1 1)
@@ -713,7 +737,7 @@
;; communicates with all the execution units seperately instead.
(define_attr "athlon_decode" "direct,vector"
- (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc")
+ (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str")
(const_string "vector")
(and (eq_attr "type" "push")
(match_operand 1 "memory_operand" ""))
@@ -741,11 +765,16 @@
(define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon")
- (eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov"))
+ (eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,str,cld"))
1 1)
(define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon")
+ (eq_attr "type" "str"))
+ 15 15)
+
+(define_function_unit "athlon_ieu" 3 0
+ (and (eq_attr "cpu" "athlon")
(eq_attr "type" "imul"))
4 0)
@@ -7789,11 +7818,18 @@
;; Block operation instructions
+(define_insn "cld"
+ [(set (reg:SI 19) (const_int 0))]
+ ""
+ "cld"
+ [(set_attr "type" "cld")])
+
(define_expand "movstrsi"
[(parallel [(set (match_operand:BLK 0 "memory_operand" "")
(match_operand:BLK 1 "memory_operand" ""))
(use (match_operand:SI 2 "const_int_operand" ""))
(use (match_operand:SI 3 "const_int_operand" ""))
+ (use (reg:SI 19))
(clobber (match_scratch:SI 4 ""))
(clobber (match_dup 5))
(clobber (match_dup 6))])]
@@ -7813,6 +7849,7 @@
operands[0] = change_address (operands[0], VOIDmode, addr0);
operands[1] = change_address (operands[1], VOIDmode, addr1);
+ emit_insn (gen_cld ());
}")
;; It might seem that operands 0 & 1 could use predicate register_operand.
@@ -7824,6 +7861,7 @@
(mem:BLK (match_operand:SI 1 "address_operand" "S")))
(use (match_operand:SI 2 "const_int_operand" "n"))
(use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:SI 19))
(clobber (match_scratch:SI 4 "=&c"))
(clobber (match_dup 0))
(clobber (match_dup 1))]
@@ -7832,7 +7870,6 @@
{
rtx xops[2];
- output_asm_insn (\"cld\", operands);
if (GET_CODE (operands[2]) == CONST_INT)
{
if (INTVAL (operands[2]) & ~0x03)
@@ -7855,12 +7892,14 @@
[(set_attr "type" "multi")])
(define_expand "clrstrsi"
- [(set (match_dup 3) (const_int 0))
+ [(set (reg:SI 19) (const_int 0))
+ (set (match_dup 3) (const_int 0))
(parallel [(set (match_operand:BLK 0 "memory_operand" "")
(const_int 0))
(use (match_operand:SI 1 "const_int_operand" ""))
(use (match_operand:SI 2 "const_int_operand" ""))
(use (match_dup 3))
+ (use (reg:SI 19))
(clobber (match_scratch:SI 4 ""))
(clobber (match_dup 5))])]
""
@@ -7877,6 +7916,8 @@
operands[5] = addr0;
operands[0] = gen_rtx_MEM (BLKmode, addr0);
+
+ emit_insn (gen_cld ());
}")
;; It might seem that operand 0 could use predicate register_operand.
@@ -7889,6 +7930,7 @@
(use (match_operand:SI 1 "const_int_operand" "n"))
(use (match_operand:SI 2 "immediate_operand" "i"))
(use (match_operand:SI 3 "register_operand" "a"))
+ (use (reg:SI 19))
(clobber (match_scratch:SI 4 "=&c"))
(clobber (match_dup 0))]
""
@@ -7896,7 +7938,6 @@
{
rtx xops[2];
- output_asm_insn (\"cld\", operands);
if (GET_CODE (operands[1]) == CONST_INT)
{
unsigned int count = INTVAL (operands[1]) & 0xffffffff;
@@ -7958,6 +7999,7 @@
once cc0 is dead. */
align = operands[4];
+ emit_insn (gen_cld ());
if (GET_CODE (count) == CONST_INT)
{
if (INTVAL (count) == 0)
@@ -8008,11 +8050,12 @@
(mem:BLK (match_operand:SI 1 "address_operand" "D"))))
(use (match_operand:SI 2 "register_operand" "c"))
(use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:SI 19))
(clobber (match_dup 0))
(clobber (match_dup 1))
(clobber (match_dup 2))]
""
- "cld\;repz{\;| }cmpsb"
+ "repz{\;| }cmpsb"
[(set_attr "type" "multi")
(set_attr "length" "3")])
@@ -8026,12 +8069,13 @@
(mem:BLK (match_operand:SI 1 "address_operand" "D")))
(const_int 0)))
(use (match_operand:SI 3 "immediate_operand" "i"))
+ (use (reg:SI 19))
(clobber (match_dup 0))
(clobber (match_dup 1))
(clobber (match_dup 2))]
""
;; The initial compare sets the zero flag.
- "cmp{l}\\t%2, %2\;cld\;repz{\;| }cmpsb"
+ "cmp{l}\\t%2, %2\;repz{\;| }cmpsb"
[(set_attr "type" "multi")
(set_attr "length" "5")])
@@ -8079,6 +8123,7 @@
emit_move_insn (scratch3, addr);
+ emit_insn (gen_cld ());
emit_insn (gen_strlensi_1 (scratch1, scratch3, eoschar,
align, constm1_rtx));
emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
@@ -8097,10 +8142,11 @@
(match_operand:QI 2 "general_operand" "a")
(match_operand:SI 3 "immediate_operand" "i")
(match_operand:SI 4 "immediate_operand" "0")] 0))
+ (use (reg:SI 19))
(clobber (match_dup 1))
(clobber (reg:CC 17))]
""
- "cld\;repnz{\;| }scasb"
+ "repnz{\;| }scasb"
[(set_attr "type" "multi")
(set_attr "length" "3")])