diff options
author | John Carr <jfc@gcc.gnu.org> | 1998-09-06 05:52:08 +0000 |
---|---|---|
committer | John Carr <jfc@gcc.gnu.org> | 1998-09-06 05:52:08 +0000 |
commit | a269a03c80a4920f09cf0c065a636f6cedf92490 (patch) | |
tree | c62f6e2a1d92a133be181e52cf0f6be0b8b402b4 | |
parent | f429f2c549af467352419e0468efe172f014c289 (diff) | |
download | gcc-a269a03c80a4920f09cf0c065a636f6cedf92490.zip gcc-a269a03c80a4920f09cf0c065a636f6cedf92490.tar.gz gcc-a269a03c80a4920f09cf0c065a636f6cedf92490.tar.bz2 |
final.c (final): If a label is reached only from a single jump...
(
* final.c (final): If a label is reached only from a single jump,
call NOTICE_UPDATE_CC on the jump and its predecessor before
emitting the insn after the label.
* i386.h: Add AMD K6 support.
Change TARGET_* macros to use table lookup.
(INITIALIZE_TRAMPOLINE): Improve trampoline code.
(ADJUST_COST): Change definition to call function in i386.c.
(ISSUE_RATE): Define as 2 for anything newer than an 80486.
* i386.c: Add AMD K6 support.
Add constants for feature tests used by TARGET_* macros.
(split_di): If before reload, call gen_lowpart and gen_highpart.
(x86_adjust_cost): New function.
(put_jump_code): New function.
(print_operand): New codes 'D' and 'd'.
* i386.md: New insn types. New insn attribute "memory".
Redefine scheduling parameters to use new types and add AMD K6
support. Explicitly set type of most insns.
(move insns): K6 prefers movl $0,reg to xorl reg,reg. Pentium
Pro and K6 prefer movl $1,reg to incl reg.
(adddi3, subdi3): Set cc_status.
(DImode shift patterns): Change label counters from HOST_WIDE_INT
to int; x86 can't have more than 2^31 DImode shifts per file.
(setcc): Combine all setcc patterns. Allow writing memory.
Combine all jump patterns using match_operator.
(*bzero): Name pattern. Emit mutliple stos instructions when that
is faster than rep stos.
(xordi3, anddi3, iordi3): Simplify DImode logical patterns and
add define_split.
* ch/Make-lang.in: Comment ^L characters. Sun make doesn't like them.
From-SVN: r22292
-rw-r--r-- | gcc/ch/Make-lang.in | 14 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 230 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 152 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 1078 |
4 files changed, 748 insertions, 726 deletions
diff --git a/gcc/ch/Make-lang.in b/gcc/ch/Make-lang.in index 95632b0..2194741 100644 --- a/gcc/ch/Make-lang.in +++ b/gcc/ch/Make-lang.in @@ -34,7 +34,7 @@ # - making any compiler driver (eg: g++) # - the compiler proper (eg: cc1plus) # - define the names for selecting the language in LANGUAGES. - +# # define version of GNUCHILL compiler. Note: maybe we have to change the # mechanism GNUCHILL_VERSION = 1.5.2 @@ -62,7 +62,7 @@ CHILL_FLAGS_TO_PASS = \ "CHILL_LIB=$(CHILL_LIB)" \ "CC=$(CC)" \ "GNUCHILL_VERSION=$(GNUCHILL_VERSION)" - +# # Define the names for selecting languages in LANGUAGES. CHILL: chill cc1chill chill-runtime @@ -105,7 +105,7 @@ chill-runtime: stmp-headers $(GCC_PASSES) cd ch/runtime; $(MAKE) $(FLAGS_TO_PASS) $(CHILL_FLAGS_TO_PASS) GCC_FOR_TARGET="$${thisdir1}/xgcc -B$${thisdir1}/" all ; \ else true; fi ;; \ esac - +# # Build hooks: CHILL.all.build: chill @@ -123,7 +123,7 @@ chill.dvi: $(srcdir)/ch/chill.texi $(srcdir)/extend.texi $(srcdir)/invoke.texi $ TEXINPUTS=${texidir}:$(srcdir):$$TEXINPUTS tex chill.texi # FIXME: Not sure languages should do this. cp ch/chill.dvi chill.dvi - +# # Install hooks: # cc1chill is installed elsewhere as part of $(COMPILERS). @@ -166,7 +166,7 @@ CHILL.install-man: CHILL.uninstall: -rm -rf $(bindir)/$(CHILL_INSTALL_NAME) -rm -rf $(bindir)/$(CHILL_CROSS_NAME) - +# # Clean hooks: # A lot of the ancillary files are deleted by the main makefile. # We just have to delete files specific to us. @@ -183,7 +183,7 @@ CHILL.maintainer-clean: -rm -f ch/chill.info* ch/chill.dvi ch/chill.??s ch/chill.*aux # CYGNUS LOCAL: Delete locally created file. -rm -f ch/hash.h - +# # Stage hooks: # The main makefile has already created stage?/ch. @@ -195,7 +195,7 @@ CHILL.stage3: -mv ch/*.o stage3/ch CHILL.stage4: -mv ch/*.o stage4/ch - +# # Maintenance hooks: # This target creates the files that can be rebuilt, but go in the diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9354e79..fc8edbd 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -100,8 +100,37 @@ struct processor_costs pentiumpro_cost = { 17 /* cost of a divide/mod */ }; +struct processor_costs k6_cost = { + 1, /* cost of an add instruction */ + 1, /* cost of a lea instruction */ + 1, /* variable shift costs */ + 1, /* constant shift costs */ + 2, /* cost of starting a multiply */ + 0, /* cost of multiply per each bit set */ + 18 /* cost of a divide/mod */ +}; + struct processor_costs *ix86_cost = &pentium_cost; +/* Processor feature/optimization bitmasks. */ +#define m_386 (1<<PROCESSOR_I386) +#define m_486 (1<<PROCESSOR_I486) +#define m_PENT (1<<PROCESSOR_PENTIUM) +#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) +#define m_K6 (1<<PROCESSOR_K6) + +const int x86_use_leave = m_386 | m_K6; +const int x86_push_memory = m_386 | m_K6; +const int x86_zero_extend_with_and = m_486 | m_PENT; +const int x86_movx = m_386 | m_PPRO | m_K6; +const int x86_double_with_add = ~m_386; +const int x86_use_bit_test = m_386; +const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO; +const int x86_use_q_reg = m_PENT | m_PPRO | m_K6; +const int x86_use_any_reg = m_486; +const int x86_cmove = m_PPRO; +const int x86_deep_branch = m_PPRO| m_K6; + #define AT_BP(mode) (gen_rtx_MEM ((mode), frame_pointer_rtx)) extern FILE *asm_out_file; @@ -213,7 +242,8 @@ override_options () {PROCESSOR_I686_STRING, PROCESSOR_PENTIUMPRO, &pentiumpro_cost, 0, 0}, {PROCESSOR_PENTIUMPRO_STRING, PROCESSOR_PENTIUMPRO, - &pentiumpro_cost, 0, 0}}; + &pentiumpro_cost, 0, 0}, + {PROCESSOR_K6_STRING, PROCESSOR_K6, &k6_cost, 0, 0}}; int ptt_size = sizeof (processor_target_table) / sizeof (struct ptt); @@ -279,7 +309,7 @@ override_options () { ix86_cpu = processor_target_table[j].processor; ix86_cost = processor_target_table[j].cost; - if (i > j && (int) ix86_arch >= (int) PROCESSOR_PENTIUMPRO) + if (i > j && (int) ix86_arch >= (int) PROCESSOR_K6) error ("-mcpu=%s does not support -march=%s", ix86_cpu_string, ix86_arch_string); @@ -1586,6 +1616,7 @@ standard_80387_constant_p (x) /* Note that on the 80387, other constants, such as pi, are much slower to load as standard constants than to load from doubles in memory! */ + /* ??? Not true on K6: all constants are equal cost. */ #endif return 0; @@ -2933,8 +2964,8 @@ legitimize_address (x, oldx, mode) && (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4) { changed = 1; - x = gen_rtx (MULT, Pmode, force_reg (Pmode, XEXP (x, 0)), - GEN_INT (1 << log)); + x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), + GEN_INT (1 << log)); } if (GET_CODE (x) == PLUS) @@ -3185,6 +3216,114 @@ output_pic_addr_const (file, x, code) } } +static void +put_jump_code (code, reverse, file) + enum rtx_code code; + int reverse; + FILE *file; +{ + int flags = cc_prev_status.flags; + int ieee = (TARGET_IEEE_FP && (flags & CC_IN_80387)); + const char *suffix; + + if (flags & CC_Z_IN_NOT_C) + switch (code) + { + case EQ: + fputs (reverse ? "c" : "nc", file); + return; + + case NE: + fputs (reverse ? "nc" : "c", file); + return; + + default: + abort (); + } + if (ieee) + { + switch (code) + { + case LE: + suffix = reverse ? "ae" : "b"; + break; + case GT: + case LT: + case GE: + suffix = reverse ? "ne" : "e"; + break; + case EQ: + suffix = reverse ? "ne" : "e"; + break; + case NE: + suffix = reverse ? "e" : "ne"; + break; + default: + abort (); + } + fputs (suffix, file); + return; + } + if (flags & CC_TEST_AX) + abort(); + if ((flags & CC_NO_OVERFLOW) && (code == LE || code == GT)) + abort (); + if (reverse) + code = reverse_condition (code); + switch (code) + { + case EQ: + suffix = "e"; + break; + + case NE: + suffix = "ne"; + break; + + case GT: + suffix = flags & CC_IN_80387 ? "a" : "g"; + break; + + case GTU: + suffix = "a"; + break; + + case LT: + if (flags & CC_NO_OVERFLOW) + suffix = "s"; + else + suffix = flags & CC_IN_80387 ? "b" : "l"; + break; + + case LTU: + suffix = "b"; + break; + + case GE: + if (flags & CC_NO_OVERFLOW) + suffix = "ns"; + else + suffix = flags & CC_IN_80387 ? "ae" : "ge"; + break; + + case GEU: + suffix = "ae"; + break; + + case LE: + suffix = flags & CC_IN_80387 ? "be" : "le"; + break; + + case LEU: + suffix = "be"; + break; + + default: + abort (); + } + fputs (suffix, file); +} + /* Append the correct conditional move suffix which corresponds to CODE. */ static void @@ -3301,7 +3440,9 @@ put_condition_code (code, reverse_cc, mode, file) C -- print opcode suffix for set/cmov insn. c -- like C, but print reversed condition F -- print opcode suffix for fcmov insn. - f -- like C, but print reversed condition + f -- like F, but print reversed condition + D -- print the opcode suffix for a jump + d -- like D, but print reversed condition R -- print the prefix for register names. z -- print the opcode suffix for the size of the current operand. * -- print a star (in certain assembler syntax) @@ -3443,6 +3584,14 @@ print_operand (file, x, code) return; + case 'D': + put_jump_code (GET_CODE (x), 0, file); + return; + + case 'd': + put_jump_code (GET_CODE (x), 1, file); + return; + /* This is used by the conditional move instructions. */ case 'C': put_condition_code (GET_CODE (x), 0, MODE_INT, file); @@ -3851,7 +4000,12 @@ split_di (operands, num, lo_half, hi_half) while (num--) { rtx op = operands[num]; - if (GET_CODE (op) == REG) + if (! reload_completed) + { + lo_half[num] = gen_lowpart (SImode, op); + hi_half[num] = gen_highpart (SImode, op); + } + else if (GET_CODE (op) == REG) { lo_half[num] = gen_rtx_REG (SImode, REGNO (op)); hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1); @@ -5360,3 +5514,67 @@ output_int_conditional_move (which_alternative, operands) return ""; } + +int +x86_adjust_cost (insn, link, dep_insn, cost) + rtx insn, link, dep_insn; + int cost; +{ + rtx next_inst; + + if (GET_CODE (dep_insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN) + return 0; + + if (GET_CODE (dep_insn) == INSN + && GET_CODE (PATTERN (dep_insn)) == SET + && GET_CODE (SET_DEST (PATTERN (dep_insn))) == REG + && GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == SET + && !reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)), + SET_SRC (PATTERN (insn)))) + return 0; /* ??? */ + + + switch (ix86_cpu) + { + case PROCESSOR_PENTIUM: + if (cost != 0 && is_fp_insn (insn) && is_fp_insn (dep_insn) + && !is_fp_dest (dep_insn)) + return 0; + + if (agi_dependent (insn, dep_insn)) + return 3; + + if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == SET + && SET_DEST (PATTERN (insn)) == cc0_rtx + && (next_inst = next_nonnote_insn (insn)) + && GET_CODE (next_inst) == JUMP_INSN) + /* compare probably paired with jump */ + return 0; + break; + + case PROCESSOR_K6: + default: + if (!is_fp_dest (dep_insn)) + { + if(!agi_dependent (insn, dep_insn)) + return 0; + if (TARGET_486) + return 2; + } + else + if (is_fp_store (insn) && is_fp_insn (dep_insn) + && NEXT_INSN (insn) && NEXT_INSN (NEXT_INSN (insn)) + && NEXT_INSN (NEXT_INSN (NEXT_INSN (insn))) + && (GET_CODE (NEXT_INSN (insn)) == INSN) + && (GET_CODE (NEXT_INSN (NEXT_INSN (insn))) == JUMP_INSN) + && (GET_CODE (NEXT_INSN (NEXT_INSN (NEXT_INSN (insn)))) == NOTE) + && (NOTE_LINE_NUMBER (NEXT_INSN (NEXT_INSN (NEXT_INSN (insn)))) + == NOTE_INSN_LOOP_END)) + return 3; + break; + } + + return cost; +} diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7e3f723..2042a6b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -155,18 +155,25 @@ extern int target_flags; #define TARGET_486 (ix86_cpu == PROCESSOR_I486) #define TARGET_PENTIUM (ix86_cpu == PROCESSOR_PENTIUM) #define TARGET_PENTIUMPRO (ix86_cpu == PROCESSOR_PENTIUMPRO) -#define TARGET_USE_LEAVE (ix86_cpu == PROCESSOR_I386) -#define TARGET_PUSH_MEMORY (ix86_cpu == PROCESSOR_I386) -#define TARGET_ZERO_EXTEND_WITH_AND (ix86_cpu != PROCESSOR_I386 \ - && ix86_cpu != PROCESSOR_PENTIUMPRO) -#define TARGET_DOUBLE_WITH_ADD (ix86_cpu != PROCESSOR_I386) -#define TARGET_USE_BIT_TEST (ix86_cpu == PROCESSOR_I386) -#define TARGET_UNROLL_STRLEN (ix86_cpu != PROCESSOR_I386) -#define TARGET_USE_Q_REG (ix86_cpu == PROCESSOR_PENTIUM \ - || ix86_cpu == PROCESSOR_PENTIUMPRO) -#define TARGET_USE_ANY_REG (ix86_cpu == PROCESSOR_I486) -#define TARGET_CMOVE (ix86_arch == PROCESSOR_PENTIUMPRO) -#define TARGET_DEEP_BRANCH_PREDICTION (ix86_cpu == PROCESSOR_PENTIUMPRO) +#define TARGET_K6 (ix86_cpu == PROCESSOR_K6) + +#define CPUMASK (1 << ix86_cpu) +extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and; +extern const int x86_use_bit_test, x86_cmove, x86_deep_branch; +extern const int x86_unroll_strlen, x86_use_q_reg, x86_use_any_reg; +extern const int x86_double_with_add; + +#define TARGET_USE_LEAVE (x86_use_leave & CPUMASK) +#define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK) +#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & CPUMASK) +#define TARGET_USE_BIT_TEST (x86_use_bit_test & CPUMASK) +#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & CPUMASK) +#define TARGET_USE_Q_REG (x86_use_q_reg & CPUMASK) +#define TARGET_USE_ANY_REG (x86_use_any_reg & CPUMASK) +#define TARGET_CMOVE (x86_cmove & (1 << ix86_arch)) +#define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & CPUMASK) +#define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & CPUMASK) + #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE) #define TARGET_SWITCHES \ @@ -219,7 +226,8 @@ enum processor_type {PROCESSOR_I386, /* 80386 */ PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */ PROCESSOR_PENTIUM, - PROCESSOR_PENTIUMPRO}; + PROCESSOR_PENTIUMPRO, + PROCESSOR_K6}; #define PROCESSOR_I386_STRING "i386" #define PROCESSOR_I486_STRING "i486" @@ -227,28 +235,20 @@ enum processor_type #define PROCESSOR_PENTIUM_STRING "pentium" #define PROCESSOR_I686_STRING "i686" #define PROCESSOR_PENTIUMPRO_STRING "pentiumpro" +#define PROCESSOR_K6_STRING "k6" extern enum processor_type ix86_cpu; extern int ix86_arch; /* Define the default processor. This is overridden by other tm.h files. */ -#define PROCESSOR_DEFAULT \ - ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_I486) \ - ? PROCESSOR_I486 \ - : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_PENTIUM) \ - ? PROCESSOR_PENTIUM \ - : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_PENTIUMPRO) \ - ? PROCESSOR_PENTIUMPRO \ - : PROCESSOR_I386 +#define PROCESSOR_DEFAULT (enum processor_type) TARGET_CPU_DEFAULT #define PROCESSOR_DEFAULT_STRING \ - ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_I486) \ - ? PROCESSOR_I486_STRING \ - : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_PENTIUM) \ - ? PROCESSOR_PENTIUM_STRING \ - : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_PENTIUMPRO) \ - ? PROCESSOR_PENTIUMPRO_STRING \ - : PROCESSOR_I386_STRING + (PROCESSOR_DEFAULT == PROCESSOR_I486 ? PROCESSOR_I486_STRING \ + : PROCESSOR_DEFAULT == PROCESSOR_PENTIUM ? PROCESSOR_PENTIUM_STRING \ + : PROCESSOR_DEFAULT == PROCESSOR_PENTIUMPRO ? PROCESSOR_PENTIUMPRO_STRING \ + : PROCESSOR_DEFAULT == PROCESSOR_K6 ? PROCESSOR_K6_STRING \ + : PROCESSOR_I386_STRING) /* This macro is similar to `TARGET_SWITCHES' but defines names of command options that have values. Its definition is an @@ -1533,25 +1533,16 @@ do { \ /* Output assembler code for a block containing the constant parts of a trampoline, leaving space for the variable parts. */ -/* On the 386, the trampoline contains three instructions: +/* On the 386, the trampoline contains two instructions: mov #STATIC,ecx - mov #FUNCTION,eax - jmp @eax */ -#define TRAMPOLINE_TEMPLATE(FILE) \ -{ \ - ASM_OUTPUT_CHAR (FILE, GEN_INT (0xb9)); \ - ASM_OUTPUT_SHORT (FILE, const0_rtx); \ - ASM_OUTPUT_SHORT (FILE, const0_rtx); \ - ASM_OUTPUT_CHAR (FILE, GEN_INT (0xb8)); \ - ASM_OUTPUT_SHORT (FILE, const0_rtx); \ - ASM_OUTPUT_SHORT (FILE, const0_rtx); \ - ASM_OUTPUT_CHAR (FILE, GEN_INT (0xff)); \ - ASM_OUTPUT_CHAR (FILE, GEN_INT (0xe0)); \ -} + jmp FUNCTION + The trampoline is generated entirely at runtime. The operand of JMP + is the address of FUNCTION relative to the instruction following the + JMP (which is 5 bytes long). */ /* Length in units of the trampoline for entering a nested function. */ -#define TRAMPOLINE_SIZE 12 +#define TRAMPOLINE_SIZE 10 /* Emit RTL insns to initialize the variable parts of a trampoline. FNADDR is an RTX for the address of the function's pure code. @@ -1559,8 +1550,14 @@ do { \ #define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \ { \ + /* Compute offset from the end of the jmp to the target function. */ \ + rtx disp = expand_binop (SImode, sub_optab, FNADDR, \ + plus_constant (TRAMP, 10), \ + NULL_RTX, 1, OPTAB_DIRECT); \ + emit_move_insn (gen_rtx_MEM (QImode, TRAMP), GEN_INT (0xb9)); \ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (TRAMP, 1)), CXT); \ - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (TRAMP, 6)), FNADDR); \ + emit_move_insn (gen_rtx_MEM (QImode, plus_constant (TRAMP, 5)), GEN_INT (0xe9));\ + emit_move_insn (gen_rtx_MEM (SImode, plus_constant (TRAMP, 6)), disp); \ } /* Definitions for register eliminations. @@ -2243,70 +2240,7 @@ while (0) the same cost as a data-dependence. */ #define ADJUST_COST(insn,link,dep_insn,cost) \ - { \ - rtx next_inst; \ - if (GET_CODE (dep_insn) == CALL_INSN) \ - (cost) = 0; \ - \ - else if (GET_CODE (dep_insn) == INSN \ - && GET_CODE (PATTERN (dep_insn)) == SET \ - && GET_CODE (SET_DEST (PATTERN (dep_insn))) == REG \ - && GET_CODE (insn) == INSN \ - && GET_CODE (PATTERN (insn)) == SET \ - && !reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)), \ - SET_SRC (PATTERN (insn)))) \ - { \ - (cost) = 0; \ - } \ - \ - else if (GET_CODE (insn) == JUMP_INSN) \ - { \ - (cost) = 0; \ - } \ - \ - if (TARGET_PENTIUM) \ - { \ - if (cost !=0 && is_fp_insn (insn) && is_fp_insn (dep_insn) \ - && !is_fp_dest (dep_insn)) \ - { \ - (cost) = 0; \ - } \ - \ - if (agi_dependent (insn, dep_insn)) \ - { \ - (cost) = 3; \ - } \ - else if (GET_CODE (insn) == INSN \ - && GET_CODE (PATTERN (insn)) == SET \ - && SET_DEST (PATTERN (insn)) == cc0_rtx \ - && (next_inst = next_nonnote_insn (insn)) \ - && GET_CODE (next_inst) == JUMP_INSN) \ - { /* compare probably paired with jump */ \ - (cost) = 0; \ - } \ - } \ - else \ - if (!is_fp_dest (dep_insn)) \ - { \ - if(!agi_dependent (insn, dep_insn)) \ - (cost) = 0; \ - else if (TARGET_486) \ - (cost) = 2; \ - } \ - else \ - if (is_fp_store (insn) && is_fp_insn (dep_insn) \ - && NEXT_INSN (insn) && NEXT_INSN (NEXT_INSN (insn)) \ - && NEXT_INSN (NEXT_INSN (NEXT_INSN (insn))) \ - && (GET_CODE (NEXT_INSN (insn)) == INSN) \ - && (GET_CODE (NEXT_INSN (NEXT_INSN (insn))) == JUMP_INSN) \ - && (GET_CODE (NEXT_INSN (NEXT_INSN (NEXT_INSN (insn)))) == NOTE) \ - && (NOTE_LINE_NUMBER (NEXT_INSN (NEXT_INSN (NEXT_INSN (insn)))) \ - == NOTE_INSN_LOOP_END)) \ - { \ - (cost) = 3; \ - } \ - } - + (cost) = x86_adjust_cost(insn, link, dep_insn, cost) #define ADJUST_BLOCKAGE(last_insn,insn,blockage) \ { \ @@ -2323,6 +2257,8 @@ while (0) } \ } +#define ISSUE_RATE ((int)ix86_cpu > (int)PROCESSOR_I486 ? 2 : 1) + /* Add any extra modes needed to represent the condition code. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 69134dd..434ba1f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -71,9 +71,43 @@ ;; This shadows the processor_type enumeration, so changes must be made ;; to i386.h at the same time. -(define_attr "type" "integer,idiv,imul,fld,fpop,fpdiv,fpmul" +(define_attr "type" + "integer,binary,memory,test,compare,fcompare,idiv,imul,lea,fld,fpop,fpdiv,fpmul" (const_string "integer")) +(define_attr "memory" "none,load,store" + (cond [(eq_attr "type" "idiv,lea") + (const_string "none") + + (eq_attr "type" "fld") + (const_string "load") + + (eq_attr "type" "test") + (if_then_else (match_operand 0 "memory_operand" "") + (const_string "load") + (const_string "none")) + + (eq_attr "type" "compare,fcompare") + (if_then_else (ior (match_operand 0 "memory_operand" "") + (match_operand 1 "memory_operand" "")) + (const_string "load") + (const_string "none")) + + (and (eq_attr "type" "integer,memory,fpop") + (match_operand 0 "memory_operand" "")) + (const_string "store") + + (and (eq_attr "type" "integer,memory,fpop") + (match_operand 1 "memory_operand" "")) + (const_string "load") + + (and (eq_attr "type" "binary,imul,fpmul,fpdiv") + (ior (match_operand 1 "memory_operand" "") + (match_operand 2 "memory_operand" ""))) + (const_string "load")] + + (const_string "none"))) + ;; Functional units ; (define_function_unit NAME MULTIPLICITY SIMULTANEITY @@ -92,11 +126,11 @@ ;; Floating point (define_function_unit "fp" 1 0 - (and (eq_attr "type" "fpop") (eq_attr "cpu" "i386,i486")) + (and (eq_attr "type" "fpop,fcompare") (eq_attr "cpu" "i386,i486")) 5 5) (define_function_unit "fp" 1 0 - (and (eq_attr "type" "fpop") (eq_attr "cpu" "pentium,pentiumpro")) + (and (eq_attr "type" "fpop,fcompare") (eq_attr "cpu" "pentium,pentiumpro")) 3 0) (define_function_unit "fp" 1 0 @@ -120,12 +154,69 @@ 10 10) (define_function_unit "fp" 1 0 - (eq_attr "type" "fld") + (and (eq_attr "type" "fld") (eq_attr "cpu" "!pentiumpro,k6")) 1 0) -(define_function_unit "integer" 1 0 - (and (eq_attr "type" "integer") (eq_attr "cpu" "!i386")) - 2 0) +;; K6 FPU is not pipelined. +(define_function_unit "fp" 1 0 + (and (eq_attr "type" "fpop,fpmul,fcompare") (eq_attr "cpu" "k6")) + 2 2) + +;; i386 and i486 have one integer unit, which need not be modeled + +(define_function_unit "integer" 2 0 + (and (eq_attr "type" "integer,binary,test,compare,lea") (eq_attr "cpu" "pentium,pentiumpro")) + 1 0) + +(define_function_unit "integer" 2 0 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "integer,binary,test,compare") + (eq_attr "memory" "!load"))) + 1 0) + +;; Internally, K6 converts REG OP MEM instructions into a load (2 cycles) +;; and a register operation (1 cycle). +(define_function_unit "integer" 2 0 + (and (eq_attr "cpu" "k6") + (and (eq_attr "type" "integer,binary,test,compare") + (eq_attr "memory" "load"))) + 3 0) + +;; Multiplies use one of the integer units +(define_function_unit "integer" 2 0 + (and (eq_attr "cpu" "pentium") (eq_attr "type" "imul")) + 11 11) + +(define_function_unit "integer" 2 0 + (and (eq_attr "cpu" "k6") (eq_attr "type" "imul")) + 2 2) + +(define_function_unit "integer" 2 0 + (and (eq_attr "cpu" "pentium") (eq_attr "type" "idiv")) + 25 25) + +(define_function_unit "integer" 2 0 + (and (eq_attr "cpu" "k6") (eq_attr "type" "idiv")) + 17 17) + +;; Pentium Pro and K6 have a separate load unit. +(define_function_unit "load" 1 0 + (and (eq_attr "cpu" "pentiumpro") (eq_attr "memory" "load")) + 3 0) + +(define_function_unit "load" 1 0 + (and (eq_attr "cpu" "k6") (eq_attr "memory" "load")) + 2 0) + +;; Pentium Pro and K6 have a separate store unit. +(define_function_unit "store" 1 0 + (and (eq_attr "cpu" "pentiumpro,k6") (eq_attr "memory" "store")) + 1 0) + +;; lea executes in the K6 store unit with 1 cycle latency +(define_function_unit "store" 1 0 + (and (eq_attr "cpu" "k6") (eq_attr "type" "lea")) + 1 0) ;; "movl MEM,REG / testl REG,REG" is faster on a 486 than "cmpl $0,MEM". @@ -143,7 +234,7 @@ ;; Processor type -- this attribute must exactly match the processor_type ;; enumeration in i386.h. -(define_attr "cpu" "i386,i486,pentium,pentiumpro" +(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6" (const (symbol_ref "ix86_cpu"))) (define_insn "tstsi_1" @@ -157,7 +248,8 @@ operands[1] = const0_rtx; return AS2 (cmp%L0,%1,%0); -}") +}" + [(set_attr "type" "test")]) (define_expand "tstsi" [(set (cc0) @@ -182,7 +274,8 @@ operands[1] = const0_rtx; return AS2 (cmp%W0,%1,%0); -}") +}" + [(set_attr "type" "test")]) (define_expand "tsthi" [(set (cc0) @@ -207,7 +300,8 @@ operands[1] = const0_rtx; return AS2 (cmp%B0,%1,%0); -}") +}" + [(set_attr "type" "test")]) (define_expand "tstqi" [(set (cc0) @@ -237,7 +331,8 @@ output_asm_insn (AS1 (fstp,%y0), operands); return output_fp_cc0_set (insn); -}") +}" + [(set_attr "type" "test")]) ;; Don't generate tstsf if generating IEEE code, since the `ftst' opcode ;; isn't IEEE compliant. @@ -271,7 +366,8 @@ output_asm_insn (AS1 (fstp,%y0), operands); return output_fp_cc0_set (insn); -}") +}" + [(set_attr "type" "test")]) ;; Don't generate tstdf if generating IEEE code, since the `ftst' opcode ;; isn't IEEE compliant. @@ -305,7 +401,8 @@ output_asm_insn (AS1 (fstp,%y0), operands); return output_fp_cc0_set (insn); -}") +}" + [(set_attr "type" "test")]) ;; Don't generate tstxf if generating IEEE code, since the `ftst' opcode ;; isn't IEEE compliant. @@ -331,7 +428,8 @@ (compare (match_operand:SI 0 "nonimmediate_operand" "mr,r") (match_operand:SI 1 "general_operand" "ri,mr")))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" - "* return AS2 (cmp%L0,%1,%0);") + "* return AS2 (cmp%L0,%1,%0);" + [(set_attr "type" "compare")]) (define_expand "cmpsi" [(set (cc0) @@ -354,7 +452,8 @@ (compare (match_operand:HI 0 "nonimmediate_operand" "mr,r") (match_operand:HI 1 "general_operand" "ri,mr")))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" - "* return AS2 (cmp%W0,%1,%0);") + "* return AS2 (cmp%W0,%1,%0);" + [(set_attr "type" "compare")]) (define_expand "cmphi" [(set (cc0) @@ -377,7 +476,8 @@ (compare (match_operand:QI 0 "nonimmediate_operand" "q,mq") (match_operand:QI 1 "general_operand" "qm,nq")))] "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" - "* return AS2 (cmp%B0,%1,%0);") + "* return AS2 (cmp%B0,%1,%0);" + [(set_attr "type" "compare")]) (define_expand "cmpqi" [(set (cc0) @@ -406,7 +506,8 @@ (match_operand:XF 1 "register_operand" "f")])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -416,7 +517,8 @@ (match_operand:SI 1 "nonimmediate_operand" "rm"))])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -426,7 +528,8 @@ (match_operand:XF 1 "register_operand" "f")])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -436,7 +539,8 @@ (match_operand:DF 1 "nonimmediate_operand" "fm"))])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -446,7 +550,8 @@ (match_operand:XF 1 "register_operand" "f")])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -456,7 +561,8 @@ (match_operand:SF 1 "nonimmediate_operand" "fm"))])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -466,7 +572,8 @@ (match_operand:XF 1 "register_operand" "f")])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -474,7 +581,8 @@ (match_operand:XF 1 "register_operand" "f"))) (clobber (match_scratch:HI 2 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -484,7 +592,8 @@ (clobber (match_scratch:HI 3 "=a,a"))] "TARGET_80387 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -494,7 +603,8 @@ (match_operand:SI 1 "nonimmediate_operand" "rm"))])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -504,7 +614,8 @@ (match_operand:DF 1 "register_operand" "f")])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -514,7 +625,8 @@ (match_operand:SF 1 "nonimmediate_operand" "fm"))])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -524,7 +636,8 @@ (match_operand:DF 1 "register_operand" "f")])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -534,7 +647,8 @@ (match_operand:DF 1 "nonimmediate_operand" "fm")])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -542,7 +656,8 @@ (match_operand:DF 1 "register_operand" "f"))) (clobber (match_scratch:HI 2 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) ;; These two insns will never be generated by combine due to the mode of ;; the COMPARE. @@ -564,7 +679,7 @@ ; "TARGET_80387" ; "* return output_float_compare (insn, operands);") -(define_insn "cmpsf_cc_1" +(define_insn "*cmpsf_cc_1" [(set (cc0) (match_operator 2 "VOIDmode_compare_op" [(match_operand:SF 0 "nonimmediate_operand" "f,fm") @@ -572,7 +687,8 @@ (clobber (match_scratch:HI 3 "=a,a"))] "TARGET_80387 && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -582,7 +698,8 @@ (match_operand:SI 1 "nonimmediate_operand" "rm"))])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -592,7 +709,8 @@ (match_operand:SF 1 "register_operand" "f")])) (clobber (match_scratch:HI 3 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_insn "" [(set (cc0) @@ -600,7 +718,8 @@ (match_operand:SF 1 "register_operand" "f"))) (clobber (match_scratch:HI 2 "=a"))] "TARGET_80387" - "* return output_float_compare (insn, operands);") + "* return output_float_compare (insn, operands);" + [(set_attr "type" "fcompare")]) (define_expand "cmpxf" [(set (cc0) @@ -760,7 +879,8 @@ return AS2 (test%L0,%1,%0); return AS2 (test%L1,%0,%1); -}") +}" + [(set_attr "type" "compare")]) (define_insn "" [(set (cc0) @@ -808,7 +928,8 @@ return AS2 (test%W0,%1,%0); return AS2 (test%W1,%0,%1); -}") +}" + [(set_attr "type" "compare")]) (define_insn "" [(set (cc0) @@ -821,7 +942,8 @@ return AS2 (test%B0,%1,%0); return AS2 (test%B1,%0,%1); -}") +}" + [(set_attr "type" "compare")]) ;; move instructions. ;; There is one for each machine mode, @@ -832,13 +954,15 @@ [(set (match_operand:SI 0 "push_operand" "=<") (match_operand:SI 1 "nonmemory_operand" "rn"))] "flag_pic" - "* return AS1 (push%L0,%1);") + "* return AS1 (push%L0,%1);" + [(set_attr "memory" "store")]) (define_insn "" [(set (match_operand:SI 0 "push_operand" "=<") (match_operand:SI 1 "nonmemory_operand" "ri"))] "!flag_pic" - "* return AS1 (push%L0,%1);") + "* return AS1 (push%L0,%1);" + [(set_attr "memory" "store")]) ;; On a 386, it is faster to push MEM directly. @@ -846,7 +970,9 @@ [(set (match_operand:SI 0 "push_operand" "=<") (match_operand:SI 1 "memory_operand" "m"))] "TARGET_PUSH_MEMORY" - "* return AS1 (push%L0,%1);") + "* return AS1 (push%L0,%1);" + [(set_attr "type" "memory") + (set_attr "memory" "load")]) ;; General case of fullword move. @@ -877,18 +1003,24 @@ ;; On i486, incl reg is faster than movl $1,reg. (define_insn "" - [(set (match_operand:SI 0 "general_operand" "=g,r") - (match_operand:SI 1 "general_operand" "rn,im"))] + [(set (match_operand:SI 0 "general_operand" "=g,r,r") + (match_operand:SI 1 "general_operand" "rn,i,m"))] "((!TARGET_MOVE || GET_CODE (operands[0]) != MEM) || (GET_CODE (operands[1]) != MEM)) && flag_pic" "* { rtx link; - if (operands[1] == const0_rtx && REG_P (operands[0])) + + /* K6: mov reg,0 is slightly faster than xor reg,reg but is 3 bytes + longer. */ + if ((ix86_cpu != PROCESSOR_K6 || optimize_size) + && operands[1] == const0_rtx && REG_P (operands[0])) return AS2 (xor%L0,%0,%0); if (operands[1] == const1_rtx + /* PPRO and K6 prefer mov to inc to reduce dependencies. */ + && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO) && (link = find_reg_note (insn, REG_WAS_0, 0)) /* Make sure the insn that stored the 0 is still present. */ && ! INSN_DELETED_P (XEXP (link, 0)) @@ -904,7 +1036,9 @@ return AS2 (lea%L0,%a1,%0); return AS2 (mov%L0,%1,%0); -}") +}" + [(set_attr "type" "integer,integer,memory") + (set_attr "memory" "*,*,load")]) (define_insn "" [(set (match_operand:SI 0 "general_operand" "=g,r") @@ -915,10 +1049,13 @@ "* { rtx link; - if (operands[1] == const0_rtx && REG_P (operands[0])) + if ((ix86_cpu != PROCESSOR_K6 || optimize_size) + && operands[1] == const0_rtx && REG_P (operands[0])) return AS2 (xor%L0,%0,%0); if (operands[1] == const1_rtx + /* PPRO and K6 prefer mov to inc to reduce dependencies. */ + && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO) && (link = find_reg_note (insn, REG_WAS_0, 0)) /* Make sure the insn that stored the 0 is still present. */ && ! INSN_DELETED_P (XEXP (link, 0)) @@ -931,19 +1068,25 @@ return AS1 (inc%L0,%0); return AS2 (mov%L0,%1,%0); -}") +}" + [(set_attr "type" "integer,memory") + (set_attr "memory" "*,load")]) (define_insn "" [(set (match_operand:HI 0 "push_operand" "=<") (match_operand:HI 1 "nonmemory_operand" "ri"))] "" - "* return AS1 (push%W0,%1);") + "* return AS1 (push%W0,%1);" + [(set_attr "type" "memory") + (set_attr "memory" "store")]) (define_insn "" [(set (match_operand:HI 0 "push_operand" "=<") (match_operand:HI 1 "memory_operand" "m"))] "TARGET_PUSH_MEMORY" - "* return AS1 (push%W0,%1);") + "* return AS1 (push%W0,%1);" + [(set_attr "type" "memory") + (set_attr "memory" "load")]) ;; On i486, an incl and movl are both faster than incw and movw. @@ -974,6 +1117,8 @@ return AS2 (xor%L0,%k0,%k0); if (REG_P (operands[0]) && operands[1] == const1_rtx + /* PPRO and K6 prefer mov to inc to reduce dependencies. */ + && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO) && (link = find_reg_note (insn, REG_WAS_0, 0)) /* Make sure the insn that stored the 0 is still present. */ && ! INSN_DELETED_P (XEXP (link, 0)) @@ -992,7 +1137,7 @@ operands[1] = i386_sext16_if_const (operands[1]); return AS2 (mov%L0,%k1,%k0); } - if (TARGET_PENTIUMPRO) + if (! TARGET_ZERO_EXTEND_WITH_AND) { /* movzwl is faster than movw on the Pentium Pro, * although not as fast as an aligned movl. */ @@ -1005,7 +1150,9 @@ } return AS2 (mov%W0,%1,%0); -}") +}" + [(set_attr "type" "integer,memory") + (set_attr "memory" "*,load")]) (define_expand "movstricthi" [(set (strict_low_part (match_operand:HI 0 "general_operand" "")) @@ -1030,10 +1177,13 @@ "* { rtx link; - if (operands[1] == const0_rtx && REG_P (operands[0])) + if ((ix86_cpu != PROCESSOR_K6 || optimize_size) + && operands[1] == const0_rtx && REG_P (operands[0])) return AS2 (xor%W0,%0,%0); if (operands[1] == const1_rtx + /* PPRO and K6 prefer mov to inc to reduce dependencies. */ + && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO) && (link = find_reg_note (insn, REG_WAS_0, 0)) /* Make sure the insn that stored the 0 is still present. */ && ! INSN_DELETED_P (XEXP (link, 0)) @@ -1046,7 +1196,8 @@ return AS1 (inc%W0,%0); return AS2 (mov%W0,%1,%0); -}") +}" + [(set_attr "type" "integer,memory")]) ;; emit_push_insn when it calls move_by_pieces ;; requires an insn to "push a byte". @@ -1096,10 +1247,12 @@ "* { rtx link; - if (operands[1] == const0_rtx && REG_P (operands[0])) - return AS2 (xor%L0,%k0,%k0); + + /* movb $0,reg8 is 2 bytes, the same as xorl reg8,reg8. + It is at least as fast as xor on any processor except a Pentium. */ if (operands[1] == const1_rtx + && ix86_cpu == PROCESSOR_PENTIUM && (link = find_reg_note (insn, REG_WAS_0, 0)) /* Make sure the insn that stored the 0 is still present. */ && ! INSN_DELETED_P (XEXP (link, 0)) @@ -1156,10 +1309,11 @@ "* { rtx link; - if (operands[1] == const0_rtx && REG_P (operands[0])) - return AS2 (xor%B0,%0,%0); + + /* movb $0,reg8 is 2 bytes, the same as xorl reg8,reg8. */ if (operands[1] == const1_rtx + && ix86_cpu == PROCESSOR_PENTIUM && ! NON_QI_REG_P (operands[0]) && (link = find_reg_note (insn, REG_WAS_0, 0)) /* Make sure the insn that stored the 0 is still present. */ @@ -1624,7 +1778,9 @@ (match_operand:DI 1 "general_operand" "riF,m"))] "(!TARGET_MOVE || GET_CODE (operands[0]) != MEM) || (GET_CODE (operands[1]) != MEM)" - "* return output_move_double (operands);") + "* return output_move_double (operands);" + [(set_attr "type" "integer,memory") + (set_attr "memory" "*,load")]) ;;- conversion instructions @@ -2625,7 +2781,7 @@ ;;- add instructions -(define_insn "addsidi3_1" +(define_insn "*addsidi3_1" [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,!&r,!r,o,!o") (plus:DI (match_operand:DI 1 "general_operand" "0,0,0,o,riF,riF,o") (zero_extend:DI (match_operand:SI 2 "general_operand" "o,ri,ri,roi,roi,ri,ri")))) @@ -2670,8 +2826,11 @@ output_asm_insn (AS2 (add%L0,%2,%0), low); output_asm_insn (AS2 (adc%L0,%2,%0), high); + cc_status.value1 = high[0]; + cc_status.flags = CC_NO_OVERFLOW; RET; -}") +}" + [(set_attr "type" "binary")]) (define_insn "addsidi3_2" [(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,&r,!&r,&r,o,o,!o") @@ -2748,8 +2907,11 @@ output_asm_insn (AS2 (add%L0,%2,%0), low); output_asm_insn (AS2 (adc%L0,%2,%0), high); + cc_status.value1 = high[0]; + cc_status.flags = CC_NO_OVERFLOW; RET; -}") +}" + [(set_attr "type" "binary")]) (define_insn "adddi3" [(set (match_operand:DI 0 "general_operand" "=&r,&ro,!r,o,!&r,!o,!o") @@ -2798,6 +2960,9 @@ } } + cc_status.value1 = high[0]; + cc_status.flags = CC_NO_OVERFLOW; + if (GET_CODE (operands[3]) == REG && GET_CODE (operands[2]) != REG) { xops[0] = high[0]; @@ -2822,7 +2987,8 @@ output_asm_insn (AS2 (add%L0,%2,%0), high); RET; -}") +}" + [(set_attr "type" "binary")]) ;; On a 486, it is faster to do movl/addl than to do a single leal if ;; operands[1] and operands[2] are both registers. @@ -2887,7 +3053,8 @@ } return AS2 (add%L0,%2,%0); -}") +}" + [(set_attr "type" "binary")]) ;; addsi3 is faster, so put this after. @@ -2916,7 +3083,8 @@ CC_STATUS_INIT; return AS2 (lea%L0,%a1,%0); -}") +}" + [(set_attr "type" "lea")]) ;; ??? `lea' here, for three operand add? If leaw is used, only %bx, ;; %si and %di can appear in SET_SRC, and output_asm_insn might not be @@ -2986,7 +3154,8 @@ return AS1 (dec%W0,%0); return AS2 (add%W0,%2,%0); -}") +}" + [(set_attr "type" "binary")]) (define_expand "addqi3" [(set (match_operand:QI 0 "general_operand" "") @@ -3011,7 +3180,8 @@ return AS1 (dec%B0,%0); return AS2 (add%B0,%2,%0); -}") +}" + [(set_attr "type" "binary")]) ;Lennart Augustsson <augustss@cs.chalmers.se> ;says this pattern just makes slower code: @@ -3108,8 +3278,12 @@ output_asm_insn (AS2 (sub%L0,%2,%0), low); output_asm_insn (AS2 (sbb%L0,%2,%0), high); + cc_status.value1 = high[0]; + cc_status.flags = CC_NO_OVERFLOW; + RET; -}") +}" + [(set_attr "type" "binary")]) (define_insn "subdi3" [(set (match_operand:DI 0 "general_operand" "=&r,&ro,o,o,!&r,!o") @@ -3152,6 +3326,9 @@ } } + cc_status.value1 = high[0]; + cc_status.flags = CC_NO_OVERFLOW; + if (GET_CODE (operands[3]) == REG) { xops[0] = high[0]; @@ -3173,10 +3350,12 @@ } else - output_asm_insn (AS2 (sub%L0,%2,%0), high); + output_asm_insn (AS2 (sub%L0,%2,%0), high); + RET; -}") +}" + [(set_attr "type" "binary")]) (define_expand "subsi3" [(set (match_operand:SI 0 "nonimmediate_operand" "") @@ -3190,7 +3369,8 @@ (minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") (match_operand:SI 2 "general_operand" "ri,rm")))] "ix86_binary_operator_ok (MINUS, SImode, operands)" - "* return AS2 (sub%L0,%2,%0);") + "* return AS2 (sub%L0,%2,%0);" + [(set_attr "type" "binary")]) (define_expand "subhi3" [(set (match_operand:HI 0 "general_operand" "") @@ -3215,7 +3395,8 @@ return AS2 (sub%L0,%k2,%k0); } return AS2 (sub%W0,%2,%0); -}") +}" + [(set_attr "type" "binary")]) (define_expand "subqi3" [(set (match_operand:QI 0 "general_operand" "") @@ -3229,7 +3410,8 @@ (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0") (match_operand:QI 2 "general_operand" "qn,qmn")))] "ix86_binary_operator_ok (MINUS, QImode, operands)" - "* return AS2 (sub%B0,%2,%0);") + "* return AS2 (sub%B0,%2,%0);" + [(set_attr "type" "binary")]) ;; The patterns that match these are at the end of this file. @@ -3655,7 +3837,8 @@ word_zero_and_operation: } return AS2 (and%L0,%2,%0); -}") +}" + [(set_attr "type" "binary")]) (define_insn "andhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") @@ -3733,14 +3916,16 @@ word_zero_and_operation: } return AS2 (and%W0,%2,%0); -}") +}" + [(set_attr "type" "binary")]) (define_insn "andqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") (match_operand:QI 2 "general_operand" "qn,qmn")))] "" - "* return AS2 (and%B0,%2,%0);") + "* return AS2 (and%B0,%2,%0);" + [(set_attr "type" "binary")]) /* I am nervous about these two.. add them later.. ;I presume this means that we have something in say op0= eax which is small @@ -3856,7 +4041,8 @@ byte_or_operation: } return AS2 (or%L0,%2,%0); -}") +}" + [(set_attr "type" "binary")]) (define_insn "iorhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") @@ -3940,14 +4126,16 @@ byte_or_operation: } return AS2 (or%W0,%2,%0); -}") +}" + [(set_attr "type" "binary")]) (define_insn "iorqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") (match_operand:QI 2 "general_operand" "qn,qmn")))] "" - "* return AS2 (or%B0,%2,%0);") + "* return AS2 (or%B0,%2,%0);" + [(set_attr "type" "binary")]) ;;- xor instructions @@ -4035,7 +4223,8 @@ byte_xor_operation: } return AS2 (xor%L0,%2,%0); -}") +}" + [(set_attr "type" "binary")]) (define_insn "xorhi3" [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r") @@ -4096,115 +4285,55 @@ byte_xor_operation: } return AS2 (xor%W0,%2,%0); -}") +}" + [(set_attr "type" "binary")]) (define_insn "xorqi3" [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q") (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0") (match_operand:QI 2 "general_operand" "qn,qm")))] "" - "* return AS2 (xor%B0,%2,%0);") + "* return AS2 (xor%B0,%2,%0);" + [(set_attr "type" "binary")]) ;; logical operations for DImode - (define_insn "anddi3" - [(set (match_operand:DI 0 "general_operand" "=&r,&ro,!r,o,!&r,!o,!o") - (and:DI (match_operand:DI 1 "general_operand" "%0,0,0,0iF,or,riF,o") - (match_operand:DI 2 "general_operand" "o,riF,0,or,or,oriF,o"))) - (clobber (match_scratch:SI 3 "=X,X,X,&r,X,&r,&r"))] + [(set (match_operand:DI 0 "general_operand" "=&r,&ro") + (and:DI (match_operand:DI 1 "general_operand" "0,0") + (match_operand:DI 2 "general_operand" "oriF,riF")))] "" - "#") + "#" + [(set_attr "type" "binary")]) + (define_insn "iordi3" - [(set (match_operand:DI 0 "general_operand" "=&r,&ro,!r,o,!&r,!o,!o") - (ior:DI (match_operand:DI 1 "general_operand" "%0,0,0,0iF,or,riF,o") - (match_operand:DI 2 "general_operand" "o,riF,0,or,or,oriF,o"))) - (clobber (match_scratch:SI 3 "=X,X,X,&r,X,&r,&r"))] + [(set (match_operand:DI 0 "general_operand" "=&r,&ro") + (ior:DI (match_operand:DI 1 "general_operand" "0,0") + (match_operand:DI 2 "general_operand" "oriF,riF")))] "" - "#") - + "#" + [(set_attr "type" "binary")]) + (define_insn "xordi3" - [(set (match_operand:DI 0 "general_operand" "=&r,&ro,!r,o,!&r,!o,!o") - (xor:DI (match_operand:DI 1 "general_operand" "%0,0,0,0iF,or,riF,o") - (match_operand:DI 2 "general_operand" "o,riF,0,or,or,oriF,o"))) - (clobber (match_scratch:SI 3 "=X,X,X,&r,X,&r,&r"))] + [(set (match_operand:DI 0 "general_operand" "=&r,&ro") + (xor:DI (match_operand:DI 1 "general_operand" "0,0") + (match_operand:DI 2 "general_operand" "oriF,riF")))] "" - "#") + "#" + [(set_attr "type" "binary")]) (define_split - [(set (match_operand:DI 0 "general_operand" "=&r,&ro,!r,o,!&r,!o,!o") - (match_operator:DI 4 "ix86_logical_operator" - [(match_operand:DI 1 "general_operand" "%0,0,0,0iF,or,riF,o") - (match_operand:DI 2 "general_operand" "o,riF,0,or,or,oriF,o")])) - (clobber (match_scratch:SI 3 "=X,X,X,&r,X,&r,&r"))] - "reload_completed" - [(const_int 0)] - " -{ - rtx low[3], high[3], xops[7], temp; - rtx (*genfunc)() = (GET_CODE (operands[4]) == AND ? gen_andsi3 - : GET_CODE (operands[4]) == IOR ? gen_iorsi3 - : GET_CODE (operands[4]) == XOR ? gen_xorsi3 - : 0); - - if (rtx_equal_p (operands[0], operands[2])) - { - temp = operands[1]; - operands[1] = operands[2]; - operands[2] = temp; - } - - split_di (operands, 3, low, high); - if (!rtx_equal_p (operands[0], operands[1])) - { - xops[0] = high[0]; - xops[1] = low[0]; - xops[2] = high[1]; - xops[3] = low[1]; - - if (GET_CODE (operands[0]) != MEM) - { - emit_insn (gen_movsi (xops[1], xops[3])); - emit_insn (gen_movsi (xops[0], xops[2])); - } - else - { - xops[4] = high[2]; - xops[5] = low[2]; - xops[6] = operands[3]; - emit_insn (gen_movsi (xops[6], xops[3])); - emit_insn ((*genfunc) (xops[6], xops[6], xops[5])); - emit_insn (gen_movsi (xops[1], xops[6])); - emit_insn (gen_movsi (xops[6], xops[2])); - emit_insn ((*genfunc) (xops[6], xops[6], xops[4])); - emit_insn (gen_movsi (xops[0], xops[6])); - DONE; - } - } - - if (GET_CODE (operands[3]) == REG && GET_CODE (operands[2]) != REG) - { - xops[0] = high[0]; - xops[1] = low[0]; - xops[2] = high[2]; - xops[3] = low[2]; - xops[4] = operands[3]; - - emit_insn (gen_movsi (xops[4], xops[3])); - emit_insn ((*genfunc) (xops[1], xops[1], xops[4])); - emit_insn (gen_movsi (xops[4], xops[2])); - emit_insn ((*genfunc) (xops[0], xops[0], xops[4])); - } - - else - { - emit_insn ((*genfunc) (low[0], low[0], low[2])); - emit_insn ((*genfunc) (high[0], high[0], high[2])); - } - - DONE; -}") + [(set (match_operand:DI 0 "general_operand" "") + (match_operator:DI 3 "ix86_logical_operator" + [(match_operand:DI 1 "general_operand" "") + (match_operand:DI 2 "general_operand" "")]))] + "" + [(set (match_dup 4) (match_op_dup:SI 3 [(match_dup 6) (match_dup 8)])) + (set (match_dup 5) (match_op_dup:SI 3 [(match_dup 7) (match_dup 9)]))] + "split_di (&operands[0], 1, &operands[4], &operands[5]); + split_di (&operands[1], 1, &operands[6], &operands[7]); + split_di (&operands[2], 1, &operands[8], &operands[9]);") ;;- negation instructions @@ -4512,7 +4641,7 @@ byte_xor_operation: "* { rtx xops[4], low[1], high[1]; - static HOST_WIDE_INT ashldi_label_number; + static int ashldi_label_number; CC_STATUS_INIT; @@ -4700,7 +4829,7 @@ byte_xor_operation: "* { rtx xops[4], low[1], high[1]; - static HOST_WIDE_INT ashrdi_label_number; + static int ashrdi_label_number; CC_STATUS_INIT; @@ -4855,7 +4984,7 @@ byte_xor_operation: "* { rtx xops[4], low[1], high[1]; - static HOST_WIDE_INT lshrdi_label_number; + static int lshrdi_label_number; CC_STATUS_INIT; @@ -5228,10 +5357,6 @@ byte_xor_operation: ;; For all sCOND expanders, also expand the compare or test insn that ;; generates cc0. Generate an equality comparison if `seq' or `sne'. -;; The 386 sCOND opcodes can write to memory. But a gcc sCOND insn may -;; not have any input reloads. A MEM write might need an input reload -;; for the address of the MEM. So don't allow MEM as the SET_DEST. - (define_expand "seq" [(match_dup 1) (set (match_operand:QI 0 "register_operand" "") @@ -5246,18 +5371,6 @@ byte_xor_operation: operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1); }") -(define_insn "" - [(set (match_operand:QI 0 "register_operand" "=q") - (eq:QI (cc0) (const_int 0)))] - "" - "* -{ - if (cc_prev_status.flags & CC_Z_IN_NOT_C) - return AS1 (setnb,%0); - else - return AS1 (sete,%0); -}") - (define_expand "sne" [(match_dup 1) (set (match_operand:QI 0 "register_operand" "") @@ -5272,19 +5385,6 @@ byte_xor_operation: operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1); }") -(define_insn "" - [(set (match_operand:QI 0 "register_operand" "=q") - (ne:QI (cc0) (const_int 0)))] - "" - "* -{ - if (cc_prev_status.flags & CC_Z_IN_NOT_C) - return AS1 (setb,%0); - else - return AS1 (setne,%0); -} -") - (define_expand "sgt" [(match_dup 1) (set (match_operand:QI 0 "register_operand" "") @@ -5292,19 +5392,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (match_operand:QI 0 "register_operand" "=q") - (gt:QI (cc0) (const_int 0)))] - "" - "* -{ - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (sete,%0); - - OUTPUT_JUMP (\"setg %0\", \"seta %0\", NULL_PTR); -}") - (define_expand "sgtu" [(match_dup 1) (set (match_operand:QI 0 "register_operand" "") @@ -5312,12 +5399,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (match_operand:QI 0 "register_operand" "=q") - (gtu:QI (cc0) (const_int 0)))] - "" - "* return \"seta %0\"; ") - (define_expand "slt" [(match_dup 1) (set (match_operand:QI 0 "register_operand" "") @@ -5325,19 +5406,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (match_operand:QI 0 "register_operand" "=q") - (lt:QI (cc0) (const_int 0)))] - "" - "* -{ - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (sete,%0); - - OUTPUT_JUMP (\"setl %0\", \"setb %0\", \"sets %0\"); -}") - (define_expand "sltu" [(match_dup 1) (set (match_operand:QI 0 "register_operand" "") @@ -5345,12 +5413,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (match_operand:QI 0 "register_operand" "=q") - (ltu:QI (cc0) (const_int 0)))] - "" - "* return \"setb %0\"; ") - (define_expand "sge" [(match_dup 1) (set (match_operand:QI 0 "register_operand" "") @@ -5358,19 +5420,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (match_operand:QI 0 "register_operand" "=q") - (ge:QI (cc0) (const_int 0)))] - "" - "* -{ - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (sete,%0); - - OUTPUT_JUMP (\"setge %0\", \"setae %0\", \"setns %0\"); -}") - (define_expand "sgeu" [(match_dup 1) (set (match_operand:QI 0 "register_operand" "") @@ -5378,12 +5427,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (match_operand:QI 0 "register_operand" "=q") - (geu:QI (cc0) (const_int 0)))] - "" - "* return \"setae %0\"; ") - (define_expand "sle" [(match_dup 1) (set (match_operand:QI 0 "register_operand" "") @@ -5391,19 +5434,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (match_operand:QI 0 "register_operand" "=q") - (le:QI (cc0) (const_int 0)))] - "" - "* -{ - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (setb,%0); - - OUTPUT_JUMP (\"setle %0\", \"setbe %0\", NULL_PTR); -}") - (define_expand "sleu" [(match_dup 1) (set (match_operand:QI 0 "register_operand" "") @@ -5411,11 +5441,61 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (match_operand:QI 0 "register_operand" "=q") - (leu:QI (cc0) (const_int 0)))] - "" - "* return \"setbe %0\"; ") +;; The 386 sCOND opcodes can write to memory. But a gcc sCOND insn may +;; not have any input reloads. A MEM write might need an input reload +;; for the address of the MEM. So don't allow MEM as the SET_DEST. + +(define_insn "*setcc" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm") + (match_operator:QI 1 "comparison_operator" [(cc0) (const_int 0)]))] + "reload_completed || register_operand (operands[0], QImode)" + "* +{ + enum rtx_code code = GET_CODE (operands[1]); + if (cc_prev_status.flags & CC_TEST_AX) + { + int eq; + HOST_WIDE_INT c; + operands[2] = gen_rtx_REG (SImode, 0); + switch (code) + { + case EQ: + c = 0x4000; + eq = 0; + break; + case NE: + c = 0x4000; + eq = 1; + break; + case GT: + c = 0x4100; + eq = 1; + break; + case LT: + c = 0x100; + eq = 0; + break; + case GE: + c = 0x100; + eq = 1; + break; + case LE: + c = 0x4100; + eq = 0; + break; + default: + abort (); + } + operands[3] = GEN_INT (c); + output_asm_insn (AS2 (testl,%3,%2), operands); + return eq ? AS1 (sete,%0) : AS1 (setne, %0); + } + + if ((cc_status.flags & CC_NO_OVERFLOW) && (code == LE || code == GT)) + return (char *)0; + return AS1(set%D1,%0); +}") + ;; Basic conditional jump instructions. ;; We ignore the overflow flag for signed branch instructions. @@ -5440,29 +5520,6 @@ byte_xor_operation: operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1); }") -(define_insn "" - [(set (pc) - (if_then_else (eq (cc0) - (const_int 0)) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "* -{ - if (cc_prev_status.flags & CC_Z_IN_NOT_C) - return \"jnc %l0\"; - else - if (cc_prev_status.flags & CC_TEST_AX) - { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x4000); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (jne,%l0); - } - - return \"je %l0\"; -}") - (define_expand "bne" [(match_dup 1) (set (pc) @@ -5480,28 +5537,6 @@ byte_xor_operation: operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1); }") -(define_insn "" - [(set (pc) - (if_then_else (ne (cc0) - (const_int 0)) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "* -{ - if (cc_prev_status.flags & CC_Z_IN_NOT_C) - return \"jc %l0\"; - else - if (cc_prev_status.flags & CC_TEST_AX) - { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x4000); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (je,%l0); - } - - return \"jne %l0\"; -}") (define_expand "bgt" [(match_dup 1) @@ -5513,29 +5548,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (pc) - (if_then_else (gt (cc0) - (const_int 0)) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "* -{ - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (je,%l0); - - if (cc_prev_status.flags & CC_TEST_AX) - { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x4100); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (je,%l0); - } - OUTPUT_JUMP (\"jg %l0\", \"ja %l0\", NULL_PTR); -}") - (define_expand "bgtu" [(match_dup 1) (set (pc) @@ -5546,15 +5558,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (pc) - (if_then_else (gtu (cc0) - (const_int 0)) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "ja %l0") - (define_expand "blt" [(match_dup 1) (set (pc) @@ -5565,28 +5568,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (pc) - (if_then_else (lt (cc0) - (const_int 0)) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "* -{ - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (je,%l0); - - if (cc_prev_status.flags & CC_TEST_AX) - { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x100); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (jne,%l0); - } - OUTPUT_JUMP (\"jl %l0\", \"jb %l0\", \"js %l0\"); -}") (define_expand "bltu" [(match_dup 1) @@ -5598,15 +5579,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (pc) - (if_then_else (ltu (cc0) - (const_int 0)) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "jb %l0") - (define_expand "bge" [(match_dup 1) (set (pc) @@ -5617,28 +5589,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (pc) - (if_then_else (ge (cc0) - (const_int 0)) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "* -{ - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (je,%l0); - if (cc_prev_status.flags & CC_TEST_AX) - { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x100); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (je,%l0); - } - OUTPUT_JUMP (\"jge %l0\", \"jae %l0\", \"jns %l0\"); -}") - (define_expand "bgeu" [(match_dup 1) (set (pc) @@ -5649,15 +5599,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (pc) - (if_then_else (geu (cc0) - (const_int 0)) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "jae %l0") - (define_expand "ble" [(match_dup 1) (set (pc) @@ -5668,29 +5609,6 @@ byte_xor_operation: "" "operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);") -(define_insn "" - [(set (pc) - (if_then_else (le (cc0) - (const_int 0)) - (label_ref (match_operand 0 "" "")) - (pc)))] - "" - "* -{ - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (jb,%l0); - if (cc_prev_status.flags & CC_TEST_AX) - { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x4100); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (jne,%l0); - } - - OUTPUT_JUMP (\"jle %l0\", \"jbe %l0\", NULL_PTR); -}") - (define_expand "bleu" [(match_dup 1) (set (pc) @@ -5703,184 +5621,111 @@ byte_xor_operation: (define_insn "" [(set (pc) - (if_then_else (leu (cc0) - (const_int 0)) - (label_ref (match_operand 0 "" "")) + (if_then_else (match_operator 0 "comparison_operator" + [(cc0) (const_int 0)]) + (label_ref (match_operand 1 "" "")) (pc)))] "" - "jbe %l0") - -;; Negated conditional jump instructions. - -(define_insn "" - [(set (pc) - (if_then_else (eq (cc0) - (const_int 0)) - (pc) - (label_ref (match_operand 0 "" ""))))] - "" "* { - if (cc_prev_status.flags & CC_Z_IN_NOT_C) - return \"jc %l0\"; - else + enum rtx_code code = GET_CODE (operands[0]); if (cc_prev_status.flags & CC_TEST_AX) { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x4000); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (je,%l0); - } - return \"jne %l0\"; -}") - -(define_insn "" - [(set (pc) - (if_then_else (ne (cc0) - (const_int 0)) - (pc) - (label_ref (match_operand 0 "" ""))))] - "" - "* -{ - if (cc_prev_status.flags & CC_Z_IN_NOT_C) - return \"jnc %l0\"; - else - if (cc_prev_status.flags & CC_TEST_AX) - { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x4000); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (jne,%l0); - } - return \"je %l0\"; -}") - -(define_insn "" - [(set (pc) - (if_then_else (gt (cc0) - (const_int 0)) - (pc) - (label_ref (match_operand 0 "" ""))))] - "" - "* -{ - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (jne,%l0); - if (cc_prev_status.flags & CC_TEST_AX) - { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x4100); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (jne,%l0); - } - OUTPUT_JUMP (\"jle %l0\", \"jbe %l0\", NULL_PTR); -}") - -(define_insn "" - [(set (pc) - (if_then_else (gtu (cc0) - (const_int 0)) - (pc) - (label_ref (match_operand 0 "" ""))))] - "" - "jbe %l0") - -(define_insn "" - [(set (pc) - (if_then_else (lt (cc0) - (const_int 0)) - (pc) - (label_ref (match_operand 0 "" ""))))] - "" - "* -{ - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (jne,%l0); - if (cc_prev_status.flags & CC_TEST_AX) - { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x100); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (je,%l0); + int eq; + HOST_WIDE_INT c; + operands[2] = gen_rtx_REG (SImode, 0); + switch (code) + { + case EQ: + c = 0x4000; + eq = 0; + break; + case NE: + c = 0x4000; + eq = 1; + break; + case GT: + c = 0x4100; + eq = 1; + break; + case LT: + c = 0x100; + eq = 0; + break; + case GE: + c = 0x100; + eq = 1; + break; + case LE: + c = 0x4100; + eq = 0; + break; + default: + abort (); + } + operands[3] = GEN_INT (c); + output_asm_insn (AS2 (testl,%3,%2), operands); + return eq ? AS1 (je,%l1) : AS1 (jne, %l1); } + if ((cc_status.flags & CC_NO_OVERFLOW) && (code == LE || code == GT)) + return (char *)0; - OUTPUT_JUMP (\"jge %l0\", \"jae %l0\", \"jns %l0\"); + return AS1(j%D0,%l1); }") (define_insn "" [(set (pc) - (if_then_else (ltu (cc0) - (const_int 0)) - (pc) - (label_ref (match_operand 0 "" ""))))] - "" - "jae %l0") - -(define_insn "" - [(set (pc) - (if_then_else (ge (cc0) - (const_int 0)) + (if_then_else (match_operator 0 "comparison_operator" + [(cc0) (const_int 0)]) (pc) - (label_ref (match_operand 0 "" ""))))] + (label_ref (match_operand 1 "" ""))))] "" "* { - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (jne,%l0); + enum rtx_code code = GET_CODE (operands[0]); if (cc_prev_status.flags & CC_TEST_AX) { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x100); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (jne,%l0); + int eq; + HOST_WIDE_INT c; + operands[2] = gen_rtx_REG (SImode, 0); + switch (code) + { + case EQ: + c = 0x4000; + eq = 1; + break; + case NE: + c = 0x4000; + eq = 0; + break; + case GT: + c = 0x4100; + eq = 0; + break; + case LT: + c = 0x100; + eq = 1; + break; + case GE: + c = 0x100; + eq = 0; + break; + case LE: + c = 0x4100; + eq = 1; + break; + default: + abort (); + } + operands[3] = GEN_INT (c); + output_asm_insn (AS2 (testl,%3,%2), operands); + return eq ? AS1 (je,%l1) : AS1 (jne, %l1); } - OUTPUT_JUMP (\"jl %l0\", \"jb %l0\", \"js %l0\"); -}") - -(define_insn "" - [(set (pc) - (if_then_else (geu (cc0) - (const_int 0)) - (pc) - (label_ref (match_operand 0 "" ""))))] - "" - "jb %l0") - -(define_insn "" - [(set (pc) - (if_then_else (le (cc0) - (const_int 0)) - (pc) - (label_ref (match_operand 0 "" ""))))] - "" - "* -{ - if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387) - && ! (cc_prev_status.flags & CC_FCOMI)) - return AS1 (jae,%l0); + if ((cc_status.flags & CC_NO_OVERFLOW) && (code == LE || code == GT)) + return (char *)0; - if (cc_prev_status.flags & CC_TEST_AX) - { - operands[1] = gen_rtx_REG (SImode, 0); - operands[2] = GEN_INT (0x4100); - output_asm_insn (AS2 (testl,%2,%1), operands); - return AS1 (je,%l0); - } - OUTPUT_JUMP (\"jg %l0\", \"ja %l0\", NULL_PTR); + return AS1(j%d0,%l1); }") - -(define_insn "" - [(set (pc) - (if_then_else (leu (cc0) - (const_int 0)) - (pc) - (label_ref (match_operand 0 "" ""))))] - "" - "ja %l0") ;; Unconditional and other jump instructions @@ -5919,7 +5764,7 @@ byte_xor_operation: (define_insn "" [(set (pc) (if_then_else (match_operator 0 "arithmetic_comparison_operator" - [(plus:SI (match_operand:SI 1 "nonimmediate_operand" "+r,m") + [(plus:SI (match_operand:SI 1 "nonimmediate_operand" "+c*r,m") (match_operand:SI 2 "general_operand" "rmi,ri")) (const_int 0)]) (label_ref (match_operand 3 "" "")) @@ -5931,6 +5776,11 @@ byte_xor_operation: "* { CC_STATUS_INIT; + + if (GET_CODE (operands[1]) == REG && REGNO (operands[2]) == 2 && + operands[2] == constm1_rtx && ix86_cpu == PROCESSOR_K6) + return \"loop %l3\"; + if (operands[2] == constm1_rtx) output_asm_insn (AS1 (dec%L1,%1), operands); @@ -6682,7 +6532,7 @@ byte_xor_operation: ;; But strength reduction might offset the MEM expression. So we let ;; reload put the address into %edi. -(define_insn "" +(define_insn "*bzero" [(set (mem:BLK (match_operand:SI 0 "address_operand" "D")) (const_int 0)) (use (match_operand:SI 1 "const_int_operand" "n")) @@ -6698,17 +6548,35 @@ byte_xor_operation: output_asm_insn (\"cld\", operands); if (GET_CODE (operands[1]) == CONST_INT) { - if (INTVAL (operands[1]) & ~0x03) + unsigned int count = INTVAL (operands[1]) & 0xffffffff; + if (count & ~0x03) { - xops[0] = GEN_INT ((INTVAL (operands[1]) >> 2) & 0x3fffffff); + xops[0] = GEN_INT (count / 4); xops[1] = operands[4]; - output_asm_insn (AS2 (mov%L1,%0,%1), xops); + /* K6: stos takes 1 cycle, rep stos takes 8 + %ecx cycles. + 80386: 4/5+5n (+2 for set of ecx) + 80486: 5/7+5n (+1 for set of ecx) + */ + if (count / 4 < ((int) ix86_cpu < (int)PROCESSOR_PENTIUM ? 4 : 6)) + { + do #ifdef INTEL_SYNTAX - output_asm_insn (\"rep stosd\", xops); + output_asm_insn (\"stosd\", xops); #else - output_asm_insn (\"rep\;stosl\", xops); + output_asm_insn (\"stosl\", xops); #endif + while ((count -= 4) > 3); + } + else + { + output_asm_insn (AS2 (mov%L1,%0,%1), xops); +#ifdef INTEL_SYNTAX + output_asm_insn (\"rep stosd\", xops); +#else + output_asm_insn (\"rep\;stosl\", xops); +#endif + } } if (INTVAL (operands[1]) & 0x02) output_asm_insn (\"stosw\", operands); |