aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Carr <jfc@gcc.gnu.org>1998-09-06 05:52:08 +0000
committerJohn Carr <jfc@gcc.gnu.org>1998-09-06 05:52:08 +0000
commita269a03c80a4920f09cf0c065a636f6cedf92490 (patch)
treec62f6e2a1d92a133be181e52cf0f6be0b8b402b4
parentf429f2c549af467352419e0468efe172f014c289 (diff)
downloadgcc-a269a03c80a4920f09cf0c065a636f6cedf92490.zip
gcc-a269a03c80a4920f09cf0c065a636f6cedf92490.tar.gz
gcc-a269a03c80a4920f09cf0c065a636f6cedf92490.tar.bz2
final.c (final): If a label is reached only from a single jump...
( * final.c (final): If a label is reached only from a single jump, call NOTICE_UPDATE_CC on the jump and its predecessor before emitting the insn after the label. * i386.h: Add AMD K6 support. Change TARGET_* macros to use table lookup. (INITIALIZE_TRAMPOLINE): Improve trampoline code. (ADJUST_COST): Change definition to call function in i386.c. (ISSUE_RATE): Define as 2 for anything newer than an 80486. * i386.c: Add AMD K6 support. Add constants for feature tests used by TARGET_* macros. (split_di): If before reload, call gen_lowpart and gen_highpart. (x86_adjust_cost): New function. (put_jump_code): New function. (print_operand): New codes 'D' and 'd'. * i386.md: New insn types. New insn attribute "memory". Redefine scheduling parameters to use new types and add AMD K6 support. Explicitly set type of most insns. (move insns): K6 prefers movl $0,reg to xorl reg,reg. Pentium Pro and K6 prefer movl $1,reg to incl reg. (adddi3, subdi3): Set cc_status. (DImode shift patterns): Change label counters from HOST_WIDE_INT to int; x86 can't have more than 2^31 DImode shifts per file. (setcc): Combine all setcc patterns. Allow writing memory. Combine all jump patterns using match_operator. (*bzero): Name pattern. Emit mutliple stos instructions when that is faster than rep stos. (xordi3, anddi3, iordi3): Simplify DImode logical patterns and add define_split. * ch/Make-lang.in: Comment ^L characters. Sun make doesn't like them. From-SVN: r22292
-rw-r--r--gcc/ch/Make-lang.in14
-rw-r--r--gcc/config/i386/i386.c230
-rw-r--r--gcc/config/i386/i386.h152
-rw-r--r--gcc/config/i386/i386.md1078
4 files changed, 748 insertions, 726 deletions
diff --git a/gcc/ch/Make-lang.in b/gcc/ch/Make-lang.in
index 95632b0..2194741 100644
--- a/gcc/ch/Make-lang.in
+++ b/gcc/ch/Make-lang.in
@@ -34,7 +34,7 @@
# - making any compiler driver (eg: g++)
# - the compiler proper (eg: cc1plus)
# - define the names for selecting the language in LANGUAGES.
-
+#
# define version of GNUCHILL compiler. Note: maybe we have to change the
# mechanism
GNUCHILL_VERSION = 1.5.2
@@ -62,7 +62,7 @@ CHILL_FLAGS_TO_PASS = \
"CHILL_LIB=$(CHILL_LIB)" \
"CC=$(CC)" \
"GNUCHILL_VERSION=$(GNUCHILL_VERSION)"
-
+#
# Define the names for selecting languages in LANGUAGES.
CHILL: chill cc1chill chill-runtime
@@ -105,7 +105,7 @@ chill-runtime: stmp-headers $(GCC_PASSES)
cd ch/runtime; $(MAKE) $(FLAGS_TO_PASS) $(CHILL_FLAGS_TO_PASS) GCC_FOR_TARGET="$${thisdir1}/xgcc -B$${thisdir1}/" all ; \
else true; fi ;; \
esac
-
+#
# Build hooks:
CHILL.all.build: chill
@@ -123,7 +123,7 @@ chill.dvi: $(srcdir)/ch/chill.texi $(srcdir)/extend.texi $(srcdir)/invoke.texi $
TEXINPUTS=${texidir}:$(srcdir):$$TEXINPUTS tex chill.texi
# FIXME: Not sure languages should do this.
cp ch/chill.dvi chill.dvi
-
+#
# Install hooks:
# cc1chill is installed elsewhere as part of $(COMPILERS).
@@ -166,7 +166,7 @@ CHILL.install-man:
CHILL.uninstall:
-rm -rf $(bindir)/$(CHILL_INSTALL_NAME)
-rm -rf $(bindir)/$(CHILL_CROSS_NAME)
-
+#
# Clean hooks:
# A lot of the ancillary files are deleted by the main makefile.
# We just have to delete files specific to us.
@@ -183,7 +183,7 @@ CHILL.maintainer-clean:
-rm -f ch/chill.info* ch/chill.dvi ch/chill.??s ch/chill.*aux
# CYGNUS LOCAL: Delete locally created file.
-rm -f ch/hash.h
-
+#
# Stage hooks:
# The main makefile has already created stage?/ch.
@@ -195,7 +195,7 @@ CHILL.stage3:
-mv ch/*.o stage3/ch
CHILL.stage4:
-mv ch/*.o stage4/ch
-
+#
# Maintenance hooks:
# This target creates the files that can be rebuilt, but go in the
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9354e79..fc8edbd 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -100,8 +100,37 @@ struct processor_costs pentiumpro_cost = {
17 /* cost of a divide/mod */
};
+struct processor_costs k6_cost = {
+ 1, /* cost of an add instruction */
+ 1, /* cost of a lea instruction */
+ 1, /* variable shift costs */
+ 1, /* constant shift costs */
+ 2, /* cost of starting a multiply */
+ 0, /* cost of multiply per each bit set */
+ 18 /* cost of a divide/mod */
+};
+
struct processor_costs *ix86_cost = &pentium_cost;
+/* Processor feature/optimization bitmasks. */
+#define m_386 (1<<PROCESSOR_I386)
+#define m_486 (1<<PROCESSOR_I486)
+#define m_PENT (1<<PROCESSOR_PENTIUM)
+#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
+#define m_K6 (1<<PROCESSOR_K6)
+
+const int x86_use_leave = m_386 | m_K6;
+const int x86_push_memory = m_386 | m_K6;
+const int x86_zero_extend_with_and = m_486 | m_PENT;
+const int x86_movx = m_386 | m_PPRO | m_K6;
+const int x86_double_with_add = ~m_386;
+const int x86_use_bit_test = m_386;
+const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO;
+const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
+const int x86_use_any_reg = m_486;
+const int x86_cmove = m_PPRO;
+const int x86_deep_branch = m_PPRO| m_K6;
+
#define AT_BP(mode) (gen_rtx_MEM ((mode), frame_pointer_rtx))
extern FILE *asm_out_file;
@@ -213,7 +242,8 @@ override_options ()
{PROCESSOR_I686_STRING, PROCESSOR_PENTIUMPRO, &pentiumpro_cost,
0, 0},
{PROCESSOR_PENTIUMPRO_STRING, PROCESSOR_PENTIUMPRO,
- &pentiumpro_cost, 0, 0}};
+ &pentiumpro_cost, 0, 0},
+ {PROCESSOR_K6_STRING, PROCESSOR_K6, &k6_cost, 0, 0}};
int ptt_size = sizeof (processor_target_table) / sizeof (struct ptt);
@@ -279,7 +309,7 @@ override_options ()
{
ix86_cpu = processor_target_table[j].processor;
ix86_cost = processor_target_table[j].cost;
- if (i > j && (int) ix86_arch >= (int) PROCESSOR_PENTIUMPRO)
+ if (i > j && (int) ix86_arch >= (int) PROCESSOR_K6)
error ("-mcpu=%s does not support -march=%s",
ix86_cpu_string, ix86_arch_string);
@@ -1586,6 +1616,7 @@ standard_80387_constant_p (x)
/* Note that on the 80387, other constants, such as pi,
are much slower to load as standard constants
than to load from doubles in memory! */
+ /* ??? Not true on K6: all constants are equal cost. */
#endif
return 0;
@@ -2933,8 +2964,8 @@ legitimize_address (x, oldx, mode)
&& (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
{
changed = 1;
- x = gen_rtx (MULT, Pmode, force_reg (Pmode, XEXP (x, 0)),
- GEN_INT (1 << log));
+ x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
+ GEN_INT (1 << log));
}
if (GET_CODE (x) == PLUS)
@@ -3185,6 +3216,114 @@ output_pic_addr_const (file, x, code)
}
}
+static void
+put_jump_code (code, reverse, file)
+ enum rtx_code code;
+ int reverse;
+ FILE *file;
+{
+ int flags = cc_prev_status.flags;
+ int ieee = (TARGET_IEEE_FP && (flags & CC_IN_80387));
+ const char *suffix;
+
+ if (flags & CC_Z_IN_NOT_C)
+ switch (code)
+ {
+ case EQ:
+ fputs (reverse ? "c" : "nc", file);
+ return;
+
+ case NE:
+ fputs (reverse ? "nc" : "c", file);
+ return;
+
+ default:
+ abort ();
+ }
+ if (ieee)
+ {
+ switch (code)
+ {
+ case LE:
+ suffix = reverse ? "ae" : "b";
+ break;
+ case GT:
+ case LT:
+ case GE:
+ suffix = reverse ? "ne" : "e";
+ break;
+ case EQ:
+ suffix = reverse ? "ne" : "e";
+ break;
+ case NE:
+ suffix = reverse ? "e" : "ne";
+ break;
+ default:
+ abort ();
+ }
+ fputs (suffix, file);
+ return;
+ }
+ if (flags & CC_TEST_AX)
+ abort();
+ if ((flags & CC_NO_OVERFLOW) && (code == LE || code == GT))
+ abort ();
+ if (reverse)
+ code = reverse_condition (code);
+ switch (code)
+ {
+ case EQ:
+ suffix = "e";
+ break;
+
+ case NE:
+ suffix = "ne";
+ break;
+
+ case GT:
+ suffix = flags & CC_IN_80387 ? "a" : "g";
+ break;
+
+ case GTU:
+ suffix = "a";
+ break;
+
+ case LT:
+ if (flags & CC_NO_OVERFLOW)
+ suffix = "s";
+ else
+ suffix = flags & CC_IN_80387 ? "b" : "l";
+ break;
+
+ case LTU:
+ suffix = "b";
+ break;
+
+ case GE:
+ if (flags & CC_NO_OVERFLOW)
+ suffix = "ns";
+ else
+ suffix = flags & CC_IN_80387 ? "ae" : "ge";
+ break;
+
+ case GEU:
+ suffix = "ae";
+ break;
+
+ case LE:
+ suffix = flags & CC_IN_80387 ? "be" : "le";
+ break;
+
+ case LEU:
+ suffix = "be";
+ break;
+
+ default:
+ abort ();
+ }
+ fputs (suffix, file);
+}
+
/* Append the correct conditional move suffix which corresponds to CODE. */
static void
@@ -3301,7 +3440,9 @@ put_condition_code (code, reverse_cc, mode, file)
C -- print opcode suffix for set/cmov insn.
c -- like C, but print reversed condition
F -- print opcode suffix for fcmov insn.
- f -- like C, but print reversed condition
+ f -- like F, but print reversed condition
+ D -- print the opcode suffix for a jump
+ d -- like D, but print reversed condition
R -- print the prefix for register names.
z -- print the opcode suffix for the size of the current operand.
* -- print a star (in certain assembler syntax)
@@ -3443,6 +3584,14 @@ print_operand (file, x, code)
return;
+ case 'D':
+ put_jump_code (GET_CODE (x), 0, file);
+ return;
+
+ case 'd':
+ put_jump_code (GET_CODE (x), 1, file);
+ return;
+
/* This is used by the conditional move instructions. */
case 'C':
put_condition_code (GET_CODE (x), 0, MODE_INT, file);
@@ -3851,7 +4000,12 @@ split_di (operands, num, lo_half, hi_half)
while (num--)
{
rtx op = operands[num];
- if (GET_CODE (op) == REG)
+ if (! reload_completed)
+ {
+ lo_half[num] = gen_lowpart (SImode, op);
+ hi_half[num] = gen_highpart (SImode, op);
+ }
+ else if (GET_CODE (op) == REG)
{
lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
@@ -5360,3 +5514,67 @@ output_int_conditional_move (which_alternative, operands)
return "";
}
+
+int
+x86_adjust_cost (insn, link, dep_insn, cost)
+ rtx insn, link, dep_insn;
+ int cost;
+{
+ rtx next_inst;
+
+ if (GET_CODE (dep_insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
+ return 0;
+
+ if (GET_CODE (dep_insn) == INSN
+ && GET_CODE (PATTERN (dep_insn)) == SET
+ && GET_CODE (SET_DEST (PATTERN (dep_insn))) == REG
+ && GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == SET
+ && !reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
+ SET_SRC (PATTERN (insn))))
+ return 0; /* ??? */
+
+
+ switch (ix86_cpu)
+ {
+ case PROCESSOR_PENTIUM:
+ if (cost != 0 && is_fp_insn (insn) && is_fp_insn (dep_insn)
+ && !is_fp_dest (dep_insn))
+ return 0;
+
+ if (agi_dependent (insn, dep_insn))
+ return 3;
+
+ if (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == SET
+ && SET_DEST (PATTERN (insn)) == cc0_rtx
+ && (next_inst = next_nonnote_insn (insn))
+ && GET_CODE (next_inst) == JUMP_INSN)
+ /* compare probably paired with jump */
+ return 0;
+ break;
+
+ case PROCESSOR_K6:
+ default:
+ if (!is_fp_dest (dep_insn))
+ {
+ if(!agi_dependent (insn, dep_insn))
+ return 0;
+ if (TARGET_486)
+ return 2;
+ }
+ else
+ if (is_fp_store (insn) && is_fp_insn (dep_insn)
+ && NEXT_INSN (insn) && NEXT_INSN (NEXT_INSN (insn))
+ && NEXT_INSN (NEXT_INSN (NEXT_INSN (insn)))
+ && (GET_CODE (NEXT_INSN (insn)) == INSN)
+ && (GET_CODE (NEXT_INSN (NEXT_INSN (insn))) == JUMP_INSN)
+ && (GET_CODE (NEXT_INSN (NEXT_INSN (NEXT_INSN (insn)))) == NOTE)
+ && (NOTE_LINE_NUMBER (NEXT_INSN (NEXT_INSN (NEXT_INSN (insn))))
+ == NOTE_INSN_LOOP_END))
+ return 3;
+ break;
+ }
+
+ return cost;
+}
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7e3f723..2042a6b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -155,18 +155,25 @@ extern int target_flags;
#define TARGET_486 (ix86_cpu == PROCESSOR_I486)
#define TARGET_PENTIUM (ix86_cpu == PROCESSOR_PENTIUM)
#define TARGET_PENTIUMPRO (ix86_cpu == PROCESSOR_PENTIUMPRO)
-#define TARGET_USE_LEAVE (ix86_cpu == PROCESSOR_I386)
-#define TARGET_PUSH_MEMORY (ix86_cpu == PROCESSOR_I386)
-#define TARGET_ZERO_EXTEND_WITH_AND (ix86_cpu != PROCESSOR_I386 \
- && ix86_cpu != PROCESSOR_PENTIUMPRO)
-#define TARGET_DOUBLE_WITH_ADD (ix86_cpu != PROCESSOR_I386)
-#define TARGET_USE_BIT_TEST (ix86_cpu == PROCESSOR_I386)
-#define TARGET_UNROLL_STRLEN (ix86_cpu != PROCESSOR_I386)
-#define TARGET_USE_Q_REG (ix86_cpu == PROCESSOR_PENTIUM \
- || ix86_cpu == PROCESSOR_PENTIUMPRO)
-#define TARGET_USE_ANY_REG (ix86_cpu == PROCESSOR_I486)
-#define TARGET_CMOVE (ix86_arch == PROCESSOR_PENTIUMPRO)
-#define TARGET_DEEP_BRANCH_PREDICTION (ix86_cpu == PROCESSOR_PENTIUMPRO)
+#define TARGET_K6 (ix86_cpu == PROCESSOR_K6)
+
+#define CPUMASK (1 << ix86_cpu)
+extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
+extern const int x86_use_bit_test, x86_cmove, x86_deep_branch;
+extern const int x86_unroll_strlen, x86_use_q_reg, x86_use_any_reg;
+extern const int x86_double_with_add;
+
+#define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
+#define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
+#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & CPUMASK)
+#define TARGET_USE_BIT_TEST (x86_use_bit_test & CPUMASK)
+#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & CPUMASK)
+#define TARGET_USE_Q_REG (x86_use_q_reg & CPUMASK)
+#define TARGET_USE_ANY_REG (x86_use_any_reg & CPUMASK)
+#define TARGET_CMOVE (x86_cmove & (1 << ix86_arch))
+#define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & CPUMASK)
+#define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & CPUMASK)
+
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
#define TARGET_SWITCHES \
@@ -219,7 +226,8 @@ enum processor_type
{PROCESSOR_I386, /* 80386 */
PROCESSOR_I486, /* 80486DX, 80486SX, 80486DX[24] */
PROCESSOR_PENTIUM,
- PROCESSOR_PENTIUMPRO};
+ PROCESSOR_PENTIUMPRO,
+ PROCESSOR_K6};
#define PROCESSOR_I386_STRING "i386"
#define PROCESSOR_I486_STRING "i486"
@@ -227,28 +235,20 @@ enum processor_type
#define PROCESSOR_PENTIUM_STRING "pentium"
#define PROCESSOR_I686_STRING "i686"
#define PROCESSOR_PENTIUMPRO_STRING "pentiumpro"
+#define PROCESSOR_K6_STRING "k6"
extern enum processor_type ix86_cpu;
extern int ix86_arch;
/* Define the default processor. This is overridden by other tm.h files. */
-#define PROCESSOR_DEFAULT \
- ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_I486) \
- ? PROCESSOR_I486 \
- : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_PENTIUM) \
- ? PROCESSOR_PENTIUM \
- : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_PENTIUMPRO) \
- ? PROCESSOR_PENTIUMPRO \
- : PROCESSOR_I386
+#define PROCESSOR_DEFAULT (enum processor_type) TARGET_CPU_DEFAULT
#define PROCESSOR_DEFAULT_STRING \
- ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_I486) \
- ? PROCESSOR_I486_STRING \
- : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_PENTIUM) \
- ? PROCESSOR_PENTIUM_STRING \
- : ((enum processor_type) TARGET_CPU_DEFAULT == PROCESSOR_PENTIUMPRO) \
- ? PROCESSOR_PENTIUMPRO_STRING \
- : PROCESSOR_I386_STRING
+ (PROCESSOR_DEFAULT == PROCESSOR_I486 ? PROCESSOR_I486_STRING \
+ : PROCESSOR_DEFAULT == PROCESSOR_PENTIUM ? PROCESSOR_PENTIUM_STRING \
+ : PROCESSOR_DEFAULT == PROCESSOR_PENTIUMPRO ? PROCESSOR_PENTIUMPRO_STRING \
+ : PROCESSOR_DEFAULT == PROCESSOR_K6 ? PROCESSOR_K6_STRING \
+ : PROCESSOR_I386_STRING)
/* This macro is similar to `TARGET_SWITCHES' but defines names of
command options that have values. Its definition is an
@@ -1533,25 +1533,16 @@ do { \
/* Output assembler code for a block containing the constant parts
of a trampoline, leaving space for the variable parts. */
-/* On the 386, the trampoline contains three instructions:
+/* On the 386, the trampoline contains two instructions:
mov #STATIC,ecx
- mov #FUNCTION,eax
- jmp @eax */
-#define TRAMPOLINE_TEMPLATE(FILE) \
-{ \
- ASM_OUTPUT_CHAR (FILE, GEN_INT (0xb9)); \
- ASM_OUTPUT_SHORT (FILE, const0_rtx); \
- ASM_OUTPUT_SHORT (FILE, const0_rtx); \
- ASM_OUTPUT_CHAR (FILE, GEN_INT (0xb8)); \
- ASM_OUTPUT_SHORT (FILE, const0_rtx); \
- ASM_OUTPUT_SHORT (FILE, const0_rtx); \
- ASM_OUTPUT_CHAR (FILE, GEN_INT (0xff)); \
- ASM_OUTPUT_CHAR (FILE, GEN_INT (0xe0)); \
-}
+ jmp FUNCTION
+ The trampoline is generated entirely at runtime. The operand of JMP
+ is the address of FUNCTION relative to the instruction following the
+ JMP (which is 5 bytes long). */
/* Length in units of the trampoline for entering a nested function. */
-#define TRAMPOLINE_SIZE 12
+#define TRAMPOLINE_SIZE 10
/* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code.
@@ -1559,8 +1550,14 @@ do { \
#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
{ \
+ /* Compute offset from the end of the jmp to the target function. */ \
+ rtx disp = expand_binop (SImode, sub_optab, FNADDR, \
+ plus_constant (TRAMP, 10), \
+ NULL_RTX, 1, OPTAB_DIRECT); \
+ emit_move_insn (gen_rtx_MEM (QImode, TRAMP), GEN_INT (0xb9)); \
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (TRAMP, 1)), CXT); \
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (TRAMP, 6)), FNADDR); \
+ emit_move_insn (gen_rtx_MEM (QImode, plus_constant (TRAMP, 5)), GEN_INT (0xe9));\
+ emit_move_insn (gen_rtx_MEM (SImode, plus_constant (TRAMP, 6)), disp); \
}
/* Definitions for register eliminations.
@@ -2243,70 +2240,7 @@ while (0)
the same cost as a data-dependence. */
#define ADJUST_COST(insn,link,dep_insn,cost) \
- { \
- rtx next_inst; \
- if (GET_CODE (dep_insn) == CALL_INSN) \
- (cost) = 0; \
- \
- else if (GET_CODE (dep_insn) == INSN \
- && GET_CODE (PATTERN (dep_insn)) == SET \
- && GET_CODE (SET_DEST (PATTERN (dep_insn))) == REG \
- && GET_CODE (insn) == INSN \
- && GET_CODE (PATTERN (insn)) == SET \
- && !reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)), \
- SET_SRC (PATTERN (insn)))) \
- { \
- (cost) = 0; \
- } \
- \
- else if (GET_CODE (insn) == JUMP_INSN) \
- { \
- (cost) = 0; \
- } \
- \
- if (TARGET_PENTIUM) \
- { \
- if (cost !=0 && is_fp_insn (insn) && is_fp_insn (dep_insn) \
- && !is_fp_dest (dep_insn)) \
- { \
- (cost) = 0; \
- } \
- \
- if (agi_dependent (insn, dep_insn)) \
- { \
- (cost) = 3; \
- } \
- else if (GET_CODE (insn) == INSN \
- && GET_CODE (PATTERN (insn)) == SET \
- && SET_DEST (PATTERN (insn)) == cc0_rtx \
- && (next_inst = next_nonnote_insn (insn)) \
- && GET_CODE (next_inst) == JUMP_INSN) \
- { /* compare probably paired with jump */ \
- (cost) = 0; \
- } \
- } \
- else \
- if (!is_fp_dest (dep_insn)) \
- { \
- if(!agi_dependent (insn, dep_insn)) \
- (cost) = 0; \
- else if (TARGET_486) \
- (cost) = 2; \
- } \
- else \
- if (is_fp_store (insn) && is_fp_insn (dep_insn) \
- && NEXT_INSN (insn) && NEXT_INSN (NEXT_INSN (insn)) \
- && NEXT_INSN (NEXT_INSN (NEXT_INSN (insn))) \
- && (GET_CODE (NEXT_INSN (insn)) == INSN) \
- && (GET_CODE (NEXT_INSN (NEXT_INSN (insn))) == JUMP_INSN) \
- && (GET_CODE (NEXT_INSN (NEXT_INSN (NEXT_INSN (insn)))) == NOTE) \
- && (NOTE_LINE_NUMBER (NEXT_INSN (NEXT_INSN (NEXT_INSN (insn)))) \
- == NOTE_INSN_LOOP_END)) \
- { \
- (cost) = 3; \
- } \
- }
-
+ (cost) = x86_adjust_cost(insn, link, dep_insn, cost)
#define ADJUST_BLOCKAGE(last_insn,insn,blockage) \
{ \
@@ -2323,6 +2257,8 @@ while (0)
} \
}
+#define ISSUE_RATE ((int)ix86_cpu > (int)PROCESSOR_I486 ? 2 : 1)
+
/* Add any extra modes needed to represent the condition code.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 69134dd..434ba1f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -71,9 +71,43 @@
;; This shadows the processor_type enumeration, so changes must be made
;; to i386.h at the same time.
-(define_attr "type" "integer,idiv,imul,fld,fpop,fpdiv,fpmul"
+(define_attr "type"
+ "integer,binary,memory,test,compare,fcompare,idiv,imul,lea,fld,fpop,fpdiv,fpmul"
(const_string "integer"))
+(define_attr "memory" "none,load,store"
+ (cond [(eq_attr "type" "idiv,lea")
+ (const_string "none")
+
+ (eq_attr "type" "fld")
+ (const_string "load")
+
+ (eq_attr "type" "test")
+ (if_then_else (match_operand 0 "memory_operand" "")
+ (const_string "load")
+ (const_string "none"))
+
+ (eq_attr "type" "compare,fcompare")
+ (if_then_else (ior (match_operand 0 "memory_operand" "")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "load")
+ (const_string "none"))
+
+ (and (eq_attr "type" "integer,memory,fpop")
+ (match_operand 0 "memory_operand" ""))
+ (const_string "store")
+
+ (and (eq_attr "type" "integer,memory,fpop")
+ (match_operand 1 "memory_operand" ""))
+ (const_string "load")
+
+ (and (eq_attr "type" "binary,imul,fpmul,fpdiv")
+ (ior (match_operand 1 "memory_operand" "")
+ (match_operand 2 "memory_operand" "")))
+ (const_string "load")]
+
+ (const_string "none")))
+
;; Functional units
; (define_function_unit NAME MULTIPLICITY SIMULTANEITY
@@ -92,11 +126,11 @@
;; Floating point
(define_function_unit "fp" 1 0
- (and (eq_attr "type" "fpop") (eq_attr "cpu" "i386,i486"))
+ (and (eq_attr "type" "fpop,fcompare") (eq_attr "cpu" "i386,i486"))
5 5)
(define_function_unit "fp" 1 0
- (and (eq_attr "type" "fpop") (eq_attr "cpu" "pentium,pentiumpro"))
+ (and (eq_attr "type" "fpop,fcompare") (eq_attr "cpu" "pentium,pentiumpro"))
3 0)
(define_function_unit "fp" 1 0
@@ -120,12 +154,69 @@
10 10)
(define_function_unit "fp" 1 0
- (eq_attr "type" "fld")
+ (and (eq_attr "type" "fld") (eq_attr "cpu" "!pentiumpro,k6"))
1 0)
-(define_function_unit "integer" 1 0
- (and (eq_attr "type" "integer") (eq_attr "cpu" "!i386"))
- 2 0)
+;; K6 FPU is not pipelined.
+(define_function_unit "fp" 1 0
+ (and (eq_attr "type" "fpop,fpmul,fcompare") (eq_attr "cpu" "k6"))
+ 2 2)
+
+;; i386 and i486 have one integer unit, which need not be modeled
+
+(define_function_unit "integer" 2 0
+ (and (eq_attr "type" "integer,binary,test,compare,lea") (eq_attr "cpu" "pentium,pentiumpro"))
+ 1 0)
+
+(define_function_unit "integer" 2 0
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "integer,binary,test,compare")
+ (eq_attr "memory" "!load")))
+ 1 0)
+
+;; Internally, K6 converts REG OP MEM instructions into a load (2 cycles)
+;; and a register operation (1 cycle).
+(define_function_unit "integer" 2 0
+ (and (eq_attr "cpu" "k6")
+ (and (eq_attr "type" "integer,binary,test,compare")
+ (eq_attr "memory" "load")))
+ 3 0)
+
+;; Multiplies use one of the integer units
+(define_function_unit "integer" 2 0
+ (and (eq_attr "cpu" "pentium") (eq_attr "type" "imul"))
+ 11 11)
+
+(define_function_unit "integer" 2 0
+ (and (eq_attr "cpu" "k6") (eq_attr "type" "imul"))
+ 2 2)
+
+(define_function_unit "integer" 2 0
+ (and (eq_attr "cpu" "pentium") (eq_attr "type" "idiv"))
+ 25 25)
+
+(define_function_unit "integer" 2 0
+ (and (eq_attr "cpu" "k6") (eq_attr "type" "idiv"))
+ 17 17)
+
+;; Pentium Pro and K6 have a separate load unit.
+(define_function_unit "load" 1 0
+ (and (eq_attr "cpu" "pentiumpro") (eq_attr "memory" "load"))
+ 3 0)
+
+(define_function_unit "load" 1 0
+ (and (eq_attr "cpu" "k6") (eq_attr "memory" "load"))
+ 2 0)
+
+;; Pentium Pro and K6 have a separate store unit.
+(define_function_unit "store" 1 0
+ (and (eq_attr "cpu" "pentiumpro,k6") (eq_attr "memory" "store"))
+ 1 0)
+
+;; lea executes in the K6 store unit with 1 cycle latency
+(define_function_unit "store" 1 0
+ (and (eq_attr "cpu" "k6") (eq_attr "type" "lea"))
+ 1 0)
;; "movl MEM,REG / testl REG,REG" is faster on a 486 than "cmpl $0,MEM".
@@ -143,7 +234,7 @@
;; Processor type -- this attribute must exactly match the processor_type
;; enumeration in i386.h.
-(define_attr "cpu" "i386,i486,pentium,pentiumpro"
+(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6"
(const (symbol_ref "ix86_cpu")))
(define_insn "tstsi_1"
@@ -157,7 +248,8 @@
operands[1] = const0_rtx;
return AS2 (cmp%L0,%1,%0);
-}")
+}"
+ [(set_attr "type" "test")])
(define_expand "tstsi"
[(set (cc0)
@@ -182,7 +274,8 @@
operands[1] = const0_rtx;
return AS2 (cmp%W0,%1,%0);
-}")
+}"
+ [(set_attr "type" "test")])
(define_expand "tsthi"
[(set (cc0)
@@ -207,7 +300,8 @@
operands[1] = const0_rtx;
return AS2 (cmp%B0,%1,%0);
-}")
+}"
+ [(set_attr "type" "test")])
(define_expand "tstqi"
[(set (cc0)
@@ -237,7 +331,8 @@
output_asm_insn (AS1 (fstp,%y0), operands);
return output_fp_cc0_set (insn);
-}")
+}"
+ [(set_attr "type" "test")])
;; Don't generate tstsf if generating IEEE code, since the `ftst' opcode
;; isn't IEEE compliant.
@@ -271,7 +366,8 @@
output_asm_insn (AS1 (fstp,%y0), operands);
return output_fp_cc0_set (insn);
-}")
+}"
+ [(set_attr "type" "test")])
;; Don't generate tstdf if generating IEEE code, since the `ftst' opcode
;; isn't IEEE compliant.
@@ -305,7 +401,8 @@
output_asm_insn (AS1 (fstp,%y0), operands);
return output_fp_cc0_set (insn);
-}")
+}"
+ [(set_attr "type" "test")])
;; Don't generate tstxf if generating IEEE code, since the `ftst' opcode
;; isn't IEEE compliant.
@@ -331,7 +428,8 @@
(compare (match_operand:SI 0 "nonimmediate_operand" "mr,r")
(match_operand:SI 1 "general_operand" "ri,mr")))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
- "* return AS2 (cmp%L0,%1,%0);")
+ "* return AS2 (cmp%L0,%1,%0);"
+ [(set_attr "type" "compare")])
(define_expand "cmpsi"
[(set (cc0)
@@ -354,7 +452,8 @@
(compare (match_operand:HI 0 "nonimmediate_operand" "mr,r")
(match_operand:HI 1 "general_operand" "ri,mr")))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
- "* return AS2 (cmp%W0,%1,%0);")
+ "* return AS2 (cmp%W0,%1,%0);"
+ [(set_attr "type" "compare")])
(define_expand "cmphi"
[(set (cc0)
@@ -377,7 +476,8 @@
(compare (match_operand:QI 0 "nonimmediate_operand" "q,mq")
(match_operand:QI 1 "general_operand" "qm,nq")))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
- "* return AS2 (cmp%B0,%1,%0);")
+ "* return AS2 (cmp%B0,%1,%0);"
+ [(set_attr "type" "compare")])
(define_expand "cmpqi"
[(set (cc0)
@@ -406,7 +506,8 @@
(match_operand:XF 1 "register_operand" "f")]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -416,7 +517,8 @@
(match_operand:SI 1 "nonimmediate_operand" "rm"))]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -426,7 +528,8 @@
(match_operand:XF 1 "register_operand" "f")]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -436,7 +539,8 @@
(match_operand:DF 1 "nonimmediate_operand" "fm"))]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -446,7 +550,8 @@
(match_operand:XF 1 "register_operand" "f")]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -456,7 +561,8 @@
(match_operand:SF 1 "nonimmediate_operand" "fm"))]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -466,7 +572,8 @@
(match_operand:XF 1 "register_operand" "f")]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -474,7 +581,8 @@
(match_operand:XF 1 "register_operand" "f")))
(clobber (match_scratch:HI 2 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -484,7 +592,8 @@
(clobber (match_scratch:HI 3 "=a,a"))]
"TARGET_80387
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -494,7 +603,8 @@
(match_operand:SI 1 "nonimmediate_operand" "rm"))]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -504,7 +614,8 @@
(match_operand:DF 1 "register_operand" "f")]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -514,7 +625,8 @@
(match_operand:SF 1 "nonimmediate_operand" "fm"))]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -524,7 +636,8 @@
(match_operand:DF 1 "register_operand" "f")]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -534,7 +647,8 @@
(match_operand:DF 1 "nonimmediate_operand" "fm")]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -542,7 +656,8 @@
(match_operand:DF 1 "register_operand" "f")))
(clobber (match_scratch:HI 2 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
;; These two insns will never be generated by combine due to the mode of
;; the COMPARE.
@@ -564,7 +679,7 @@
; "TARGET_80387"
; "* return output_float_compare (insn, operands);")
-(define_insn "cmpsf_cc_1"
+(define_insn "*cmpsf_cc_1"
[(set (cc0)
(match_operator 2 "VOIDmode_compare_op"
[(match_operand:SF 0 "nonimmediate_operand" "f,fm")
@@ -572,7 +687,8 @@
(clobber (match_scratch:HI 3 "=a,a"))]
"TARGET_80387
&& (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -582,7 +698,8 @@
(match_operand:SI 1 "nonimmediate_operand" "rm"))]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -592,7 +709,8 @@
(match_operand:SF 1 "register_operand" "f")]))
(clobber (match_scratch:HI 3 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_insn ""
[(set (cc0)
@@ -600,7 +718,8 @@
(match_operand:SF 1 "register_operand" "f")))
(clobber (match_scratch:HI 2 "=a"))]
"TARGET_80387"
- "* return output_float_compare (insn, operands);")
+ "* return output_float_compare (insn, operands);"
+ [(set_attr "type" "fcompare")])
(define_expand "cmpxf"
[(set (cc0)
@@ -760,7 +879,8 @@
return AS2 (test%L0,%1,%0);
return AS2 (test%L1,%0,%1);
-}")
+}"
+ [(set_attr "type" "compare")])
(define_insn ""
[(set (cc0)
@@ -808,7 +928,8 @@
return AS2 (test%W0,%1,%0);
return AS2 (test%W1,%0,%1);
-}")
+}"
+ [(set_attr "type" "compare")])
(define_insn ""
[(set (cc0)
@@ -821,7 +942,8 @@
return AS2 (test%B0,%1,%0);
return AS2 (test%B1,%0,%1);
-}")
+}"
+ [(set_attr "type" "compare")])
;; move instructions.
;; There is one for each machine mode,
@@ -832,13 +954,15 @@
[(set (match_operand:SI 0 "push_operand" "=<")
(match_operand:SI 1 "nonmemory_operand" "rn"))]
"flag_pic"
- "* return AS1 (push%L0,%1);")
+ "* return AS1 (push%L0,%1);"
+ [(set_attr "memory" "store")])
(define_insn ""
[(set (match_operand:SI 0 "push_operand" "=<")
(match_operand:SI 1 "nonmemory_operand" "ri"))]
"!flag_pic"
- "* return AS1 (push%L0,%1);")
+ "* return AS1 (push%L0,%1);"
+ [(set_attr "memory" "store")])
;; On a 386, it is faster to push MEM directly.
@@ -846,7 +970,9 @@
[(set (match_operand:SI 0 "push_operand" "=<")
(match_operand:SI 1 "memory_operand" "m"))]
"TARGET_PUSH_MEMORY"
- "* return AS1 (push%L0,%1);")
+ "* return AS1 (push%L0,%1);"
+ [(set_attr "type" "memory")
+ (set_attr "memory" "load")])
;; General case of fullword move.
@@ -877,18 +1003,24 @@
;; On i486, incl reg is faster than movl $1,reg.
(define_insn ""
- [(set (match_operand:SI 0 "general_operand" "=g,r")
- (match_operand:SI 1 "general_operand" "rn,im"))]
+ [(set (match_operand:SI 0 "general_operand" "=g,r,r")
+ (match_operand:SI 1 "general_operand" "rn,i,m"))]
"((!TARGET_MOVE || GET_CODE (operands[0]) != MEM)
|| (GET_CODE (operands[1]) != MEM))
&& flag_pic"
"*
{
rtx link;
- if (operands[1] == const0_rtx && REG_P (operands[0]))
+
+ /* K6: mov reg,0 is slightly faster than xor reg,reg but is 3 bytes
+ longer. */
+ if ((ix86_cpu != PROCESSOR_K6 || optimize_size)
+ && operands[1] == const0_rtx && REG_P (operands[0]))
return AS2 (xor%L0,%0,%0);
if (operands[1] == const1_rtx
+ /* PPRO and K6 prefer mov to inc to reduce dependencies. */
+ && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO)
&& (link = find_reg_note (insn, REG_WAS_0, 0))
/* Make sure the insn that stored the 0 is still present. */
&& ! INSN_DELETED_P (XEXP (link, 0))
@@ -904,7 +1036,9 @@
return AS2 (lea%L0,%a1,%0);
return AS2 (mov%L0,%1,%0);
-}")
+}"
+ [(set_attr "type" "integer,integer,memory")
+ (set_attr "memory" "*,*,load")])
(define_insn ""
[(set (match_operand:SI 0 "general_operand" "=g,r")
@@ -915,10 +1049,13 @@
"*
{
rtx link;
- if (operands[1] == const0_rtx && REG_P (operands[0]))
+ if ((ix86_cpu != PROCESSOR_K6 || optimize_size)
+ && operands[1] == const0_rtx && REG_P (operands[0]))
return AS2 (xor%L0,%0,%0);
if (operands[1] == const1_rtx
+ /* PPRO and K6 prefer mov to inc to reduce dependencies. */
+ && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO)
&& (link = find_reg_note (insn, REG_WAS_0, 0))
/* Make sure the insn that stored the 0 is still present. */
&& ! INSN_DELETED_P (XEXP (link, 0))
@@ -931,19 +1068,25 @@
return AS1 (inc%L0,%0);
return AS2 (mov%L0,%1,%0);
-}")
+}"
+ [(set_attr "type" "integer,memory")
+ (set_attr "memory" "*,load")])
(define_insn ""
[(set (match_operand:HI 0 "push_operand" "=<")
(match_operand:HI 1 "nonmemory_operand" "ri"))]
""
- "* return AS1 (push%W0,%1);")
+ "* return AS1 (push%W0,%1);"
+ [(set_attr "type" "memory")
+ (set_attr "memory" "store")])
(define_insn ""
[(set (match_operand:HI 0 "push_operand" "=<")
(match_operand:HI 1 "memory_operand" "m"))]
"TARGET_PUSH_MEMORY"
- "* return AS1 (push%W0,%1);")
+ "* return AS1 (push%W0,%1);"
+ [(set_attr "type" "memory")
+ (set_attr "memory" "load")])
;; On i486, an incl and movl are both faster than incw and movw.
@@ -974,6 +1117,8 @@
return AS2 (xor%L0,%k0,%k0);
if (REG_P (operands[0]) && operands[1] == const1_rtx
+ /* PPRO and K6 prefer mov to inc to reduce dependencies. */
+ && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO)
&& (link = find_reg_note (insn, REG_WAS_0, 0))
/* Make sure the insn that stored the 0 is still present. */
&& ! INSN_DELETED_P (XEXP (link, 0))
@@ -992,7 +1137,7 @@
operands[1] = i386_sext16_if_const (operands[1]);
return AS2 (mov%L0,%k1,%k0);
}
- if (TARGET_PENTIUMPRO)
+ if (! TARGET_ZERO_EXTEND_WITH_AND)
{
/* movzwl is faster than movw on the Pentium Pro,
* although not as fast as an aligned movl. */
@@ -1005,7 +1150,9 @@
}
return AS2 (mov%W0,%1,%0);
-}")
+}"
+ [(set_attr "type" "integer,memory")
+ (set_attr "memory" "*,load")])
(define_expand "movstricthi"
[(set (strict_low_part (match_operand:HI 0 "general_operand" ""))
@@ -1030,10 +1177,13 @@
"*
{
rtx link;
- if (operands[1] == const0_rtx && REG_P (operands[0]))
+ if ((ix86_cpu != PROCESSOR_K6 || optimize_size)
+ && operands[1] == const0_rtx && REG_P (operands[0]))
return AS2 (xor%W0,%0,%0);
if (operands[1] == const1_rtx
+ /* PPRO and K6 prefer mov to inc to reduce dependencies. */
+ && (optimize_size || (int)ix86_cpu < (int)PROCESSOR_PENTIUMPRO)
&& (link = find_reg_note (insn, REG_WAS_0, 0))
/* Make sure the insn that stored the 0 is still present. */
&& ! INSN_DELETED_P (XEXP (link, 0))
@@ -1046,7 +1196,8 @@
return AS1 (inc%W0,%0);
return AS2 (mov%W0,%1,%0);
-}")
+}"
+ [(set_attr "type" "integer,memory")])
;; emit_push_insn when it calls move_by_pieces
;; requires an insn to "push a byte".
@@ -1096,10 +1247,12 @@
"*
{
rtx link;
- if (operands[1] == const0_rtx && REG_P (operands[0]))
- return AS2 (xor%L0,%k0,%k0);
+
+ /* movb $0,reg8 is 2 bytes, the same as xorl reg8,reg8.
+ It is at least as fast as xor on any processor except a Pentium. */
if (operands[1] == const1_rtx
+ && ix86_cpu == PROCESSOR_PENTIUM
&& (link = find_reg_note (insn, REG_WAS_0, 0))
/* Make sure the insn that stored the 0 is still present. */
&& ! INSN_DELETED_P (XEXP (link, 0))
@@ -1156,10 +1309,11 @@
"*
{
rtx link;
- if (operands[1] == const0_rtx && REG_P (operands[0]))
- return AS2 (xor%B0,%0,%0);
+
+ /* movb $0,reg8 is 2 bytes, the same as xorl reg8,reg8. */
if (operands[1] == const1_rtx
+ && ix86_cpu == PROCESSOR_PENTIUM
&& ! NON_QI_REG_P (operands[0])
&& (link = find_reg_note (insn, REG_WAS_0, 0))
/* Make sure the insn that stored the 0 is still present. */
@@ -1624,7 +1778,9 @@
(match_operand:DI 1 "general_operand" "riF,m"))]
"(!TARGET_MOVE || GET_CODE (operands[0]) != MEM)
|| (GET_CODE (operands[1]) != MEM)"
- "* return output_move_double (operands);")
+ "* return output_move_double (operands);"
+ [(set_attr "type" "integer,memory")
+ (set_attr "memory" "*,load")])
;;- conversion instructions
@@ -2625,7 +2781,7 @@
;;- add instructions
-(define_insn "addsidi3_1"
+(define_insn "*addsidi3_1"
[(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,!&r,!r,o,!o")
(plus:DI (match_operand:DI 1 "general_operand" "0,0,0,o,riF,riF,o")
(zero_extend:DI (match_operand:SI 2 "general_operand" "o,ri,ri,roi,roi,ri,ri"))))
@@ -2670,8 +2826,11 @@
output_asm_insn (AS2 (add%L0,%2,%0), low);
output_asm_insn (AS2 (adc%L0,%2,%0), high);
+ cc_status.value1 = high[0];
+ cc_status.flags = CC_NO_OVERFLOW;
RET;
-}")
+}"
+ [(set_attr "type" "binary")])
(define_insn "addsidi3_2"
[(set (match_operand:DI 0 "nonimmediate_operand" "=&r,r,o,&r,!&r,&r,o,o,!o")
@@ -2748,8 +2907,11 @@
output_asm_insn (AS2 (add%L0,%2,%0), low);
output_asm_insn (AS2 (adc%L0,%2,%0), high);
+ cc_status.value1 = high[0];
+ cc_status.flags = CC_NO_OVERFLOW;
RET;
-}")
+}"
+ [(set_attr "type" "binary")])
(define_insn "adddi3"
[(set (match_operand:DI 0 "general_operand" "=&r,&ro,!r,o,!&r,!o,!o")
@@ -2798,6 +2960,9 @@
}
}
+ cc_status.value1 = high[0];
+ cc_status.flags = CC_NO_OVERFLOW;
+
if (GET_CODE (operands[3]) == REG && GET_CODE (operands[2]) != REG)
{
xops[0] = high[0];
@@ -2822,7 +2987,8 @@
output_asm_insn (AS2 (add%L0,%2,%0), high);
RET;
-}")
+}"
+ [(set_attr "type" "binary")])
;; On a 486, it is faster to do movl/addl than to do a single leal if
;; operands[1] and operands[2] are both registers.
@@ -2887,7 +3053,8 @@
}
return AS2 (add%L0,%2,%0);
-}")
+}"
+ [(set_attr "type" "binary")])
;; addsi3 is faster, so put this after.
@@ -2916,7 +3083,8 @@
CC_STATUS_INIT;
return AS2 (lea%L0,%a1,%0);
-}")
+}"
+ [(set_attr "type" "lea")])
;; ??? `lea' here, for three operand add? If leaw is used, only %bx,
;; %si and %di can appear in SET_SRC, and output_asm_insn might not be
@@ -2986,7 +3154,8 @@
return AS1 (dec%W0,%0);
return AS2 (add%W0,%2,%0);
-}")
+}"
+ [(set_attr "type" "binary")])
(define_expand "addqi3"
[(set (match_operand:QI 0 "general_operand" "")
@@ -3011,7 +3180,8 @@
return AS1 (dec%B0,%0);
return AS2 (add%B0,%2,%0);
-}")
+}"
+ [(set_attr "type" "binary")])
;Lennart Augustsson <augustss@cs.chalmers.se>
;says this pattern just makes slower code:
@@ -3108,8 +3278,12 @@
output_asm_insn (AS2 (sub%L0,%2,%0), low);
output_asm_insn (AS2 (sbb%L0,%2,%0), high);
+ cc_status.value1 = high[0];
+ cc_status.flags = CC_NO_OVERFLOW;
+
RET;
-}")
+}"
+ [(set_attr "type" "binary")])
(define_insn "subdi3"
[(set (match_operand:DI 0 "general_operand" "=&r,&ro,o,o,!&r,!o")
@@ -3152,6 +3326,9 @@
}
}
+ cc_status.value1 = high[0];
+ cc_status.flags = CC_NO_OVERFLOW;
+
if (GET_CODE (operands[3]) == REG)
{
xops[0] = high[0];
@@ -3173,10 +3350,12 @@
}
else
- output_asm_insn (AS2 (sub%L0,%2,%0), high);
+ output_asm_insn (AS2 (sub%L0,%2,%0), high);
+
RET;
-}")
+}"
+ [(set_attr "type" "binary")])
(define_expand "subsi3"
[(set (match_operand:SI 0 "nonimmediate_operand" "")
@@ -3190,7 +3369,8 @@
(minus:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
(match_operand:SI 2 "general_operand" "ri,rm")))]
"ix86_binary_operator_ok (MINUS, SImode, operands)"
- "* return AS2 (sub%L0,%2,%0);")
+ "* return AS2 (sub%L0,%2,%0);"
+ [(set_attr "type" "binary")])
(define_expand "subhi3"
[(set (match_operand:HI 0 "general_operand" "")
@@ -3215,7 +3395,8 @@
return AS2 (sub%L0,%k2,%k0);
}
return AS2 (sub%W0,%2,%0);
-}")
+}"
+ [(set_attr "type" "binary")])
(define_expand "subqi3"
[(set (match_operand:QI 0 "general_operand" "")
@@ -3229,7 +3410,8 @@
(minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
(match_operand:QI 2 "general_operand" "qn,qmn")))]
"ix86_binary_operator_ok (MINUS, QImode, operands)"
- "* return AS2 (sub%B0,%2,%0);")
+ "* return AS2 (sub%B0,%2,%0);"
+ [(set_attr "type" "binary")])
;; The patterns that match these are at the end of this file.
@@ -3655,7 +3837,8 @@ word_zero_and_operation:
}
return AS2 (and%L0,%2,%0);
-}")
+}"
+ [(set_attr "type" "binary")])
(define_insn "andhi3"
[(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
@@ -3733,14 +3916,16 @@ word_zero_and_operation:
}
return AS2 (and%W0,%2,%0);
-}")
+}"
+ [(set_attr "type" "binary")])
(define_insn "andqi3"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
(and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
(match_operand:QI 2 "general_operand" "qn,qmn")))]
""
- "* return AS2 (and%B0,%2,%0);")
+ "* return AS2 (and%B0,%2,%0);"
+ [(set_attr "type" "binary")])
/* I am nervous about these two.. add them later..
;I presume this means that we have something in say op0= eax which is small
@@ -3856,7 +4041,8 @@ byte_or_operation:
}
return AS2 (or%L0,%2,%0);
-}")
+}"
+ [(set_attr "type" "binary")])
(define_insn "iorhi3"
[(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
@@ -3940,14 +4126,16 @@ byte_or_operation:
}
return AS2 (or%W0,%2,%0);
-}")
+}"
+ [(set_attr "type" "binary")])
(define_insn "iorqi3"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
(ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
(match_operand:QI 2 "general_operand" "qn,qmn")))]
""
- "* return AS2 (or%B0,%2,%0);")
+ "* return AS2 (or%B0,%2,%0);"
+ [(set_attr "type" "binary")])
;;- xor instructions
@@ -4035,7 +4223,8 @@ byte_xor_operation:
}
return AS2 (xor%L0,%2,%0);
-}")
+}"
+ [(set_attr "type" "binary")])
(define_insn "xorhi3"
[(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
@@ -4096,115 +4285,55 @@ byte_xor_operation:
}
return AS2 (xor%W0,%2,%0);
-}")
+}"
+ [(set_attr "type" "binary")])
(define_insn "xorqi3"
[(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
(xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
(match_operand:QI 2 "general_operand" "qn,qm")))]
""
- "* return AS2 (xor%B0,%2,%0);")
+ "* return AS2 (xor%B0,%2,%0);"
+ [(set_attr "type" "binary")])
;; logical operations for DImode
-
(define_insn "anddi3"
- [(set (match_operand:DI 0 "general_operand" "=&r,&ro,!r,o,!&r,!o,!o")
- (and:DI (match_operand:DI 1 "general_operand" "%0,0,0,0iF,or,riF,o")
- (match_operand:DI 2 "general_operand" "o,riF,0,or,or,oriF,o")))
- (clobber (match_scratch:SI 3 "=X,X,X,&r,X,&r,&r"))]
+ [(set (match_operand:DI 0 "general_operand" "=&r,&ro")
+ (and:DI (match_operand:DI 1 "general_operand" "0,0")
+ (match_operand:DI 2 "general_operand" "oriF,riF")))]
""
- "#")
+ "#"
+ [(set_attr "type" "binary")])
+
(define_insn "iordi3"
- [(set (match_operand:DI 0 "general_operand" "=&r,&ro,!r,o,!&r,!o,!o")
- (ior:DI (match_operand:DI 1 "general_operand" "%0,0,0,0iF,or,riF,o")
- (match_operand:DI 2 "general_operand" "o,riF,0,or,or,oriF,o")))
- (clobber (match_scratch:SI 3 "=X,X,X,&r,X,&r,&r"))]
+ [(set (match_operand:DI 0 "general_operand" "=&r,&ro")
+ (ior:DI (match_operand:DI 1 "general_operand" "0,0")
+ (match_operand:DI 2 "general_operand" "oriF,riF")))]
""
- "#")
-
+ "#"
+ [(set_attr "type" "binary")])
+
(define_insn "xordi3"
- [(set (match_operand:DI 0 "general_operand" "=&r,&ro,!r,o,!&r,!o,!o")
- (xor:DI (match_operand:DI 1 "general_operand" "%0,0,0,0iF,or,riF,o")
- (match_operand:DI 2 "general_operand" "o,riF,0,or,or,oriF,o")))
- (clobber (match_scratch:SI 3 "=X,X,X,&r,X,&r,&r"))]
+ [(set (match_operand:DI 0 "general_operand" "=&r,&ro")
+ (xor:DI (match_operand:DI 1 "general_operand" "0,0")
+ (match_operand:DI 2 "general_operand" "oriF,riF")))]
""
- "#")
+ "#"
+ [(set_attr "type" "binary")])
(define_split
- [(set (match_operand:DI 0 "general_operand" "=&r,&ro,!r,o,!&r,!o,!o")
- (match_operator:DI 4 "ix86_logical_operator"
- [(match_operand:DI 1 "general_operand" "%0,0,0,0iF,or,riF,o")
- (match_operand:DI 2 "general_operand" "o,riF,0,or,or,oriF,o")]))
- (clobber (match_scratch:SI 3 "=X,X,X,&r,X,&r,&r"))]
- "reload_completed"
- [(const_int 0)]
- "
-{
- rtx low[3], high[3], xops[7], temp;
- rtx (*genfunc)() = (GET_CODE (operands[4]) == AND ? gen_andsi3
- : GET_CODE (operands[4]) == IOR ? gen_iorsi3
- : GET_CODE (operands[4]) == XOR ? gen_xorsi3
- : 0);
-
- if (rtx_equal_p (operands[0], operands[2]))
- {
- temp = operands[1];
- operands[1] = operands[2];
- operands[2] = temp;
- }
-
- split_di (operands, 3, low, high);
- if (!rtx_equal_p (operands[0], operands[1]))
- {
- xops[0] = high[0];
- xops[1] = low[0];
- xops[2] = high[1];
- xops[3] = low[1];
-
- if (GET_CODE (operands[0]) != MEM)
- {
- emit_insn (gen_movsi (xops[1], xops[3]));
- emit_insn (gen_movsi (xops[0], xops[2]));
- }
- else
- {
- xops[4] = high[2];
- xops[5] = low[2];
- xops[6] = operands[3];
- emit_insn (gen_movsi (xops[6], xops[3]));
- emit_insn ((*genfunc) (xops[6], xops[6], xops[5]));
- emit_insn (gen_movsi (xops[1], xops[6]));
- emit_insn (gen_movsi (xops[6], xops[2]));
- emit_insn ((*genfunc) (xops[6], xops[6], xops[4]));
- emit_insn (gen_movsi (xops[0], xops[6]));
- DONE;
- }
- }
-
- if (GET_CODE (operands[3]) == REG && GET_CODE (operands[2]) != REG)
- {
- xops[0] = high[0];
- xops[1] = low[0];
- xops[2] = high[2];
- xops[3] = low[2];
- xops[4] = operands[3];
-
- emit_insn (gen_movsi (xops[4], xops[3]));
- emit_insn ((*genfunc) (xops[1], xops[1], xops[4]));
- emit_insn (gen_movsi (xops[4], xops[2]));
- emit_insn ((*genfunc) (xops[0], xops[0], xops[4]));
- }
-
- else
- {
- emit_insn ((*genfunc) (low[0], low[0], low[2]));
- emit_insn ((*genfunc) (high[0], high[0], high[2]));
- }
-
- DONE;
-}")
+ [(set (match_operand:DI 0 "general_operand" "")
+ (match_operator:DI 3 "ix86_logical_operator"
+ [(match_operand:DI 1 "general_operand" "")
+ (match_operand:DI 2 "general_operand" "")]))]
+ ""
+ [(set (match_dup 4) (match_op_dup:SI 3 [(match_dup 6) (match_dup 8)]))
+ (set (match_dup 5) (match_op_dup:SI 3 [(match_dup 7) (match_dup 9)]))]
+ "split_di (&operands[0], 1, &operands[4], &operands[5]);
+ split_di (&operands[1], 1, &operands[6], &operands[7]);
+ split_di (&operands[2], 1, &operands[8], &operands[9]);")
;;- negation instructions
@@ -4512,7 +4641,7 @@ byte_xor_operation:
"*
{
rtx xops[4], low[1], high[1];
- static HOST_WIDE_INT ashldi_label_number;
+ static int ashldi_label_number;
CC_STATUS_INIT;
@@ -4700,7 +4829,7 @@ byte_xor_operation:
"*
{
rtx xops[4], low[1], high[1];
- static HOST_WIDE_INT ashrdi_label_number;
+ static int ashrdi_label_number;
CC_STATUS_INIT;
@@ -4855,7 +4984,7 @@ byte_xor_operation:
"*
{
rtx xops[4], low[1], high[1];
- static HOST_WIDE_INT lshrdi_label_number;
+ static int lshrdi_label_number;
CC_STATUS_INIT;
@@ -5228,10 +5357,6 @@ byte_xor_operation:
;; For all sCOND expanders, also expand the compare or test insn that
;; generates cc0. Generate an equality comparison if `seq' or `sne'.
-;; The 386 sCOND opcodes can write to memory. But a gcc sCOND insn may
-;; not have any input reloads. A MEM write might need an input reload
-;; for the address of the MEM. So don't allow MEM as the SET_DEST.
-
(define_expand "seq"
[(match_dup 1)
(set (match_operand:QI 0 "register_operand" "")
@@ -5246,18 +5371,6 @@ byte_xor_operation:
operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);
}")
-(define_insn ""
- [(set (match_operand:QI 0 "register_operand" "=q")
- (eq:QI (cc0) (const_int 0)))]
- ""
- "*
-{
- if (cc_prev_status.flags & CC_Z_IN_NOT_C)
- return AS1 (setnb,%0);
- else
- return AS1 (sete,%0);
-}")
-
(define_expand "sne"
[(match_dup 1)
(set (match_operand:QI 0 "register_operand" "")
@@ -5272,19 +5385,6 @@ byte_xor_operation:
operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);
}")
-(define_insn ""
- [(set (match_operand:QI 0 "register_operand" "=q")
- (ne:QI (cc0) (const_int 0)))]
- ""
- "*
-{
- if (cc_prev_status.flags & CC_Z_IN_NOT_C)
- return AS1 (setb,%0);
- else
- return AS1 (setne,%0);
-}
-")
-
(define_expand "sgt"
[(match_dup 1)
(set (match_operand:QI 0 "register_operand" "")
@@ -5292,19 +5392,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (match_operand:QI 0 "register_operand" "=q")
- (gt:QI (cc0) (const_int 0)))]
- ""
- "*
-{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (sete,%0);
-
- OUTPUT_JUMP (\"setg %0\", \"seta %0\", NULL_PTR);
-}")
-
(define_expand "sgtu"
[(match_dup 1)
(set (match_operand:QI 0 "register_operand" "")
@@ -5312,12 +5399,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (match_operand:QI 0 "register_operand" "=q")
- (gtu:QI (cc0) (const_int 0)))]
- ""
- "* return \"seta %0\"; ")
-
(define_expand "slt"
[(match_dup 1)
(set (match_operand:QI 0 "register_operand" "")
@@ -5325,19 +5406,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (match_operand:QI 0 "register_operand" "=q")
- (lt:QI (cc0) (const_int 0)))]
- ""
- "*
-{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (sete,%0);
-
- OUTPUT_JUMP (\"setl %0\", \"setb %0\", \"sets %0\");
-}")
-
(define_expand "sltu"
[(match_dup 1)
(set (match_operand:QI 0 "register_operand" "")
@@ -5345,12 +5413,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (match_operand:QI 0 "register_operand" "=q")
- (ltu:QI (cc0) (const_int 0)))]
- ""
- "* return \"setb %0\"; ")
-
(define_expand "sge"
[(match_dup 1)
(set (match_operand:QI 0 "register_operand" "")
@@ -5358,19 +5420,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (match_operand:QI 0 "register_operand" "=q")
- (ge:QI (cc0) (const_int 0)))]
- ""
- "*
-{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (sete,%0);
-
- OUTPUT_JUMP (\"setge %0\", \"setae %0\", \"setns %0\");
-}")
-
(define_expand "sgeu"
[(match_dup 1)
(set (match_operand:QI 0 "register_operand" "")
@@ -5378,12 +5427,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (match_operand:QI 0 "register_operand" "=q")
- (geu:QI (cc0) (const_int 0)))]
- ""
- "* return \"setae %0\"; ")
-
(define_expand "sle"
[(match_dup 1)
(set (match_operand:QI 0 "register_operand" "")
@@ -5391,19 +5434,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (match_operand:QI 0 "register_operand" "=q")
- (le:QI (cc0) (const_int 0)))]
- ""
- "*
-{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (setb,%0);
-
- OUTPUT_JUMP (\"setle %0\", \"setbe %0\", NULL_PTR);
-}")
-
(define_expand "sleu"
[(match_dup 1)
(set (match_operand:QI 0 "register_operand" "")
@@ -5411,11 +5441,61 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (match_operand:QI 0 "register_operand" "=q")
- (leu:QI (cc0) (const_int 0)))]
- ""
- "* return \"setbe %0\"; ")
+;; The 386 sCOND opcodes can write to memory. But a gcc sCOND insn may
+;; not have any input reloads. A MEM write might need an input reload
+;; for the address of the MEM. So don't allow MEM as the SET_DEST.
+
+(define_insn "*setcc"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
+ (match_operator:QI 1 "comparison_operator" [(cc0) (const_int 0)]))]
+ "reload_completed || register_operand (operands[0], QImode)"
+ "*
+{
+ enum rtx_code code = GET_CODE (operands[1]);
+ if (cc_prev_status.flags & CC_TEST_AX)
+ {
+ int eq;
+ HOST_WIDE_INT c;
+ operands[2] = gen_rtx_REG (SImode, 0);
+ switch (code)
+ {
+ case EQ:
+ c = 0x4000;
+ eq = 0;
+ break;
+ case NE:
+ c = 0x4000;
+ eq = 1;
+ break;
+ case GT:
+ c = 0x4100;
+ eq = 1;
+ break;
+ case LT:
+ c = 0x100;
+ eq = 0;
+ break;
+ case GE:
+ c = 0x100;
+ eq = 1;
+ break;
+ case LE:
+ c = 0x4100;
+ eq = 0;
+ break;
+ default:
+ abort ();
+ }
+ operands[3] = GEN_INT (c);
+ output_asm_insn (AS2 (testl,%3,%2), operands);
+ return eq ? AS1 (sete,%0) : AS1 (setne, %0);
+ }
+
+ if ((cc_status.flags & CC_NO_OVERFLOW) && (code == LE || code == GT))
+ return (char *)0;
+ return AS1(set%D1,%0);
+}")
+
;; Basic conditional jump instructions.
;; We ignore the overflow flag for signed branch instructions.
@@ -5440,29 +5520,6 @@ byte_xor_operation:
operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);
}")
-(define_insn ""
- [(set (pc)
- (if_then_else (eq (cc0)
- (const_int 0))
- (label_ref (match_operand 0 "" ""))
- (pc)))]
- ""
- "*
-{
- if (cc_prev_status.flags & CC_Z_IN_NOT_C)
- return \"jnc %l0\";
- else
- if (cc_prev_status.flags & CC_TEST_AX)
- {
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x4000);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (jne,%l0);
- }
-
- return \"je %l0\";
-}")
-
(define_expand "bne"
[(match_dup 1)
(set (pc)
@@ -5480,28 +5537,6 @@ byte_xor_operation:
operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);
}")
-(define_insn ""
- [(set (pc)
- (if_then_else (ne (cc0)
- (const_int 0))
- (label_ref (match_operand 0 "" ""))
- (pc)))]
- ""
- "*
-{
- if (cc_prev_status.flags & CC_Z_IN_NOT_C)
- return \"jc %l0\";
- else
- if (cc_prev_status.flags & CC_TEST_AX)
- {
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x4000);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (je,%l0);
- }
-
- return \"jne %l0\";
-}")
(define_expand "bgt"
[(match_dup 1)
@@ -5513,29 +5548,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (pc)
- (if_then_else (gt (cc0)
- (const_int 0))
- (label_ref (match_operand 0 "" ""))
- (pc)))]
- ""
- "*
-{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (je,%l0);
-
- if (cc_prev_status.flags & CC_TEST_AX)
- {
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x4100);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (je,%l0);
- }
- OUTPUT_JUMP (\"jg %l0\", \"ja %l0\", NULL_PTR);
-}")
-
(define_expand "bgtu"
[(match_dup 1)
(set (pc)
@@ -5546,15 +5558,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (pc)
- (if_then_else (gtu (cc0)
- (const_int 0))
- (label_ref (match_operand 0 "" ""))
- (pc)))]
- ""
- "ja %l0")
-
(define_expand "blt"
[(match_dup 1)
(set (pc)
@@ -5565,28 +5568,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (pc)
- (if_then_else (lt (cc0)
- (const_int 0))
- (label_ref (match_operand 0 "" ""))
- (pc)))]
- ""
- "*
-{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (je,%l0);
-
- if (cc_prev_status.flags & CC_TEST_AX)
- {
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x100);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (jne,%l0);
- }
- OUTPUT_JUMP (\"jl %l0\", \"jb %l0\", \"js %l0\");
-}")
(define_expand "bltu"
[(match_dup 1)
@@ -5598,15 +5579,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (pc)
- (if_then_else (ltu (cc0)
- (const_int 0))
- (label_ref (match_operand 0 "" ""))
- (pc)))]
- ""
- "jb %l0")
-
(define_expand "bge"
[(match_dup 1)
(set (pc)
@@ -5617,28 +5589,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (pc)
- (if_then_else (ge (cc0)
- (const_int 0))
- (label_ref (match_operand 0 "" ""))
- (pc)))]
- ""
- "*
-{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (je,%l0);
- if (cc_prev_status.flags & CC_TEST_AX)
- {
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x100);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (je,%l0);
- }
- OUTPUT_JUMP (\"jge %l0\", \"jae %l0\", \"jns %l0\");
-}")
-
(define_expand "bgeu"
[(match_dup 1)
(set (pc)
@@ -5649,15 +5599,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (pc)
- (if_then_else (geu (cc0)
- (const_int 0))
- (label_ref (match_operand 0 "" ""))
- (pc)))]
- ""
- "jae %l0")
-
(define_expand "ble"
[(match_dup 1)
(set (pc)
@@ -5668,29 +5609,6 @@ byte_xor_operation:
""
"operands[1] = (*i386_compare_gen)(i386_compare_op0, i386_compare_op1);")
-(define_insn ""
- [(set (pc)
- (if_then_else (le (cc0)
- (const_int 0))
- (label_ref (match_operand 0 "" ""))
- (pc)))]
- ""
- "*
-{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (jb,%l0);
- if (cc_prev_status.flags & CC_TEST_AX)
- {
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x4100);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (jne,%l0);
- }
-
- OUTPUT_JUMP (\"jle %l0\", \"jbe %l0\", NULL_PTR);
-}")
-
(define_expand "bleu"
[(match_dup 1)
(set (pc)
@@ -5703,184 +5621,111 @@ byte_xor_operation:
(define_insn ""
[(set (pc)
- (if_then_else (leu (cc0)
- (const_int 0))
- (label_ref (match_operand 0 "" ""))
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(cc0) (const_int 0)])
+ (label_ref (match_operand 1 "" ""))
(pc)))]
""
- "jbe %l0")
-
-;; Negated conditional jump instructions.
-
-(define_insn ""
- [(set (pc)
- (if_then_else (eq (cc0)
- (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
- ""
"*
{
- if (cc_prev_status.flags & CC_Z_IN_NOT_C)
- return \"jc %l0\";
- else
+ enum rtx_code code = GET_CODE (operands[0]);
if (cc_prev_status.flags & CC_TEST_AX)
{
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x4000);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (je,%l0);
- }
- return \"jne %l0\";
-}")
-
-(define_insn ""
- [(set (pc)
- (if_then_else (ne (cc0)
- (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
- ""
- "*
-{
- if (cc_prev_status.flags & CC_Z_IN_NOT_C)
- return \"jnc %l0\";
- else
- if (cc_prev_status.flags & CC_TEST_AX)
- {
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x4000);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (jne,%l0);
- }
- return \"je %l0\";
-}")
-
-(define_insn ""
- [(set (pc)
- (if_then_else (gt (cc0)
- (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
- ""
- "*
-{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (jne,%l0);
- if (cc_prev_status.flags & CC_TEST_AX)
- {
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x4100);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (jne,%l0);
- }
- OUTPUT_JUMP (\"jle %l0\", \"jbe %l0\", NULL_PTR);
-}")
-
-(define_insn ""
- [(set (pc)
- (if_then_else (gtu (cc0)
- (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
- ""
- "jbe %l0")
-
-(define_insn ""
- [(set (pc)
- (if_then_else (lt (cc0)
- (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
- ""
- "*
-{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (jne,%l0);
- if (cc_prev_status.flags & CC_TEST_AX)
- {
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x100);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (je,%l0);
+ int eq;
+ HOST_WIDE_INT c;
+ operands[2] = gen_rtx_REG (SImode, 0);
+ switch (code)
+ {
+ case EQ:
+ c = 0x4000;
+ eq = 0;
+ break;
+ case NE:
+ c = 0x4000;
+ eq = 1;
+ break;
+ case GT:
+ c = 0x4100;
+ eq = 1;
+ break;
+ case LT:
+ c = 0x100;
+ eq = 0;
+ break;
+ case GE:
+ c = 0x100;
+ eq = 1;
+ break;
+ case LE:
+ c = 0x4100;
+ eq = 0;
+ break;
+ default:
+ abort ();
+ }
+ operands[3] = GEN_INT (c);
+ output_asm_insn (AS2 (testl,%3,%2), operands);
+ return eq ? AS1 (je,%l1) : AS1 (jne, %l1);
}
+ if ((cc_status.flags & CC_NO_OVERFLOW) && (code == LE || code == GT))
+ return (char *)0;
- OUTPUT_JUMP (\"jge %l0\", \"jae %l0\", \"jns %l0\");
+ return AS1(j%D0,%l1);
}")
(define_insn ""
[(set (pc)
- (if_then_else (ltu (cc0)
- (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
- ""
- "jae %l0")
-
-(define_insn ""
- [(set (pc)
- (if_then_else (ge (cc0)
- (const_int 0))
+ (if_then_else (match_operator 0 "comparison_operator"
+ [(cc0) (const_int 0)])
(pc)
- (label_ref (match_operand 0 "" ""))))]
+ (label_ref (match_operand 1 "" ""))))]
""
"*
{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (jne,%l0);
+ enum rtx_code code = GET_CODE (operands[0]);
if (cc_prev_status.flags & CC_TEST_AX)
{
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x100);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (jne,%l0);
+ int eq;
+ HOST_WIDE_INT c;
+ operands[2] = gen_rtx_REG (SImode, 0);
+ switch (code)
+ {
+ case EQ:
+ c = 0x4000;
+ eq = 1;
+ break;
+ case NE:
+ c = 0x4000;
+ eq = 0;
+ break;
+ case GT:
+ c = 0x4100;
+ eq = 0;
+ break;
+ case LT:
+ c = 0x100;
+ eq = 1;
+ break;
+ case GE:
+ c = 0x100;
+ eq = 0;
+ break;
+ case LE:
+ c = 0x4100;
+ eq = 1;
+ break;
+ default:
+ abort ();
+ }
+ operands[3] = GEN_INT (c);
+ output_asm_insn (AS2 (testl,%3,%2), operands);
+ return eq ? AS1 (je,%l1) : AS1 (jne, %l1);
}
- OUTPUT_JUMP (\"jl %l0\", \"jb %l0\", \"js %l0\");
-}")
-
-(define_insn ""
- [(set (pc)
- (if_then_else (geu (cc0)
- (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
- ""
- "jb %l0")
-
-(define_insn ""
- [(set (pc)
- (if_then_else (le (cc0)
- (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
- ""
- "*
-{
- if (TARGET_IEEE_FP && (cc_prev_status.flags & CC_IN_80387)
- && ! (cc_prev_status.flags & CC_FCOMI))
- return AS1 (jae,%l0);
+ if ((cc_status.flags & CC_NO_OVERFLOW) && (code == LE || code == GT))
+ return (char *)0;
- if (cc_prev_status.flags & CC_TEST_AX)
- {
- operands[1] = gen_rtx_REG (SImode, 0);
- operands[2] = GEN_INT (0x4100);
- output_asm_insn (AS2 (testl,%2,%1), operands);
- return AS1 (je,%l0);
- }
- OUTPUT_JUMP (\"jg %l0\", \"ja %l0\", NULL_PTR);
+ return AS1(j%d0,%l1);
}")
-
-(define_insn ""
- [(set (pc)
- (if_then_else (leu (cc0)
- (const_int 0))
- (pc)
- (label_ref (match_operand 0 "" ""))))]
- ""
- "ja %l0")
;; Unconditional and other jump instructions
@@ -5919,7 +5764,7 @@ byte_xor_operation:
(define_insn ""
[(set (pc)
(if_then_else (match_operator 0 "arithmetic_comparison_operator"
- [(plus:SI (match_operand:SI 1 "nonimmediate_operand" "+r,m")
+ [(plus:SI (match_operand:SI 1 "nonimmediate_operand" "+c*r,m")
(match_operand:SI 2 "general_operand" "rmi,ri"))
(const_int 0)])
(label_ref (match_operand 3 "" ""))
@@ -5931,6 +5776,11 @@ byte_xor_operation:
"*
{
CC_STATUS_INIT;
+
+ if (GET_CODE (operands[1]) == REG && REGNO (operands[2]) == 2 &&
+ operands[2] == constm1_rtx && ix86_cpu == PROCESSOR_K6)
+ return \"loop %l3\";
+
if (operands[2] == constm1_rtx)
output_asm_insn (AS1 (dec%L1,%1), operands);
@@ -6682,7 +6532,7 @@ byte_xor_operation:
;; But strength reduction might offset the MEM expression. So we let
;; reload put the address into %edi.
-(define_insn ""
+(define_insn "*bzero"
[(set (mem:BLK (match_operand:SI 0 "address_operand" "D"))
(const_int 0))
(use (match_operand:SI 1 "const_int_operand" "n"))
@@ -6698,17 +6548,35 @@ byte_xor_operation:
output_asm_insn (\"cld\", operands);
if (GET_CODE (operands[1]) == CONST_INT)
{
- if (INTVAL (operands[1]) & ~0x03)
+ unsigned int count = INTVAL (operands[1]) & 0xffffffff;
+ if (count & ~0x03)
{
- xops[0] = GEN_INT ((INTVAL (operands[1]) >> 2) & 0x3fffffff);
+ xops[0] = GEN_INT (count / 4);
xops[1] = operands[4];
- output_asm_insn (AS2 (mov%L1,%0,%1), xops);
+ /* K6: stos takes 1 cycle, rep stos takes 8 + %ecx cycles.
+ 80386: 4/5+5n (+2 for set of ecx)
+ 80486: 5/7+5n (+1 for set of ecx)
+ */
+ if (count / 4 < ((int) ix86_cpu < (int)PROCESSOR_PENTIUM ? 4 : 6))
+ {
+ do
#ifdef INTEL_SYNTAX
- output_asm_insn (\"rep stosd\", xops);
+ output_asm_insn (\"stosd\", xops);
#else
- output_asm_insn (\"rep\;stosl\", xops);
+ output_asm_insn (\"stosl\", xops);
#endif
+ while ((count -= 4) > 3);
+ }
+ else
+ {
+ output_asm_insn (AS2 (mov%L1,%0,%1), xops);
+#ifdef INTEL_SYNTAX
+ output_asm_insn (\"rep stosd\", xops);
+#else
+ output_asm_insn (\"rep\;stosl\", xops);
+#endif
+ }
}
if (INTVAL (operands[1]) & 0x02)
output_asm_insn (\"stosw\", operands);