aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog81
-rw-r--r--gcc/Makefile.in4
-rw-r--r--gcc/config/c4x/c4x.c4
-rw-r--r--gcc/config/i386/i386.c4
-rw-r--r--gcc/config/i386/i386.md135
-rw-r--r--gcc/config/ia64/ia64-protos.h3
-rw-r--r--gcc/config/ia64/ia64.c112
-rw-r--r--gcc/config/ia64/ia64.h90
-rw-r--r--gcc/config/ia64/ia64.md118
-rw-r--r--gcc/config/rs6000/rs6000.c5
-rw-r--r--gcc/config/rs6000/rs6000.md24
-rw-r--r--gcc/doloop.c846
-rw-r--r--gcc/final.c12
-rw-r--r--gcc/flags.h2
-rw-r--r--gcc/jump.c2
-rw-r--r--gcc/loop.c306
-rw-r--r--gcc/loop.h2
-rw-r--r--gcc/recog.c2
-rw-r--r--gcc/toplev.c6
19 files changed, 1246 insertions, 512 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ad7ae07..edc2df4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,84 @@
+2000-07-30 Michael Hayes <mhayes@cygnus.com>
+ Richard Henderson <rth@cygnus.com>
+
+ * Makefile.in (OBJS): Add doloop.o.
+ * doloop.c: New file.
+
+ * final.c (insn_current_reference_address): Return 0 before final.
+ * flags.h (flag_branch_on_count_reg): Fix typos in commentary.
+ * jump.c (any_uncondjump_p): Likewise.
+ * loop.c (indirect_jump_in_function): Make static.
+ (strength_reduce): Call doloop_optimize.
+ (insert_bct, instrument_loop_bct): Remove.
+ * loop.h (doloop_optimize): Prototype.
+ * recog.c (split_all_insns): Split all INSN_P.
+ * toplev.c (flag_branch_on_count_reg): Default on.
+
+ * config/c4x/c4x.c (c4x_optimization_options): Don't set
+ flag_branch_on_count_reg.
+ * config/i386/i386.c (override_options): Likewise.
+ * config/rs6000/rs6000.c (optimization_options): Likewise.
+
+ * config/i386/i386.md (decrement_and_branch_on_count): Remove.
+ (doloop_end): New.
+ (dbra_ge): Remove, as well as all it's splitters.
+
+ * config/rs6000/rs6000.md (decrement_and_branch_on_count): Remove.
+ (doloop_end): New.
+
+ * config/ia64/ia64-protos.h (ar_lc_reg_operand): Declare.
+ (ia64_register_move_cost): Declare.
+ * config/ia64/ia64.c (ar_lc_reg_operand): New.
+ (struct ia64_frame_info): Add ar_size.
+ (ia64_compute_frame_size): Set it.
+ (save_restore_insns): Save and restore ar.lc.
+ (ia64_register_move_cost): New, moved from header file. Handle
+ application registers.
+ (REG_AR_PFS, REG_AR_EC): Remove. Replace with AR_*_REGNUM numbers.
+ (emit_insn_group_barriers): Special case doloop_end_internal.
+ (ia64_epilogue_uses): Mark ar.lc live at end.
+ * config/ia64/ia64.h (AR_CCV_REGNUM, AR_LC_REGNUM): New registers.
+ (AR_EC_REGNUM, AR_PFS_REGNUM): New registers.
+ (FIRST_PSEUDO_REGISTER): Make room.
+ (AR_M_REGNO_P, AR_I_REGNO_P, AR_REGNO_P): New.
+ (FIXED_REGISTERS, CALL_USED_REGISTERS): Update.
+ (REG_ALLOC_ORDER): Update.
+ (HARD_REGNO_MODE_OK): Update.
+ (REGISTER_NAMES): Update.
+ (enum reg_class): Add AR_M_REGS and AR_I_REGS.
+ (REG_CLASS_NAMES, REG_CLASS_CONTENTS): Update.
+ (REGNO_REG_CLASS): Update.
+ (LEGITIMATE_ADDRESS_DISP): Displacement range is 9 bits, not 10.
+ (REGISTER_MOVE_COST): Move out of line.
+ (PREDICATE_CODES): Update.
+ * config/ia64/ia64.md (movdi patterns): Handle ar register classes.
+ (addsi3_plus1_alt, adddi3_plus1_alt): New.
+ (shladd_elim splitter): Allow constants in the predicate.
+ (doloop_end, doloop_end_internal): New.
+
+2000-07-30 Richard Henderson <rth@cygnus.com>
+
+ * genattrtab.c (struct insn_def): Add lineno member.
+ (struct insn_ent): Likewise.
+ (struct attr_desc): Likewise.
+ (struct delay_desc): Likewise.
+ (struct function_unit_op): Likewise.
+ (struct function_unit): Likewise.
+ (check_attr_value): Use message_with_line.
+ (check_defs): Likewise.
+ (expand_units): Likewise.
+ (check_attr_test): Take a lineno argument.
+ (gen_attr): Likewise.
+ (gen_insn): Likewise.
+ (gen_delay): Likewise.
+ (gen_unit): Likewise.
+ (main): Give it to them.
+ (convert_set_attr_alternative): Take an insn_def argument
+ instead of num_alt and insn_index.
+ (convert_set_attr): Likewise.
+ (write_test_expr): Protect INSN_ADDRESSES load
+ with INSN_ADDRESSES_SET_P.
+
2000-07-30 Richard Henderson <rth@cygnus.com>
* flow.c (init_propagate_block_info): Use pc_set.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 83213d1..ddd150a 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -689,7 +689,7 @@ OBJS = diagnostic.o \
function.o stmt.o except.o expr.o calls.o expmed.o explow.o optabs.o real.o \
builtins.o intl.o varasm.o rtl.o print-rtl.o rtlanal.o emit-rtl.o genrtl.o \
dbxout.o sdbout.o dwarfout.o dwarf2out.o xcoffout.o bitmap.o alias.o gcse.o \
- integrate.o jump.o cse.o loop.o unroll.o flow.o combine.o varray.o \
+ integrate.o jump.o cse.o loop.o doloop.o unroll.o flow.o combine.o varray.o \
regclass.o regmove.o local-alloc.o global.o reload.o reload1.o caller-save.o \
insn-peep.o reorg.o haifa-sched.o final.o recog.o reg-stack.o regrename.o \
insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o lcm.o \
@@ -1338,6 +1338,8 @@ profile.o : profile.c $(CONFIG_H) system.h $(RTL_H) $(TREE_H) flags.h \
loop.o : loop.c $(CONFIG_H) system.h $(RTL_H) flags.h $(LOOP_H) insn-config.h \
insn-flags.h $(REGS_H) hard-reg-set.h $(RECOG_H) $(EXPR_H) real.h \
$(BASIC_BLOCK_H) function.h toplev.h varray.h except.h cselib.h
+doloop.o : doloop.c $(CONFIG_H) system.h $(RTL_H) flags.h $(LOOP_H) \
+ insn-flags.h $(EXPR_H) hard-reg-set.h $(BASIC_BLOCK_H)
unroll.o : unroll.c $(CONFIG_H) system.h $(RTL_H) insn-config.h function.h \
$(INTEGRATE_H) $(REGS_H) $(RECOG_H) flags.h $(EXPR_H) $(LOOP_H) toplev.h \
hard-reg-set.h varray.h $(BASIC_BLOCK_H)
diff --git a/gcc/config/c4x/c4x.c b/gcc/config/c4x/c4x.c
index 9afe59a..261215f 100644
--- a/gcc/config/c4x/c4x.c
+++ b/gcc/config/c4x/c4x.c
@@ -295,10 +295,6 @@ c4x_optimization_options (level, size)
instructions. The benefit we gain we get by scheduling before
register allocation is probably marginal anyhow. */
flag_schedule_insns = 0;
-
- /* When optimizing, enable use of RPTB instruction. */
- if (level >= 1)
- flag_branch_on_count_reg = 1;
}
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 061d693..c5289ab 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -630,10 +630,6 @@ override_options ()
if (flag_fast_math)
target_flags &= ~MASK_IEEE_FP;
- /* If we're planning on using `loop', use it. */
- if (TARGET_USE_LOOP && optimize)
- flag_branch_on_count_reg = 1;
-
/* It makes no sense to ask for just SSE builtins, so MMX is also turned
on by -msse. */
if (TARGET_SSE)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index eecb151..0c06124 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8327,27 +8327,32 @@
;; This is all complicated by the fact that since this is a jump insn
;; we must handle our own reloads.
-(define_expand "decrement_and_branch_on_count"
- [(parallel [(set (pc) (if_then_else
- (ne (match_operand:SI 0 "register_operand" "")
- (const_int 1))
- (label_ref (match_operand 1 "" ""))
- (pc)))
- (set (match_dup 0)
- (plus:SI (match_dup 0)
- (const_int -1)))
- (clobber (match_scratch:SI 2 ""))
- (clobber (reg:CC 17))])]
+(define_expand "doloop_end"
+ [(use (match_operand 0 "" "")) ; loop pseudo
+ (use (match_operand 1 "" "")) ; iterations; zero if unknown
+ (use (match_operand 2 "" "")) ; max iterations
+ (use (match_operand 3 "" "")) ; loop level
+ (use (match_operand 4 "" ""))] ; label
"TARGET_USE_LOOP"
- "")
+ "
+{
+ /* Only use cloop on innermost loops. */
+ if (INTVAL (operands[3]) > 1)
+ FAIL;
+ if (GET_MODE (operands[0]) != SImode)
+ FAIL;
+ emit_jump_insn (gen_doloop_end_internal (operands[4], operands[0],
+ operands[0]));
+ DONE;
+}")
-(define_insn "*dbra_ne"
+(define_insn "doloop_end_internal"
[(set (pc)
- (if_then_else (ne (match_operand:SI 1 "register_operand" "c,*r,*r")
+ (if_then_else (ne (match_operand:SI 1 "register_operand" "c,?*r,?*r")
(const_int 1))
(label_ref (match_operand 0 "" ""))
(pc)))
- (set (match_operand:SI 2 "register_operand" "=1,*r,*m*r")
+ (set (match_operand:SI 2 "register_operand" "=1,1,*m*r")
(plus:SI (match_dup 1)
(const_int -1)))
(clobber (match_scratch:SI 3 "=X,X,r"))
@@ -8372,55 +8377,24 @@
(const_string "ibr")
(const_string "multi")))])
-(define_insn "*dbra_ge"
- [(set (pc)
- (if_then_else (ge (match_operand:SI 1 "register_operand" "c,*r,*r")
- (const_int 0))
- (label_ref (match_operand 0 "" ""))
- (pc)))
- (set (match_operand:SI 2 "register_operand" "=1,*r,*m*r")
- (plus:SI (match_dup 1)
- (const_int -1)))
- (clobber (match_scratch:SI 3 "=X,X,r"))
- (clobber (reg:CC 17))]
- "TARGET_USE_LOOP && find_reg_note (insn, REG_NONNEG, 0)"
- "*
-{
- if (which_alternative != 0)
- return \"#\";
- if (get_attr_length (insn) == 2)
- return \"loop\\t%l0\";
- else
- return \"dec{l}\\t%1\;jne\\t%l0\";
-}"
- [(set (attr "type")
- (if_then_else (and (eq_attr "alternative" "0")
- (and (ge (minus (match_dup 0) (pc))
- (const_int -128))
- (lt (minus (match_dup 0) (pc))
- (const_int 124))))
- (const_string "ibr")
- (const_string "multi")))
- (set_attr "ppro_uops" "many")])
-
(define_split
[(set (pc)
(if_then_else (ne (match_operand:SI 1 "register_operand" "")
(const_int 1))
(match_operand 0 "" "")
(pc)))
- (set (match_operand:SI 2 "register_operand" "")
+ (set (match_dup 1)
(plus:SI (match_dup 1)
(const_int -1)))
- (clobber (match_scratch:SI 3 ""))
+ (clobber (match_scratch:SI 2 ""))
(clobber (reg:CC 17))]
- "TARGET_USE_LOOP && reload_completed
- && ! (REGNO (operands[1]) == 2 && rtx_equal_p (operands[1], operands[2]))"
- [(set (match_dup 2) (match_dup 1))
- (parallel [(set (reg:CCZ 17)
- (compare:CCZ (plus:SI (match_dup 2) (const_int -1))
+ "TARGET_USE_LOOP
+ && reload_completed
+ && REGNO (operands[1]) != 2"
+ [(parallel [(set (reg:CCZ 17)
+ (compare:CCZ (plus:SI (match_dup 1) (const_int -1))
(const_int 0)))
- (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))])
+ (set (match_dup 1) (plus:SI (match_dup 1) (const_int -1)))])
(set (pc) (if_then_else (ne (reg:CCZ 17) (const_int 0))
(match_dup 0)
(pc)))]
@@ -8432,12 +8406,15 @@
(const_int 1))
(match_operand 0 "" "")
(pc)))
- (set (match_operand:SI 2 "memory_operand" "")
+ (set (match_operand:SI 2 "nonimmediate_operand" "")
(plus:SI (match_dup 1)
(const_int -1)))
(clobber (match_scratch:SI 3 ""))
(clobber (reg:CC 17))]
- "TARGET_USE_LOOP && reload_completed"
+ "TARGET_USE_LOOP
+ && reload_completed
+ && (! REG_P (operands[2])
+ || ! rtx_equal_p (operands[1], operands[2]))"
[(set (match_dup 3) (match_dup 1))
(parallel [(set (reg:CCZ 17)
(compare:CCZ (plus:SI (match_dup 3) (const_int -1))
@@ -8448,52 +8425,6 @@
(match_dup 0)
(pc)))]
"")
-
-(define_split
- [(set (pc)
- (if_then_else (ge (match_operand:SI 1 "register_operand" "")
- (const_int 0))
- (match_operand 0 "" "")
- (pc)))
- (set (match_operand:SI 2 "register_operand" "")
- (plus:SI (match_dup 1)
- (const_int -1)))
- (clobber (match_scratch:SI 3 ""))
- (clobber (reg:CC 17))]
- "TARGET_USE_LOOP && reload_completed
- && ! (REGNO (operands[1]) == 2 && rtx_equal_p (operands[1], operands[2]))"
- [(set (match_dup 2) (match_dup 1))
- (parallel [(set (reg:CCNO 17)
- (compare:CCNO (plus:SI (match_dup 2) (const_int -1))
- (const_int 0)))
- (set (match_dup 2) (plus:SI (match_dup 2) (const_int -1)))])
- (set (pc) (if_then_else (lt (reg:CCNO 17) (const_int 0))
- (match_dup 0)
- (pc)))]
- "")
-
-(define_split
- [(set (pc)
- (if_then_else (ge (match_operand:SI 1 "register_operand" "")
- (const_int 0))
- (match_operand 0 "" "")
- (pc)))
- (set (match_operand:SI 2 "memory_operand" "")
- (plus:SI (match_dup 1)
- (const_int -1)))
- (clobber (match_scratch:SI 3 ""))
- (clobber (reg:CC 17))]
- "TARGET_USE_LOOP && reload_completed"
- [(set (match_dup 3) (match_dup 1))
- (parallel [(set (reg:CCNO 17)
- (compare:CCNO (plus:SI (match_dup 3) (const_int -1))
- (const_int 0)))
- (set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))])
- (set (match_dup 2) (match_dup 3))
- (set (pc) (if_then_else (lt (reg:CCNO 17) (const_int 0))
- (match_dup 0)
- (pc)))]
- "")
;; Call instructions.
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
index 00421f8..12ea516 100644
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -59,6 +59,8 @@ extern void ia64_function_prologue PARAMS((FILE *, int));
extern void ia64_funtion_epilogue PARAMS((FILE *, int));
extern int ia64_direct_return PARAMS((void));
extern int predicate_operator PARAMS((rtx, enum machine_mode));
+extern int ar_lc_reg_operand PARAMS((rtx, enum machine_mode));
+
extern int ia64_move_ok PARAMS((rtx, rtx));
extern void ia64_expand_load_address PARAMS((rtx, rtx));
@@ -102,6 +104,7 @@ extern int ia64_valid_type_attribute PARAMS((tree, tree, tree, tree));
extern void ia64_encode_section_info PARAMS((tree));
#endif /* TREE_CODE */
+extern int ia64_register_move_cost PARAMS((enum reg_class, enum reg_class));
extern int ia64_epilogue_uses PARAMS((int));
extern void ia64_file_start PARAMS((FILE *));
extern void ia64_expand_prologue PARAMS((void));
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 6b877b5..7153620 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -559,6 +559,19 @@ predicate_operator (op, mode)
return ((GET_MODE (op) == mode || mode == VOIDmode)
&& (code == EQ || code == NE));
}
+
+/* Return 1 if this is the ar.lc register. */
+
+int
+ar_lc_reg_operand (op, mode)
+ register rtx op;
+ enum machine_mode mode;
+{
+ return (GET_MODE (op) == DImode
+ && (mode == DImode || mode == VOIDmode)
+ && GET_CODE (op) == REG
+ && REGNO (op) == AR_LC_REGNUM);
+}
/* Return 1 if the operands of a move are ok. */
@@ -683,6 +696,7 @@ struct ia64_frame_info
long fr_pad_size; /* # bytes needed to align FP save area. */
long pr_size; /* # bytes needed to store predicate regs. */
long br_size; /* # bytes needed to store branch regs. */
+ long ar_size; /* # bytes needed to store AR regs. */
HARD_REG_SET mask; /* mask of saved registers. */
int initialized; /* != 0 is frame size already calculated. */
};
@@ -713,6 +727,7 @@ ia64_compute_frame_size (size)
int fr_pad_size = 0;
int pr_size = 0;
int br_size = 0;
+ int ar_size = 0;
int pretend_pad_size = 0;
int tmp;
int regno;
@@ -772,6 +787,13 @@ ia64_compute_frame_size (size)
else
fr_pad_size = 0;
+ /* AR.LC, for reasons unexplained, is call saved. */
+ if (regs_ever_live[AR_LC_REGNUM])
+ {
+ SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
+ ar_size = 8;
+ }
+
/* If we have an odd number of words of pretend arguments written to the
stack, then the FR save area will be unaligned. We pad below this area
to keep things 16 byte aligned. This needs to be kept distinct, to
@@ -780,7 +802,7 @@ ia64_compute_frame_size (size)
pretend_pad_size = current_function_pretend_args_size % 16;
/* The 16 bytes is for the scratch area. */
- tmp = (size + gr_size + fr_pad_size + fr_size + pr_size + br_size
+ tmp = (size + gr_size + fr_pad_size + fr_size + pr_size + br_size + ar_size
+ current_function_outgoing_args_size + 16);
tmp += (current_function_pretend_args_size
? current_function_pretend_args_size - 16
@@ -810,6 +832,7 @@ ia64_compute_frame_size (size)
current_frame_info.fr_pad_size = fr_pad_size;
current_frame_info.pr_size = pr_size;
current_frame_info.br_size = br_size;
+ current_frame_info.ar_size = ar_size;
COPY_HARD_REG_SET (current_frame_info.mask, mask);
current_frame_info.initialized = reload_completed;
@@ -822,8 +845,11 @@ save_restore_insns (save_p)
{
rtx insn;
- if (current_frame_info.gr_size + current_frame_info.fr_size
- + current_frame_info.br_size + current_frame_info.pr_size)
+ if (current_frame_info.gr_size
+ + current_frame_info.fr_size
+ + current_frame_info.br_size
+ + current_frame_info.pr_size
+ + current_frame_info.ar_size)
{
rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (2));
rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg);
@@ -833,6 +859,7 @@ save_restore_insns (save_p)
+ current_frame_info.fr_pad_size
+ current_frame_info.br_size
+ current_frame_info.pr_size
+ + current_frame_info.ar_size
+ current_frame_info.var_size
+ current_frame_info.pretend_size
+ current_frame_info.pretend_pad_size));
@@ -961,6 +988,29 @@ save_restore_insns (save_p)
if (save_p)
RTX_FRAME_RELATED_P (insn) = 1;
}
+
+ if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
+ {
+ rtx src, dest;
+
+ if (save_p)
+ {
+ src = gen_rtx_REG (DImode, AR_LC_REGNUM);
+ dest = gen_rtx_MEM (DImode, tmp_post_inc);
+ }
+ else
+ {
+ src = gen_rtx_MEM (DImode, tmp_post_inc);
+ dest = gen_rtx_REG (DImode, AR_LC_REGNUM);
+ }
+
+ insn = emit_insn (gen_movdi (tmp2_reg, src));
+ if (save_p)
+ RTX_FRAME_RELATED_P (insn) = 1;
+ insn = emit_insn (gen_movdi (dest, tmp2_reg));
+ if (save_p)
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
}
}
@@ -2149,6 +2199,28 @@ ia64_print_operand (file, x, code)
return;
}
+/* Calulate the cost of moving data from a register in class FROM to
+ one in class TO. */
+
+int
+ia64_register_move_cost (from, to)
+ enum reg_class from, to;
+{
+ int from_hard, to_hard;
+ int from_gr, to_gr;
+
+ from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS);
+ to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS);
+ from_gr = (from == GENERAL_REGS);
+ to_gr = (to == GENERAL_REGS);
+
+ if (from_hard && to_hard)
+ return 8;
+ else if ((from_hard && !to_gr) || (!from_gr && to_hard))
+ return 6;
+
+ return 2;
+}
/* This function returns the register class required for a secondary
register when copying between one of the registers in CLASS, and X,
@@ -2382,14 +2454,11 @@ ia64_override_options ()
complex). */
#define REG_GP (GR_REG (1))
#define REG_RP (BR_REG (0))
-#define REG_AR_PFS (FIRST_PSEUDO_REGISTER)
#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1)
-/* ??? This will eventually need to be a hard register. */
-#define REG_AR_EC (FIRST_PSEUDO_REGISTER + 2)
/* This is used for volatile asms which may require a stop bit immediately
before and after them. */
-#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 3)
-#define NUM_REGS (FIRST_PSEUDO_REGISTER + 4)
+#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2)
+#define NUM_REGS (FIRST_PSEUDO_REGISTER + 3)
/* For each register, we keep track of how many times it has been
written in the current instruction group. If a register is written
@@ -2521,15 +2590,13 @@ rws_access_reg (regno, flags, pred)
/* Branches have several RAW exceptions that allow to avoid
barriers. */
- if (REGNO_REG_CLASS (regno) == BR_REGS || regno == REG_AR_PFS)
+ if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
/* RAW dependencies on branch regs are permissible as long
as the writer is a non-branch instruction. Since we
never generate code that uses a branch register written
by a branch instruction, handling this case is
easy. */
- /* ??? This assumes that we don't emit br.cloop, br.cexit, br.ctop,
- br.wexit, br.wtop. This is true currently. */
- return 0;
+ return 0;
if (REGNO_REG_CLASS (regno) == PR_REGS
&& ! rws_sum[regno].written_by_fp)
@@ -2678,7 +2745,7 @@ rtx_needs_barrier (x, flags, pred)
new_flags.is_write = 0;
/* ??? Why is this here? It seems unnecessary. */
need_barrier |= rws_access_reg (REG_GP, new_flags, pred);
- need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred);
+ need_barrier |= rws_access_reg (AR_EC_REGNUM, new_flags, pred);
/* Avoid multiple register writes, in case this is a pattern with
multiple CALL rtx. This avoids an abort in rws_access_reg. */
@@ -2688,7 +2755,7 @@ rtx_needs_barrier (x, flags, pred)
{
new_flags.is_write = 1;
need_barrier |= rws_access_reg (REG_RP, new_flags, pred);
- need_barrier |= rws_access_reg (REG_AR_PFS, new_flags, pred);
+ need_barrier |= rws_access_reg (AR_PFS_REGNUM, new_flags, pred);
need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
}
break;
@@ -2877,7 +2944,7 @@ rtx_needs_barrier (x, flags, pred)
/* Alloc must always be the first instruction. Currently, we
only emit it at the function start, so we don't need to worry
about emitting a stop bit before it. */
- need_barrier = rws_access_reg (REG_AR_PFS, flags, pred);
+ need_barrier = rws_access_reg (AR_PFS_REGNUM, flags, pred);
new_flags.is_write = 1;
need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
@@ -2892,7 +2959,7 @@ rtx_needs_barrier (x, flags, pred)
case 4: /* mov ar.pfs= */
new_flags.is_write = 1;
- need_barrier = rws_access_reg (REG_AR_PFS, new_flags, pred);
+ need_barrier = rws_access_reg (AR_PFS_REGNUM, new_flags, pred);
break;
case 5: /* set_bsp */
@@ -2920,10 +2987,10 @@ rtx_needs_barrier (x, flags, pred)
case RETURN:
new_flags.is_write = 0;
need_barrier = rws_access_reg (REG_RP, flags, pred);
- need_barrier |= rws_access_reg (REG_AR_PFS, flags, pred);
+ need_barrier |= rws_access_reg (AR_PFS_REGNUM, flags, pred);
new_flags.is_write = 1;
- need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred);
+ need_barrier |= rws_access_reg (AR_EC_REGNUM, new_flags, pred);
need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred);
break;
@@ -3042,6 +3109,12 @@ emit_insn_group_barriers (insns)
if (INSN_CODE (insn) == CODE_FOR_epilogue_deallocate_stack)
pat = XVECEXP (pat, 0, 0);
+ /* ??? Similarly, the pattern we use for br.cloop
+ confuses the code above. The second element of the
+ vector is representative. */
+ else if (INSN_CODE (insn) == CODE_FOR_doloop_end_internal)
+ pat = XVECEXP (pat, 0, 1);
+
memset (rws_insn, 0, sizeof (rws_insn));
need_barrier |= rtx_needs_barrier (pat, flags, 0);
@@ -3164,6 +3237,9 @@ ia64_epilogue_uses (regno)
if (regno == R_BR (0))
return 1;
+ if (regno == AR_LC_REGNUM)
+ return 1;
+
return 0;
}
diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
index 6379ae7..ab298f4 100644
--- a/gcc/config/ia64/ia64.h
+++ b/gcc/config/ia64/ia64.h
@@ -535,12 +535,11 @@ while (0)
/* Register Basics */
/* Number of hardware registers known to the compiler.
- We have 128 general registers, 128 floating point registers, 64 predicate
- registers, 8 branch registers, and one frame pointer register. */
+ We have 128 general registers, 128 floating point registers,
+ 64 predicate registers, 8 branch registers, one frame pointer,
+ and several "application" registers. */
-/* ??? Should add ar.lc, ar.ec and probably also ar.pfs. */
-
-#define FIRST_PSEUDO_REGISTER 330
+#define FIRST_PSEUDO_REGISTER 334
/* Ranges for the various kinds of registers. */
#define ADDL_REGNO_P(REGNO) ((unsigned HOST_WIDE_INT) (REGNO) <= 3)
@@ -561,10 +560,23 @@ while (0)
#define IN_REG(REGNO) ((REGNO) + 112)
#define LOC_REG(REGNO) ((REGNO) + 32)
+#define AR_CCV_REGNUM 330
+#define AR_LC_REGNUM 331
+#define AR_EC_REGNUM 332
+#define AR_PFS_REGNUM 333
+
#define IN_REGNO_P(REGNO) ((REGNO) >= IN_REG (0) && (REGNO) <= IN_REG (7))
#define LOC_REGNO_P(REGNO) ((REGNO) >= LOC_REG (0) && (REGNO) <= LOC_REG (79))
#define OUT_REGNO_P(REGNO) ((REGNO) >= OUT_REG (0) && (REGNO) <= OUT_REG (7))
+#define AR_M_REGNO_P(REGNO) ((REGNO) == AR_CCV_REGNUM)
+#define AR_I_REGNO_P(REGNO) ((REGNO) >= AR_LC_REGNUM \
+ && (REGNO) < FIRST_PSEUDO_REGISTER)
+#define AR_REGNO_P(REGNO) ((REGNO) >= AR_CCV_REGNUM \
+ && (REGNO) < FIRST_PSEUDO_REGISTER)
+
+
+
/* ??? Don't really need two sets of macros. I like this one better because
it is less typing. */
#define R_GR(REGNO) GR_REG (REGNO)
@@ -619,14 +631,14 @@ while (0)
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
/* Branch registers. */ \
0, 0, 0, 0, 0, 0, 0, 0, \
- /*FP RA*/ \
- 1, 1, \
+ /*FP RA CCV LC EC PFS */ \
+ 1, 1, 1, 1, 1, 1 \
}
-/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered (in
- general) by function calls as well as for fixed registers. This macro
- therefore identifies the registers that are not available for general
- allocation of values that must live across function calls. */
+/* Like `FIXED_REGISTERS' but has 1 for each register that is clobbered
+ (in general) by function calls as well as for fixed registers. This
+ macro therefore identifies the registers that are not available for
+ general allocation of values that must live across function calls. */
#define CALL_USED_REGISTERS \
{ /* General registers. */ \
@@ -654,8 +666,8 @@ while (0)
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
/* Branch registers. */ \
1, 0, 0, 0, 0, 0, 1, 1, \
- /*FP RA*/ \
- 1, 1, \
+ /*FP RA CCV LC EC PFS */ \
+ 1, 1, 1, 1, 1, 1 \
}
/* Define this macro if the target machine has register windows. This C
@@ -787,10 +799,10 @@ while (0)
R_PR (0), \
/* Special branch registers. */ \
R_BR (0), \
- /* Frame pointer. Return address. */ \
+ /* Other fixed registers. */ \
FRAME_POINTER_REGNUM, RETURN_ADDRESS_POINTER_REGNUM, \
+ AR_CCV_REGNUM, AR_LC_REGNUM, AR_EC_REGNUM, AR_PFS_REGNUM \
}
-
/* How Values Fit in Registers */
@@ -815,6 +827,7 @@ while (0)
(FR_REGNO_P (REGNO) ? (MODE) != CCmode \
: PR_REGNO_P (REGNO) ? (MODE) == CCmode \
: GR_REGNO_P (REGNO) ? (MODE) != XFmode \
+ : AR_REGNO_P (REGNO) ? (MODE) == DImode \
: 1)
/* A C expression that is nonzero if it is desirable to choose register
@@ -877,6 +890,8 @@ enum reg_class
GR_REGS,
FR_REGS,
GR_AND_FR_REGS,
+ AR_M_REGS,
+ AR_I_REGS,
ALL_REGS,
LIM_REG_CLASSES
};
@@ -890,7 +905,8 @@ enum reg_class
constants. These names are used in writing some of the debugging dumps. */
#define REG_CLASS_NAMES \
{ "NO_REGS", "PR_REGS", "BR_REGS", "ADDL_REGS", "GR_REGS", \
- "FR_REGS", "GR_AND_FR_REGS", "ALL_REGS" }
+ "FR_REGS", "GR_AND_FR_REGS", "AR_M_REGS", "AR_I_REGS", \
+ "ALL_REGS" }
/* An initializer containing the contents of the register classes, as integers
which are bit masks. The Nth integer specifies the contents of class N.
@@ -901,35 +917,43 @@ enum reg_class
/* NO_REGS. */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
- 0x00000000, 0x00000000, 0x000 }, \
+ 0x00000000, 0x00000000, 0x0000 }, \
/* PR_REGS. */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
- 0xFFFFFFFF, 0xFFFFFFFF, 0x000 }, \
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x0000 }, \
/* BR_REGS. */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
- 0x00000000, 0x00000000, 0x0FF }, \
+ 0x00000000, 0x00000000, 0x00FF }, \
/* ADDL_REGS. */ \
{ 0x0000000F, 0x00000000, 0x00000000, 0x00000000, \
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
- 0x00000000, 0x00000000, 0x000 }, \
+ 0x00000000, 0x00000000, 0x0000 }, \
/* GR_REGS. */ \
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
0x00000000, 0x00000000, 0x00000000, 0x00000000, \
- 0x00000000, 0x00000000, 0x300 }, \
+ 0x00000000, 0x00000000, 0x0300 }, \
/* FR_REGS. */ \
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
- 0x00000000, 0x00000000, 0x000 }, \
+ 0x00000000, 0x00000000, 0x0000 }, \
/* GR_AND_FR_REGS. */ \
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
- 0x00000000, 0x00000000, 0x300 }, \
+ 0x00000000, 0x00000000, 0x0300 }, \
+ /* AR_M_REGS. */ \
+ { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x0400 }, \
+ /* AR_I_REGS. */ \
+ { 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x00000000, 0x00000000, \
+ 0x00000000, 0x00000000, 0x3800 }, \
/* ALL_REGS. */ \
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, \
- 0xFFFFFFFF, 0xFFFFFFFF, 0x3FF }, \
+ 0xFFFFFFFF, 0xFFFFFFFF, 0x3FFF }, \
}
/* A C expression whose value is a register class containing hard register
@@ -944,6 +968,8 @@ enum reg_class
: FR_REGNO_P (REGNO) ? FR_REGS \
: PR_REGNO_P (REGNO) ? PR_REGS \
: BR_REGNO_P (REGNO) ? BR_REGS \
+ : AR_M_REGNO_P (REGNO) ? AR_I_REGS \
+ : AR_I_REGNO_P (REGNO) ? AR_M_REGS \
: NO_REGS)
/* A macro whose definition is the name of the class to which a valid base
@@ -968,6 +994,8 @@ enum reg_class
: (CHAR) == 'a' ? ADDL_REGS \
: (CHAR) == 'b' ? BR_REGS \
: (CHAR) == 'c' ? PR_REGS \
+ : (CHAR) == 'd' ? AR_M_REGS \
+ : (CHAR) == 'e' ? AR_I_REGS \
: NO_REGS)
/* A C expression which is nonzero if register number NUM is suitable for use
@@ -1816,8 +1844,8 @@ do { \
&& rtx_equal_p (R, XEXP (X, 0)) \
&& (GET_CODE (XEXP (X, 1)) == REG \
|| (GET_CODE (XEXP (X, 1)) == CONST_INT \
- && INTVAL (XEXP (X, 1)) >= -512 \
- && INTVAL (XEXP (X, 1)) < 512)))
+ && INTVAL (XEXP (X, 1)) >= -256 \
+ && INTVAL (XEXP (X, 1)) < 256)))
#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) \
do { \
@@ -1926,10 +1954,7 @@ do { \
one in class TO. */
#define REGISTER_MOVE_COST(FROM, TO) \
-((FROM) == BR_REGS && (TO) == BR_REGS ? 8 \
- : (((FROM) == BR_REGS && (TO) != GENERAL_REGS) \
- || ((TO) == BR_REGS && (FROM) != GENERAL_REGS)) ? 6 \
- : 2)
+ ia64_register_move_cost((FROM), (TO))
/* A C expression for the cost of moving data of mode M between a register and
memory. */
@@ -2363,7 +2388,7 @@ do { \
/* Branch registers. */ \
"b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", \
/* Frame pointer. Return address. */ \
- "sfp", "retaddr" \
+ "sfp", "retaddr", "ar.ccv", "ar.lc", "ar.ec", "ar.pfs" \
}
/* If defined, a C initializer for an array of structures containing a name and
@@ -2749,7 +2774,8 @@ do { \
{ "normal_comparison_operator", {EQ, NE, GT, LE, GTU, LEU}}, \
{ "adjusted_comparison_operator", {LT, GE, LTU, GEU}}, \
{ "call_multiple_values_operation", {PARALLEL}}, \
-{ "predicate_operator", {NE, EQ}},
+{ "predicate_operator", {NE, EQ}}, \
+{ "ar_lc_reg_operand", {REG}},
/* An alias for a machine mode name. This is the machine mode that elements of
a jump-table should have. */
diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md
index 872ef6b..f430120 100644
--- a/gcc/config/ia64/ia64.md
+++ b/gcc/config/ia64/ia64.md
@@ -368,10 +368,12 @@
(define_insn ""
[(cond_exec
(match_operator 2 "predicate_operator"
- [(match_operand:CC 3 "register_operand" "c,c,c,c,c,c,c,c")
+ [(match_operand:CC 3 "register_operand" "c,c,c,c,c,c,c,c,c,c")
(const_int 0)])
- (set (match_operand:DI 0 "register_operand" "=r,r,r, r,*f,*f, r,*b")
- (match_operand:DI 1 "nonmemory_operand" "rO,J,i,*f,rO,*f,*b,rO")))]
+ (set (match_operand:DI 0 "register_operand"
+ "=r,r,r, r,*f,*f, r,*b*e, r,*d")
+ (match_operand:DI 1 "nonmemory_operand"
+ "rO,J,i,*f,rO,*f,*b*e, rO,*d,rO")))]
"TARGET_A_STEP && ia64_move_ok (operands[0], operands[1])"
"*
{
@@ -383,6 +385,8 @@
\"(%J2) setf.sig %0 = %r1\",
\"(%J2) mov %0 = %1\",
\"(%J2) mov %0 = %1\",
+ \"(%J2) mov %0 = %r1\",
+ \"(%J2) mov %0 = %1\",
\"(%J2) mov %0 = %r1\"
};
@@ -403,14 +407,14 @@
return alt[which_alternative];
}"
- [(set_attr "type" "A,A,L,M,M,F,I,I")
+ [(set_attr "type" "A,A,L,M,M,F,I,I,M,M")
(set_attr "predicable" "no")])
(define_insn "*movdi_internal_astep"
[(set (match_operand:DI 0 "destination_operand"
- "=r,r,r,r, m, r,*f,*f,*f, Q, r,*b")
+ "=r,r,r,r, m, r,*f,*f,*f, Q, r,*b*e, r,*d")
(match_operand:DI 1 "move_operand"
- "rO,J,i,m,rO,*f,rO,*f, Q,*f,*b,rO"))]
+ "rO,J,i,m,rO,*f,rO,*f, Q,*f,*b*e, rO,*d,rO"))]
"TARGET_A_STEP && ia64_move_ok (operands[0], operands[1])"
"*
{
@@ -426,6 +430,8 @@
\"ldf8 %0 = %1%P1\",
\"stf8 %0 = %1%P0\",
\"mov %0 = %1\",
+ \"mov %0 = %r1\",
+ \"mov %0 = %1\",
\"mov %0 = %r1\"
};
@@ -435,14 +441,14 @@
return alt[which_alternative];
}"
- [(set_attr "type" "A,A,L,M,M,M,M,F,M,M,I,I")
+ [(set_attr "type" "A,A,L,M,M,M,M,F,M,M,I,I,M,M")
(set_attr "predicable" "no")])
(define_insn "*movdi_internal"
[(set (match_operand:DI 0 "destination_operand"
- "=r,r,r,r, m, r,*f,*f,*f, Q, r,*b")
+ "=r,r,r,r, m, r,*f,*f,*f, Q, r,*b*e, r,*d")
(match_operand:DI 1 "move_operand"
- "rO,J,i,m,rO,*f,rO,*f, Q,*f,*b,rO"))]
+ "rO,J,i,m,rO,*f,rO,*f, Q,*f,*b*e, rO,*d,rO"))]
"! TARGET_A_STEP && ia64_move_ok (operands[0], operands[1])"
"*
{
@@ -458,6 +464,8 @@
\"%,ldf8 %0 = %1%P1\",
\"%,stf8 %0 = %1%P0\",
\"%,mov %0 = %1\",
+ \"%,mov %0 = %r1\",
+ \"%,mov %0 = %1\",
\"%,mov %0 = %r1\"
};
@@ -467,7 +475,7 @@
return alt[which_alternative];
}"
- [(set_attr "type" "A,A,L,M,M,M,M,F,M,M,I,I")])
+ [(set_attr "type" "A,A,L,M,M,M,M,F,M,M,I,I,M,M")])
(define_split
[(set (match_operand:DI 0 "register_operand" "")
@@ -1104,6 +1112,15 @@
"add %0 = %1, %2, 1"
[(set_attr "type" "A")])
+(define_insn "*addsi3_plus1_alt"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+ (const_int 2))
+ (const_int 1)))]
+ ""
+ "add %0 = %1, %1, 1"
+ [(set_attr "type" "A")])
+
(define_expand "subsi3"
[(set (match_operand:SI 0 "register_operand" "")
(minus:SI (match_operand:SI 1 "reg_or_8bit_operand" "")
@@ -1299,6 +1316,18 @@
"add %0 = %1, %2, 1"
[(set_attr "type" "A")])
+;; This has some of the same problems as shladd. We let the shladd
+;; eliminator hack handle it, which results in the 1 being forced into
+;; a register, but not more ugliness here.
+(define_insn "*adddi3_plus1_alt"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "r")
+ (const_int 2))
+ (const_int 1)))]
+ ""
+ "add %0 = %1, %1, 1"
+ [(set_attr "type" "A")])
+
(define_insn "subdi3"
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI (match_operand:DI 1 "reg_or_8bit_operand" "rK")
@@ -1862,31 +1891,20 @@
;; doesn't succeed, then this remain a shladd pattern, and will be reloaded
;; incorrectly.
-(define_insn "*shladd_elim"
+(define_insn_and_split "*shladd_elim"
[(set (match_operand:DI 0 "register_operand" "=&r")
(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "r")
(match_operand:DI 2 "shladd_operand" "n"))
- (match_operand:DI 3 "register_operand" "r"))
+ (match_operand:DI 3 "nonmemory_operand" "r"))
(match_operand:DI 4 "nonmemory_operand" "rI")))]
"reload_in_progress"
- "#"
- [(set_attr "type" "unknown")])
-
-;; ??? Need to emit an instruction group barrier here because this gets split
-;; after md_reorg.
-
-(define_split
- [(set (match_operand:DI 0 "register_operand" "")
- (plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
- (match_operand:DI 2 "shladd_operand" ""))
- (match_operand:DI 3 "register_operand" ""))
- (match_operand:DI 4 "reg_or_14bit_operand" "")))]
+ "* abort ();"
"reload_completed"
[(set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
(match_dup 3)))
- (unspec_volatile [(const_int 0)] 2)
(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
- "")
+ ""
+ [(set_attr "type" "unknown")])
(define_insn "ashrdi3"
[(set (match_operand:DI 0 "register_operand" "=r")
@@ -2465,6 +2483,42 @@
;; ::::::::::::::::::::
;; ::
+;; :: Counted loop operations
+;; ::
+;; ::::::::::::::::::::
+
+(define_expand "doloop_end"
+ [(use (match_operand 0 "" "")) ; loop pseudo
+ (use (match_operand 1 "" "")) ; iterations; zero if unknown
+ (use (match_operand 2 "" "")) ; max iterations
+ (use (match_operand 3 "" "")) ; loop level
+ (use (match_operand 4 "" ""))] ; label
+ ""
+ "
+{
+ /* Only use cloop on innermost loops. */
+ if (INTVAL (operands[3]) > 1)
+ FAIL;
+ emit_jump_insn (gen_doloop_end_internal (gen_rtx_REG (DImode, AR_LC_REGNUM),
+ operands[4]));
+ DONE;
+}")
+
+(define_insn "doloop_end_internal"
+ [(set (pc) (if_then_else (ne (match_operand:DI 0 "ar_lc_reg_operand" "")
+ (const_int 0))
+ (label_ref (match_operand 1 "" ""))
+ (pc)))
+ (set (match_dup 0) (if_then_else:DI (ne (match_dup 0) (const_int 0))
+ (match_dup 0)
+ (plus:DI (match_dup 0) (const_int -1))))]
+ ""
+ "br.cloop.sptk.few %l1"
+ [(set_attr "type" "B")
+ (set_attr "predicable" "no")])
+
+;; ::::::::::::::::::::
+;; ::
;; :: Set flag operations
;; ::
;; ::::::::::::::::::::
@@ -2706,32 +2760,32 @@
;; Errata 72 workaround.
(define_insn "*cmovdi_internal_astep"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r,*f,Q,*b,r,*f,Q,*b,r,*f,Q,*b")
+ "=r,*f,Q,*b*d*e,r,*f,Q,*b*d*e,r,*f,Q,*b*d*e")
(if_then_else:DI
(match_operator:CC 4 "predicate_operator"
[(match_operand:CC 1 "register_operand"
"c,c,c,c,c,c,c,c,c,c,c,c")
(const_int 0)])
(match_operand:DI 2 "general_operand"
- "0,0,0,0,ri*f*b,rO,*f,r,ri*f*b,rO,*f,r")
+ "0,0,0,0,ri*f*b*d*e,rO,*f,r,ri*f*b*d*e,rO,*f,r")
(match_operand:DI 3 "general_operand"
- "ri*f*b,rO,*f,r,0,0,0,0,ri*f*b,rO,*f,r")))]
+ "ri*f*b*d*e,rO,*f,r,0,0,0,0,ri*f*b*d*e,rO,*f,r")))]
"TARGET_A_STEP"
"* abort ();"
[(set_attr "predicable" "no")])
(define_insn "*cmovdi_internal"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r,m,*f,Q,*b,r,m,*f,Q,*b,r,m,*f,Q,*b")
+ "=r,m,*f,Q,*b*d*e,r,m,*f,Q,*b*d*e,r,m,*f,Q,*b*d*e")
(if_then_else:DI
(match_operator:CC 4 "predicate_operator"
[(match_operand:CC 1 "register_operand"
"c,c,c,c,c,c,c,c,c,c,c,c,c,c,c")
(const_int 0)])
(match_operand:DI 2 "general_operand"
- "0,0,0,0,0,rim*f*b,rO,rOQ,*f,r,rim*f*b,rO,rOQ,*f,r")
+ "0,0,0,0,0,rim*f*b*d*e,rO,rOQ,*f,r,rim*f*b*d*e,rO,rOQ,*f,r")
(match_operand:DI 3 "general_operand"
- "rim*f*b,rO,rOQ,*f,r,0,0,0,0,0,rim*f*b,rO,rOQ,*f,r")))]
+ "rim*f*b*d*e,rO,rOQ,*f,r,0,0,0,0,0,rim*f*b*d*e,rO,rOQ,*f,r")))]
"! TARGET_A_STEP"
"* abort ();"
[(set_attr "predicable" "no")])
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 16a55af..7644606 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -400,11 +400,6 @@ optimization_options (level, size)
int level;
int size ATTRIBUTE_UNUSED;
{
-#ifdef HAVE_decrement_and_branch_on_count
- /* When optimizing, enable use of BCT instruction. */
- if (level >= 1)
- flag_branch_on_count_reg = 1;
-#endif
}
/* Do anything needed at the start of the asm file. */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 52ef798..428fac8 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -13291,16 +13291,30 @@ operands[2] = GEN_INT (INTVAL (operands[1]) >> 32);
;; Define the subtract-one-and-jump insns, starting with the template
;; so loop.c knows what to generate.
-(define_expand "decrement_and_branch_on_count"
- [(use (match_operand 0 "register_operand" ""))
- (use (label_ref (match_operand 1 "" "")))]
+(define_expand "doloop_end"
+ [(use (match_operand 0 "" "")) ; loop pseudo
+ (use (match_operand 1 "" "")) ; iterations; zero if unknown
+ (use (match_operand 2 "" "")) ; max iterations
+ (use (match_operand 3 "" "")) ; loop level
+ (use (match_operand 4 "" ""))] ; label
""
"
{
+ /* Only use this on innermost loops. */
+ if (INTVAL (operands[3]) > 1)
+ FAIL;
if (TARGET_POWERPC64)
- emit_jump_insn (gen_ctrdi (operands[0], operands[1]));
+ {
+ if (GET_MODE (operands[0]) != DImode)
+ FAIL;
+ emit_jump_insn (gen_ctrdi (operands[0], operands[4]));
+ }
else
- emit_jump_insn (gen_ctrsi (operands[0], operands[1]));
+ {
+ if (GET_MODE (operands[0]) != SImode)
+ FAIL;
+ emit_jump_insn (gen_ctrsi (operands[0], operands[4]));
+ }
DONE;
}")
diff --git a/gcc/doloop.c b/gcc/doloop.c
new file mode 100644
index 0000000..3933e3f
--- /dev/null
+++ b/gcc/doloop.c
@@ -0,0 +1,846 @@
+/* Perform doloop optimizations
+ Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+ Contributed by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#include "config.h"
+#include "system.h"
+#include "rtl.h"
+#include "insn-flags.h"
+#include "flags.h"
+#include "expr.h"
+#include "loop.h"
+#include "hard-reg-set.h"
+#include "basic-block.h"
+#include "tm_p.h"
+
+
+/* This module is used to modify loops with a determinable number of
+ iterations to use special low-overhead looping instructions.
+
+ It first validates whether the loop is well behaved and has a
+ determinable number of iterations (either at compile or run-time).
+ It then modifies the loop to use a low-overhead looping pattern as
+ follows:
+
+ 1. A pseudo register is allocated as the loop iteration counter.
+
+ 2. The number of loop iterations is calculated and is stored
+ in the loop counter.
+
+ 3. At the end of the loop, the jump insn is replaced by the
+ doloop_end pattern. The compare must remain because it might be
+ used elsewhere. If the loop-variable or condition register are
+ used elsewhere, they will be eliminated by flow.
+
+ 4. An optional doloop_begin pattern is inserted at the top of the
+ loop.
+*/
+
+
+#ifdef HAVE_doloop_end
+
+static rtx doloop_condition_get
+ PARAMS ((rtx));
+static unsigned HOST_WIDE_INT doloop_iterations_max
+ PARAMS ((const struct loop_info *, enum machine_mode, int));
+static int doloop_valid_p
+ PARAMS ((const struct loop *, rtx));
+static int doloop_modify
+ PARAMS ((const struct loop *, rtx, rtx, rtx, rtx, rtx));
+static int doloop_modify_runtime
+ PARAMS ((const struct loop *, rtx, rtx, rtx, enum machine_mode, rtx));
+
+
+/* Return the loop termination condition for PATTERN or zero
+ if it is not a decrement and branch jump insn. */
+static rtx
+doloop_condition_get (pattern)
+ rtx pattern;
+{
+ rtx cmp;
+ rtx inc;
+ rtx reg;
+ rtx condition;
+
+ /* The canonical doloop pattern we expect is:
+
+ (parallel [(set (pc) (if_then_else (condition)
+ (label_ref (label))
+ (pc)))
+ (set (reg) (plus (reg) (const_int -1)))
+ (additional clobbers and uses)])
+
+ Some machines (IA-64) make the decrement conditional on
+ the condition as well, so we don't bother verifying the
+ actual decrement. In summary, the branch must be the
+ first entry of the parallel (also required by jump.c),
+ and the second entry of the parallel must be a set of
+ the loop counter register. */
+
+ if (GET_CODE (pattern) != PARALLEL)
+ return 0;
+
+ cmp = XVECEXP (pattern, 0, 0);
+ inc = XVECEXP (pattern, 0, 1);
+
+ /* Check for (set (reg) (something)). */
+ if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
+ return 0;
+
+ /* Extract loop counter register. */
+ reg = SET_DEST (inc);
+
+ /* Check for (set (pc) (if_then_else (condition)
+ (label_ref (label))
+ (pc))). */
+ if (GET_CODE (cmp) != SET
+ || SET_DEST (cmp) != pc_rtx
+ || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
+ || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
+ || XEXP (SET_SRC (cmp), 2) != pc_rtx)
+ return 0;
+
+ /* Extract loop termination condition. */
+ condition = XEXP (SET_SRC (cmp), 0);
+
+ if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
+ || GET_CODE (XEXP (condition, 1)) != CONST_INT)
+ return 0;
+
+ if (XEXP (condition, 0) == reg)
+ return condition;
+
+ if (GET_CODE (XEXP (condition, 0)) == PLUS
+ && XEXP (XEXP (condition, 0), 0) == reg)
+ return condition;
+
+ /* ??? If a machine uses a funny comparison, we could return a
+ canonicalised form here. */
+
+ return 0;
+}
+
+
+/* Return an estimate of the maximum number of loop iterations for the
+ loop specified by LOOP or zero if the loop is not normal.
+ MODE is the mode of the iteration count and NONNEG is non-zero if
+ the the iteration count has been proved to be non-negative. */
+static unsigned HOST_WIDE_INT
+doloop_iterations_max (loop_info, mode, nonneg)
+ const struct loop_info *loop_info;
+ enum machine_mode mode;
+ int nonneg;
+{
+ unsigned HOST_WIDE_INT n_iterations_max;
+ enum rtx_code code;
+ rtx min_value;
+ rtx max_value;
+ HOST_WIDE_INT abs_inc;
+ int neg_inc;
+
+ neg_inc = 0;
+ abs_inc = INTVAL (loop_info->increment);
+ if (abs_inc < 0)
+ {
+ abs_inc = -abs_inc;
+ neg_inc = 1;
+ }
+
+ if (neg_inc)
+ {
+ code = swap_condition (loop_info->comparison_code);
+ min_value = loop_info->final_equiv_value;
+ max_value = loop_info->initial_equiv_value;
+ }
+ else
+ {
+ code = loop_info->comparison_code;
+ min_value = loop_info->initial_equiv_value;
+ max_value = loop_info->final_equiv_value;
+ }
+
+ /* Since the loop has a VTOP, we know that the initial test will be
+ true and thus the value of max_value should be greater than the
+ value of min_value. Thus the difference should always be positive
+ and the code must be LT, LE, LTU, LEU, or NE. Otherwise the loop is
+ not normal, e.g., `for (i = 0; i < 10; i--)'. */
+ switch (code)
+ {
+ case LTU:
+ case LEU:
+ {
+ unsigned HOST_WIDE_INT umax;
+ unsigned HOST_WIDE_INT umin;
+
+ if (GET_CODE (min_value) == CONST_INT)
+ umin = INTVAL (min_value);
+ else
+ umin = 0;
+
+ if (GET_CODE (max_value) == CONST_INT)
+ umax = INTVAL (max_value);
+ else
+ umax = (2U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
+
+ n_iterations_max = umax - umin;
+ break;
+ }
+
+ case LT:
+ case LE:
+ {
+ HOST_WIDE_INT smax;
+ HOST_WIDE_INT smin;
+
+ if (GET_CODE (min_value) == CONST_INT)
+ smin = INTVAL (min_value);
+ else
+ smin = -(1U << (GET_MODE_BITSIZE (mode) - 1));
+
+ if (GET_CODE (max_value) == CONST_INT)
+ smax = INTVAL (max_value);
+ else
+ smax = (1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
+
+ n_iterations_max = smax - smin;
+ break;
+ }
+
+ case NE:
+ if (GET_CODE (min_value) == CONST_INT
+ && GET_CODE (max_value) == CONST_INT)
+ n_iterations_max = INTVAL (max_value) - INTVAL (min_value);
+ else
+ /* We need to conservatively assume that we might have the maximum
+ number of iterations without any additional knowledge. */
+ n_iterations_max = (2U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
+ break;
+
+ default:
+ return 0;
+ }
+
+ n_iterations_max /= abs_inc;
+
+ /* If we know that the iteration count is non-negative then adjust
+ n_iterations_max if it is so large that it appears negative. */
+ if (nonneg && n_iterations_max > (1U << (GET_MODE_BITSIZE (mode) - 1)))
+ n_iterations_max = (1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
+
+ return n_iterations_max;
+}
+
+
+/* Return non-zero if the loop specified by LOOP is suitable for
+ the use of special low-overhead looping instructions. */
+static int
+doloop_valid_p (loop, jump_insn)
+ const struct loop *loop;
+ rtx jump_insn;
+{
+ const struct loop_info *loop_info = LOOP_INFO (loop);
+
+ /* The loop must have a conditional jump at the end. */
+ if (! any_condjump_p (jump_insn)
+ || ! onlyjump_p (jump_insn))
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Invalid jump at loop end.\n");
+ return 0;
+ }
+
+ /* Give up if a loop has been completely unrolled. */
+ if (loop_info->n_iterations == loop_info->unroll_number)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Loop completely unrolled.\n");
+ return 0;
+ }
+
+ /* The loop must have a single exit target. A break or return
+ statement within a loop will generate multiple loop exits.
+ Another example of a loop that currently generates multiple exit
+ targets is for (i = 0; i < (foo ? 8 : 4); i++) { }. */
+ if (loop_info->has_multiple_exit_targets)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Loop has multiple exit targets.\n");
+ return 0;
+ }
+
+ /* An indirect jump may jump out of the loop. */
+ if (loop_info->has_indirect_jump)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Indirect jump in function.\n");
+ return 0;
+ }
+
+ /* A called function may clobber any special registers required for
+ low-overhead looping. */
+ if (loop_info->has_call)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Function call in loop.\n");
+ return 0;
+ }
+
+ /* Some targets (eg, PPC) use the count register for branch on table
+ instructions. ??? This should be a target specific check. */
+ if (loop_info->has_tablejump)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Computed branch in the loop.\n");
+ return 0;
+ }
+
+ if (! loop_info->increment)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Could not determine iteration info.\n");
+ return 0;
+ }
+
+ if (GET_CODE (loop_info->increment) != CONST_INT)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Increment not an integer constant.\n");
+ return 0;
+ }
+
+ /* There is no guarantee that a NE loop will terminate if the
+ absolute increment is not unity. ??? We could compute this
+ condition at run-time and have a additional jump around the loop
+ to ensure an infinite loop. */
+ if (loop_info->comparison_code == NE
+ && INTVAL (loop_info->increment) != -1
+ && INTVAL (loop_info->increment) != 1)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: NE loop with non-unity increment.\n");
+ return 0;
+ }
+
+ /* Check for loops that may not terminate under special conditions. */
+ if (! loop_info->n_iterations
+ && ((loop_info->comparison_code == LEU
+ && INTVAL (loop_info->increment) > 0)
+ || (loop_info->comparison_code == GEU
+ && INTVAL (loop_info->increment) < 0)))
+ {
+ /* If the comparison is LEU and the comparison value is UINT_MAX
+ then the loop will not terminate. Similarly, if the
+ comparison code is GEU and the initial value is 0, the loop
+ will not terminate.
+
+ Note that with LE and GE, the loop behaviour can be
+ implementation dependent if an overflow occurs, say between
+ INT_MAX and INT_MAX + 1. We thus don't have to worry about
+ these two cases.
+
+ ??? We could compute these conditions at run-time and have a
+ additional jump around the loop to ensure an infinite loop.
+ However, it is very unlikely that this is the intended
+ behaviour of the loop and checking for these rare boundary
+ conditions would pessimize all other code. */
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Possible infinite iteration case ignored.\n");
+ }
+
+ return 1;
+}
+
+
+/* Modify the loop to use the low-overhead looping insn where LOOP
+ describes the loop, ITERATIONS is an RTX containing the desired
+ number of loop iterations, ITERATIONS_MAX is a CONST_INT specifying
+ the maximum number of loop iterations, and DOLOOP_INSN is the
+ low-overhead looping insn to emit at the end of the loop. This
+ returns non-zero if it was successful. */
+static int
+doloop_modify (loop, iterations, iterations_max,
+ doloop_seq, start_label, condition)
+ const struct loop *loop;
+ rtx iterations;
+ rtx iterations_max;
+ rtx doloop_seq;
+ rtx start_label;
+ rtx condition;
+{
+ rtx counter_reg;
+ rtx count;
+ rtx sequence;
+ rtx jump_insn;
+ int nonneg = 0;
+ int decrement_count;
+
+ jump_insn = prev_nonnote_insn (loop->end);
+
+ if (loop_dump_stream)
+ {
+ fprintf (loop_dump_stream, "Doloop: Inserting doloop pattern (");
+ if (GET_CODE (iterations) == CONST_INT)
+ fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
+ INTVAL (iterations));
+ else
+ fputs ("runtime", loop_dump_stream);
+ fputs (" iterations).", loop_dump_stream);
+ }
+
+ /* Discard original jump to continue loop. The original compare
+ result may still be live, so it cannot be discarded explicitly. */
+ delete_insn (jump_insn);
+
+ /* Emit the label that will delimit the start of the loop. */
+ emit_label_after (start_label, loop->start);
+ LABEL_NUSES (start_label)++;
+
+ counter_reg = XEXP (condition, 0);
+ if (GET_CODE (counter_reg) == PLUS)
+ counter_reg = XEXP (counter_reg, 0);
+
+ start_sequence ();
+
+ count = iterations;
+ decrement_count = 0;
+ switch (GET_CODE (condition))
+ {
+ case NE:
+ /* Currently only NE tests against zero and one are supported. */
+ if (XEXP (condition, 1) == const0_rtx)
+ decrement_count = 1;
+ else if (XEXP (condition, 1) != const1_rtx)
+ abort ();
+ break;
+
+ case GE:
+ /* Currently only GE tests against zero are supported. */
+ if (XEXP (condition, 1) != const0_rtx)
+ abort ();
+
+ /* The iteration count needs decrementing for a GE test. */
+ decrement_count = 1;
+
+ /* Determine if the iteration counter will be non-negative.
+ Note that the maximum value loaded is iterations_max - 1. */
+ if ((unsigned HOST_WIDE_INT) INTVAL (iterations_max)
+ <= (1U << (GET_MODE_BITSIZE (GET_MODE (counter_reg)) - 1)))
+ nonneg = 1;
+ break;
+
+ /* Abort if an invalid doloop pattern has been generated. */
+ default:
+ abort();
+ }
+
+ if (decrement_count)
+ {
+ if (GET_CODE (count) == CONST_INT)
+ count = GEN_INT (INTVAL (count) - 1);
+ else
+ count = expand_binop (GET_MODE (counter_reg), sub_optab,
+ count, GEN_INT (1),
+ 0, 0, OPTAB_LIB_WIDEN);
+ }
+
+ /* Insert initialization of the count register into the loop header. */
+ convert_move (counter_reg, count, 1);
+ sequence = gen_sequence ();
+ end_sequence ();
+ emit_insn_before (sequence, loop->start);
+
+ /* Some targets (eg, C4x) need to initialize special looping
+ registers. */
+#ifdef HAVE_doloop_begin
+ {
+ rtx init;
+
+ init = gen_doloop_begin (counter_reg,
+ GET_CODE (iterations) == CONST_INT
+ ? iterations : const0_rtx, iterations_max,
+ GEN_INT (loop->level));
+ if (init)
+ {
+ start_sequence ();
+ emit_insn (init);
+ sequence = gen_sequence ();
+ end_sequence ();
+ emit_insn_after (sequence, loop->start);
+ }
+ }
+#endif
+
+ /* Insert the new low-overhead looping insn. */
+ emit_jump_insn_before (doloop_seq, loop->end);
+ jump_insn = prev_nonnote_insn (loop->end);
+ JUMP_LABEL (jump_insn) = start_label;
+
+ /* Add a REG_NONNEG note if the actual or estimated maximum number
+ of iterations is non-negative. */
+ if (nonneg)
+ {
+ REG_NOTES (jump_insn)
+ = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
+ }
+ return 1;
+}
+
+
+/* Handle the more complex case, where the bounds are not known at
+ compile time. In this case we generate a run_time calculation of
+ the number of iterations. We rely on the existence of a run-time
+ guard to ensure that the loop executes at least once, i.e.,
+ initial_value obeys the loop comparison condition. If a guard is
+ not present, we emit one. The loop to modify is described by LOOP.
+ ITERATIONS_MAX is a CONST_INT specifying the estimated maximum
+ number of loop iterations. DOLOOP_INSN is the low-overhead looping
+ insn to insert. Returns non-zero if loop successfully modified. */
+static int
+doloop_modify_runtime (loop, iterations_max,
+ doloop_seq, start_label, mode, condition)
+ const struct loop *loop;
+ rtx iterations_max;
+ rtx doloop_seq;
+ rtx start_label;
+ enum machine_mode mode;
+ rtx condition;
+{
+ const struct loop_info *loop_info = LOOP_INFO (loop);
+ HOST_WIDE_INT abs_inc;
+ int neg_inc;
+ rtx diff;
+ rtx sequence;
+ rtx iterations;
+ rtx initial_value;
+ rtx final_value;
+ rtx increment;
+ int unsigned_p;
+ enum rtx_code comparison_code;
+
+ increment = loop_info->increment;
+ initial_value = loop_info->initial_value;
+ final_value = loop_info->final_value;
+
+ neg_inc = 0;
+ abs_inc = INTVAL (increment);
+ if (abs_inc < 0)
+ {
+ abs_inc = -abs_inc;
+ neg_inc = 1;
+ }
+
+ comparison_code = loop_info->comparison_code;
+ unsigned_p = (comparison_code == LTU
+ || comparison_code == LEU
+ || comparison_code == GTU
+ || comparison_code == GEU
+ || comparison_code == NE);
+
+ /* The number of iterations (prior to any loop unrolling) is given by:
+ (abs (final - initial) + abs_inc - 1) / abs_inc.
+
+ However, it is possible for the summation to overflow, and a
+ safer method is:
+
+ abs (final - initial) / abs_inc + (abs (final - initial) % abs_inc) != 0
+
+ If the loop has been unrolled, then the loop body has been
+ preconditioned to iterate a multiple of unroll_number times.
+ The number of iterations of the loop body is simply:
+ abs (final - initial) / (abs_inc * unroll_number).
+
+ The division and modulo operations can be avoided by requiring
+ that the increment is a power of 2 (precondition_loop_p enforces
+ this requirement). Nevertheless, the RTX_COSTS should be checked
+ to see if a fast divmod is available. */
+
+ start_sequence ();
+ /* abs (final - initial) */
+ diff = expand_binop (mode, sub_optab,
+ copy_rtx (neg_inc ? initial_value : final_value),
+ copy_rtx (neg_inc ? final_value : initial_value),
+ NULL_RTX, unsigned_p, OPTAB_LIB_WIDEN);
+
+ if (loop_info->unroll_number == 1)
+ {
+ if (abs_inc != 1)
+ {
+ int shift_count;
+ rtx extra;
+ rtx label;
+
+ shift_count = exact_log2 (abs_inc);
+ if (shift_count < 0)
+ abort ();
+
+ /* abs (final - initial) / abs_inc */
+ iterations = expand_binop (GET_MODE (diff), lshr_optab,
+ diff, GEN_INT (shift_count),
+ NULL_RTX, 1,
+ OPTAB_LIB_WIDEN);
+
+ /* abs (final - initial) % abs_inc */
+ extra = expand_binop (GET_MODE (iterations), and_optab,
+ diff, GEN_INT (abs_inc - 1),
+ NULL_RTX, 1,
+ OPTAB_LIB_WIDEN);
+
+ /* If (abs (final - initial) % abs_inc == 0) jump past
+ following increment instruction. */
+ label = gen_label_rtx();
+ emit_cmp_and_jump_insns (extra, const0_rtx, EQ, NULL_RTX,
+ GET_MODE (extra), 0, 0, label);
+ JUMP_LABEL (get_last_insn ()) = label;
+ LABEL_NUSES (label)++;
+
+ /* Increment the iteration count by one. */
+ iterations = expand_binop (GET_MODE (iterations), add_optab,
+ iterations, GEN_INT (1),
+ iterations, 1,
+ OPTAB_LIB_WIDEN);
+
+ emit_label (label);
+ }
+ else
+ iterations = diff;
+ }
+ else
+ {
+ int shift_count;
+
+ /* precondition_loop_p has preconditioned the loop so that the
+ iteration count of the loop body is always a power of 2.
+ Since we won't get an overflow calculating the loop count,
+ the code we emit is simpler. */
+ shift_count = exact_log2 (loop_info->unroll_number * abs_inc);
+ if (shift_count < 0)
+ abort ();
+
+ iterations = expand_binop (GET_MODE (diff), lshr_optab,
+ diff, GEN_INT (shift_count),
+ NULL_RTX, 1,
+ OPTAB_LIB_WIDEN);
+ }
+
+
+ /* If there is a NOTE_INSN_LOOP_VTOP, we have a `for' or `while'
+ style loop, with a loop exit test at the start. Thus, we can
+ assume that the loop condition was true when the loop was
+ entered.
+
+ `do-while' loops require special treatment since the exit test is
+ not executed before the start of the loop. We need to determine
+ if the loop will terminate after the first pass and to limit the
+ iteration count to one if necessary. */
+ if (! loop->vtop)
+ {
+ rtx label;
+
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream, "Doloop: Do-while loop.\n");
+
+ /* A `do-while' loop must iterate at least once. If the
+ iteration count is bogus, we set the iteration count to 1.
+ Note that if the loop has been unrolled, then the loop body
+ is guaranteed to execute at least once. */
+ if (loop_info->unroll_number == 1)
+ {
+ /* Emit insns to test if the loop will immediately
+ terminate and to set the iteration count to 1 if true. */
+ label = gen_label_rtx();
+ emit_cmp_and_jump_insns (copy_rtx (initial_value),
+ copy_rtx (loop_info->comparison_value),
+ comparison_code, NULL_RTX, mode, 0, 0,
+ label);
+ JUMP_LABEL (get_last_insn ()) = label;
+ LABEL_NUSES (label)++;
+ emit_move_insn (iterations, const1_rtx);
+ emit_label (label);
+ }
+ }
+
+ sequence = gen_sequence ();
+ end_sequence ();
+ emit_insn_before (sequence, loop->start);
+
+ return doloop_modify (loop, iterations, iterations_max, doloop_seq,
+ start_label, condition);
+}
+
+
+/* This is the main entry point. Process loop described by LOOP
+ validating that the loop is suitable for conversion to use a low
+ overhead looping instruction, replacing the jump insn where
+ suitable. We distinguish between loops with compile-time bounds
+ and those with run-time bounds. Information from LOOP is used to
+ compute the number of iterations and to determine whether the loop
+ is a candidate for this optimization. Returns non-zero if loop
+ successfully modified. */
+int
+doloop_optimize (loop)
+ const struct loop *loop;
+{
+ struct loop_info *loop_info = LOOP_INFO (loop);
+ rtx initial_value;
+ rtx final_value;
+ rtx increment;
+ rtx jump_insn;
+ enum machine_mode mode;
+ unsigned HOST_WIDE_INT n_iterations;
+ unsigned HOST_WIDE_INT n_iterations_max;
+ rtx doloop_seq, doloop_pat, doloop_reg;
+ rtx iterations;
+ rtx iterations_max;
+ rtx start_label;
+ rtx condition;
+
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Processing loop %d, enclosed levels %d.\n",
+ loop->num, loop->level);
+
+ jump_insn = prev_nonnote_insn (loop->end);
+
+ /* Check that loop is a candidate for a low-overhead looping insn. */
+ if (! doloop_valid_p (loop, jump_insn))
+ return 0;
+
+ /* Determine if the loop can be safely, and profitably,
+ preconditioned. While we don't precondition the loop in a loop
+ unrolling sense, this test ensures that the loop is well behaved
+ and that the increment is a constant integer. */
+ if (! precondition_loop_p (loop, &initial_value, &final_value,
+ &increment, &mode))
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Cannot precondition loop.\n");
+ return 0;
+ }
+
+ /* Determine or estimate the maximum number of loop iterations. */
+ n_iterations = loop_info->n_iterations;
+ if (n_iterations)
+ {
+ /* This is the simple case where the initial and final loop
+ values are constants. */
+ n_iterations_max = n_iterations;
+ }
+ else
+ {
+ int nonneg = find_reg_note (jump_insn, REG_NONNEG, 0) != 0;
+
+ /* This is the harder case where the initial and final loop
+ values may not be constants. */
+ n_iterations_max = doloop_iterations_max (loop_info, mode, nonneg);
+
+ if (! n_iterations_max)
+ {
+ /* We have something like `for (i = 0; i < 10; i--)'. */
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Not normal loop.\n");
+ return 0;
+ }
+ }
+
+ /* Account for loop unrolling in the iteration count. This will
+ have no effect if loop_iterations could not determine the number
+ of iterations. */
+ n_iterations /= loop_info->unroll_number;
+ n_iterations_max /= loop_info->unroll_number;
+
+ if (n_iterations && n_iterations < 3)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Too few iterations (%ld) to be profitable.\n",
+ (long int) n_iterations);
+ return 0;
+ }
+
+ iterations = GEN_INT (n_iterations);
+ iterations_max = GEN_INT (n_iterations_max);
+
+ /* Generate looping insn. If the pattern FAILs then give up trying
+ to modify the loop since there is some aspect the back-end does
+ not like. */
+ start_label = gen_label_rtx ();
+ doloop_reg = gen_reg_rtx (mode);
+ doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
+ GEN_INT (loop->level), start_label);
+ if (! doloop_seq && mode != word_mode)
+ {
+ PUT_MODE (doloop_reg, word_mode);
+ doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
+ GEN_INT (loop->level), start_label);
+ }
+ if (! doloop_seq)
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Target unwilling to use doloop pattern!\n");
+ return 0;
+ }
+
+ /* A raw define_insn may yield a plain pattern. If a sequence
+ was involved, the last must be the jump instruction. */
+ if (GET_CODE (doloop_seq) == SEQUENCE)
+ {
+ doloop_pat = XVECEXP (doloop_seq, 0, XVECLEN (doloop_seq, 0) - 1);
+ if (GET_CODE (doloop_pat) == JUMP_INSN)
+ doloop_pat = PATTERN (doloop_pat);
+ else
+ doloop_pat = NULL_RTX;
+ }
+ else
+ doloop_pat = doloop_seq;
+
+ if (! doloop_pat
+ || ! (condition = doloop_condition_get (doloop_pat)))
+ {
+ if (loop_dump_stream)
+ fprintf (loop_dump_stream,
+ "Doloop: Unrecognizable doloop pattern!\n");
+ return 0;
+ }
+
+ if (n_iterations != 0)
+ /* Handle the simpler case, where we know the iteration count at
+ compile time. */
+ return doloop_modify (loop, iterations, iterations_max, doloop_seq,
+ start_label, condition);
+ else
+ /* Handle the harder case, where we must add additional runtime tests. */
+ return doloop_modify_runtime (loop, iterations_max, doloop_seq,
+ start_label, mode, condition);
+}
+
+#endif /* HAVE_doloop_end */
diff --git a/gcc/final.c b/gcc/final.c
index 1fc72c9..7bdd403 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -918,9 +918,14 @@ int
insn_current_reference_address (branch)
rtx branch;
{
- rtx dest;
- rtx seq = NEXT_INSN (PREV_INSN (branch));
- int seq_uid = INSN_UID (seq);
+ rtx dest, seq;
+ int seq_uid;
+
+ if (! INSN_ADDRESSES_SET_P ())
+ return 0;
+
+ seq = NEXT_INSN (PREV_INSN (branch));
+ seq_uid = INSN_UID (seq);
if (GET_CODE (branch) != JUMP_INSN)
/* This can happen for example on the PA; the objective is to know the
offset to address something in front of the start of the function.
@@ -929,6 +934,7 @@ insn_current_reference_address (branch)
any alignment we'd encounter, so we skip the call to align_fuzz. */
return insn_current_address;
dest = JUMP_LABEL (branch);
+
/* BRANCH has no proper alignment chain set, so use SEQ. */
if (INSN_SHUID (branch) < INSN_SHUID (dest))
{
diff --git a/gcc/flags.h b/gcc/flags.h
index 3dc5c7e..de5b099 100644
--- a/gcc/flags.h
+++ b/gcc/flags.h
@@ -375,7 +375,7 @@ extern int flag_schedule_speculative;
extern int flag_schedule_speculative_load;
extern int flag_schedule_speculative_load_dangerous;
-/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple
+/* flag_branch_on_count_reg means try to replace add-1,compare,branch tupple
by a cheaper branch, on a count register. */
extern int flag_branch_on_count_reg;
diff --git a/gcc/jump.c b/gcc/jump.c
index 9928b51..98a7c43 100644
--- a/gcc/jump.c
+++ b/gcc/jump.c
@@ -2217,7 +2217,7 @@ any_uncondjump_p (insn)
/* Return true when insn is a conditional jump. This function works for
instructions containing PC sets in PARALLELs. The instruction may have
various other effects so before removing the jump you must verify
- safe_to_remove_jump_p.
+ onlyjump_p.
Note that unlike condjump_p it returns false for unconditional jumps. */
diff --git a/gcc/loop.c b/gcc/loop.c
index 2fb4855..f812b2e 100644
--- a/gcc/loop.c
+++ b/gcc/loop.c
@@ -327,16 +327,8 @@ typedef struct rtx_pair {
&& INSN_LUID (INSN) >= INSN_LUID (START) \
&& INSN_LUID (INSN) <= INSN_LUID (END))
-#ifdef HAVE_decrement_and_branch_on_count
-/* Test whether BCT applicable and safe. */
-static void insert_bct PARAMS ((struct loop *));
-
-/* Auxiliary function that inserts the BCT pattern into the loop. */
-static void instrument_loop_bct PARAMS ((rtx, rtx, rtx));
-#endif /* HAVE_decrement_and_branch_on_count */
-
/* Indirect_jump_in_function is computed once per function. */
-int indirect_jump_in_function = 0;
+static int indirect_jump_in_function;
static int indirect_jump_in_function_p PARAMS ((rtx));
static int compute_luids PARAMS ((rtx, rtx, int));
@@ -5025,12 +5017,10 @@ strength_reduce (loop, insn_count, flags)
&& unrolled_insn_copies <= insn_count))
unroll_loop (loop, insn_count, end_insert_before, 1);
-#ifdef HAVE_decrement_and_branch_on_count
- /* Instrument the loop with BCT insn. */
- if (HAVE_decrement_and_branch_on_count && (flags & LOOP_BCT)
- && flag_branch_on_count_reg)
- insert_bct (loop);
-#endif /* HAVE_decrement_and_branch_on_count */
+#ifdef HAVE_doloop_end
+ if (HAVE_doloop_end && (flags & LOOP_BCT) && flag_branch_on_count_reg)
+ doloop_optimize (loop);
+#endif /* HAVE_doloop_end */
if (loop_dump_stream)
fprintf (loop_dump_stream, "\n");
@@ -9187,6 +9177,7 @@ canonicalize_condition (insn, cond, reverse, earliest, want_reg)
return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
}
+
/* Given a jump insn JUMP, return the condition that will cause it to branch
to its JUMP_LABEL. If the condition cannot be understood, or is an
inequality floating-point comparison which needs to be reversed, 0 will
@@ -9242,291 +9233,6 @@ get_condition_for_loop (loop, x)
XEXP (comparison, 1), XEXP (comparison, 0));
}
-#ifdef HAVE_decrement_and_branch_on_count
-/* Instrument loop for insertion of bct instruction. We distinguish between
- loops with compile-time bounds and those with run-time bounds.
- Information from loop_iterations() is used to compute compile-time bounds.
- Run-time bounds should use loop preconditioning, but currently ignored.
- */
-
-static void
-insert_bct (loop)
- struct loop *loop;
-{
- unsigned HOST_WIDE_INT n_iterations;
- rtx loop_start = loop->start;
- rtx loop_end = loop->end;
- struct loop_info *loop_info = LOOP_INFO (loop);
- int loop_num = loop->num;
-
-#if 0
- int increment_direction, compare_direction;
- /* If the loop condition is <= or >=, the number of iteration
- is 1 more than the range of the bounds of the loop. */
- int add_iteration = 0;
- enum machine_mode loop_var_mode = word_mode;
-#endif
-
- /* It's impossible to instrument a competely unrolled loop. */
- if (loop_info->unroll_number == loop_info->n_iterations)
- return;
-
- /* Make sure that the count register is not in use. */
- if (loop_info->used_count_register)
- {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct %d: BCT instrumentation failed: count register already in use\n",
- loop_num);
- return;
- }
-
- /* Make sure that the function has no indirect jumps. */
- if (indirect_jump_in_function)
- {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct %d: BCT instrumentation failed: indirect jump in function\n",
- loop_num);
- return;
- }
-
- /* Make sure that the last loop insn is a conditional jump. */
- if (GET_CODE (PREV_INSN (loop_end)) != JUMP_INSN
- || ! onlyjump_p (PREV_INSN (loop_end))
- || ! any_condjump_p (PREV_INSN (loop_end)))
- {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct %d: BCT instrumentation failed: invalid jump at loop end\n",
- loop_num);
- return;
- }
-
- /* Make sure that the loop does not contain a function call
- (the count register might be altered by the called function). */
- if (loop_info->has_call)
- {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct %d: BCT instrumentation failed: function call in loop\n",
- loop_num);
- return;
- }
-
- /* Make sure that the loop does not jump via a table.
- (the count register might be used to perform the branch on table). */
- if (loop_info->has_tablejump)
- {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct %d: BCT instrumentation failed: computed branch in the loop\n",
- loop_num);
- return;
- }
-
- /* Account for loop unrolling in instrumented iteration count. */
- if (loop_info->unroll_number > 1)
- n_iterations = loop_info->n_iterations / loop_info->unroll_number;
- else
- n_iterations = loop_info->n_iterations;
-
- if (n_iterations != 0 && n_iterations < 3)
- {
- /* Allow an enclosing outer loop to benefit if possible. */
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct %d: Too few iterations to benefit from BCT optimization\n",
- loop_num);
- return;
- }
-
- /* Try to instrument the loop. */
-
- /* Handle the simpler case, where the bounds are known at compile time. */
- if (n_iterations > 0)
- {
- struct loop *outer_loop;
- struct loop_info *outer_loop_info;
-
- /* Mark all enclosing loops that they cannot use count register. */
- for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
- {
- outer_loop_info = LOOP_INFO (outer_loop);
- outer_loop_info->used_count_register = 1;
- }
- instrument_loop_bct (loop_start, loop_end, GEN_INT (n_iterations));
- return;
- }
-
- /* Handle the more complex case, that the bounds are NOT known
- at compile time. In this case we generate run_time calculation
- of the number of iterations. */
-
- if (loop_info->iteration_var == 0)
- {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct %d: BCT Runtime Instrumentation failed: no loop iteration variable found\n",
- loop_num);
- return;
- }
-
- if (GET_MODE_CLASS (GET_MODE (loop_info->iteration_var)) != MODE_INT
- || GET_MODE_SIZE (GET_MODE (loop_info->iteration_var)) != UNITS_PER_WORD)
- {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct %d: BCT Runtime Instrumentation failed: loop variable not integer\n",
- loop_num);
- return;
- }
-
- /* With runtime bounds, if the compare is of the form '!=' we give up */
- if (loop_info->comparison_code == NE)
- {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct %d: BCT Runtime Instrumentation failed: runtime bounds with != comparison\n",
- loop_num);
- return;
- }
-/* Use common loop preconditioning code instead. */
-#if 0
- else
- {
- /* We rely on the existence of run-time guard to ensure that the
- loop executes at least once. */
- rtx sequence;
- rtx iterations_num_reg;
-
- unsigned HOST_WIDE_INT increment_value_abs
- = INTVAL (increment) * increment_direction;
-
- /* make sure that the increment is a power of two, otherwise (an
- expensive) divide is needed. */
- if (exact_log2 (increment_value_abs) == -1)
- {
- if (loop_dump_stream)
- fprintf (loop_dump_stream,
- "insert_bct: not instrumenting BCT because the increment is not power of 2\n");
- return;
- }
-
- /* compute the number of iterations */
- start_sequence ();
- {
- rtx temp_reg;
-
- /* Again, the number of iterations is calculated by:
- ;
- ; compare-val - initial-val + (increment -1) + additional-iteration
- ; num_iterations = -----------------------------------------------------------------
- ; increment
- */
- /* ??? Do we have to call copy_rtx here before passing rtx to
- expand_binop? */
- if (compare_direction > 0)
- {
- /* <, <= :the loop variable is increasing */
- temp_reg = expand_binop (loop_var_mode, sub_optab,
- comparison_value, initial_value,
- NULL_RTX, 0, OPTAB_LIB_WIDEN);
- }
- else
- {
- temp_reg = expand_binop (loop_var_mode, sub_optab,
- initial_value, comparison_value,
- NULL_RTX, 0, OPTAB_LIB_WIDEN);
- }
-
- if (increment_value_abs - 1 + add_iteration != 0)
- temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg,
- GEN_INT (increment_value_abs - 1
- + add_iteration),
- NULL_RTX, 0, OPTAB_LIB_WIDEN);
-
- if (increment_value_abs != 1)
- iterations_num_reg = expand_binop (loop_var_mode, asr_optab,
- temp_reg,
- GEN_INT (exact_log2 (increment_value_abs)),
- NULL_RTX, 0, OPTAB_LIB_WIDEN);
- else
- iterations_num_reg = temp_reg;
- }
- sequence = gen_sequence ();
- end_sequence ();
- emit_insn_before (sequence, loop_start);
- instrument_loop_bct (loop_start, loop_end, iterations_num_reg);
- }
-
- return;
-#endif /* Complex case */
-}
-
-/* Instrument loop by inserting a bct in it as follows:
- 1. A new counter register is created.
- 2. In the head of the loop the new variable is initialized to the value
- passed in the loop_num_iterations parameter.
- 3. At the end of the loop, comparison of the register with 0 is generated.
- The created comparison follows the pattern defined for the
- decrement_and_branch_on_count insn, so this insn will be generated.
- 4. The branch on the old variable are deleted. The compare must remain
- because it might be used elsewhere. If the loop-variable or condition
- register are used elsewhere, they will be eliminated by flow. */
-
-static void
-instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
- rtx loop_start, loop_end;
- rtx loop_num_iterations;
-{
- rtx counter_reg;
- rtx start_label;
- rtx sequence;
-
- if (HAVE_decrement_and_branch_on_count)
- {
- if (loop_dump_stream)
- {
- fputs ("instrument_bct: Inserting BCT (", loop_dump_stream);
- if (GET_CODE (loop_num_iterations) == CONST_INT)
- fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
- INTVAL (loop_num_iterations));
- else
- fputs ("runtime", loop_dump_stream);
- fputs (" iterations)", loop_dump_stream);
- }
-
- /* Discard original jump to continue loop. Original compare result
- may still be live, so it cannot be discarded explicitly. */
- delete_insn (PREV_INSN (loop_end));
-
- /* Insert the label which will delimit the start of the loop. */
- start_label = gen_label_rtx ();
- emit_label_after (start_label, loop_start);
-
- /* Insert initialization of the count register into the loop header. */
- start_sequence ();
- counter_reg = gen_reg_rtx (word_mode);
- emit_insn (gen_move_insn (counter_reg, loop_num_iterations));
- sequence = gen_sequence ();
- end_sequence ();
- emit_insn_before (sequence, loop_start);
-
- /* Insert new comparison on the count register instead of the
- old one, generating the needed BCT pattern (that will be
- later recognized by assembly generation phase). */
- sequence = emit_jump_insn_before (
- gen_decrement_and_branch_on_count (counter_reg, start_label),
- loop_end);
-
- if (GET_CODE (sequence) != JUMP_INSN)
- abort ();
- JUMP_LABEL (sequence) = start_label;
- LABEL_NUSES (start_label)++;
- }
-}
-#endif /* HAVE_decrement_and_branch_on_count */
/* Scan the function and determine whether it has indirect (computed) jumps.
diff --git a/gcc/loop.h b/gcc/loop.h
index 1409b49..7d4c7ce 100644
--- a/gcc/loop.h
+++ b/gcc/loop.h
@@ -254,3 +254,5 @@ int loop_insn_first_p PARAMS ((rtx, rtx));
typedef rtx (*loop_insn_callback ) PARAMS ((struct loop *, rtx, int, int));
void for_each_insn_in_loop PARAMS ((struct loop *, loop_insn_callback));
+/* Forward declarations for non-static functions declared in doloop.c. */
+int doloop_optimize PARAMS ((const struct loop *));
diff --git a/gcc/recog.c b/gcc/recog.c
index e079b7e..9bbd10b 100644
--- a/gcc/recog.c
+++ b/gcc/recog.c
@@ -2626,7 +2626,7 @@ split_all_insns (upd_life)
/* Can't use `next_real_insn' because that might go across
CODE_LABELS and short-out basic blocks. */
next = NEXT_INSN (insn);
- if (GET_CODE (insn) != INSN)
+ if (! INSN_P (insn))
;
/* Don't split no-op move insns. These should silently
diff --git a/gcc/toplev.c b/gcc/toplev.c
index cb60d51..2e06176 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -746,9 +746,9 @@ int flag_schedule_speculative_load_dangerous = 0;
int flag_single_precision_constant;
-/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple
- by a cheaper branch, on a count register. */
-int flag_branch_on_count_reg;
+/* flag_branch_on_count_reg means try to replace add-1,compare,branch tupple
+ by a cheaper branch on a count register. */
+int flag_branch_on_count_reg = 1;
/* -finhibit-size-directive inhibits output of .size for ELF.
This is used only for compiling crtstuff.c,