aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Law <law@gcc.gnu.org>1994-11-10 21:53:33 -0700
committerJeff Law <law@gcc.gnu.org>1994-11-10 21:53:33 -0700
commit279c9bde625781e47709a2a5900c3c56966e67e7 (patch)
treef47e4f49d247da38fa20c4e200c92e0ecdfdb123
parentba25ac36a7b77df8d3021f270fa28d64997d3d65 (diff)
downloadgcc-279c9bde625781e47709a2a5900c3c56966e67e7.zip
gcc-279c9bde625781e47709a2a5900c3c56966e67e7.tar.gz
gcc-279c9bde625781e47709a2a5900c3c56966e67e7.tar.bz2
* Automatic generation of inline long call sequences when needed.
* pa.h (TARGET_LONG_CALLS): Delete. Replace all uses of with TARGET_PORTABLE_RUNTIME. (TARGET_MILLICODE_LONG_CALLS): New target flag. * pa.c (output_function_prologue): Keep track of the total number of code bytes emitted for each source file. (output_call): Handle TARGET_PORTABLE_RUNTIME and millicode calls for TARGET_MILLICODE_LONG_CALLS. Emit an inline long-call if needed. If emitting an inline long-call, perform argument relocations before the call if they are needed, unfill the delay slot of the call if necessary. * pa.md (define_delay for millicode): Disable the delay slot if TARGET_MILLICODE_LONG_CALLS. (millicode insns and indirect calls): Properly compute length for both TARGET_PORTABLE_RUNTIME and TARGET_MILLICODE_LONG_CALLS. (call_internal_symref): Properly compute the length when more than 240000 bytes of code have already been output. Take TARGET_MILLICODE_LONG_CALLS into account in the length computation. From-SVN: r8422
-rw-r--r--gcc/config/pa/pa.c145
-rw-r--r--gcc/config/pa/pa.h29
-rw-r--r--gcc/config/pa/pa.md112
3 files changed, 225 insertions, 61 deletions
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index 2ec55ba..21d207f 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -52,6 +52,11 @@ static int gr_saved, fr_saved;
static rtx find_addr_reg ();
+/* Keep track of the number of bytes we have output in the CODE subspaces
+ during this compilation so we'll know when to emit inline long-calls. */
+
+unsigned int total_code_bytes;
+
/* Return non-zero only if OP is a register of mode MODE,
or CONST0_RTX. */
int
@@ -72,7 +77,7 @@ call_operand_address (op, mode)
rtx op;
enum machine_mode mode;
{
- return (CONSTANT_P (op) && ! TARGET_LONG_CALLS);
+ return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
}
/* Return 1 if X contains a symbolic expression. We know these
@@ -2044,6 +2049,19 @@ output_function_prologue (file, size)
if (profile_flag)
ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
hp_profile_labelno);
+
+ if (insn_addresses)
+ {
+ unsigned int old_total = total_code_bytes;
+
+ total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
+ total_code_bytes += FUNCTION_BOUNDARY /BITS_PER_UNIT;
+
+ /* Be prepared to handle overflows. */
+ total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
+ }
+ else
+ total_code_bytes = -1;
}
void
@@ -3760,9 +3778,9 @@ output_movb (operands, insn, which_alternative, reverse_comparison)
RETURN_POINTER is the register which will hold the return address.
%r2 for most calls, %r31 for millicode calls.
- When TARGET_LONG_CALLS is true, output_call is only called for
- millicode calls. In addition, no delay slots are available when
- TARGET_LONG_CALLS is true. */
+ When TARGET_MILLICODE_LONG_CALLS is true, then we have to assume
+ that two instruction sequences must be used to reach the millicode
+ routines (including dyncall!). */
char *
output_call (insn, call_dest, return_pointer)
@@ -3775,21 +3793,124 @@ output_call (insn, call_dest, return_pointer)
rtx xoperands[4];
rtx seq_insn;
- /* Handle common case -- empty delay slot or no jump in the delay slot. */
- if (dbr_sequence_length () == 0
+ /* Handle long millicode calls for mod, div, and mul. */
+ if (TARGET_PORTABLE_RUNTIME
+ || (TARGET_MILLICODE_LONG_CALLS && REGNO (return_pointer) == 31))
+ {
+ xoperands[0] = call_dest;
+ xoperands[1] = return_pointer;
+ output_asm_insn ("ldil L%%%0,%%r29", xoperands);
+ output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
+ output_asm_insn ("blr 0,%r1\n\tbv,n 0(%%r29)\n\tnop", xoperands);
+ return "";
+ }
+
+ /* Handle common case -- empty delay slot or no jump in the delay slot,
+ and we're sure that the branch will reach the beginning of the $CODE$
+ subspace. */
+ if ((dbr_sequence_length () == 0
+ && get_attr_length (insn) == 8)
|| (dbr_sequence_length () != 0
- && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN))
+ && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
+ && get_attr_length (insn) == 4))
{
xoperands[0] = call_dest;
xoperands[1] = return_pointer;
- if (TARGET_LONG_CALLS)
+ output_asm_insn ("bl %0,%r1%#", xoperands);
+ return "";
+ }
+
+ /* This call may not reach the beginning of the $CODE$ subspace. */
+ if (get_attr_length (insn) > 8)
+ {
+ int delay_insn_deleted = 0;
+ rtx xoperands[2];
+ rtx link;
+
+ /* We need to emit an inline long-call branch. Furthermore,
+ because we're changing a named function call into an indirect
+ function call well after the parameters have been set up, we
+ need to make sure any FP args appear in both the integer
+ and FP registers. Also, we need move any delay slot insn
+ out of the delay slot -- Yuk! */
+ if (dbr_sequence_length () != 0
+ && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
{
- output_asm_insn ("ldil L%%%0,%%r29", xoperands);
- output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
- output_asm_insn ("blr 0,%r1\n\tbv,n 0(%%r29)\n\tnop", xoperands);
+ /* A non-jump insn in the delay slot. By definition we can
+ emit this insn before the call (and in fact before argument
+ relocating. */
+ final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
+
+ /* Now delete the delay insn. */
+ PUT_CODE (NEXT_INSN (insn), NOTE);
+ NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
+ delay_insn_deleted = 1;
+ }
+
+ /* Now copy any FP arguments into integer registers. */
+ for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
+ {
+ int arg_mode, regno;
+ rtx use = XEXP (link, 0);
+ if (! (GET_CODE (use) == USE
+ && GET_CODE (XEXP (use, 0)) == REG
+ && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
+ continue;
+
+ arg_mode = GET_MODE (XEXP (use, 0));
+ regno = REGNO (XEXP (use, 0));
+ /* Is it a floating point register? */
+ if (regno >= 32 && regno <= 39)
+ {
+ /* Copy from the FP register into an integer register
+ (via memory). */
+ if (arg_mode == SFmode)
+ {
+ xoperands[0] = XEXP (use, 0);
+ xoperands[1] = gen_rtx (REG, SImode, 26 - (regno - 32) / 2);
+ output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
+ output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
+ }
+ else
+ {
+ xoperands[0] = XEXP (use, 0);
+ xoperands[1] = gen_rtx (REG, DImode, 25 - (regno - 34) / 2);
+ output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
+ output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
+ output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
+ }
+
+ }
+ }
+
+ /* Now emit the inline long-call. */
+ xoperands[0] = call_dest;
+ output_asm_insn ("ldil L%%%0,%%r22\n\tldo R%%%0(%%r22),%%r22", xoperands);
+
+ /* If TARGET_MILLICODE_LONG_CALLS, then we must use a long-call sequence
+ to call dyncall! */
+ if (TARGET_MILLICODE_LONG_CALLS)
+ {
+ output_asm_insn ("ldil L%%$$dyncall,%%r31", xoperands);
+ output_asm_insn ("ldo R%%$$dyncall(%%r31),%%r31", xoperands);
+ output_asm_insn ("blr 0,%%r2\n\tbv,n 0(%%r31)\n\tnop", xoperands);
}
else
- output_asm_insn ("bl %0,%r1%#", xoperands);
+ output_asm_insn ("bl $$dyncall,%%r31\n\tcopy %%r31,%%r2", xoperands);
+
+ /* If we had a jump in the call's delay slot, output it now. */
+ if (dbr_sequence_length () != 0
+ && !delay_insn_deleted)
+ {
+ xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
+ output_asm_insn ("b,n %0", xoperands);
+
+ /* Now delete the delay insn. */
+ PUT_CODE (NEXT_INSN (insn), NOTE);
+ NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
+ }
return "";
}
diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index 5d6e9911..218a97d 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -28,6 +28,9 @@ enum cmp_type /* comparison type */
CMP_MAX /* max comparison type */
};
+/* For long call handling. */
+extern unsigned int total_code_bytes;
+
/* Print subsidiary information on the compiler version in use. */
#define TARGET_VERSION fprintf (stderr, " (hppa)");
@@ -57,13 +60,18 @@ extern int target_flags;
/* Allow unconditional jumps in the delay slots of call instructions. */
#define TARGET_JUMP_IN_DELAY (target_flags & 8)
-/* Force all function calls to indirect addressing via a register. This
- avoids lossage when the function is very far away from the current PC.
+/* In rare cases, a millicode call via "bl" can not be turned into
+ a millicode call using "ble" (when SHLIB_INFO subspace is very large).
+
+ This option forces just millicode calls to use inline long-calls
+ This is far more efficient than the old long-call option which forced
+ every function to be called indirectly (as is still the case for
+ TARGET_PORTABLE_RUNTIME).
??? What about simple jumps, they can suffer from the same problem.
Would require significant surgery in pa.md. */
-#define TARGET_LONG_CALLS (target_flags & 16)
+#define TARGET_MILLICODE_LONG_CALLS (target_flags & 16)
/* Disable indexed addressing modes. */
@@ -73,7 +81,8 @@ extern int target_flags;
HP wants everyone to use for ELF objects. If at all possible you want
to avoid this since it's a performance loss for non-prototyped code.
- Note TARGET_PORTABLE_RUNTIME also implies TARGET_LONG_CALLS. */
+ Note TARGET_PORTABLE_RUNTIME also forces all calls to use inline
+ long-call stubs which is quite expensive. */
#define TARGET_PORTABLE_RUNTIME (target_flags & 64)
@@ -100,8 +109,8 @@ extern int target_flags;
{"no-fast-indirect-calls", -4},\
{"jump-in-delay", 8}, \
{"no-jump-in-delay", -8}, \
- {"long-calls", 16}, \
- {"no-long-calls", -16}, \
+ {"millicode-long-calls", 16},\
+ {"no-millicode-long-calls", -16},\
{"disable-indexing", 32}, \
{"no-disable-indexing", -32},\
{"portable-runtime", 64+16},\
@@ -832,9 +841,7 @@ struct hppa_args {int words, nargs_prototype; };
The caller must make a distinction between calls to explicitly named
functions and calls through pointers to functions -- the conventions
are different! Calls through pointers to functions only use general
- registers for the first four argument words. Note the indirect function
- calling conventions are in effect during TARGET_LONG_CALLS, but
- current_call_is_indirect will not be set in such situations.
+ registers for the first four argument words.
Of course all this is different for the portable runtime model
HP wants everyone to use for ELF. Ugh. Here's a quick description
@@ -869,12 +876,12 @@ struct hppa_args {int words, nargs_prototype; };
|| !FLOAT_MODE_P (MODE) || (CUM).nargs_prototype > 0) \
? gen_rtx (REG, (MODE), \
(FUNCTION_ARG_SIZE ((MODE), (TYPE)) > 1 \
- ? (((!(current_call_is_indirect || TARGET_LONG_CALLS) \
+ ? (((!current_call_is_indirect \
|| TARGET_PORTABLE_RUNTIME) \
&& (MODE) == DFmode) \
? ((CUM).words ? 38 : 34) \
: ((CUM).words ? 23 : 25)) \
- : (((!(current_call_is_indirect || TARGET_LONG_CALLS) \
+ : (((!current_call_is_indirect \
|| TARGET_PORTABLE_RUNTIME) \
&& (MODE) == SFmode) \
? (32 + 2 * (CUM).words) \
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index 60459ac..db6cf97 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -93,10 +93,11 @@
[(eq_attr "in_call_delay" "true") (nil) (nil)])
;; millicode call delay slot description. Note it disallows delay slot
-;; when TARGET_LONG_CALLS is true.
+;; when TARGET_PORTABLE_RUNTIME or TARGET_MILLICODE_LONG_CALLS is true.
(define_delay (eq_attr "type" "milli")
[(and (eq_attr "in_call_delay" "true")
- (eq (symbol_ref "TARGET_LONG_CALLS") (const_int 0)))
+ (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
+ (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS") (const_int 0))))
(nil) (nil)])
;; Unconditional branch, return and other similar instructions.
@@ -2268,10 +2269,13 @@
""
"* return output_mul_insn (0, insn);"
[(set_attr "type" "milli")
- (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
- (const_int 0))
- (const_int 4)
- (const_int 24)))])
+ (set (attr "length")
+ (if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
+ (const_int 0)))
+ (const_int 4)
+ (const_int 24)))])
;;; Division and mod.
(define_expand "divsi3"
@@ -2318,10 +2322,13 @@
"*
return output_div_insn (operands, 0, insn);"
[(set_attr "type" "milli")
- (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
- (const_int 0))
- (const_int 4)
- (const_int 24)))])
+ (set (attr "length")
+ (if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
+ (const_int 0)))
+ (const_int 4)
+ (const_int 24)))])
(define_expand "udivsi3"
[(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
@@ -2367,10 +2374,13 @@
"*
return output_div_insn (operands, 1, insn);"
[(set_attr "type" "milli")
- (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
- (const_int 0))
- (const_int 4)
- (const_int 24)))])
+ (set (attr "length")
+ (if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
+ (const_int 0)))
+ (const_int 4)
+ (const_int 24)))])
(define_expand "modsi3"
[(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
@@ -2412,10 +2422,13 @@
"*
return output_mod_insn (0, insn);"
[(set_attr "type" "milli")
- (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
- (const_int 0))
- (const_int 4)
- (const_int 24)))])
+ (set (attr "length")
+ (if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
+ (const_int 0)))
+ (const_int 4)
+ (const_int 24)))])
(define_expand "umodsi3"
[(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
@@ -2457,10 +2470,13 @@
"*
return output_mod_insn (1, insn);"
[(set_attr "type" "milli")
- (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
- (const_int 0))
- (const_int 4)
- (const_int 24)))])
+ (set (attr "length")
+ (if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
+ (const_int 0)))
+ (const_int 4)
+ (const_int 24)))])
;;- and instructions
;; We define DImode `and` so with DImode `not` we can get
@@ -3143,7 +3159,7 @@
rtx op;
rtx call_insn;
- if (TARGET_LONG_CALLS)
+ if (TARGET_PORTABLE_RUNTIME)
op = force_reg (SImode, XEXP (operands[0], 0));
else
op = XEXP (operands[0], 0);
@@ -3185,14 +3201,21 @@
(match_operand 1 "" "i"))
(clobber (reg:SI 2))
(use (const_int 0))]
- "! TARGET_LONG_CALLS"
+ "! TARGET_PORTABLE_RUNTIME"
"*
{
output_arg_descriptor (insn);
return output_call (insn, operands[0], gen_rtx (REG, SImode, 2));
}"
[(set_attr "type" "call")
- (set_attr "length" "4")])
+ (set (attr "length")
+ (if_then_else (lt (plus (symbol_ref "total_code_bytes") (pc))
+ (const_int 240000))
+ (const_int 4)
+ (if_then_else (ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
+ (const_int 0))
+ (const_int 64)
+ (const_int 52))))])
(define_insn "call_internal_reg"
[(call (mem:SI (match_operand:SI 0 "register_operand" "r"))
@@ -3206,16 +3229,19 @@
return \"blr 0,%%r2\;bv,n 0(%r0)\;ldo 4(%%r2),%%r2\";
/* Yuk! bl may not be able to reach $$dyncall. */
- if (TARGET_LONG_CALLS)
+ if (TARGET_PORTABLE_RUNTIME || TARGET_MILLICODE_LONG_CALLS)
return \"copy %r0,%%r22\;ldil L%%$$dyncall,%%r31\;ldo R%%$$dyncall(%%r31),%%r31\;blr 0,%%r2\;bv,n 0(%%r31)\;nop\";
else
return \"copy %r0,%%r22\;.CALL\\tARGW0=GR\;bl $$dyncall,%%r31\;copy %%r31,%%r2\";
}"
[(set_attr "type" "dyncall")
- (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
- (const_int 0))
- (const_int 12)
- (const_int 24)))])
+ (set (attr "length")
+ (if_then_else (and (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
+ (const_int 0)))
+ (const_int 12)
+ (const_int 24)))])
(define_expand "call_value"
[(parallel [(set (match_operand 0 "" "")
@@ -3228,7 +3254,7 @@
rtx op;
rtx call_insn;
- if (TARGET_LONG_CALLS)
+ if (TARGET_PORTABLE_RUNTIME)
op = force_reg (SImode, XEXP (operands[1], 0));
else
op = XEXP (operands[1], 0);
@@ -3275,14 +3301,21 @@
(clobber (reg:SI 2))
(use (const_int 0))]
;;- Don't use operand 1 for most machines.
- "! TARGET_LONG_CALLS"
+ "! TARGET_PORTABLE_RUNTIME"
"*
{
output_arg_descriptor (insn);
return output_call (insn, operands[1], gen_rtx (REG, SImode, 2));
}"
[(set_attr "type" "call")
- (set_attr "length" "4")])
+ (set (attr "length")
+ (if_then_else (lt (plus (symbol_ref "total_code_bytes") (pc))
+ (const_int 240000))
+ (const_int 4)
+ (if_then_else (ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
+ (const_int 0))
+ (const_int 64)
+ (const_int 52))))])
(define_insn "call_value_internal_reg"
[(set (match_operand 0 "" "=rf")
@@ -3297,16 +3330,19 @@
return \"blr 0,%%r2\;bv,n 0(%r1)\;ldo 4(%%r2),%%r2\";
/* Yuk! bl may not be able to reach $$dyncall. */
- if (TARGET_LONG_CALLS)
+ if (TARGET_PORTABLE_RUNTIME || TARGET_MILLICODE_LONG_CALLS)
return \"copy %r1,%%r22\;ldil L%%$$dyncall,%%r31\;ldo R%%$$dyncall(%%r31),%%r31\;blr 0,%%r2\;bv,n 0(%%r31)\;nop\";
else
return \"copy %r1,%%r22\;.CALL\\tARGW0=GR\;bl $$dyncall,%%r31\;copy %%r31,%%r2\";
}"
[(set_attr "type" "dyncall")
- (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
- (const_int 0))
- (const_int 12)
- (const_int 24)))])
+ (set (attr "length")
+ (if_then_else (and (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+ (const_int 0))
+ (ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
+ (const_int 0)))
+ (const_int 12)
+ (const_int 24)))])
;; Call subroutine returning any type.