aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog15
-rw-r--r--gcc/config/pa/pa.h132
-rw-r--r--gcc/config/pa/pa.md114
3 files changed, 183 insertions, 78 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 955e05f..6e4bac8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2003-12-20 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
+
+ * pa.h (TRAMPOLINE_TEMPLATE): Shorten sequence when generating PA
+ 2.0 code.
+ (TRAMPOLINE_CODE_SIZE, MIN_CACHELINE_SIZE): New defines.
+ (INITIALIZE_TRAMPOLINE): Rework to pass line length, and aligned start
+ and end addresses to I and D cache instruction patterns.
+ * pa.md (anddi3, iordi3): Change predicates of operands 1 and 2 to
+ and_operand and ior_operand, respectively. When generating 64-bit
+ code, only one operand needs to be a register operand.
+ (xordi3): Change predicates of operands 1 and 2 to register_operand.
+ (one_cmpldi2): Change predicate of operand 1 to register_operand.
+ (dcacheflush, icacheflush): Revise to flush an arbitrary number of
+ cache lines.
+
2003-12-20 Josef Zlomek <zlomekj@suse.cz>
PR optimization/13430, PR optimization/12322
diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index 5c33a3b..312d58c 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -1003,10 +1003,20 @@ extern int may_call_alloca;
fputs ("\tdepwi 0,31,2,%r21\n", FILE); \
fputs ("\tldw 4(%r21),%r19\n", FILE); \
fputs ("\tldw 0(%r21),%r21\n", FILE); \
- fputs ("\tldsid (%r21),%r1\n", FILE); \
- fputs ("\tmtsp %r1,%sr0\n", FILE); \
- fputs ("\tbe 0(%sr0,%r21)\n", FILE); \
- fputs ("\tldw 40(%r22),%r29\n", FILE); \
+ if (TARGET_PA_20) \
+ { \
+ fputs ("\tbve (%r21)\n", FILE); \
+ fputs ("\tldw 40(%r22),%r29\n", FILE); \
+ fputs ("\t.word 0\n", FILE); \
+ fputs ("\t.word 0\n", FILE); \
+ } \
+ else \
+ { \
+ fputs ("\tldsid (%r21),%r1\n", FILE); \
+ fputs ("\tmtsp %r1,%sr0\n", FILE); \
+ fputs ("\tbe 0(%sr0,%r21)\n", FILE); \
+ fputs ("\tldw 40(%r22),%r29\n", FILE); \
+ } \
fputs ("\t.word 0\n", FILE); \
fputs ("\t.word 0\n", FILE); \
fputs ("\t.word 0\n", FILE); \
@@ -1029,16 +1039,21 @@ extern int may_call_alloca;
} \
}
-/* Length in units of the trampoline for entering a nested function.
+/* Length in units of the trampoline for entering a nested function. */
+
+#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52)
- Flush the cache entries corresponding to the first and last addresses
- of the trampoline. This is necessary as the trampoline may cross two
- cache lines.
+/* Length in units of the trampoline instruction code. */
- If the code part of the trampoline ever grows to > 32 bytes, then it
- will become necessary to hack on the cacheflush pattern in pa.md. */
+#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
-#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52)
+/* Minimum length of a cache line. A length of 16 will work on all
+ PA-RISC processors. All PA 1.1 processors have a cache line of
+ 32 bytes. Most but not all PA 2.0 processors have a cache line
+ of 64 bytes. As cache flushes are expensive and we don't support
+ PA 1.0, we use a minimum length of 32. */
+
+#define MIN_CACHELINE_SIZE 32
/* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code.
@@ -1048,54 +1063,85 @@ extern int may_call_alloca;
Move the static chain value to trampoline template at offset 40.
Move the trampoline address to trampoline template at offset 44.
Move r19 to trampoline template at offset 48. The latter two
- words create a plabel for the indirect call to the trampoline. */
+ words create a plabel for the indirect call to the trampoline.
+
+ A similar sequence is used for the 64-bit port but the plabel is
+ at the beginning of the trampoline.
+
+ Finally, the cache entries for the trampoline code are flushed.
+ This is necessary to ensure that the trampoline instruction sequence
+ is written to memory prior to any attempts at prefetching the code
+ sequence. */
#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
{ \
+ rtx start_addr = gen_reg_rtx (Pmode); \
+ rtx end_addr = gen_reg_rtx (Pmode); \
+ rtx line_length = gen_reg_rtx (Pmode); \
+ rtx tmp; \
+ \
if (!TARGET_64BIT) \
{ \
- rtx start_addr, end_addr; \
+ tmp = memory_address (Pmode, plus_constant ((TRAMP), 36)); \
+ emit_move_insn (gen_rtx_MEM (Pmode, tmp), (FNADDR)); \
+ tmp = memory_address (Pmode, plus_constant ((TRAMP), 40)); \
+ emit_move_insn (gen_rtx_MEM (Pmode, tmp), (CXT)); \
\
- start_addr = memory_address (Pmode, plus_constant ((TRAMP), 36)); \
- emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (FNADDR)); \
- start_addr = memory_address (Pmode, plus_constant ((TRAMP), 40)); \
- emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (CXT)); \
- start_addr = memory_address (Pmode, plus_constant ((TRAMP), 44)); \
- emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (TRAMP)); \
- start_addr = memory_address (Pmode, plus_constant ((TRAMP), 48)); \
- emit_move_insn (gen_rtx_MEM (Pmode, start_addr), \
+ /* Create a fat pointer for the trampoline. */ \
+ tmp = memory_address (Pmode, plus_constant ((TRAMP), 44)); \
+ emit_move_insn (gen_rtx_MEM (Pmode, tmp), (TRAMP)); \
+ tmp = memory_address (Pmode, plus_constant ((TRAMP), 48)); \
+ emit_move_insn (gen_rtx_MEM (Pmode, tmp), \
gen_rtx_REG (Pmode, 19)); \
+ \
/* fdc and fic only use registers for the address to flush, \
- they do not accept integer displacements. */ \
- start_addr = force_reg (Pmode, (TRAMP)); \
- end_addr = force_reg (Pmode, plus_constant ((TRAMP), 32)); \
- emit_insn (gen_dcacheflush (start_addr, end_addr)); \
- emit_insn (gen_icacheflush (start_addr, end_addr, start_addr, \
+ they do not accept integer displacements. We align the \
+ start and end addresses to the beginning of their respective \
+ cache lines to minimize the number of lines flushed. */ \
+ tmp = force_reg (Pmode, (TRAMP)); \
+ emit_insn (gen_andsi3 (start_addr, tmp, \
+ GEN_INT (-MIN_CACHELINE_SIZE))); \
+ tmp = force_reg (Pmode, \
+ plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1)); \
+ emit_insn (gen_andsi3 (end_addr, tmp, \
+ GEN_INT (-MIN_CACHELINE_SIZE))); \
+ emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); \
+ emit_insn (gen_dcacheflush (start_addr, end_addr, line_length)); \
+ emit_insn (gen_icacheflush (start_addr, end_addr, line_length, \
+ gen_reg_rtx (Pmode), \
gen_reg_rtx (Pmode), \
gen_reg_rtx (Pmode))); \
} \
else \
{ \
- rtx start_addr, end_addr; \
+ tmp = memory_address (Pmode, plus_constant ((TRAMP), 56)); \
+ emit_move_insn (gen_rtx_MEM (Pmode, tmp), (FNADDR)); \
+ tmp = memory_address (Pmode, plus_constant ((TRAMP), 64)); \
+ emit_move_insn (gen_rtx_MEM (Pmode, tmp), (CXT)); \
\
- start_addr = memory_address (Pmode, plus_constant ((TRAMP), 56)); \
- emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (FNADDR)); \
- start_addr = memory_address (Pmode, plus_constant ((TRAMP), 64)); \
- emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (CXT)); \
/* Create a fat pointer for the trampoline. */ \
- end_addr = force_reg (Pmode, plus_constant ((TRAMP), 32)); \
- start_addr = memory_address (Pmode, plus_constant ((TRAMP), 16)); \
- emit_move_insn (gen_rtx_MEM (Pmode, start_addr), end_addr); \
- end_addr = gen_rtx_REG (Pmode, 27); \
- start_addr = memory_address (Pmode, plus_constant ((TRAMP), 24)); \
- emit_move_insn (gen_rtx_MEM (Pmode, start_addr), end_addr); \
+ tmp = memory_address (Pmode, plus_constant ((TRAMP), 16)); \
+ emit_move_insn (gen_rtx_MEM (Pmode, tmp), \
+ force_reg (Pmode, plus_constant ((TRAMP), 32))); \
+ tmp = memory_address (Pmode, plus_constant ((TRAMP), 24)); \
+ emit_move_insn (gen_rtx_MEM (Pmode, tmp), \
+ gen_rtx_REG (Pmode, 27)); \
+ \
/* fdc and fic only use registers for the address to flush, \
- they do not accept integer displacements. PA 2.0 cache \
- lines are 64 bytes. */ \
- start_addr = force_reg (Pmode, (TRAMP)); \
- end_addr = force_reg (Pmode, plus_constant ((TRAMP), 64)); \
- emit_insn (gen_dcacheflush (start_addr, end_addr)); \
- emit_insn (gen_icacheflush (start_addr, end_addr, start_addr, \
+ they do not accept integer displacements. We align the \
+ start and end addresses to the beginning of their respective \
+ cache lines to minimize the number of lines flushed. */ \
+ tmp = force_reg (Pmode, plus_constant ((TRAMP), 32)); \
+ emit_insn (gen_anddi3 (start_addr, tmp, \
+ GEN_INT (-MIN_CACHELINE_SIZE))); \
+ tmp = force_reg (Pmode, \
+ plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1)); \
+ emit_insn (gen_anddi3 (end_addr, tmp, \
+ GEN_INT (-MIN_CACHELINE_SIZE))); \
+ emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); \
+ emit_insn (gen_dcacheflush (start_addr, end_addr, line_length)); \
+ emit_insn (gen_icacheflush (start_addr, end_addr, line_length, \
+ gen_reg_rtx (Pmode), \
gen_reg_rtx (Pmode), \
gen_reg_rtx (Pmode))); \
} \
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index 310a248..1635677 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -5322,15 +5322,25 @@
(define_expand "anddi3"
[(set (match_operand:DI 0 "register_operand" "")
- (and:DI (match_operand:DI 1 "arith_double_operand" "")
- (match_operand:DI 2 "arith_double_operand" "")))]
+ (and:DI (match_operand:DI 1 "and_operand" "")
+ (match_operand:DI 2 "and_operand" "")))]
""
"
{
- if (! register_operand (operands[1], DImode)
- || ! register_operand (operands[2], DImode))
- /* Let GCC break this into word-at-a-time operations. */
- FAIL;
+ if (TARGET_64BIT)
+ {
+ /* One operand must be a register operand. */
+ if (!register_operand (operands[1], DImode)
+ && !register_operand (operands[2], DImode))
+ FAIL;
+ }
+ else
+ {
+ /* Both operands must be register operands. */
+ if (!register_operand (operands[1], DImode)
+ || !register_operand (operands[2], DImode))
+ FAIL;
+ }
}")
(define_insn ""
@@ -5391,15 +5401,25 @@
(define_expand "iordi3"
[(set (match_operand:DI 0 "register_operand" "")
- (ior:DI (match_operand:DI 1 "arith_double_operand" "")
- (match_operand:DI 2 "arith_double_operand" "")))]
+ (ior:DI (match_operand:DI 1 "ior_operand" "")
+ (match_operand:DI 2 "ior_operand" "")))]
""
"
{
- if (! register_operand (operands[1], DImode)
- || ! register_operand (operands[2], DImode))
- /* Let GCC break this into word-at-a-time operations. */
- FAIL;
+ if (TARGET_64BIT)
+ {
+ /* One operand must be a register operand. */
+ if (!register_operand (operands[1], DImode)
+ && !register_operand (operands[2], DImode))
+ FAIL;
+ }
+ else
+ {
+ /* Both operands must be register operands. */
+ if (!register_operand (operands[1], DImode)
+ || !register_operand (operands[2], DImode))
+ FAIL;
+ }
}")
(define_insn ""
@@ -5462,15 +5482,11 @@
(define_expand "xordi3"
[(set (match_operand:DI 0 "register_operand" "")
- (xor:DI (match_operand:DI 1 "arith_double_operand" "")
- (match_operand:DI 2 "arith_double_operand" "")))]
+ (xor:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "register_operand" "")))]
""
"
{
- if (! register_operand (operands[1], DImode)
- || ! register_operand (operands[2], DImode))
- /* Let GCC break this into word-at-a-time operations. */
- FAIL;
}")
(define_insn ""
@@ -5532,12 +5548,10 @@
(define_expand "one_cmpldi2"
[(set (match_operand:DI 0 "register_operand" "")
- (not:DI (match_operand:DI 1 "arith_double_operand" "")))]
+ (not:DI (match_operand:DI 1 "register_operand" "")))]
""
"
{
- if (! register_operand (operands[1], DImode))
- FAIL;
}")
(define_insn ""
@@ -8828,29 +8842,59 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
return \"\";
}")
-;; Flush the I and D cache line found at the address in operand 0.
+;; Flush the I and D cache lines from the start address (operand0)
+;; to the end address (operand1). No lines are flushed if the end
+;; address is less than the start address (unsigned).
+;;
+;; Because the range of memory flushed is variable and the size of
+;; a MEM can only be a CONST_INT, the patterns specify that they
+;; perform an unspecified volatile operation on all memory.
+;;
+;; The address range for an icache flush must lie within a single
+;; space on targets with non-equivalent space registers.
+;;
;; This is used by the trampoline code for nested functions.
-;; So long as the trampoline itself is less than 32 bytes this
-;; is sufficient.
-
+;;
+;; Operand 0 contains the start address.
+;; Operand 1 contains the end address.
+;; Operand 2 contains the line length to use.
+;; Operand 3 contains the start address (clobbered).
+;; Operands 4 and 5 (icacheflush) are clobbered scratch registers.
(define_insn "dcacheflush"
- [(unspec_volatile [(const_int 1)] 0)
- (use (mem:SI (match_operand 0 "pmode_register_operand" "r")))
- (use (mem:SI (match_operand 1 "pmode_register_operand" "r")))]
+ [(const_int 1)
+ (unspec_volatile [(mem:BLK (scratch))] 0)
+ (use (match_operand 0 "pmode_register_operand" "r"))
+ (use (match_operand 1 "pmode_register_operand" "r"))
+ (use (match_operand 2 "pmode_register_operand" "r"))
+ (clobber (match_scratch 3 "=&0"))]
""
- "fdc 0(%0)\;fdc 0(%1)\;sync"
+ "*
+{
+ if (TARGET_64BIT)
+ return \"cmpb,*<<=,n %3,%1,.\;fdc,m %2(%3)\;sync\";
+ else
+ return \"cmpb,<<=,n %3,%1,.\;fdc,m %2(%3)\;sync\";
+}"
[(set_attr "type" "multi")
(set_attr "length" "12")])
(define_insn "icacheflush"
- [(unspec_volatile [(const_int 2)] 0)
- (use (mem:SI (match_operand 0 "pmode_register_operand" "r")))
- (use (mem:SI (match_operand 1 "pmode_register_operand" "r")))
+ [(const_int 2)
+ (unspec_volatile [(mem:BLK (scratch))] 0)
+ (use (match_operand 0 "pmode_register_operand" "r"))
+ (use (match_operand 1 "pmode_register_operand" "r"))
(use (match_operand 2 "pmode_register_operand" "r"))
- (clobber (match_operand 3 "pmode_register_operand" "=&r"))
- (clobber (match_operand 4 "pmode_register_operand" "=&r"))]
+ (clobber (match_scratch 3 "=&0"))
+ (clobber (match_operand 4 "pmode_register_operand" "=&r"))
+ (clobber (match_operand 5 "pmode_register_operand" "=&r"))]
""
- "mfsp %%sr0,%4\;ldsid (%2),%3\;mtsp %3,%%sr0\;fic 0(%%sr0,%0)\;fic 0(%%sr0,%1)\;sync\;mtsp %4,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop"
+ "*
+{
+ if (TARGET_64BIT)
+ return \"mfsp %%sr0,%5\;ldsid (%3),%4\;mtsp %4,%%sr0\;cmpb,*<<=,n %3,%1,.\;fic,m %2(%%sr0,%3)\;sync\;mtsp %5,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop\";
+ else
+ return \"mfsp %%sr0,%5\;ldsid (%3),%4\;mtsp %4,%%sr0\;cmpb,<<=,n %3,%1,.\;fic,m %2(%%sr0,%3)\;sync\;mtsp %5,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop\";
+}"
[(set_attr "type" "multi")
(set_attr "length" "52")])