diff options
author | Richard Henderson <rth@cygnus.com> | 2000-08-14 14:01:24 -0700 |
---|---|---|
committer | Richard Henderson <rth@gcc.gnu.org> | 2000-08-14 14:01:24 -0700 |
commit | 3f622353270a85d3945641069fb027d0ec9cd787 (patch) | |
tree | 866f9e8520c80581c82643ab70fdd2401ba6d0a1 /gcc/config/ia64 | |
parent | b6767a49b4b4ad83418462aa67ab7432a382e51a (diff) | |
download | gcc-3f622353270a85d3945641069fb027d0ec9cd787.zip gcc-3f622353270a85d3945641069fb027d0ec9cd787.tar.gz gcc-3f622353270a85d3945641069fb027d0ec9cd787.tar.bz2 |
configure.in (ia64-*): Set float_format for i386 long double.
* configure.in (ia64-*): Set float_format for i386 long double.
* real.c (GET_REAL): Treat 128-bit INTEL_EXTENDED_IEEE_FORMAT
as we would for i386 XFmode.
(PUT_REAL): Likewise.
(endian, ereal_atof, real_value_truncate): Likewise.
(ereal_isneg, toe64, etens, make_nan): Likewise.
* real.h (REAL_VALUE_TO_TARGET_LONG_DOUBLE): Likewise.
* config/ia64/ia64-protos.h: Update.
* config/ia64/ia64.c (general_tfmode_operand): New.
(destination_tfmode_operand): New.
(tfreg_or_fp01_operand): New.
(ia64_split_timode): New.
(spill_tfmode_operand): New.
(ia64_expand_prologue): Use TFmode not XFmode.
(ia64_expand_epilogue): Likewise.
(ia64_function_arg): Likewise.
(ia64_function_arg_advance): Likewise.
(ia64_return_in_memory): Likewise.
(ia64_function_value): Likewise.
(ia64_print_operand): Likewise.
(ia64_register_move_cost): Set GR<->FR to 5.
(ia64_secondary_reload_class): Get GR for TImode memory op.
* config/ia64/ia64.h (ROUND_TYPE_SIZE): Remove.
(ROUND_TYPE_ALIGN): Remove.
(LONG_DOUBLE_TYPE_SIZE): Set to 128.
(INTEL_EXTENDED_IEEE_FORMAT): Define.
(HARD_REGNO_NREGS): Use TFmode, not XFmode.
(HARD_REGNO_MODE_OK): Likewise. Disallow TImode in FRs.
(MODES_TIEABLE_P): Use TFmode, not XFmode.
(CLASS_MAX_NREGS): Likewise.
(ASM_OUTPUT_LONG_DOUBLE): Output by 4 byte hunks.
(PREDICATE_CODES): Update.
* config/ia64/ia64.md (movti): New.
(movti_internal): Use a clobber for memory alternatives.
(reload_inti, reload_outti): New.
(movsfcc_astep): Predicate properly.
(movdfcc_astep): Likewise.
(movxf): Remove.
(movtf): New.
(extendsftf2, extenddftf2): New.
(trunctfsf2, trunctfdf2): New.
(floatditf2, fix_trunctfdi2): New.
(floatunsditf2, fixuns_trunctfdi2): New.
(addtf3, subtf3, multf3, abstf2): New.
(negtf2, nabstf2, mintf3, maxtf3): New.
(maddtf3, msubtf3, nmultf3, nmaddtf3): New.
(cmptf): New.
(fr_spill): Use TFmode, not XFmode.
(fr_restore): Likewise.
* config/ia64/lib1funcs.asm (__divtf3): New.
* config/ia64/t-ia64 (LIB1ASMFUNCS): Add it.
From-SVN: r35689
Diffstat (limited to 'gcc/config/ia64')
-rw-r--r-- | gcc/config/ia64/ia64-protos.h | 8 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.c | 175 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.h | 80 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.md | 414 | ||||
-rw-r--r-- | gcc/config/ia64/lib1funcs.asm | 42 | ||||
-rw-r--r-- | gcc/config/ia64/t-ia64 | 2 |
6 files changed, 599 insertions, 122 deletions
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index 30f1d61..af12060 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -59,9 +59,14 @@ extern int ia64_direct_return PARAMS((void)); extern int predicate_operator PARAMS((rtx, enum machine_mode)); extern int ar_lc_reg_operand PARAMS((rtx, enum machine_mode)); extern int ar_ccv_reg_operand PARAMS((rtx, enum machine_mode)); +extern int general_tfmode_operand PARAMS((rtx, enum machine_mode)); +extern int destination_tfmode_operand PARAMS((rtx, enum machine_mode)); +extern int tfreg_or_fp01_operand PARAMS((rtx, enum machine_mode)); extern int ia64_move_ok PARAMS((rtx, rtx)); extern rtx ia64_gp_save_reg PARAMS((int)); +extern rtx ia64_split_timode PARAMS((rtx[], rtx, rtx)); +extern rtx spill_tfmode_operand PARAMS((rtx, int)); extern void ia64_expand_load_address PARAMS((rtx, rtx)); extern void ia64_expand_fetch_and_op PARAMS ((enum fetchop_code, @@ -112,6 +117,3 @@ extern void ia64_output_end_prologue PARAMS((FILE *)); extern void ia64_init_builtins PARAMS((void)); extern void ia64_override_options PARAMS((void)); extern int ia64_dbx_register_number PARAMS((int)); - -/* ??? Flag defined in toplev.c, for ia64.md -fssa hack. */ -extern int flag_ssa; diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index f090402..44bc8d8 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -570,6 +570,46 @@ ar_ccv_reg_operand (op, mode) && GET_CODE (op) == REG && REGNO (op) == AR_CCV_REGNUM); } + +/* Like general_operand, but don't allow (mem (addressof)). */ + +int +general_tfmode_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! general_operand (op, mode)) + return 0; + if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF) + return 0; + return 1; +} + +/* Similarly. */ + +int +destination_tfmode_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! destination_operand (op, mode)) + return 0; + if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == ADDRESSOF) + return 0; + return 1; +} + +/* Similarly. */ + +int +tfreg_or_fp01_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_CODE (op) == SUBREG) + return 0; + return reg_or_fp01_operand (op, mode); +} /* Return 1 if the operands of a move are ok. */ @@ -681,6 +721,106 @@ ia64_gp_save_reg (setjmp_p) return save; } + +/* Split a post-reload TImode reference into two DImode components. */ + +rtx +ia64_split_timode (out, in, scratch) + rtx out[2]; + rtx in, scratch; +{ + switch (GET_CODE (in)) + { + case REG: + out[0] = gen_rtx_REG (DImode, REGNO (in)); + out[1] = gen_rtx_REG (DImode, REGNO (in) + 1); + return NULL_RTX; + + case MEM: + { + HOST_WIDE_INT offset; + rtx base = XEXP (in, 0); + rtx offset_rtx; + + switch (GET_CODE (base)) + { + case REG: + out[0] = change_address (in, DImode, NULL_RTX); + break; + case POST_MODIFY: + base = XEXP (base, 0); + out[0] = change_address (in, DImode, NULL_RTX); + break; + + /* Since we're changing the mode, we need to change to POST_MODIFY + as well to preserve the size of the increment. Either that or + do the update in two steps, but we've already got this scratch + register handy so let's use it. */ + case POST_INC: + base = XEXP (base, 0); + out[0] = change_address (in, DImode, + gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, 16))); + break; + case POST_DEC: + base = XEXP (base, 0); + out[0] = change_address (in, DImode, + gen_rtx_POST_MODIFY (Pmode, base,plus_constant (base, -16))); + break; + default: + abort (); + } + + if (scratch == NULL_RTX) + abort (); + out[1] = change_address (in, DImode, scratch); + return gen_adddi3 (scratch, base, GEN_INT (8)); + } + + case CONST_INT: + case CONST_DOUBLE: + split_double (in, &out[0], &out[1]); + return NULL_RTX; + + default: + abort (); + } +} + +/* ??? Fixing GR->FR TFmode moves during reload is hard. You need to go + through memory plus an extra GR scratch register. Except that you can + either get the first from SECONDARY_MEMORY_NEEDED or the second from + SECONDARY_RELOAD_CLASS, but not both. + + We got into problems in the first place by allowing a construct like + (subreg:TF (reg:TI)), which we got from a union containing a long double. + This solution attempts to prevent this situation from ocurring. When + we see something like the above, we spill the inner register to memory. */ + +rtx +spill_tfmode_operand (in, force) + rtx in; + int force; +{ + if (GET_CODE (in) == SUBREG + && GET_MODE (SUBREG_REG (in)) == TImode + && GET_CODE (SUBREG_REG (in)) == REG) + { + rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE); + return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); + } + else if (force && GET_CODE (in) == REG) + { + rtx mem = gen_mem_addressof (in, NULL_TREE); + return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0))); + } + else if (GET_CODE (in) == MEM + && GET_CODE (XEXP (in, 0)) == ADDRESSOF) + { + return change_address (in, TFmode, copy_to_reg (XEXP (in, 0))); + } + else + return in; +} /* Begin the assembly file. */ @@ -1702,7 +1842,7 @@ ia64_expand_prologue () { if (cfa_off & 15) abort (); - reg = gen_rtx_REG (XFmode, regno); + reg = gen_rtx_REG (TFmode, regno); do_spill (gen_fr_spill_x, reg, cfa_off, reg); cfa_off -= 16; } @@ -1867,7 +2007,7 @@ ia64_expand_epilogue () { if (cfa_off & 15) abort (); - reg = gen_rtx_REG (XFmode, regno); + reg = gen_rtx_REG (TFmode, regno); do_restore (gen_fr_restore_x, reg, cfa_off); cfa_off -= 16; } @@ -2304,7 +2444,6 @@ ia64_function_arg (cum, mode, type, named, incoming) gen_rtx_REG (hfa_mode, (FR_ARG_FIRST + fp_regs)), GEN_INT (offset)); - /* ??? Padding for XFmode type? */ offset += hfa_size; args_byte_size += hfa_size; fp_regs++; @@ -2484,7 +2623,6 @@ ia64_function_arg_advance (cum, mode, type, named) for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) { - /* ??? Padding for XFmode type? */ offset += hfa_size; args_byte_size += hfa_size; fp_regs++; @@ -2586,7 +2724,6 @@ ia64_return_in_memory (valtype) { int hfa_size = GET_MODE_SIZE (hfa_mode); - /* ??? Padding for XFmode type? */ if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) return 1; else @@ -2629,7 +2766,6 @@ ia64_function_value (valtype, func) loc[i] = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), GEN_INT (offset)); - /* ??? Padding for XFmode type? */ offset += hfa_size; } @@ -2782,19 +2918,10 @@ ia64_print_operand (file, x, code) case POST_INC: value = GET_MODE_SIZE (GET_MODE (x)); - - /* ??? This is for ldf.fill and stf.spill which use XFmode, - but which actually need 16 bytes increments. Perhaps we - can change them to use TFmode instead. Or don't use - POST_DEC/POST_INC for them. */ - if (value == 12) - value = 16; break; case POST_DEC: value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x)); - if (value == -12) - value = -16; break; } @@ -2930,17 +3057,28 @@ ia64_register_move_cost (from, to) { int from_hard, to_hard; int from_gr, to_gr; + int from_fr, to_fr; from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS); to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS); from_gr = (from == GENERAL_REGS); to_gr = (to == GENERAL_REGS); + from_fr = (from == FR_REGS); + to_fr = (to == FR_REGS); if (from_hard && to_hard) return 8; else if ((from_hard && !to_gr) || (!from_gr && to_hard)) return 6; + /* ??? Moving from FR<->GR must be more expensive than 2, so that we get + secondary memory reloads for TFmode moves. Unfortunately, we don't + have the mode here, so we can't check that. */ + /* Moreover, we have to make this at least as high as MEMORY_MOVE_COST + to avoid spectacularly poor register class preferencing for TFmode. */ + else if (from_fr != to_fr) + return 5; + return 2; } @@ -3018,6 +3156,13 @@ ia64_secondary_reload_class (class, mode, x) return GR_REGS; break; + case GR_REGS: + /* Since we have no offsettable memory addresses, we need a temporary + to hold the address of the second word. */ + if (mode == TImode) + return GR_REGS; + break; + default: break; } diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h index bb1e038..90717b8 100644 --- a/gcc/config/ia64/ia64.h +++ b/gcc/config/ia64/ia64.h @@ -383,23 +383,6 @@ while (0) a field, not crossing a boundary for it. */ #define PCC_BITFIELD_TYPE_MATTERS 1 -/* Define this macro as an expression for the overall size of a structure - (given by STRUCT as a tree node) when the size computed from the fields is - SIZE and the alignment is ALIGN. - - The default is to round SIZE up to a multiple of ALIGN. */ -/* ??? Might need this for 80-bit double-extended floats. */ -/* #define ROUND_TYPE_SIZE(STRUCT, SIZE, ALIGN) */ - -/* Define this macro as an expression for the alignment of a structure (given - by STRUCT as a tree node) if the alignment computed in the usual way is - COMPUTED and the alignment explicitly specified was SPECIFIED. - - The default is to use SPECIFIED if it is larger; otherwise, use the smaller - of COMPUTED and `BIGGEST_ALIGNMENT' */ -/* ??? Might need this for 80-bit double-extended floats. */ -/* #define ROUND_TYPE_ALIGN(STRUCT, COMPUTED, SPECIFIED) */ - /* An integer expression for the size in bits of the largest integer machine mode that should actually be used. */ @@ -465,8 +448,11 @@ while (0) /* A C expression for the size in bits of the type `long double' on the target machine. If you don't define this, the default is two words. */ -/* ??? We have an 80 bit extended double format. */ -#define LONG_DOUBLE_TYPE_SIZE 64 +#define LONG_DOUBLE_TYPE_SIZE 128 + +/* Tell real.c that this is the 80-bit Intel extended float format + packaged in a 128-bit entity. */ +#define INTEL_EXTENDED_IEEE_FORMAT /* An expression whose value is 1 or 0, according to whether the type `char' should be signed or unsigned by default. The user can always override this @@ -812,7 +798,6 @@ while (0) /* A C expression for the number of consecutive hard registers, starting at register number REGNO, required to hold a value of mode MODE. */ -/* ??? x86 80-bit FP values only require 1 register. */ /* ??? We say that CCmode values require two registers. This allows us to easily store the normal and inverted values. We use CCImode to indicate a single predicate register. */ @@ -821,19 +806,20 @@ while (0) ((REGNO) == PR_REG (0) && (MODE) == DImode ? 64 \ : PR_REGNO_P (REGNO) && (MODE) == CCmode ? 2 \ : PR_REGNO_P (REGNO) && (MODE) == CCImode ? 1 \ - : FR_REGNO_P (REGNO) && (MODE) == XFmode ? 1 \ + : FR_REGNO_P (REGNO) && (MODE) == TFmode ? 1 \ : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) /* A C expression that is nonzero if it is permissible to store a value of mode MODE in hard register number REGNO (or in several registers starting with that one). */ -#define HARD_REGNO_MODE_OK(REGNO, MODE) \ - (FR_REGNO_P (REGNO) ? GET_MODE_CLASS (MODE) != MODE_CC \ +#define HARD_REGNO_MODE_OK(REGNO, MODE) \ + (FR_REGNO_P (REGNO) ? GET_MODE_CLASS (MODE) != MODE_CC && (MODE) != TImode \ : PR_REGNO_P (REGNO) ? GET_MODE_CLASS (MODE) == MODE_CC \ - : GR_REGNO_P (REGNO) ? (MODE) != XFmode && (MODE) != CCImode \ + : GR_REGNO_P (REGNO) ? (MODE) != CCImode && (MODE) != TFmode \ : AR_REGNO_P (REGNO) ? (MODE) == DImode \ - : 1) + : BR_REGNO_P (REGNO) ? (MODE) == DImode \ + : 0) /* A C expression that is nonzero if it is desirable to choose register allocation so as to avoid move instructions between a value of mode MODE1 @@ -846,11 +832,11 @@ while (0) INTEGRAL_MODE_P or FLOAT_MODE_P and the other is not. Otherwise, it is true. */ /* Don't tie integer and FP modes, as that causes us to get integer registers - allocated for FP instructions. XFmode only supported in FP registers at - the moment, so we can't tie it with any other modes. */ + allocated for FP instructions. TFmode only supported in FP registers so + we can't tie it with any other modes. */ #define MODES_TIEABLE_P(MODE1, MODE2) \ ((GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) \ - && (((MODE1) == XFmode) == ((MODE2) == XFmode))) + && (((MODE1) == TFmode) == ((MODE2) == TFmode))) /* Define this macro if the compiler should avoid copies to/from CCmode registers. You should only define this macro if support fo copying to/from @@ -1044,10 +1030,16 @@ enum reg_class registers of CLASS1 can only be copied to registers of class CLASS2 by storing a register of CLASS1 into memory and loading that memory location into a register of CLASS2. */ -/* ??? We may need this for XFmode moves between FR and GR regs. Using - getf.sig/getf.exp almost works, but the result in the GR regs is not - properly formatted and has two extra bits. */ -/* #define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, M) */ + +#if 0 +/* ??? May need this, but since we've disallowed TFmode in GR_REGS, + I'm not quite sure how it could be invoked. The normal problems + with unions should be solved with the addressof fiddling done by + movtf and friends. */ +#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \ + ((MODE) == TFmode && (((CLASS1) == GR_REGS && (CLASS2) == FR_REGS) \ + || ((CLASS1) == FR_REGS && (CLASS2) == GR_REGS))) +#endif /* A C expression for the maximum number of consecutive registers of class CLASS needed to hold a value of mode MODE. @@ -1055,7 +1047,7 @@ enum reg_class #define CLASS_MAX_NREGS(CLASS, MODE) \ ((MODE) == CCmode && (CLASS) == PR_REGS ? 2 \ - : ((CLASS) == FR_REGS && (MODE) == XFmode) ? 1 \ + : ((CLASS) == FR_REGS && (MODE) == TFmode) ? 1 \ : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) /* If defined, gives a class of registers that cannot be used as the @@ -1786,11 +1778,7 @@ do { \ on the machine mode of the memory reference it is used for or if the address is valid for some modes but not others. */ -/* ??? Strictly speaking this isn't true, because we can use any increment with - any mode. Unfortunately, the RTL implies that the increment depends on the - mode, so we need this for now. */ - -#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL) \ +#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL) \ if (GET_CODE (ADDR) == POST_DEC || GET_CODE (ADDR) == POST_INC) \ goto LABEL; @@ -1996,20 +1984,17 @@ do { \ /* Output of Data. */ /* A C statement to output to the stdio stream STREAM an assembler instruction - to assemble a floating-point constant of `XFmode', `DFmode', `SFmode', + to assemble a floating-point constant of `TFmode', `DFmode', `SFmode', respectively, whose value is VALUE. */ -/* ??? This has not been tested. Long doubles are really 10 bytes not 12 - bytes on ia64. */ - /* ??? Must reverse the word order for big-endian code? */ #define ASM_OUTPUT_LONG_DOUBLE(FILE, VALUE) \ do { \ long t[3]; \ REAL_VALUE_TO_TARGET_LONG_DOUBLE (VALUE, t); \ - fprintf (FILE, "\tdata8 0x%08lx, 0x%08lx, 0x%08lx\n", \ - t[0] & 0xffffffff, t[1] & 0xffffffff, t[2] & 0xffffffff); \ + fprintf (FILE, "\tdata4 0x%08lx, 0x%08lx, 0x%08lx, 0x%08lx\n", \ + t[0] & 0xffffffff, t[1] & 0xffffffff, t[2] & 0xffffffff, 0); \ } while (0) /* ??? Must reverse the word order for big-endian code? */ @@ -2667,13 +2652,16 @@ do { \ CONSTANT_P_RTX}}, \ { "shladd_operand", {CONST_INT}}, \ { "fetchadd_operand", {CONST_INT}}, \ -{ "reg_or_fp01_operand", {SUBREG, REG, CONST_DOUBLE, CONSTANT_P_RTX}}, \ +{ "reg_or_fp01_operand", {SUBREG, REG, CONST_DOUBLE}}, \ { "normal_comparison_operator", {EQ, NE, GT, LE, GTU, LEU}}, \ { "adjusted_comparison_operator", {LT, GE, LTU, GEU}}, \ { "call_multiple_values_operation", {PARALLEL}}, \ { "predicate_operator", {NE, EQ}}, \ { "ar_lc_reg_operand", {REG}}, \ -{ "ar_ccv_reg_operand", {REG}}, +{ "ar_ccv_reg_operand", {REG}}, \ +{ "general_tfmode_operand", {SUBREG, REG, CONST_DOUBLE, MEM}}, \ +{ "destination_tfmode_operand", {SUBREG, REG, MEM}}, \ +{ "tfreg_or_fp01_operand", {REG, CONST_DOUBLE}}, /* An alias for a machine mode name. This is the machine mode that elements of a jump-table should have. */ diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md index 4a68623..0734936 100644 --- a/gcc/config/ia64/ia64.md +++ b/gcc/config/ia64/ia64.md @@ -22,8 +22,6 @@ ;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. -;; ??? Add support for long double XFmode patterns. - ;; ??? register_operand accepts (subreg:DI (mem:SI X)) which forces later ;; reload. This will be fixed once scheduling support is turned on. @@ -575,25 +573,115 @@ "addl %0 = @ltoff(%1), gp" [(set_attr "type" "A")]) -;; ??? These patterns exist to make SSA happy. We can get TImode values -;; because of structure moves generated for parameter and return value -;; loads and stores. +;; With no offsettable memory references, we've got to have a scratch +;; around to play with the second word. +(define_expand "movti" + [(parallel [(set (match_operand:TI 0 "general_operand" "") + (match_operand:TI 1 "general_operand" "")) + (clobber (match_scratch:DI 2 ""))])] + "" + " +{ + if (! reload_in_progress && ! reload_completed + && ! ia64_move_ok (operands[0], operands[1])) + operands[1] = force_reg (TImode, operands[1]); +}") + +(define_insn_and_split "*movti_internal" + [(set (match_operand:TI 0 "nonimmediate_operand" "=r,r,m") + (match_operand:TI 1 "general_operand" "ri,m,r")) + (clobber (match_scratch:DI 2 "=X,&r,&r"))] + "ia64_move_ok (operands[0], operands[1])" + "#" + "reload_completed" + [(const_int 0)] + " +{ + rtx adj1, adj2, in[2], out[2]; + int first; + + adj1 = ia64_split_timode (in, operands[1], operands[2]); + adj2 = ia64_split_timode (out, operands[0], operands[2]); + + first = 0; + if (reg_overlap_mentioned_p (out[0], in[1])) + { + if (reg_overlap_mentioned_p (out[1], in[0])) + abort (); + first = 1; + } + + if (adj1 && adj2) + abort (); + if (adj1) + emit_insn (adj1); + if (adj2) + emit_insn (adj2); + emit_insn (gen_rtx_SET (VOIDmode, out[first], in[first])); + emit_insn (gen_rtx_SET (VOIDmode, out[!first], in[!first])); + DONE; +}" + [(set_attr "type" "unknown") + (set_attr "predicable" "no")]) -(define_insn "*movti_internal" - [(set (match_operand:TI 0 "register_operand" "=r") - (match_operand:TI 1 "register_operand" "r"))] - "flag_ssa" +;; ??? SSA creates these. Can't allow memories since we don't have +;; the scratch register. Fortunately combine will know how to add +;; the clobber and scratch. +(define_insn_and_split "*movti_internal_reg" + [(set (match_operand:TI 0 "register_operand" "=r") + (match_operand:TI 1 "nonmemory_operand" "ri"))] + "" "#" + "reload_completed" + [(const_int 0)] + " +{ + rtx in[2], out[2]; + int first; + + ia64_split_timode (in, operands[1], NULL_RTX); + ia64_split_timode (out, operands[0], NULL_RTX); + + first = 0; + if (reg_overlap_mentioned_p (out[0], in[1])) + { + if (reg_overlap_mentioned_p (out[1], in[0])) + abort (); + first = 1; + } + + emit_insn (gen_rtx_SET (VOIDmode, out[first], in[first])); + emit_insn (gen_rtx_SET (VOIDmode, out[!first], in[!first])); + DONE; +}" [(set_attr "type" "unknown") (set_attr "predicable" "no")]) -(define_split - [(set (match_operand:TI 0 "register_operand" "") - (match_operand:TI 1 "register_operand" ""))] - "flag_ssa && reload_completed" - [(set (subreg:DI (match_dup 0) 0) (subreg:DI (match_dup 1) 0)) - (set (subreg:DI (match_dup 0) 1) (subreg:DI (match_dup 1) 1))] - "") +(define_expand "reload_inti" + [(parallel [(set (match_operand:TI 0 "register_operand" "=r") + (match_operand:TI 1 "" "m")) + (clobber (match_operand:DI 2 "register_operand" "=&r"))])] + "" + " +{ + /* ??? Should now be enforced by tweeks to push_secondary_reload. */ + if (reg_overlap_mentioned_p (operands[2], operands[0]) + || reg_overlap_mentioned_p (operands[2], operands[1])) + abort (); +}") + +(define_expand "reload_outti" + [(parallel [(set (match_operand:TI 0 "" "=m") + (match_operand:TI 1 "register_operand" "r")) + (clobber (match_operand:DI 2 "register_operand" "=&r"))])] + "" + " +{ + /* ??? Should now be enforced by tweeks to push_secondary_reload. */ + if (reg_overlap_mentioned_p (operands[2], operands[0]) + || reg_overlap_mentioned_p (operands[2], operands[1])) + abort (); +}") ;; Floating Point Moves ;; @@ -621,10 +709,10 @@ (match_operand:SF 1 "nonmemory_operand" "fG,fG,*r,*r")))] "TARGET_A_STEP && ia64_move_ok (operands[0], operands[1])" "@ - mov %0 = %F1 - getf.s %0 = %F1 - setf.s %0 = %1 - mov %0 = %1" + (%J2) mov %0 = %F1 + (%J2) getf.s %0 = %F1 + (%J2) setf.s %0 = %1 + (%J2) mov %0 = %1" [(set_attr "type" "F,M,M,A") (set_attr "predicable" "no")]) @@ -680,10 +768,10 @@ (match_operand:DF 1 "nonmemory_operand" "fG,fG,*r,*r")))] "TARGET_A_STEP && ia64_move_ok (operands[0], operands[1])" "@ - mov %0 = %F1 - getf.d %0 = %F1 - setf.d %0 = %1 - mov %0 = %1" + (%J2) mov %0 = %F1 + (%J2) getf.d %0 = %F1 + (%J2) setf.d %0 = %1 + (%J2) mov %0 = %1" [(set_attr "type" "F,M,M,A") (set_attr "predicable" "no")]) @@ -718,35 +806,96 @@ st8%Q0 %0 = %1%P0" [(set_attr "type" "F,M,M,M,M,A,M,M")]) -(define_expand "movxf" - [(set (match_operand:XF 0 "general_operand" "") - (match_operand:XF 1 "general_operand" ""))] +;; With no offsettable memory references, we've got to have a scratch +;; around to play with the second word if the variable winds up in GRs. +(define_expand "movtf" + [(set (match_operand:TF 0 "general_operand" "") + (match_operand:TF 1 "general_operand" ""))] "" " { - if (! reload_in_progress && ! reload_completed - && ! ia64_move_ok (operands[0], operands[1])) - operands[1] = force_reg (XFmode, operands[1]); + /* We must support TFmode loads into general registers for stdarg/vararg + and unprototyped calls. We split them into DImode loads for convenience. + We don't need TFmode stores from general regs, because a stdarg/vararg + routine does a block store to memory of unnamed arguments. */ + if (GET_CODE (operands[0]) == REG + && GR_REGNO_P (REGNO (operands[0]))) + { + /* We're hoping to transform everything that deals with TFmode + quantities and GR registers early in the compiler. */ + if (no_new_pseudos) + abort (); + + /* Struct to register can just use TImode instead. */ + if ((GET_CODE (operands[1]) == SUBREG + && GET_MODE (SUBREG_REG (operands[1])) == TImode) + || (GET_CODE (operands[1]) == REG + && GR_REGNO_P (REGNO (operands[1])))) + { + emit_move_insn (gen_rtx_REG (TImode, REGNO (operands[0])), + SUBREG_REG (operands[1])); + DONE; + } + + if (GET_CODE (operands[1]) == CONST_DOUBLE) + { + emit_move_insn (gen_rtx_REG (DImode, REGNO (operands[0])), + operand_subword (operands[1], 0, 0, DImode)); + emit_move_insn (gen_rtx_REG (DImode, REGNO (operands[0]) + 1), + operand_subword (operands[1], 1, 0, DImode)); + DONE; + } + + /* If the quantity is in a register not known to be GR, spill it. */ + if (register_operand (operands[1], TFmode)) + operands[1] = spill_tfmode_operand (operands[1], 1); + + if (GET_CODE (operands[1]) == MEM) + { + rtx out[2]; + + out[WORDS_BIG_ENDIAN] = gen_rtx_REG (DImode, REGNO (operands[0])); + out[!WORDS_BIG_ENDIAN] = gen_rtx_REG (DImode, REGNO (operands[0])+1); + + emit_move_insn (out[0], change_address (operands[1], DImode, NULL)); + emit_move_insn (out[1], + change_address (operands[1], DImode, + plus_constant (XEXP (operands[1], 0), + 8))); + DONE; + } + + abort (); + } + + if (! reload_in_progress && ! reload_completed) + { + operands[0] = spill_tfmode_operand (operands[0], 0); + operands[1] = spill_tfmode_operand (operands[1], 0); + + if (! ia64_move_ok (operands[0], operands[1])) + operands[1] = force_reg (TFmode, operands[1]); + } }") ;; ??? There's no easy way to mind volatile acquire/release semantics. ;; Errata 72 workaround. -(define_insn "*movxfcc_astep" +(define_insn "*movtfcc_astep" [(cond_exec (match_operator 2 "predicate_operator" [(match_operand:CC 3 "register_operand" "c") (const_int 0)]) - (set (match_operand:XF 0 "register_operand" "=f") - (match_operand:XF 1 "nonmemory_operand" "fG")))] + (set (match_operand:TF 0 "register_operand" "=f") + (match_operand:TF 1 "nonmemory_operand" "fG")))] "TARGET_A_STEP && ia64_move_ok (operands[0], operands[1])" - "mov %0 = %F1" + "(%J2) mov %0 = %F1" [(set_attr "type" "F") (set_attr "predicable" "no")]) -(define_insn "*movxf_internal_astep" - [(set (match_operand:XF 0 "destination_operand" "=f,f, m") - (match_operand:XF 1 "general_operand" "fG,m,fG"))] +(define_insn "*movtf_internal_astep" + [(set (match_operand:TF 0 "destination_tfmode_operand" "=f,f, m") + (match_operand:TF 1 "general_tfmode_operand" "fG,m,fG"))] "TARGET_A_STEP && ia64_move_ok (operands[0], operands[1])" "@ mov %0 = %F1 @@ -755,9 +904,9 @@ [(set_attr "type" "F,M,M") (set_attr "predicable" "no")]) -(define_insn "*movxf_internal" - [(set (match_operand:XF 0 "destination_operand" "=f,f, m") - (match_operand:XF 1 "general_operand" "fG,m,fG"))] +(define_insn "*movtf_internal" + [(set (match_operand:TF 0 "destination_tfmode_operand" "=f,f, m") + (match_operand:TF 1 "general_tfmode_operand" "fG,m,fG"))] "! TARGET_A_STEP && ia64_move_ok (operands[0], operands[1])" "@ mov %0 = %F1 @@ -843,6 +992,26 @@ "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;" [(set_attr "type" "F")]) +(define_insn_and_split "extendsftf2" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (float_extend:TF (match_operand:SF 1 "register_operand" "0,f")))] + "" + "mov %0 = %1" + "reload_completed" + [(set (match_dup 0) (float_extend:TF (match_dup 1)))] + "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;" + [(set_attr "type" "F")]) + +(define_insn_and_split "extenddftf2" + [(set (match_operand:TF 0 "register_operand" "=f,f") + (float_extend:TF (match_operand:DF 1 "register_operand" "0,f")))] + "" + "mov %0 = %1" + "reload_completed" + [(set (match_dup 0) (float_extend:TF (match_dup 1)))] + "if (true_regnum (operands[0]) == true_regnum (operands[1])) DONE;" + [(set_attr "type" "F")]) + (define_insn "truncdfsf2" [(set (match_operand:SF 0 "register_operand" "=f") (float_truncate:SF (match_operand:DF 1 "register_operand" "f")))] @@ -850,25 +1019,25 @@ "fnorm.s %0 = %1%B0" [(set_attr "type" "F")]) -(define_insn "truncxfsf2" +(define_insn "trunctfsf2" [(set (match_operand:SF 0 "register_operand" "=f") - (float_truncate:SF (match_operand:XF 1 "register_operand" "f")))] + (float_truncate:SF (match_operand:TF 1 "register_operand" "f")))] "" "fnorm.s %0 = %1%B0" [(set_attr "type" "F")]) -(define_insn "truncxfdf2" +(define_insn "trunctfdf2" [(set (match_operand:DF 0 "register_operand" "=f") - (float_truncate:DF (match_operand:XF 1 "register_operand" "f")))] + (float_truncate:DF (match_operand:TF 1 "register_operand" "f")))] "" "fnorm.d %0 = %1%B0" [(set_attr "type" "F")]) ;; Convert between signed integer types and floating point. -(define_insn "floatdixf2" - [(set (match_operand:XF 0 "register_operand" "=f") - (float:XF (match_operand:DI 1 "register_operand" "f")))] +(define_insn "floatditf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (float:TF (match_operand:DI 1 "register_operand" "f")))] "" "fcvt.xf %0 = %1" [(set_attr "type" "F")]) @@ -887,6 +1056,13 @@ "fcvt.fx.trunc %0 = %1%B0" [(set_attr "type" "F")]) +(define_insn "fix_trunctfdi2" + [(set (match_operand:DI 0 "register_operand" "=f") + (fix:DI (match_operand:TF 1 "register_operand" "f")))] + "" + "fcvt.fx.trunc %0 = %1%B0" + [(set_attr "type" "F")]) + ;; Convert between unsigned integer types and floating point. (define_insn "floatunsdisf2" @@ -903,6 +1079,13 @@ "fcvt.xuf.d %0 = %1%B0" [(set_attr "type" "F")]) +(define_insn "floatunsditf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (unsigned_float:TF (match_operand:DI 1 "register_operand" "f")))] + "" + "fcvt.xuf %0 = %1%B0" + [(set_attr "type" "F")]) + (define_insn "fixuns_truncsfdi2" [(set (match_operand:DI 0 "register_operand" "=f") (unsigned_fix:DI (match_operand:SF 1 "register_operand" "f")))] @@ -917,6 +1100,12 @@ "fcvt.fxu.trunc %0 = %1%B0" [(set_attr "type" "F")]) +(define_insn "fixuns_trunctfdi2" + [(set (match_operand:DI 0 "register_operand" "=f") + (unsigned_fix:DI (match_operand:TF 1 "register_operand" "f")))] + "" + "fcvt.fxu.trunc %0 = %1%B0" + [(set_attr "type" "F")]) ;; :::::::::::::::::::: ;; :: @@ -1702,7 +1891,111 @@ "" "fnma.d %0 = %1, %2, %F3%B0" [(set_attr "type" "F")]) + +;; :::::::::::::::::::: +;; :: +;; :: 80 bit floating point arithmetic +;; :: +;; :::::::::::::::::::: + +(define_insn "addtf3" + [(set (match_operand:TF 0 "register_operand" "=f") + (plus:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))] + "" + "fadd %0 = %F1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "subtf3" + [(set (match_operand:TF 0 "register_operand" "=f") + (minus:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))] + "" + "fsub %0 = %F1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "multf3" + [(set (match_operand:TF 0 "register_operand" "=f") + (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))] + "" + "fmpy %0 = %F1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "abstf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (abs:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")))] + "" + "fabs %0 = %F1%B0" + [(set_attr "type" "F")]) + +(define_insn "negtf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (neg:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG")))] + "" + "fneg %0 = %F1%B0" + [(set_attr "type" "F")]) + +(define_insn "*nabstf2" + [(set (match_operand:TF 0 "register_operand" "=f") + (neg:TF (abs:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG"))))] + "" + "fnegabs %0 = %F1%B0" + [(set_attr "type" "F")]) + +(define_insn "mintf3" + [(set (match_operand:TF 0 "register_operand" "=f") + (smin:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))] + "" + "fmin %0 = %F1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "maxtf3" + [(set (match_operand:TF 0 "register_operand" "=f") + (smax:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")))] + "" + "fmax %0 = %F1, %F2%B0" + [(set_attr "type" "F")]) + +(define_insn "*maddtf3" + [(set (match_operand:TF 0 "register_operand" "=f") + (plus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))] + "" + "fma %0 = %F1, %F2, %F3%B0" + [(set_attr "type" "F")]) + +(define_insn "*msubtf3" + [(set (match_operand:TF 0 "register_operand" "=f") + (minus:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG")) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))] + "" + "fms %0 = %F1, %F2, %F3%B0" + [(set_attr "type" "F")]) + +(define_insn "*nmultf3" + [(set (match_operand:TF 0 "register_operand" "=f") + (neg:TF (mult:TF (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))))] + "" + "fnmpy %0 = %F1, %F2%B0" + [(set_attr "type" "F")]) +;; ??? Is it possible to canonicalize this as (minus (reg) (mult))? + +(define_insn "*nmaddtf3" + [(set (match_operand:TF 0 "register_operand" "=f") + (plus:TF (neg:TF (mult:TF + (match_operand:TF 1 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 2 "tfreg_or_fp01_operand" "fG"))) + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")))] + "" + "fnma %0 = %F1, %F2, %F3%B0" + [(set_attr "type" "F")]) ;; :::::::::::::::::::: ;; :: @@ -2038,13 +2331,11 @@ DONE; }") -;; ??? Enable this for XFmode support. - -(define_expand "cmpxf" +(define_expand "cmptf" [(set (cc0) - (compare (match_operand:XF 0 "reg_or_fp01_operand" "") - (match_operand:XF 1 "reg_or_fp01_operand" "")))] - "0" + (compare (match_operand:TF 0 "tfreg_or_fp01_operand" "") + (match_operand:TF 1 "tfreg_or_fp01_operand" "")))] + "" " { ia64_compare_op0 = operands[0]; @@ -2108,6 +2399,15 @@ "fcmp.%D1 %0, %I0 = %F2, %F3" [(set_attr "type" "F")]) +(define_insn "*cmptf_internal" + [(set (match_operand:CC 0 "register_operand" "=c") + (match_operator:CC 1 "comparison_operator" + [(match_operand:TF 2 "tfreg_or_fp01_operand" "fG") + (match_operand:TF 3 "tfreg_or_fp01_operand" "fG")]))] + "" + "fcmp.%D1 %0, %I0 = %F2, %F3" + [(set_attr "type" "F")]) + ;; ??? Can this pattern be generated? (define_insn "*bit_zero" @@ -3383,15 +3683,15 @@ [(set_attr "type" "M")]) (define_insn "fr_spill" - [(set (match_operand:XF 0 "memory_operand" "=m") - (unspec:XF [(match_operand:XF 1 "register_operand" "f")] 3))] + [(set (match_operand:TF 0 "memory_operand" "=m") + (unspec:TF [(match_operand:TF 1 "register_operand" "f")] 3))] "" "stf.spill %0 = %1%P0" [(set_attr "type" "M")]) (define_insn "fr_restore" - [(set (match_operand:XF 0 "register_operand" "=f") - (unspec:XF [(match_operand:XF 1 "memory_operand" "m")] 4))] + [(set (match_operand:TF 0 "register_operand" "=f") + (unspec:TF [(match_operand:TF 1 "memory_operand" "m")] 4))] "" "ldf.fill %0 = %1%P1" [(set_attr "type" "M")]) diff --git a/gcc/config/ia64/lib1funcs.asm b/gcc/config/ia64/lib1funcs.asm index e5fb7d7..76e37e9 100644 --- a/gcc/config/ia64/lib1funcs.asm +++ b/gcc/config/ia64/lib1funcs.asm @@ -1,3 +1,45 @@ +#ifdef L__divtf3 +// Compute a 80-bit IEEE double-extended quotient. +// +// From the Intel IA-64 Optimization Guide, choose the minimum latency +// alternative. +// +// farg0 holds the dividend. farg1 holds the divisor. + + .text + .align 16 + .global __divtf3 + .proc __divtf3 +__divtf3: + frcpa f10, p6 = farg0, farg1 + ;; +(p6) fnma.s1 f11 = farg1, f10, f1 + ;; +(p6) fma.s1 f12 = f11, f10, f10 +(p6) fma.s1 f11 = f11, f11, f0 + ;; +(p6) fma.s1 f11 = f11, f12, f12 + ;; +(p6) fnma.s1 f12 = farg1, f11, f1 +(p6) fma.s1 f10 = farg0, f10, f0 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fnma.s1 f12 = farg1, f10, farg0 + ;; +(p6) fma.s1 f10 = f12, f11, f10 +(p6) fnma.s1 f12 = farg1, f11, f1 + ;; +(p6) fnma.s1 f8 = farg1, f10, farg0 +(p6) fma.s1 f9 = f12, f11, f11 + ;; +(p6) fma f10 = f8, f9, f10 + ;; + mov fret0 = f10 + br.ret.sptk rp + ;; + .endp __divtf3 +#endif + #ifdef L__divdf3 // Compute a 64-bit IEEE double quotient. // diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64 index 5f59c5b..1f1e61af 100644 --- a/gcc/config/ia64/t-ia64 +++ b/gcc/config/ia64/t-ia64 @@ -8,7 +8,7 @@ LIB1ASMSRC = ia64/lib1funcs.asm # ??? We change the names of the DImode div/mod files so that they won't # accidentally be overridden by libgcc2.c files. We used to use __ia64 as # a prefix, now we use __ as the prefix. -LIB1ASMFUNCS = __divdf3 __divsf3 \ +LIB1ASMFUNCS = __divtf3 __divdf3 __divsf3 \ __divdi3 __moddi3 __udivdi3 __umoddi3 \ __divsi3 __modsi3 __udivsi3 __umodsi3 __save_stack_nonlocal \ __nonlocal_goto __restore_stack_nonlocal __trampoline |