diff options
Diffstat (limited to 'gcc/config/ia64/ia64.c')
-rw-r--r-- | gcc/config/ia64/ia64.c | 3279 |
1 files changed, 3279 insertions, 0 deletions
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c new file mode 100644 index 0000000..f7e9ebd --- /dev/null +++ b/gcc/config/ia64/ia64.c @@ -0,0 +1,3279 @@ +/* Definitions of target machine for GNU compiler. + Copyright (C) 1999 Cygnus Solutions. + Contributed by James E. Wilson <wilson@cygnus.com> and + David Mosberger <davidm@hpl.hp.com>. + +This file is part of GNU CC. + +GNU CC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU CC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + +#include <stdio.h> +#include <ctype.h> +#include "config.h" +#include "rtl.h" +#include "tree.h" +#include "tm_p.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "real.h" +#include "insn-config.h" +#include "conditions.h" +#include "insn-flags.h" +#include "output.h" +#include "insn-attr.h" +#include "flags.h" +#include "recog.h" +#include "expr.h" +#include "obstack.h" +#include "except.h" +#include "function.h" +#include "ggc.h" +#include "basic-block.h" + +/* This is used for communication between ASM_OUTPUT_LABEL and + ASM_OUTPUT_LABELREF. */ +int ia64_asm_output_label = 0; + +/* Define the information needed to generate branch and scc insns. This is + stored from the compare operation. */ +struct rtx_def * ia64_compare_op0; +struct rtx_def * ia64_compare_op1; + +/* Register number where ar.pfs was saved in the prologue, or zero + if it was not saved. */ + +int ia64_arpfs_regno; + +/* Register number where rp was saved in the prologue, or zero if it was + not saved. */ + +int ia64_rp_regno; + +/* Register number where frame pointer was saved in the prologue, or zero + if it was not saved. */ + +int ia64_fp_regno; + +/* Number of input and local registers used. This is needed for the .regstk + directive, and also for debugging info. */ + +int ia64_input_regs; +int ia64_local_regs; + +/* If true, then we must emit a .regstk directive. */ + +int ia64_need_regstk; + +/* Register names for ia64_expand_prologue. */ +char *ia64_reg_numbers[96] = +{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", + "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", + "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", + "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", + "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", + "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", + "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", + "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", + "r96", "r97", "r98", "r99", "r100","r101","r102","r103", + "r104","r105","r106","r107","r108","r109","r110","r111", + "r112","r113","r114","r115","r116","r117","r118","r119", + "r120","r121","r122","r123","r124","r125","r126","r127"}; + +/* ??? These strings could be shared with REGISTER_NAMES. */ +char *ia64_input_reg_names[8] = +{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; + +/* ??? These strings could be shared with REGISTER_NAMES. */ +char *ia64_local_reg_names[80] = +{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", + "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", + "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", + "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", + "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", + "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", + "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", + "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", + "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", + "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; + +/* ??? These strings could be shared with REGISTER_NAMES. */ +char *ia64_output_reg_names[8] = +{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; + +/* String used with the -mfixed-range= option. */ +const char *ia64_fixed_range_string; + +/* Variables which are this size or smaller are put in the sdata/sbss + sections. */ + +int ia64_section_threshold; + +/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */ + +int +call_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (mode != GET_MODE (op)) + return 0; + + return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG + || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG)); +} + +/* Return 1 if OP refers to a symbol in the sdata section. */ + +int +sdata_symbolic_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + switch (GET_CODE (op)) + { + case SYMBOL_REF: + return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR; + + case CONST: + return (GET_CODE (XEXP (op, 0)) == PLUS + && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF + && XSTR (XEXP (XEXP (op, 0), 0), 0)[0] == SDATA_NAME_FLAG_CHAR); + default: + break; + } + + return 0; +} + +/* Return 1 if OP refers to a symbol. */ + +int +symbolic_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + switch (GET_CODE (op)) + { + case CONST: + case SYMBOL_REF: + case LABEL_REF: + return 1; + + default: + break; + } + return 0; +} + +/* Return 1 if OP refers to a function. */ + +int +function_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op)) + return 1; + else + return 0; +} + +/* Return 1 if OP is setjmp or a similar function. */ + +/* ??? This is an unsatisfying solution. Should rethink. */ + +int +setjmp_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + char *name; + int retval = 0; + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + + name = XSTR (op, 0); + + /* The following code is borrowed from special_function_p in calls.c. */ + + /* Disregard prefix _, __ or __x. */ + if (name[0] == '_') + { + if (name[1] == '_' && name[2] == 'x') + name += 3; + else if (name[1] == '_') + name += 2; + else + name += 1; + } + + if (name[0] == 's') + { + retval + = ((name[1] == 'e' + && (! strcmp (name, "setjmp") + || ! strcmp (name, "setjmp_syscall"))) + || (name[1] == 'i' + && ! strcmp (name, "sigsetjmp")) + || (name[1] == 'a' + && ! strcmp (name, "savectx"))); + } + else if ((name[0] == 'q' && name[1] == 's' + && ! strcmp (name, "qsetjmp")) + || (name[0] == 'v' && name[1] == 'f' + && ! strcmp (name, "vfork"))) + retval = 1; + + return retval; +} + +/* Return 1 if OP is a general operand, but when pic exclude symbolic + operands. */ + +/* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF + from PREDICATE_CODES. */ + +int +move_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + if (! TARGET_NO_PIC && symbolic_operand (op, mode)) + return 0; + + return general_operand (op, mode); +} + +/* Return 1 if OP is a register operand, or zero. */ + +int +reg_or_0_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (op == const0_rtx || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or a 6 bit immediate operand. */ + +int +reg_or_6bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or an 8 bit immediate operand. */ + +int +reg_or_8bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or an 8 bit adjusted immediate + operand. */ + +int +reg_or_8bit_adjusted_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or is valid for both an 8 bit + immediate and an 8 bit adjusted immediate operand. This is necessary + because when we emit a compare, we don't know what the condition will be, + so we need the union of the immediates accepted by GT and LT. */ + +int +reg_or_8bit_and_adjusted_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)) + && CONST_OK_FOR_L (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or a 14 bit immediate operand. */ + +int +reg_or_14bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a register operand, or a 22 bit immediate operand. */ + +int +reg_or_22bit_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if OP is a 6 bit immediate operand. */ + +int +shift_count_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) + || GET_CODE (op) == CONSTANT_P_RTX); +} + +/* Return 1 if OP is a 5 bit immediate operand. */ + +int +shift_32bit_count_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_INT + && (INTVAL (op) >= 0 && INTVAL (op) < 32)) + || GET_CODE (op) == CONSTANT_P_RTX); +} + +/* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */ + +int +shladd_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) == 2 || INTVAL (op) == 4 + || INTVAL (op) == 8 || INTVAL (op) == 16)); +} + +/* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */ + +int +fetchadd_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return (GET_CODE (op) == CONST_INT + && (INTVAL (op) == -16 || INTVAL (op) == -8 || + INTVAL (op) == -4 || INTVAL (op) == -1 || + INTVAL (op) == 1 || INTVAL (op) == 4 || + INTVAL (op) == 8 || INTVAL (op) == 16)); +} + +/* Return 1 if OP is a floating-point constant zero, one, or a register. */ + +int +reg_or_fp01_operand (op, mode) + rtx op; + enum machine_mode mode; +{ + return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op)) + || GET_CODE (op) == CONSTANT_P_RTX + || register_operand (op, mode)); +} + +/* Return 1 if this is a comparison operator, which accepts an normal 8-bit + signed immediate operand. */ + +int +normal_comparison_operator (op, mode) + register rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + return ((mode == VOIDmode || GET_MODE (op) == mode) + && (code == EQ || code == NE + || code == GT || code == LE || code == GTU || code == LEU)); +} + +/* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit + signed immediate operand. */ + +int +adjusted_comparison_operator (op, mode) + register rtx op; + enum machine_mode mode; +{ + enum rtx_code code = GET_CODE (op); + return ((mode == VOIDmode || GET_MODE (op) == mode) + && (code == LT || code == GE || code == LTU || code == GEU)); +} + +/* Return 1 if OP is a call returning an HFA. It is known to be a PARALLEL + and the first section has already been tested. */ + +int +call_multiple_values_operation (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + int count = XVECLEN (op, 0) - 2; + int i; + int dest_regno; + + /* Perform a quick check so we don't block up below. */ + if (count <= 1 + || GET_CODE (XVECEXP (op, 0, 0)) != SET + || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG + || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != CALL) + return 0; + + dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0))); + + for (i = 1; i < count; i++) + { + rtx elt = XVECEXP (op, 0, i + 2); + + if (GET_CODE (elt) != SET + || GET_CODE (SET_SRC (elt)) != CALL + || GET_CODE (SET_DEST (elt)) != REG + || REGNO (SET_DEST (elt)) != dest_regno + i) + return 0; + } + + return 1; +} + + +/* Structure to be filled in by ia64_compute_frame_size with register + save masks and offsets for the current function. */ + +struct ia64_frame_info +{ + long total_size; /* # bytes that the entire frame takes up. */ + long var_size; /* # bytes that variables take up. */ + long args_size; /* # bytes that outgoing arguments take up. */ + long pretend_size; /* # bytes that stdarg arguments take up. */ + long pretend_pad_size; /* # bytes padding to align stdarg args. */ + long extra_size; /* # bytes of extra gunk. */ + long gr_size; /* # bytes needed to store general regs. */ + long fr_size; /* # bytes needed to store FP regs. */ + long fr_pad_size; /* # bytes needed to align FP save area. */ + long pr_size; /* # bytes needed to store predicate regs. */ + long br_size; /* # bytes needed to store branch regs. */ + HARD_REG_SET mask; /* mask of saved registers. */ + int initialized; /* != 0 is frame size already calculated. */ +}; + +/* Current frame information calculated by compute_frame_size. */ +struct ia64_frame_info current_frame_info; + +/* Helper function for INITIAL_ELIMINATION_OFFSET. Return the offset from the + frame pointer where b0 is saved. */ + +int +ia64_rap_fp_offset () +{ + return - current_frame_info.br_size; +} + +/* Returns the number of bytes offset between the frame pointer and the stack + pointer for the current function. SIZE is the number of bytes of space + needed for local variables. */ +unsigned int +ia64_compute_frame_size (size) + int size; +{ + int total_size; + int extra_size; + int gr_size = 0; + int fr_size = 0; + int fr_pad_size = 0; + int pr_size = 0; + int br_size = 0; + int pretend_pad_size = 0; + int tmp; + int regno; + HARD_REG_SET mask; + + CLEAR_HARD_REG_SET (mask); + + /* Calculate space needed for general registers. */ + /* We never need to save any of the stacked registers, which are regs + 32 to 127. */ + for (regno = GR_REG (0); regno <= GR_REG (31); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + { + SET_HARD_REG_BIT (mask, regno); + gr_size += 8; + } + + /* Allocate space to save/restore the unat from. */ + if (gr_size != 0 + || current_function_varargs || current_function_stdarg) + gr_size += 8; + + /* Calculate space needed for FP registers. */ + for (regno = FR_REG (0); regno <= FR_REG (127); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + { + SET_HARD_REG_BIT (mask, regno); + fr_size += 16; + } + + /* Calculate space needed for predicate registers. */ + for (regno = PR_REG (0); regno <= PR_REG (63); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + { + SET_HARD_REG_BIT (mask, regno); + pr_size = 8; + } + + /* Calculate space needed for branch registers. */ + for (regno = BR_REG (0); regno <= BR_REG (7); regno++) + if (regs_ever_live[regno] && ! call_used_regs[regno]) + { + SET_HARD_REG_BIT (mask, regno); + br_size += 8; + } + + /* The FR save area needs to be 16-byte aligned. */ + if (fr_size) + { + tmp = (size + fr_size + pr_size + br_size); + fr_pad_size = IA64_STACK_ALIGN (tmp) - tmp; + } + else + fr_pad_size = 0; + + /* If we have an odd number of words of pretend arguments written to the + stack, then the FR save area will be unaligned. We pad below this area + to keep things 16 byte aligned. This needs to be kept distinct, to + avoid confusing it with padding added below the GR save area, which does + not affect the FR area alignment. */ + pretend_pad_size = current_function_pretend_args_size % 16; + + /* The 16 bytes is for the scratch area. */ + tmp = (size + gr_size + fr_pad_size + fr_size + pr_size + br_size + + current_function_outgoing_args_size + 16); + tmp += (current_function_pretend_args_size + ? current_function_pretend_args_size - 16 + : 0) + pretend_pad_size; + total_size = IA64_STACK_ALIGN (tmp); + extra_size = total_size - tmp + 16; + + /* If this is a leaf routine (BR_REG (0) is not live), and if there is no + stack space needed for register saves, then don't allocate the 16 byte + scratch area. */ + if (total_size == 16 && ! regs_ever_live[BR_REG (0)]) + { + total_size = 0; + extra_size = 0; + } + + current_frame_info.total_size = total_size; + current_frame_info.var_size = size; + current_frame_info.args_size = current_function_outgoing_args_size; + current_frame_info.pretend_size + = (current_function_pretend_args_size + ? current_function_pretend_args_size - 16 + : 0); + current_frame_info.pretend_pad_size = pretend_pad_size; + current_frame_info.extra_size = extra_size; + current_frame_info.gr_size = gr_size; + current_frame_info.fr_size = fr_size; + current_frame_info.fr_pad_size = fr_pad_size; + current_frame_info.pr_size = pr_size; + current_frame_info.br_size = br_size; + COPY_HARD_REG_SET (current_frame_info.mask, mask); + current_frame_info.initialized = reload_completed; + + return total_size; +} + +void +save_restore_insns (save_p) + int save_p; +{ + rtx insn; + + if (current_frame_info.gr_size + current_frame_info.fr_size + + current_frame_info.br_size + current_frame_info.pr_size) + { + rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (2)); + rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg); + rtx tmp2_reg = gen_rtx_REG (DImode, GR_REG (3)); + int offset = (current_frame_info.total_size + - (current_frame_info.gr_size + current_frame_info.fr_size + + current_frame_info.fr_pad_size + + current_frame_info.br_size + + current_frame_info.pr_size + + current_frame_info.var_size + + current_frame_info.pretend_size + + current_frame_info.pretend_pad_size)); + rtx offset_rtx; + int regno; + + /* If there is a frame pointer, then we use it instead of the stack + pointer, so that the stack pointer does not need to be valid when + the epilogue starts. See EXIT_IGNORE_STACK. */ + if (frame_pointer_needed) + offset = offset - current_frame_info.total_size; + + if (CONST_OK_FOR_I (offset)) + offset_rtx = GEN_INT (offset); + else + { + offset_rtx = tmp_reg; + insn = emit_insn (gen_movdi (tmp_reg, GEN_INT (offset))); + RTX_FRAME_RELATED_P (insn) = 1; + } + insn = emit_insn (gen_adddi3 (tmp_reg, + (frame_pointer_needed ? frame_pointer_rtx + : stack_pointer_rtx), + offset_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + + /* Must save/restore ar.unat if any GR is spilled/restored. */ + if (current_frame_info.gr_size != 0 + || current_function_varargs || current_function_stdarg) + { + rtx mem = gen_rtx_MEM (DImode, tmp_post_inc); + if (save_p) + { + insn = emit_insn (gen_unat_spill (tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_movdi (mem, tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + insn = emit_insn (gen_movdi (tmp2_reg, mem)); + RTX_FRAME_RELATED_P (insn) = 1; + /* The restore happens after the last ld8.fill instruction. */ + } + } + + for (regno = GR_REG (0); regno <= GR_REG (127); regno++) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + rtx mem = gen_rtx_MEM (DImode, tmp_post_inc); + if (save_p) + insn = emit_insn (gen_gr_spill (mem, + gen_rtx_REG (DImode, regno))); + else + insn = emit_insn (gen_gr_restore (gen_rtx_REG (DImode, regno), + mem)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Now restore the unat register if necessary. */ + if ((current_frame_info.gr_size != 0 + || current_function_varargs || current_function_stdarg) + && ! save_p) + emit_insn (gen_unat_restore (tmp2_reg)); + + for (regno = FR_REG (0); regno <= FR_REG (127); regno++) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + rtx mem = gen_rtx_MEM (XFmode, tmp_post_inc); + if (save_p) + insn = emit_insn (gen_fr_spill (mem, + gen_rtx_REG (XFmode, regno))); + else + insn = emit_insn (gen_fr_restore (gen_rtx_REG (XFmode, regno), + mem)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* If one is used, we save/restore all of them. */ + for (regno = PR_REG (0); regno <= PR_REG (63); regno++) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + rtx mem = gen_rtx_MEM (DImode, tmp_post_inc); + if (save_p) + { + insn = emit_insn (gen_pr_spill (tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_movdi (mem, tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + } + else + { + insn = emit_insn (gen_movdi (tmp2_reg, mem)); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_pr_restore (tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + } + break; + } + + for (regno = BR_REG (0); regno <= BR_REG (7); regno++) + if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) + { + rtx src, dest; + + if (save_p) + { + src = gen_rtx_REG (DImode, regno); + dest = gen_rtx_MEM (DImode, tmp_post_inc); + } + else + { + src = gen_rtx_MEM (DImode, tmp_post_inc); + dest = gen_rtx_REG (DImode, regno); + } + + insn = emit_insn (gen_movdi (tmp2_reg, src)); + RTX_FRAME_RELATED_P (insn) = 1; + insn = emit_insn (gen_movdi (dest, tmp2_reg)); + RTX_FRAME_RELATED_P (insn) = 1; + } + } +} + + +/* Called after register allocation to add any instructions needed for the + prologue. Using a prologue insn is favored compared to putting all of the + instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler + to intermix instructions with the saves of the caller saved registers. In + some cases, it might be necessary to emit a barrier instruction as the last + insn to prevent such scheduling. + + Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 + so that the debug info generation code can handle them properly. */ + +/* ??? Get inefficient code when the frame size is larger than can fit in an + adds instruction. */ + +/* ??? Add support for allocating temporaries from the output registers if + they do not need to live past call instructions. */ + +/* ??? If the function does not return, then we don't need to save the rp + and ar.pfs registers. */ + +/* ??? If this is a leaf function, then fp/rp/ar.pfs should be put in the + low 32 regs. */ + +/* ??? Should not reserve a local register for rp/ar.pfs. Should + instead check to see if any local registers are unused, and if so, + allocate them to rp/ar.pfs in that order. Not sure what to do about + fp, we may still need to reserve a local register for it. */ + +void +ia64_expand_prologue () +{ + rtx insn, offset; + int i, locals, inputs, outputs, rotates; + int frame_size = ia64_compute_frame_size (get_frame_size ()); + int leaf_function; + int epilogue_p; + edge e; + + /* ??? This seems like a leaf_function_p bug. It calls get_insns which + returns the first insn of the current sequence, not the first insn + of the function. We work around this by pushing to the topmost + sequence first. */ + push_topmost_sequence (); + leaf_function = leaf_function_p (); + pop_topmost_sequence (); + + /* ??? If there is no epilogue, then we don't need some prologue insns. We + need to avoid emitting the dead prologue insns, because flow will complain + about them. */ + if (optimize) + { + for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next) + if ((e->flags & EDGE_FAKE) == 0 + && (e->flags & EDGE_FALLTHRU) != 0) + break; + epilogue_p = (e != NULL); + } + else + epilogue_p = 1; + + /* Find the highest local register used. */ + /* We have only 80 local registers, because we reserve 8 for the inputs + and 8 for the outputs. */ + + for (i = LOC_REG (79); i >= LOC_REG (0); i--) + if (regs_ever_live[i]) + break; + locals = i - LOC_REG (0) + 1; + + /* Likewise for inputs. */ + + for (i = IN_REG (7); i >= IN_REG (0); i--) + if (regs_ever_live[i]) + break; + inputs = i - IN_REG (0) + 1; + +#if 0 + /* If the function was declared with syscall_linkage, then we may need to + preserve all declared input registers, even if they weren't used. + Currently, syscall_linkage does not have this effect. */ + + if (lookup_attribute ("syscall_linkage", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) + inputs = MAX (inputs, current_function_args_info.words); +#endif + + /* Likewise for outputs. */ + + for (i = OUT_REG (7); i >= OUT_REG (0); i--) + if (regs_ever_live[i]) + break; + outputs = i - OUT_REG (0) + 1; + + /* When -p profiling, we need one output register for the mcount argument. + Likwise for -a profiling for the bb_init_func argument. For -ax + profiling, we need two output registers for the two bb_init_trace_func + arguments. */ + if (profile_flag || profile_block_flag == 1) + outputs = MAX (outputs, 1); + else if (profile_block_flag == 2) + outputs = MAX (outputs, 2); + + /* Leaf functions should not use any output registers. */ + if (leaf_function && outputs != 0) + abort (); + + /* No rotating register support as yet. */ + + rotates = 0; + + /* Allocate two extra locals for saving/restoring rp and ar.pfs. Also + allocate one local for use as the frame pointer if frame_pointer_needed + is true. */ + locals += 2 + frame_pointer_needed; + + /* Save these values in global registers for debugging info. */ + ia64_input_regs = inputs; + ia64_local_regs = locals; + + /* Set the local, input, and output register names. We need to do this + for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in + half. If we use in/loc/out register names, then we get assembler errors + in crtn.S because there is no alloc insn or regstk directive in there. + We give in/loc/out names to unused registers, to make invalid uses of + them easy to spot. */ + if (! TARGET_REG_NAMES) + { + for (i = 0; i < 8; i++) + { + if (i < inputs) + reg_names[IN_REG (i)] = ia64_reg_numbers[i]; + else + reg_names[IN_REG (i)] = ia64_input_reg_names[i]; + } + for (i = 0; i < 80; i++) + { + if (i < locals) + reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; + else + reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; + } + for (i = 0; i < 8; i++) + { + if (i < outputs) + reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; + else + reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; + } + } + + /* Set the frame pointer register name now that it is known, and the + local register names are known. */ + if (frame_pointer_needed) + { + reg_names[FRAME_POINTER_REGNUM] + = reg_names[LOC_REG (locals - 3)]; + ia64_fp_regno = LOC_REG (inputs + locals - 3); + } + else + ia64_fp_regno = 0; + + /* We don't need an alloc instruction if this is a leaf function, and the + locals and outputs are both zero sized. Since we have already allocated + two locals for rp and ar.pfs, we check for two locals. */ + if (locals == 2 && outputs == 0 && leaf_function) + { + /* If there is no alloc, but there are input registers used, then we + need a .regstk directive. */ + if (TARGET_REG_NAMES) + ia64_need_regstk = 1; + else + ia64_need_regstk = 0; + + ia64_arpfs_regno = 0; + ia64_rp_regno = 0; + } + else + { + ia64_need_regstk = 0; + + ia64_arpfs_regno = LOC_REG (locals - 1); + ia64_rp_regno = LOC_REG (locals - 2); + reg_names[RETURN_ADDRESS_REGNUM] = reg_names[ia64_rp_regno]; + + emit_insn (gen_alloc (gen_rtx_REG (DImode, ia64_arpfs_regno), + GEN_INT (inputs), GEN_INT (locals), + GEN_INT (outputs), GEN_INT (rotates))); + + /* ??? FIXME ??? We don't need to save BR_REG (0) if this is a leaf + function. We also don't need to allocate a local reg for it then. */ + /* ??? Likewise if there is no epilogue. */ + if (epilogue_p) + emit_move_insn (gen_rtx_REG (DImode, ia64_rp_regno), + gen_rtx_REG (DImode, BR_REG (0))); + } + + /* Set up frame pointer and stack pointer. */ + if (frame_pointer_needed) + { + insn = emit_insn (gen_movdi (hard_frame_pointer_rtx, stack_pointer_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + } + if (frame_size != 0) + { + if (CONST_OK_FOR_I (-frame_size)) + offset = GEN_INT (-frame_size); + else + { + offset = gen_rtx_REG (DImode, GR_REG (2)); + insn = emit_insn (gen_movdi (offset, GEN_INT (-frame_size))); + RTX_FRAME_RELATED_P (insn) = 1; + } + /* If there is a frame pointer, then we need to make the stack pointer + decrement depend on the frame pointer, so that the stack pointer + update won't be moved past fp-relative stores to the frame. */ + if (frame_pointer_needed) + insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx, + stack_pointer_rtx, + offset, + hard_frame_pointer_rtx)); + else + insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, + offset)); + RTX_FRAME_RELATED_P (insn) = 1; + } + + /* Save registers to frame. */ + save_restore_insns (1); +} + +/* Called after register allocation to add any instructions needed for the + epilogue. Using a epilogue insn is favored compared to putting all of the + instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler + to intermix instructions with the saves of the caller saved registers. In + some cases, it might be necessary to emit a barrier instruction as the last + insn to prevent such scheduling. */ + +void +ia64_expand_epilogue () +{ + /* Restore registers from frame. */ + save_restore_insns (0); + + /* ??? The gen_epilogue_deallocate_stack call below does not work. This + is mainly because there is no fp+offset addressing mode, so most loads + from the frame do not actually use the frame pointer; they use a pseudo + computed from the frame pointer. The same problem exists with the + stack pointer when there is no frame pointer. I think this can be + fixed only by making the dependency analysis code in sched smarter, so + that it recognizes references to the frame, and makes succeeding stack + pointer updates anti-dependent on them. */ + emit_insn (gen_blockage ()); + + if (frame_pointer_needed) + { + /* If there is a frame pointer, then we need to make the stack pointer + restore depend on the frame pointer, so that the stack pointer + restore won't be moved up past fp-relative loads from the frame. */ + emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx, + hard_frame_pointer_rtx)); + } + else + { + int frame_size = current_frame_info.total_size; + rtx offset; + + if (frame_size != 0) + { + if (CONST_OK_FOR_I (frame_size)) + offset = GEN_INT (frame_size); + else + { + offset = gen_rtx_REG (DImode, GR_REG (2)); + emit_insn (gen_movdi (offset, GEN_INT (frame_size))); + } + emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, + offset)); + } + } + + if (ia64_arpfs_regno) + emit_insn (gen_pfs_restore (gen_rtx_REG (DImode, ia64_arpfs_regno))); + + if (ia64_rp_regno) + emit_move_insn (gen_rtx_REG (DImode, BR_REG (0)), + gen_rtx_REG (DImode, ia64_rp_regno)); + + emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); +} + +/* Emit the function prologue. */ + +void +ia64_function_prologue (file, size) + FILE *file; + int size; +{ + if (ia64_need_regstk) + fprintf (file, "\t.regstk %d, 0, 0, 0\n", ia64_input_regs); + + /* ??? Emit .body directive. GNU as ignores .body currently. */ +} + +/* Emit the function epilogue. */ + +void +ia64_function_epilogue (file, size) + FILE *file; + int size; +{ +} + +/* Return 1 if br.ret can do all the work required to return from a + function. */ + +int +ia64_direct_return () +{ + return (reload_completed && ! frame_pointer_needed + && ia64_compute_frame_size (get_frame_size ())); +} + + +/* Do any needed setup for a variadic function. CUM has not been updated + for the last named argument which has type TYPE and mode MODE. */ +void +ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time) + CUMULATIVE_ARGS cum; + int int_mode; + tree type; + int * pretend_size; + int second_time; +{ + /* If this is a stdarg function, then don't save the current argument. */ + int offset = ! current_function_varargs; + + if (cum.words < MAX_ARGUMENT_SLOTS) + { + if (! second_time) + { + int i; + int first_reg = GR_ARG_FIRST + cum.words + offset; + rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (16)); + rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg); + rtx mem = gen_rtx_MEM (DImode, tmp_post_inc); + rtx insn; + + /* We must emit st8.spill insns instead of st8 because we might + be saving non-argument registers, and non-argument registers might + not contain valid values. */ + emit_move_insn (tmp_reg, virtual_incoming_args_rtx); + for (i = first_reg; i < GR_ARG_FIRST + 8; i++) + { + insn = emit_insn (gen_gr_spill (mem, gen_rtx_REG (DImode, i))); + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, tmp_reg, 0); + } + } + *pretend_size = ((MAX_ARGUMENT_SLOTS - cum.words - offset) + * UNITS_PER_WORD); + } +} + +/* Check whether TYPE is a homogeneous floating point aggregate. If + it is, return the mode of the floating point type that appears + in all leafs. If it is not, return VOIDmode. + + An aggregate is a homogeneous floating point aggregate is if all + fields/elements in it have the same floating point type (e.g, + SFmode). 128-bit quad-precision floats are excluded. */ + +static enum machine_mode +hfa_element_mode (type, nested) + tree type; + int nested; +{ + enum machine_mode element_mode = VOIDmode; + enum machine_mode mode; + enum tree_code code = TREE_CODE (type); + int know_element_mode = 0; + tree t; + + switch (code) + { + case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: + case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE: + case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: + case FILE_TYPE: case SET_TYPE: case LANG_TYPE: + case FUNCTION_TYPE: + return VOIDmode; + + /* Fortran complex types are supposed to be HFAs, so we need to handle + gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex + types though. */ + case COMPLEX_TYPE: + if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT) + return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type)) + * BITS_PER_UNIT, MODE_FLOAT, 0); + else + return VOIDmode; + + case REAL_TYPE: + /* We want to return VOIDmode for raw REAL_TYPEs, but the actual + mode if this is contained within an aggregate. */ + if (nested) + return TYPE_MODE (type); + else + return VOIDmode; + + case ARRAY_TYPE: + return TYPE_MODE (TREE_TYPE (type)); + + case RECORD_TYPE: + case UNION_TYPE: + case QUAL_UNION_TYPE: + for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t)) + { + if (TREE_CODE (t) != FIELD_DECL) + continue; + + mode = hfa_element_mode (TREE_TYPE (t), 1); + if (know_element_mode) + { + if (mode != element_mode) + return VOIDmode; + } + else if (GET_MODE_CLASS (mode) != MODE_FLOAT) + return VOIDmode; + else + { + know_element_mode = 1; + element_mode = mode; + } + } + return element_mode; + + default: + /* If we reach here, we probably have some front-end specific type + that the backend doesn't know about. This can happen via the + aggregate_value_p call in init_function_start. All we can do is + ignore unknown tree types. */ + return VOIDmode; + } + + return VOIDmode; +} + +/* Return rtx for register where argument is passed, or zero if it is passed + on the stack. */ + +/* ??? 128-bit quad-precision floats are always passed in general + registers. */ + +rtx +ia64_function_arg (cum, mode, type, named, incoming) + CUMULATIVE_ARGS *cum; + enum machine_mode mode; + tree type; + int named; + int incoming; +{ + int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); + int words = (((mode == BLKmode ? int_size_in_bytes (type) + : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) + / UNITS_PER_WORD); + int offset = 0; + enum machine_mode hfa_mode = VOIDmode; + + /* Arguments larger than 8 bytes start at the next even boundary. */ + if (words > 1 && (cum->words & 1)) + offset = 1; + + /* If all argument slots are used, then it must go on the stack. */ + if (cum->words + offset >= MAX_ARGUMENT_SLOTS) + return 0; + + /* Check for and handle homogeneous FP aggregates. */ + if (type) + hfa_mode = hfa_element_mode (type, 0); + + /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas + and unprototyped hfas are passed specially. */ + if (hfa_mode != VOIDmode && (! cum->prototype || named)) + { + rtx loc[16]; + int i = 0; + int fp_regs = cum->fp_regs; + int int_regs = cum->words + offset; + int hfa_size = GET_MODE_SIZE (hfa_mode); + int byte_size; + int args_byte_size; + + /* If prototyped, pass it in FR regs then GR regs. + If not prototyped, pass it in both FR and GR regs. + + If this is an SFmode aggregate, then it is possible to run out of + FR regs while GR regs are still left. In that case, we pass the + remaining part in the GR regs. */ + + /* Fill the FP regs. We do this always. We stop if we reach the end + of the argument, the last FP register, or the last argument slot. */ + + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); + args_byte_size = int_regs * UNITS_PER_WORD; + offset = 0; + for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS + && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (hfa_mode, (FR_ARG_FIRST + + fp_regs)), + GEN_INT (offset)); + /* ??? Padding for XFmode type? */ + offset += hfa_size; + args_byte_size += hfa_size; + fp_regs++; + } + + /* If no prototype, then the whole thing must go in GR regs. */ + if (! cum->prototype) + offset = 0; + /* If this is an SFmode aggregate, then we might have some left over + that needs to go in GR regs. */ + else if (byte_size != offset) + int_regs += offset / UNITS_PER_WORD; + + /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ + + for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) + { + enum machine_mode gr_mode = DImode; + + /* If we have an odd 4 byte hunk because we ran out of FR regs, + then this goes in a GR reg left adjusted/little endian, right + adjusted/big endian. */ + /* ??? Currently this is handled wrong, because 4-byte hunks are + always right adjusted/little endian. */ + if (offset & 0x4) + gr_mode = SImode; + /* If we have an even 4 byte hunk because the aggregate is a + multiple of 4 bytes in size, then this goes in a GR reg right + adjusted/little endian. */ + else if (byte_size - offset == 4) + gr_mode = SImode; + + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (gr_mode, (basereg + + int_regs)), + GEN_INT (offset)); + offset += GET_MODE_SIZE (gr_mode); + int_regs++; + } + + /* If we ended up using just one location, just return that one loc. */ + if (i == 1) + return XEXP (loc[0], 0); + else + return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); + } + + /* Integral and aggregates go in general registers. If we have run out of + FR registers, then FP values must also go in general registers. This can + happen when we have a SFmode HFA. */ + else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS) + return gen_rtx_REG (mode, basereg + cum->words + offset); + + /* If there is a prototype, then FP values go in a FR register when + named, and in a GR registeer when unnamed. */ + else if (cum->prototype) + { + if (! named) + return gen_rtx_REG (mode, basereg + cum->words + offset); + else + return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs); + } + /* If there is no prototype, then FP values go in both FR and GR + registers. */ + else + { + rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, (FR_ARG_FIRST + + cum->fp_regs)), + const0_rtx); + rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (mode, + (basereg + cum->words + + offset)), + const0_rtx); + + return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg)); + } +} + +/* Return number of words, at the beginning of the argument, that must be + put in registers. 0 is the argument is entirely in registers or entirely + in memory. */ + +int +ia64_function_arg_partial_nregs (cum, mode, type, named) + CUMULATIVE_ARGS *cum; + enum machine_mode mode; + tree type; + int named; +{ + int words = (((mode == BLKmode ? int_size_in_bytes (type) + : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) + / UNITS_PER_WORD); + int offset = 0; + + /* Arguments larger than 8 bytes start at the next even boundary. */ + if (words > 1 && (cum->words & 1)) + offset = 1; + + /* If all argument slots are used, then it must go on the stack. */ + if (cum->words + offset >= MAX_ARGUMENT_SLOTS) + return 0; + + /* It doesn't matter whether the argument goes in FR or GR regs. If + it fits within the 8 argument slots, then it goes entirely in + registers. If it extends past the last argument slot, then the rest + goes on the stack. */ + + if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) + return 0; + + return MAX_ARGUMENT_SLOTS - cum->words - offset; +} + +/* Update CUM to point after this argument. This is patterned after + ia64_function_arg. */ + +void +ia64_function_arg_advance (cum, mode, type, named) + CUMULATIVE_ARGS *cum; + enum machine_mode mode; + tree type; + int named; +{ + int words = (((mode == BLKmode ? int_size_in_bytes (type) + : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) + / UNITS_PER_WORD); + int offset = 0; + enum machine_mode hfa_mode = VOIDmode; + + /* If all arg slots are already full, then there is nothing to do. */ + if (cum->words >= MAX_ARGUMENT_SLOTS) + return; + + /* Arguments larger than 8 bytes start at the next even boundary. */ + if (words > 1 && (cum->words & 1)) + offset = 1; + + cum->words += words + offset; + + /* Check for and handle homogeneous FP aggregates. */ + if (type) + hfa_mode = hfa_element_mode (type, 0); + + /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas + and unprototyped hfas are passed specially. */ + if (hfa_mode != VOIDmode && (! cum->prototype || named)) + { + int fp_regs = cum->fp_regs; + /* This is the original value of cum->words + offset. */ + int int_regs = cum->words - words; + int hfa_size = GET_MODE_SIZE (hfa_mode); + int byte_size; + int args_byte_size; + + /* If prototyped, pass it in FR regs then GR regs. + If not prototyped, pass it in both FR and GR regs. + + If this is an SFmode aggregate, then it is possible to run out of + FR regs while GR regs are still left. In that case, we pass the + remaining part in the GR regs. */ + + /* Fill the FP regs. We do this always. We stop if we reach the end + of the argument, the last FP register, or the last argument slot. */ + + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); + args_byte_size = int_regs * UNITS_PER_WORD; + offset = 0; + for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS + && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) + { + /* ??? Padding for XFmode type? */ + offset += hfa_size; + args_byte_size += hfa_size; + fp_regs++; + } + + cum->fp_regs = fp_regs; + } + + /* Integral and aggregates go in general registers. If we have run out of + FR registers, then FP values must also go in general registers. This can + happen when we have a SFmode HFA. */ + else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS) + return; + + /* If there is a prototype, then FP values go in a FR register when + named, and in a GR registeer when unnamed. */ + else if (cum->prototype) + { + if (! named) + return; + else + /* ??? Complex types should not reach here. */ + cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); + } + /* If there is no prototype, then FP values go in both FR and GR + registers. */ + else + /* ??? Complex types should not reach here. */ + cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); + + return; +} + +/* Implement va_start. */ + +void +ia64_va_start (stdarg_p, valist, nextarg) + int stdarg_p; + tree valist; + rtx nextarg; +{ + int arg_words; + int ofs; + + arg_words = current_function_args_info.words; + + if (stdarg_p) + ofs = 0; + else + ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0); + + nextarg = plus_constant (nextarg, ofs); + std_expand_builtin_va_start (1, valist, nextarg); +} + +/* Implement va_arg. */ + +rtx +ia64_va_arg (valist, type) + tree valist, type; +{ + HOST_WIDE_INT size; + tree t; + + /* Arguments larger than 8 bytes are 16 byte aligned. */ + size = int_size_in_bytes (type); + if (size > UNITS_PER_WORD) + { + t = build (PLUS_EXPR, TREE_TYPE (valist), valist, + build_int_2 (2 * UNITS_PER_WORD - 1, 0)); + t = build (BIT_AND_EXPR, TREE_TYPE (t), t, + build_int_2 (-2 * UNITS_PER_WORD, -1)); + t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t); + TREE_SIDE_EFFECTS (t) = 1; + expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); + } + + return std_expand_builtin_va_arg (valist, type); +} + +/* Return 1 if function return value returned in memory. Return 0 if it is + in a register. */ + +int +ia64_return_in_memory (valtype) + tree valtype; +{ + enum machine_mode mode; + enum machine_mode hfa_mode; + int byte_size; + + mode = TYPE_MODE (valtype); + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); + + /* Hfa's with up to 8 elements are returned in the FP argument registers. */ + + hfa_mode = hfa_element_mode (valtype, 0); + if (hfa_mode != VOIDmode) + { + int hfa_size = GET_MODE_SIZE (hfa_mode); + + /* ??? Padding for XFmode type? */ + if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) + return 1; + else + return 0; + } + + else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) + return 1; + else + return 0; +} + +/* Return rtx for register that holds the function return value. */ + +rtx +ia64_function_value (valtype, func) + tree valtype; + tree func; +{ + enum machine_mode mode; + enum machine_mode hfa_mode; + + mode = TYPE_MODE (valtype); + hfa_mode = hfa_element_mode (valtype, 0); + + if (hfa_mode != VOIDmode) + { + rtx loc[8]; + int i; + int hfa_size; + int byte_size; + int offset; + + hfa_size = GET_MODE_SIZE (hfa_mode); + byte_size = ((mode == BLKmode) + ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); + offset = 0; + for (i = 0; offset < byte_size; i++) + { + loc[i] = gen_rtx_EXPR_LIST (VOIDmode, + gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), + GEN_INT (offset)); + /* ??? Padding for XFmode type? */ + offset += hfa_size; + } + + if (i == 1) + return XEXP (loc[0], 0); + else + return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); + } + else if (FLOAT_TYPE_P (valtype)) + return gen_rtx_REG (mode, FR_ARG_FIRST); + else + return gen_rtx_REG (mode, GR_RET_FIRST); +} + +/* Print a memory address as an operand to reference that memory location. */ + +/* ??? Do we need this? It gets used only for 'a' operands. We could perhaps + also call this from ia64_print_operand for memory addresses. */ + +void +ia64_print_operand_address (stream, address) + FILE * stream; + rtx address; +{ +} + +/* Print an operand to a assembler instruction. + B Work arounds for hardware bugs. + C Swap and print a comparison operator. + D Print an FP comparison operator. + E Print 32 - constant, for SImode shifts as extract. + F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or + a floating point register emitted normally. + I Invert a predicate register by adding 1. + O Append .acq for volatile load. + P Postincrement of a MEM. + Q Append .rel for volatile store. + S Shift amount for shladd instruction. + T Print an 8-bit sign extended number (K) as a 32-bit unsigned number + for Intel assembler. + U Print an 8-bit sign extended number (K) as a 64-bit unsigned number + for Intel assembler. + r Print register name, or constant 0 as r0. HP compatibility for + Linux kernel. */ +void +ia64_print_operand (file, x, code) + FILE * file; + rtx x; + int code; +{ + switch (code) + { + /* XXX Add other codes here. */ + + case 0: + /* Handled below. */ + break; + + case 'B': + if (TARGET_A_STEP) + fputs (" ;; nop 0 ;; nop 0 ;;", file); + return; + + case 'C': + { + enum rtx_code c = swap_condition (GET_CODE (x)); + fputs (GET_RTX_NAME (c), file); + return; + } + + case 'D': + fputs (GET_CODE (x) == NE ? "neq" : GET_RTX_NAME (GET_CODE (x)), file); + return; + + case 'E': + fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); + return; + + case 'F': + if (x == CONST0_RTX (GET_MODE (x))) + fputs (reg_names [FR_REG (0)], file); + else if (x == CONST1_RTX (GET_MODE (x))) + fputs (reg_names [FR_REG (1)], file); + else if (GET_CODE (x) == REG) + fputs (reg_names [REGNO (x)], file); + else + abort (); + return; + + case 'I': + fputs (reg_names [REGNO (x) + 1], file); + return; + + case 'O': + if (MEM_VOLATILE_P (x)) + fputs(".acq", file); + return; + + case 'P': + { + int value; + + if (GET_CODE (XEXP (x, 0)) != POST_INC + && GET_CODE (XEXP (x, 0)) != POST_DEC) + return; + + fputs (", ", file); + + value = GET_MODE_SIZE (GET_MODE (x)); + + /* ??? This is for ldf.fill and stf.spill which use XFmode, but which + actually need 16 bytes increments. Perhaps we can change them + to use TFmode instead. Or don't use POST_DEC/POST_INC for them. + Currently, there are no other uses of XFmode, so hacking it here + is no problem. */ + if (value == 12) + value = 16; + + if (GET_CODE (XEXP (x, 0)) == POST_DEC) + value = -value; + + fprintf (file, "%d", value); + return; + } + + case 'Q': + if (MEM_VOLATILE_P (x)) + fputs(".rel", file); + return; + + case 'S': + fprintf (file, HOST_WIDE_INT_PRINT_DEC, exact_log2 (INTVAL (x))); + return; + + case 'T': + if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) + { + fprintf (file, "0x%x", INTVAL (x) & 0xffffffff); + return; + } + break; + + case 'U': + if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) + { + char *prefix = "0x"; + if (INTVAL (x) & 0x80000000) + { + fprintf (file, "0xffffffff"); + prefix = ""; + } + fprintf (file, "%s%x", prefix, INTVAL (x) & 0xffffffff); + return; + } + break; + + case 'r': + /* If this operand is the constant zero, write it as zero. */ + if (GET_CODE (x) == REG) + fputs (reg_names[REGNO (x)], file); + else if (x == CONST0_RTX (GET_MODE (x))) + fputs ("r0", file); + else + output_operand_lossage ("invalid %%r value"); + return; + + default: + output_operand_lossage ("ia64_print_operand: unknown code"); + return; + } + + switch (GET_CODE (x)) + { + /* This happens for the spill/restore instructions. */ + case POST_INC: + x = XEXP (x, 0); + /* ... fall through ... */ + + case REG: + fputs (reg_names [REGNO (x)], file); + break; + + case MEM: + { + rtx addr = XEXP (x, 0); + if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) + addr = XEXP (addr, 0); + fprintf (file, "[%s]", reg_names [REGNO (addr)]); + break; + } + + default: + output_addr_const (file, x); + break; + } + + return; +} + + + +/* This function returns the register class required for a secondary + register when copying between one of the registers in CLASS, and X, + using MODE. A return value of NO_REGS means that no secondary register + is required. */ + +enum reg_class +ia64_secondary_reload_class (class, mode, x) + enum reg_class class; + enum machine_mode mode; + rtx x; +{ + int regno = -1; + + if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) + regno = true_regnum (x); + + /* ??? This is required because of a bad gcse/cse/global interaction. + We end up with two pseudos with overlapping lifetimes both of which are + equiv to the same constant, and both which need to be in BR_REGS. This + results in a BR_REGS to BR_REGS copy which doesn't exist. To reproduce, + return NO_REGS here, and compile divdi3 in libgcc2.c. This seems to be + a cse bug. cse_basic_block_end changes depending on the path length, + which means the qty_first_reg check in make_regs_eqv can give different + answers at different times. */ + /* ??? At some point I'll probably need a reload_indi pattern to handle + this. */ + if (class == BR_REGS && BR_REGNO_P (regno)) + return GR_REGS; + + /* This is needed if a pseudo used as a call_operand gets spilled to a + stack slot. */ + if (class == BR_REGS && GET_CODE (x) == MEM) + return GR_REGS; + + /* This can happen when a paradoxical subreg is an operand to the muldi3 + pattern. */ + /* ??? This shouldn't be necessary after instruction scheduling is enabled, + because paradoxical subregs are not accepted by register_operand when + INSN_SCHEDULING is defined. Or alternatively, stop the paradoxical subreg + stupidity in the *_operand functions in recog.c. */ + if ((class == FR_REGS || class == FR_INT_REGS || class == FR_FP_REGS) + && GET_CODE (x) == MEM + && (GET_MODE (x) == SImode || GET_MODE (x) == HImode + || GET_MODE (x) == QImode)) + return GR_REGS; + + /* This can happen because of the ior/and/etc patterns that accept FP + registers as operands. If the third operand is a constant, then it + needs to be reloaded into a FP register. */ + if ((class == FR_REGS || class == FR_INT_REGS || class == FR_FP_REGS) + && GET_CODE (x) == CONST_INT) + return GR_REGS; + + /* Moving a integer from an FP register to memory requires a general register + as an intermediary. This is not necessary if we are moving a DImode + subreg of a DFmode value from an FP register to memory, since stfd will + do the right thing in this case. */ + if (class == FR_INT_REGS && GET_CODE (x) == MEM && GET_MODE (x) == DImode) + return GR_REGS; + + /* ??? This happens if we cse/gcse a CCmode value across a call, and the + function has a nonlocal goto. This is because global does not allocate + call crossing pseudos to hard registers when current_function_has_ + nonlocal_goto is true. This is relatively common for C++ programs that + use exceptions. To reproduce, return NO_REGS and compile libstdc++. */ + if (class == PR_REGS && GET_CODE (x) == MEM) + return GR_REGS; + + return NO_REGS; +} + + +/* Emit text to declare externally defined variables and functions, because + the Intel assembler does not support undefined externals. */ + +void +ia64_asm_output_external (file, decl, name) + FILE *file; + tree decl; + char *name; +{ + int save_referenced; + + /* GNU as does not need anything here. */ + if (TARGET_GNU_AS) + return; + + /* ??? The Intel assembler creates a reference that needs to be satisfied by + the linker when we do this, so we need to be careful not to do this for + builtin functions which have no library equivalent. Unfortunately, we + can't tell here whether or not a function will actually be called by + expand_expr, so we pull in library functions even if we may not need + them later. */ + if (! strcmp (name, "__builtin_next_arg") + || ! strcmp (name, "alloca") + || ! strcmp (name, "__builtin_constant_p") + || ! strcmp (name, "__builtin_args_info")) + return; + + /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and + restore it. */ + save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)); + if (TREE_CODE (decl) == FUNCTION_DECL) + { + fprintf (file, "\t%s\t ", TYPE_ASM_OP); + assemble_name (file, name); + putc (',', file); + fprintf (file, TYPE_OPERAND_FMT, "function"); + putc ('\n', file); + } + ASM_GLOBALIZE_LABEL (file, name); + TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced; +} + +/* Parse the -mfixed-range= option string. */ + +static void +fix_range (str) + char *str; +{ + int i, first, last; + char *dash, *comma; + + /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and + REG2 are either register names or register numbers. The effect + of this option is to mark the registers in the range from REG1 to + REG2 as ``fixed'' so they won't be used by the compiler. This is + used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ + + while (1) + { + dash = strchr (str, '-'); + if (!dash) + { + warning ("value of -mfixed-range must have form REG1-REG2"); + return; + } + *dash = '\0'; + + comma = strchr (dash + 1, ','); + if (comma) + *comma = '\0'; + + first = decode_reg_name (str); + if (first < 0) + { + warning ("unknown register name: %s", str); + return; + } + + last = decode_reg_name (dash + 1); + if (last < 0) + { + warning ("unknown register name: %s", dash + 1); + return; + } + + *dash = '-'; + + if (first > last) + { + warning ("%s-%s is an empty range", str, dash + 1); + return; + } + + for (i = first; i <= last; ++i) + fixed_regs[i] = call_used_regs[i] = 1; + + if (!comma) + break; + + *comma = ','; + str = comma + 1; + } +} + +/* Called to register all of our global variables with the garbage + collector. */ + +static void +ia64_add_gc_roots () +{ + ggc_add_rtx_root (&ia64_compare_op0, 1); + ggc_add_rtx_root (&ia64_compare_op1, 1); +} + +/* Handle TARGET_OPTIONS switches. */ + +void +ia64_override_options () +{ + if (ia64_fixed_range_string) + fix_range (ia64_fixed_range_string); + + ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE; + + ia64_add_gc_roots (); +} + +/* The following collection of routines emit instruction group stop bits as + necessary to avoid dependencies. */ + +/* Need to track some additional registers as far as serialization is + concerned so we can properly handle br.call and br.ret. We could + make these registers visible to gcc, but since these registers are + never explicitly used in gcc generated code, it seems wasteful to + do so (plus it would make the call and return patterns needlessly + complex). */ +#define REG_GP (GR_REG (1)) +#define REG_RP (BR_REG (0)) +#define REG_AR_PFS (FIRST_PSEUDO_REGISTER) +#define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) +/* ??? This will eventually need to be a hard register. */ +#define REG_AR_EC (FIRST_PSEUDO_REGISTER + 2) +/* This is used for volatile asms which may require a stop bit immediately + before and after them. */ +#define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 3) +#define NUM_REGS (FIRST_PSEUDO_REGISTER + 4) + +/* For each register, we keep track of how many times it has been + written in the current instruction group. If a register is written + unconditionally (no qualifying predicate), WRITE_COUNT is set to 2 + and FIRST_PRED is ignored. If a register is written if its + qualifying predicate P is true, we set WRITE_COUNT to 1 and + FIRST_PRED to P. Later on, the same register may be written again + by the complement of P (P+1 if P is even, P-1, otherwise) and when + this happens, WRITE_COUNT gets set to 2. The result of this is + that whenever an insn attempts to write a register whose + WRITE_COUNT is two, we need to issue a insn group barrier first. */ +struct reg_write_state +{ + char write_count; + char written_by_fp; /* Was register written by a floating-point insn? */ + short first_pred; /* 0 means ``no predicate'' */ +}; + +/* Cumulative info for the current instruction group. */ +struct reg_write_state rws_sum[NUM_REGS]; +/* Info for the current instruction. This gets copied to rws_sum after a + stop bit is emitted. */ +struct reg_write_state rws_insn[NUM_REGS]; + +/* Misc flags needed to compute RAW/WAW dependencies while we are traversing + RTL for one instruction. */ +struct reg_flags +{ + unsigned int is_write : 1; /* Is register being written? */ + unsigned int is_fp : 1; /* Is register used as part of an fp op? */ + unsigned int is_branch : 1; /* Is register used as part of a branch? */ +}; + +/* Update *RWS for REGNO, which is being written by the current instruction, + with predicate PRED, and associated register flags in FLAGS. */ + +static void +rws_update (rws, regno, flags, pred) + struct reg_write_state *rws; + int regno; + struct reg_flags flags; + int pred; +{ + rws[regno].write_count += pred ? 1 : 2; + rws[regno].written_by_fp |= flags.is_fp; + rws[regno].first_pred = pred; +} + +/* Handle an access to register REGNO of type FLAGS using predicate register + PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates + a dependency with an earlier instruction in the same group. */ + +static int +rws_access_reg (regno, flags, pred) + int regno; + struct reg_flags flags; + int pred; +{ + int need_barrier = 0; + int is_predicate_reg; + + if (regno >= NUM_REGS) + abort (); + + if (flags.is_write) + { + /* One insn writes same reg multiple times? */ + if (rws_insn[regno].write_count > 0) + abort (); + + /* Update info for current instruction. */ + rws_update (rws_insn, regno, flags, pred); + + /* ??? This is necessary because predicate regs require two hard + registers. However, this should be using HARD_REGNO_NREGS so that + it works for all multi-reg hard registers, instead of only for + predicate registers. */ + is_predicate_reg = REGNO_REG_CLASS (regno) == PR_REGS; + if (is_predicate_reg) + rws_update (rws_insn, regno + 1, flags, pred); + + switch (rws_sum[regno].write_count) + { + case 0: + /* The register has not been written yet. */ + rws_update (rws_sum, regno, flags, pred); + if (is_predicate_reg) + rws_update (rws_sum, regno + 1, flags, pred); + break; + + case 1: + /* The register has been written via a predicate. If this is + not a complementary predicate, then we need a barrier. */ + /* ??? This assumes that P and P+1 are always complementary + predicates for P even. */ + if ((rws_sum[regno].first_pred ^ 1) != pred) + need_barrier = 1; + rws_update (rws_sum, regno, flags, pred); + if (is_predicate_reg) + rws_update (rws_sum, regno + 1, flags, pred); + break; + + case 2: + /* The register has been unconditionally written already. We + need a barrier. */ + need_barrier = 1; + break; + + default: + abort (); + } + } + else + { + if (flags.is_branch) + { + /* Branches have several RAW exceptions that allow to avoid + barriers. */ + + if (REGNO_REG_CLASS (regno) == BR_REGS || regno == REG_AR_PFS) + /* RAW dependencies on branch regs are permissible as long + as the writer is a non-branch instruction. Since we + never generate code that uses a branch register written + by a branch instruction, handling this case is + easy. */ + /* ??? This assumes that we don't emit br.cloop, br.cexit, br.ctop, + br.wexit, br.wtop. This is true currently. */ + return 0; + + if (REGNO_REG_CLASS (regno) == PR_REGS + && ! rws_sum[regno].written_by_fp) + /* The predicates of a branch are available within the + same insn group as long as the predicate was written by + something other than a floating-point instruction. */ + return 0; + } + + switch (rws_sum[regno].write_count) + { + case 0: + /* The register has not been written yet. */ + break; + + case 1: + /* The register has been written via a predicate. If this is + not a complementary predicate, then we need a barrier. */ + /* ??? This assumes that P and P+1 are always complementary + predicates for P even. */ + if ((rws_sum[regno].first_pred ^ 1) != pred) + need_barrier = 1; + break; + + case 2: + /* The register has been unconditionally written already. We + need a barrier. */ + need_barrier = 1; + break; + + default: + abort (); + } + } + + return need_barrier; +} + +/* Handle an access to rtx X of type FLAGS using predicate register PRED. + Return 1 is this access creates a dependency with an earlier instruction + in the same group. */ + +static int +rtx_needs_barrier (x, flags, pred) + rtx x; + struct reg_flags flags; + int pred; +{ + int i, j; + int is_complemented = 0; + int need_barrier = 0; + const char *format_ptr; + struct reg_flags new_flags; + rtx src, dst; + rtx cond = 0; + + if (! x) + return 0; + + new_flags = flags; + + switch (GET_CODE (x)) + { + case SET: + src = SET_SRC (x); + switch (GET_CODE (src)) + { + case CALL: + /* We don't need to worry about the result registers that + get written by subroutine call. */ + need_barrier = rtx_needs_barrier (src, flags, pred); + return need_barrier; + + case IF_THEN_ELSE: + if (SET_DEST (x) == pc_rtx) + { + /* X is a conditional branch. */ + /* ??? This seems redundant, as the caller sets this bit for + all JUMP_INSNs. */ + new_flags.is_branch = 1; + need_barrier = rtx_needs_barrier (src, new_flags, pred); + return need_barrier; + } + else + { + /* X is a conditional move. */ + cond = XEXP (src, 0); + if (GET_CODE (cond) == EQ) + is_complemented = 1; + cond = XEXP (cond, 0); + if (GET_CODE (cond) != REG + && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) + abort (); + + if (XEXP (src, 1) == SET_DEST (x) + || XEXP (src, 2) == SET_DEST (x)) + { + /* X is a conditional move that conditionally writes the + destination. */ + + /* We need another complement in this case. */ + if (XEXP (src, 1) == SET_DEST (x)) + is_complemented = ! is_complemented; + + pred = REGNO (cond); + if (is_complemented) + ++pred; + } + + /* ??? If this is a conditional write to the dest, then this + instruction does not actually read one source. This probably + doesn't matter, because that source is also the dest. */ + /* ??? Multiple writes to predicate registers are allowed + if they are all AND type compares, or if they are all OR + type compares. We do not generate such instructions + currently. */ + } + /* ... fall through ... */ + + default: + if (GET_RTX_CLASS (GET_CODE (src)) == '<' + && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT) + /* Set new_flags.is_fp to 1 so that we know we're dealing + with a floating point comparison when processing the + destination of the SET. */ + new_flags.is_fp = 1; + break; + } + need_barrier = rtx_needs_barrier (src, flags, pred); + /* This instruction unconditionally uses a predicate register. */ + if (cond) + need_barrier |= rws_access_reg (REGNO (cond), flags, 0); + + dst = SET_DEST (x); + if (GET_CODE (dst) == ZERO_EXTRACT) + { + need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); + need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); + dst = XEXP (dst, 0); + } + new_flags.is_write = 1; + need_barrier |= rtx_needs_barrier (dst, new_flags, pred); + break; + + case CALL: + new_flags.is_write = 0; + /* ??? Why is this here? It seems unnecessary. */ + need_barrier |= rws_access_reg (REG_GP, new_flags, pred); + need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred); + + /* Avoid multiple register writes, in case this is a pattern with + multiple CALL rtx. This avoids an abort in rws_access_reg. */ + /* ??? This assumes that no rtx other than CALL/RETURN sets REG_AR_CFM, + and that we don't have predicated calls/returns. */ + if (! rws_insn[REG_AR_CFM].write_count) + { + new_flags.is_write = 1; + need_barrier |= rws_access_reg (REG_RP, new_flags, pred); + need_barrier |= rws_access_reg (REG_AR_PFS, new_flags, pred); + need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred); + } + break; + + case CLOBBER: +#if 0 + case USE: + /* We must handle USE here in case it occurs within a PARALLEL. + For instance, the mov ar.pfs= instruction has a USE which requires + a barrier between it and an immediately preceeding alloc. */ +#endif + /* Clobber & use are for earlier compiler-phases only. */ + break; + + case ASM_OPERANDS: + case ASM_INPUT: + /* We always emit stop bits for traditional asms. We emit stop bits + for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ + if (GET_CODE (x) != ASM_OPERANDS + || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) + { + /* Avoid writing the register multiple times if we have multiple + asm outputs. This avoids an abort in rws_access_reg. */ + if (! rws_insn[REG_VOLATILE].write_count) + { + new_flags.is_write = 1; + rws_access_reg (REG_VOLATILE, new_flags, pred); + } + return 1; + } + + /* For all ASM_OPERANDS, we must traverse the vector of input operands. + We can not just fall through here since then we would be confused + by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate + traditional asms unlike their normal usage. */ + + for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) + if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) + need_barrier = 1; + break; + + case PARALLEL: + for (i = XVECLEN (x, 0) - 1; i >= 0; --i) + if (rtx_needs_barrier (XVECEXP (x, 0, i), flags, pred)) + need_barrier = 1; + break; + + case SUBREG: + x = SUBREG_REG (x); + /* FALLTHRU */ + case REG: + need_barrier = rws_access_reg (REGNO (x), flags, pred); + break; + + case MEM: + /* Find the regs used in memory address computation. */ + new_flags.is_write = 0; + need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); + break; + + case CONST_INT: case CONST_DOUBLE: + case SYMBOL_REF: case LABEL_REF: case CONST: + break; + + /* Operators with side-effects. */ + case POST_INC: case POST_DEC: + if (GET_CODE (XEXP (x, 0)) != REG) + abort (); + + new_flags.is_write = 0; + need_barrier = rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred); + new_flags.is_write = 1; + need_barrier |= rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred); + break; + + /* Handle common unary and binary ops for efficiency. */ + case COMPARE: case PLUS: case MINUS: case MULT: case DIV: + case MOD: case UDIV: case UMOD: case AND: case IOR: + case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: + case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: + case NE: case EQ: case GE: case GT: case LE: + case LT: case GEU: case GTU: case LEU: case LTU: + need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); + need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); + break; + + case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: + case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: + case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: + case SQRT: case FFS: + need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); + break; + + case UNSPEC: + switch (XINT (x, 1)) + { + /* ??? For the st8.spill/ld8.fill instructions, we can ignore unat + dependencies as long as we don't have both a spill and fill in + the same instruction group. We need to check for that. */ + case 1: /* st8.spill */ + case 2: /* ld8.fill */ + case 3: /* stf.spill */ + case 4: /* ldf.spill */ + case 8: /* popcnt */ + need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + break; + + case 5: /* mov =pr */ + /* This reads all predicate registers. */ + for (i = PR_REG (1); i < PR_REG (64); i++) + need_barrier |= rws_access_reg (i, flags, pred); + break; + + case 6: /* mov pr= */ + /* This writes all predicate registers. */ + new_flags.is_write = 1; + /* We need to skip by two, because rws_access_reg always writes + to two predicate registers at a time. */ + /* ??? Strictly speaking, we shouldn't be counting writes to pr0. */ + for (i = PR_REG (0); i < PR_REG (64); i += 2) + need_barrier |= rws_access_reg (i, new_flags, pred); + break; + + case 7: + abort (); + + /* ??? Should track unat reads and writes. */ + case 9: /* mov =ar.unat */ + case 10: /* mov ar.unat= */ + break; + case 11: /* mov ar.ccv= */ + break; + case 12: /* mf */ + break; + case 13: /* cmpxchg_acq */ + break; + case 14: /* val_compare_and_swap */ + break; + case 15: /* lock_release */ + break; + case 16: /* lock_test_and_set */ + break; + case 17: /* _and_fetch */ + break; + case 18: /* fetch_and_ */ + break; + case 19: /* fetchadd_acq */ + break; + default: + abort (); + } + break; + + case UNSPEC_VOLATILE: + switch (XINT (x, 1)) + { + case 0: /* alloc */ + /* Alloc must always be the first instruction. Currently, we + only emit it at the function start, so we don't need to worry + about emitting a stop bit before it. */ + need_barrier = rws_access_reg (REG_AR_PFS, flags, pred); + + new_flags.is_write = 1; + need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred); + return need_barrier; + + case 1: /* blockage */ + case 2: /* insn group barrier */ + return 0; + + case 3: /* flush_cache */ + return rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); + + case 4: /* mov ar.pfs= */ + new_flags.is_write = 1; + need_barrier = rws_access_reg (REG_AR_PFS, new_flags, pred); + break; + + default: + abort (); + } + break; + + case RETURN: + new_flags.is_write = 0; + need_barrier = rws_access_reg (REG_RP, flags, pred); + need_barrier |= rws_access_reg (REG_AR_PFS, flags, pred); + + new_flags.is_write = 1; + need_barrier |= rws_access_reg (REG_AR_EC, new_flags, pred); + need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred); + break; + + default: + format_ptr = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + switch (format_ptr[i]) + { + case '0': /* unused field */ + case 'i': /* integer */ + case 'n': /* note */ + case 'w': /* wide integer */ + case 's': /* pointer to string */ + case 'S': /* optional pointer to string */ + break; + + case 'e': + if (rtx_needs_barrier (XEXP (x, i), flags, pred)) + need_barrier = 1; + break; + + case 'E': + for (j = XVECLEN (x, i) - 1; j >= 0; --j) + if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) + need_barrier = 1; + break; + + default: + abort (); + } + } + return need_barrier; +} + +/* INSNS is an chain of instructions. Scan the chain, and insert stop bits + as necessary to eliminate dependendencies. */ + +static void +emit_insn_group_barriers (insns) + rtx insns; +{ + int need_barrier = 0; + int exception_nesting; + struct reg_flags flags; + rtx insn, prev_insn; + + memset (rws_sum, 0, sizeof (rws_sum)); + + prev_insn = 0; + for (insn = insns; insn; insn = NEXT_INSN (insn)) + { + memset (&flags, 0, sizeof (flags)); + switch (GET_CODE (insn)) + { + case NOTE: + switch (NOTE_LINE_NUMBER (insn)) + { + case NOTE_INSN_EH_REGION_BEG: + exception_nesting++; + break; + + case NOTE_INSN_EH_REGION_END: + exception_nesting--; + break; + + case NOTE_INSN_EPILOGUE_BEG: + break; + + default: + break; + } + break; + + case JUMP_INSN: + case CALL_INSN: + flags.is_branch = 1; + case INSN: + if (GET_CODE (PATTERN (insn)) == USE) + /* Don't care about USE "insns"---those are used to + indicate to the optimizer that it shouldn't get rid of + certain operations. */ + break; + else + { + memset (rws_insn, 0, sizeof (rws_insn)); + need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); + + /* Check to see if the previous instruction was a volatile + asm. */ + if (! need_barrier) + need_barrier = rws_access_reg (REG_VOLATILE, flags, 0); + + if (need_barrier) + { + /* PREV_INSN null can happen if the very first insn is a + volatile asm. */ + if (prev_insn) + emit_insn_after (gen_insn_group_barrier (), prev_insn); + memcpy (rws_sum, rws_insn, sizeof (rws_sum)); + } + need_barrier = 0; + prev_insn = insn; + } + break; + + case BARRIER: + /* A barrier doesn't imply an instruction group boundary. */ + break; + + case CODE_LABEL: + /* Leave prev_insn alone so the barrier gets generated in front + of the label, if one is needed. */ + break; + + default: + abort (); + } + } +} + +/* Perform machine dependent operations on the rtl chain INSNS. */ + +void +ia64_reorg (insns) + rtx insns; +{ + emit_insn_group_barriers (insns); +} + +/* Return true if REGNO is used by the epilogue. */ + +int +ia64_epilogue_uses (regno) + int regno; +{ + /* For functions defined with the syscall_linkage attribute, all input + registers are marked as live at all function exits. This prevents the + register allocator from using the input registers, which in turn makes it + possible to restart a system call after an interrupt without having to + save/restore the input registers. */ + + if (IN_REGNO_P (regno) + && (regno < IN_REG (current_function_args_info.words)) + && lookup_attribute ("syscall_linkage", + TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) + return 1; + + return 0; +} + +/* Return true if IDENTIFIER is a valid attribute for TYPE. */ + +int +ia64_valid_type_attribute (type, attributes, identifier, args) + tree type; + tree attributes ATTRIBUTE_UNUSED; + tree identifier; + tree args; +{ + /* We only support an attribute for function calls. */ + + if (TREE_CODE (type) != FUNCTION_TYPE + && TREE_CODE (type) != METHOD_TYPE) + return 0; + + /* The "syscall_linkage" attribute says the callee is a system call entry + point. This affects ia64_epilogue_uses. */ + + if (is_attribute_p ("syscall_linkage", identifier)) + return args == NULL_TREE; + + return 0; +} + +/* For ia64, SYMBOL_REF_FLAG set means that it is a function. + + We add @ to the name if this goes in small data/bss. We can only put + a variable in small data/bss if it is defined in this module or a module + that we are statically linked with. We can't check the second condition, + but TREE_STATIC gives us the first one. */ + +/* ??? If we had IPA, we could check the second condition. We could support + programmer added section attributes if the variable is not defined in this + module. */ + +/* ??? See the v850 port for a cleaner way to do this. */ + +/* ??? We could also support own long data here. Generating movl/add/ld8 + instead of addl,ld8/ld8. This makes the code bigger, but should make the + code faster because there is one less load. This also includes incomplete + types which can't go in sdata/sbss. */ + +/* ??? See select_section. We must put short own readonly variables in + sdata/sbss instead of the more natural rodata, because we can't perform + the DECL_READONLY_SECTION test here. */ + +extern struct obstack * saveable_obstack; + +void +ia64_encode_section_info (decl) + tree decl; +{ + if (TREE_CODE (decl) == FUNCTION_DECL) + SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1; + /* We assume that -fpic is used only to create a shared library (dso). + With -fpic, no global data can ever be sdata. + Without -fpic, global common uninitialized data can never be sdata, since + it can unify with a real definition in a dso. */ + /* ??? Actually, we can put globals in sdata, as long as we don't use gprel + to access them. The linker may then be able to do linker relaxation to + optimize references to them. Currently sdata implies use of gprel. */ + else if (! TARGET_NO_SDATA + && TREE_CODE (decl) == VAR_DECL + && TREE_STATIC (decl) + && ! (TREE_PUBLIC (decl) + && (flag_pic + || (DECL_COMMON (decl) + && (DECL_INITIAL (decl) == 0 + || DECL_INITIAL (decl) == error_mark_node)))) + /* Either the variable must be declared without a section attribute, + or the section must be sdata or sbss. */ + && (DECL_SECTION_NAME (decl) == 0 + || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".sdata") + || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), + ".sbss"))) + { + int size = int_size_in_bytes (TREE_TYPE (decl)); + char *str = XSTR (XEXP (DECL_RTL (decl), 0), 0); + int reloc; + + /* ??? We should redeclare CTOR_LIST, DTOR_END so that we don't have to + special case them here. Currently we put them in ctor/dtors sections + behind the compiler's back. We should use section attributes + instead. */ + if (! strcmp (str, "__CTOR_LIST__") + || ! strcmp (str, "__DTOR_END__")) + ; + + /* If this is an incomplete type with size 0, then we can't put it in + sdata because it might be too big when completed. */ + else if (size > 0 && size <= ia64_section_threshold) + { + int len = strlen (str); + char *newstr = obstack_alloc (saveable_obstack, len + 2); + + strcpy (newstr + 1, str); + *newstr = SDATA_NAME_FLAG_CHAR; + XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr; + } + } +} + +#define def_builtin(name, type, code) \ + builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR) + +struct builtin_description +{ + enum insn_code icode; + const char *name; + enum ia64_builtins code; + enum rtx_code comparison; + unsigned int flag; +}; + +/* All 32 bit intrinsics that take 2 arguments. */ +static struct builtin_description bdesc_2argsi[] = +{ + { CODE_FOR_fetch_and_add_si, "__sync_fetch_and_add_si", IA64_BUILTIN_FETCH_AND_ADD_SI, 0, 0 }, + { CODE_FOR_fetch_and_sub_si, "__sync_fetch_and_sub_si", IA64_BUILTIN_FETCH_AND_SUB_SI, 0, 0 }, + { CODE_FOR_fetch_and_or_si, "__sync_fetch_and_or_si", IA64_BUILTIN_FETCH_AND_OR_SI, 0, 0 }, + { CODE_FOR_fetch_and_and_si, "__sync_fetch_and_and_si", IA64_BUILTIN_FETCH_AND_AND_SI, 0, 0 }, + { CODE_FOR_fetch_and_xor_si, "__sync_fetch_and_xor_si", IA64_BUILTIN_FETCH_AND_XOR_SI, 0, 0 }, + { CODE_FOR_fetch_and_nand_si, "__sync_fetch_and_nand_si", IA64_BUILTIN_FETCH_AND_NAND_SI, 0, 0 }, + { CODE_FOR_add_and_fetch_si, "__sync_add_and_fetch_si", IA64_BUILTIN_ADD_AND_FETCH_SI, 0, 0 }, + { CODE_FOR_sub_and_fetch_si, "__sync_sub_and_fetch_si", IA64_BUILTIN_SUB_AND_FETCH_SI, 0, 0 }, + { CODE_FOR_or_and_fetch_si, "__sync_or_and_fetch_si", IA64_BUILTIN_OR_AND_FETCH_SI, 0, 0 }, + { CODE_FOR_and_and_fetch_si, "__sync_and_and_fetch_si", IA64_BUILTIN_AND_AND_FETCH_SI, 0, 0 }, + { CODE_FOR_xor_and_fetch_si, "__sync_xor_and_fetch_si", IA64_BUILTIN_XOR_AND_FETCH_SI, 0, 0 }, + { CODE_FOR_nand_and_fetch_si, "__sync_nand_and_fetch_si", IA64_BUILTIN_NAND_AND_FETCH_SI, 0, 0 } +}; + +/* All 64 bit intrinsics that take 2 arguments. */ +static struct builtin_description bdesc_2argdi[] = +{ + { CODE_FOR_fetch_and_add_di, "__sync_fetch_and_add_di", IA64_BUILTIN_FETCH_AND_ADD_DI, 0, 0 }, + { CODE_FOR_fetch_and_sub_di, "__sync_fetch_and_sub_di", IA64_BUILTIN_FETCH_AND_SUB_DI, 0, 0 }, + { CODE_FOR_fetch_and_or_di, "__sync_fetch_and_or_di", IA64_BUILTIN_FETCH_AND_OR_DI, 0, 0 }, + { CODE_FOR_fetch_and_and_di, "__sync_fetch_and_and_di", IA64_BUILTIN_FETCH_AND_AND_DI, 0, 0 }, + { CODE_FOR_fetch_and_xor_di, "__sync_fetch_and_xor_di", IA64_BUILTIN_FETCH_AND_XOR_DI, 0, 0 }, + { CODE_FOR_fetch_and_nand_di, "__sync_fetch_and_nand_di", IA64_BUILTIN_FETCH_AND_NAND_DI, 0, 0 }, + { CODE_FOR_add_and_fetch_di, "__sync_add_and_fetch_di", IA64_BUILTIN_ADD_AND_FETCH_DI, 0, 0 }, + { CODE_FOR_sub_and_fetch_di, "__sync_sub_and_fetch_di", IA64_BUILTIN_SUB_AND_FETCH_DI, 0, 0 }, + { CODE_FOR_or_and_fetch_di, "__sync_or_and_fetch_di", IA64_BUILTIN_OR_AND_FETCH_DI, 0, 0 }, + { CODE_FOR_and_and_fetch_di, "__sync_and_and_fetch_di", IA64_BUILTIN_AND_AND_FETCH_DI, 0, 0 }, + { CODE_FOR_xor_and_fetch_di, "__sync_xor_and_fetch_di", IA64_BUILTIN_XOR_AND_FETCH_DI, 0, 0 }, + { CODE_FOR_nand_and_fetch_di, "__sync_nand_and_fetch_di", IA64_BUILTIN_NAND_AND_FETCH_DI, 0, 0 } +}; + +void +ia64_init_builtins () +{ + int i; + struct builtin_description *d; + + tree psi_type_node = build_pointer_type (integer_type_node); + tree pdi_type_node = build_pointer_type (long_integer_type_node); + tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE); + + + /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */ + tree si_ftype_psi_si_si + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, psi_type_node, + tree_cons (NULL_TREE, integer_type_node, + tree_cons (NULL_TREE, integer_type_node, + endlink)))); + + /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */ + tree di_ftype_pdi_di_di + = build_function_type (long_integer_type_node, + tree_cons (NULL_TREE, pdi_type_node, + tree_cons (NULL_TREE, long_integer_type_node, + tree_cons (NULL_TREE, long_integer_type_node, + endlink)))); + /* __sync_synchronize */ + tree void_ftype_void + = build_function_type (void_type_node, endlink); + + /* __sync_lock_test_and_set_si */ + tree si_ftype_psi_si + = build_function_type (integer_type_node, + tree_cons (NULL_TREE, psi_type_node, + tree_cons (NULL_TREE, integer_type_node, endlink))); + + /* __sync_lock_test_and_set_di */ + tree di_ftype_pdi_di + = build_function_type (long_integer_type_node, + tree_cons (NULL_TREE, pdi_type_node, + tree_cons (NULL_TREE, long_integer_type_node, endlink))); + + /* __sync_lock_release_si */ + tree void_ftype_psi + = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node, endlink)); + + /* __sync_lock_release_di */ + tree void_ftype_pdi + = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node, endlink)); + + def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI); + + def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI); + + def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI); + + def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI); + + def_builtin ("__sync_synchronize", void_ftype_void, IA64_BUILTIN_SYNCHRONIZE); + + def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si, IA64_BUILTIN_LOCK_TEST_AND_SET_SI); + + def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di, IA64_BUILTIN_LOCK_TEST_AND_SET_DI); + + def_builtin ("__sync_lock_release_si", void_ftype_psi, IA64_BUILTIN_LOCK_RELEASE_SI); + + def_builtin ("__sync_lock_release_di", void_ftype_pdi, IA64_BUILTIN_LOCK_RELEASE_DI); + + /* Add all builtins that are operations on two args. */ + for (i=0, d = bdesc_2argsi; i < sizeof(bdesc_2argsi) / sizeof *d; i++, d++) + def_builtin (d->name, si_ftype_psi_si, d->code); + for (i=0, d = bdesc_2argdi; i < sizeof(bdesc_2argdi) / sizeof *d; i++, d++) + def_builtin (d->name, di_ftype_pdi_di, d->code); +} + +/* Expand fetch_and_op intrinsics. The basic code sequence is: + + mf + ldsz return = [ptr]; + tmp = return; + do { + oldval = tmp; + ar.ccv = tmp; + tmp <op>= value; + cmpxchgsz.acq tmp = [ptr], tmp + cmpxchgsz.acq tmp = [ptr], tmp + } while (tmp != oldval) +*/ +void +ia64_expand_fetch_and_op (code, mode, operands) + enum fetchop_code code; + enum machine_mode mode; + rtx operands[]; +{ + rtx oldval, newlabel; + rtx tmp_reg = gen_rtx_REG (mode, GR_REG(0)); + rtx mfreg = gen_rtx_MEM (BLKmode, tmp_reg); + RTX_UNCHANGING_P (mfreg) = 1; + emit_insn (gen_mf (mfreg)); + tmp_reg = gen_reg_rtx (mode); + oldval = gen_reg_rtx (mode); + + if (mode == SImode) + { + emit_insn (gen_movsi (operands[0], operands[1])); + emit_insn (gen_movsi (tmp_reg, operands[0])); + } + else + { + emit_insn (gen_movdi (operands[0], operands[1])); + emit_insn (gen_movdi (tmp_reg, operands[0])); + } + + newlabel = gen_label_rtx (); + emit_label (newlabel); + if (mode == SImode) + { + emit_insn (gen_movsi (oldval, tmp_reg)); + emit_insn (gen_ccv_restore_si (tmp_reg)); + } + else + { + emit_insn (gen_movdi (oldval, tmp_reg)); + emit_insn (gen_ccv_restore_di (tmp_reg)); + } + + /* Perform the specific operation. */ + switch (code) + { + case IA64_ADD_OP: + { + rtx reg; + if (GET_CODE (operands[2]) == CONST_INT) + reg = gen_reg_rtx (mode); + else + reg = operands[2]; + if (mode == SImode) + { + if (reg != operands[2]) + emit_insn (gen_movsi (reg, operands[2])); + emit_insn (gen_addsi3 (tmp_reg, tmp_reg, reg)); + } + else + { + if (reg != operands[2]) + emit_insn (gen_movdi (reg, operands[2])); + emit_insn (gen_adddi3 (tmp_reg, tmp_reg, reg)); + } + break; + } + + case IA64_SUB_OP: + if (mode == SImode) + emit_insn (gen_subsi3 (tmp_reg, tmp_reg, operands[2])); + else + emit_insn (gen_subdi3 (tmp_reg, tmp_reg, operands[2])); + break; + + case IA64_OR_OP: + emit_insn (gen_iordi3 (tmp_reg, tmp_reg, operands[2])); + break; + + case IA64_AND_OP: + emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2])); + break; + + case IA64_XOR_OP: + emit_insn (gen_xordi3 (tmp_reg, tmp_reg, operands[2])); + break; + + case IA64_NAND_OP: + emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2])); + if (mode == SImode) + emit_insn (gen_one_cmplsi2 (tmp_reg, operands[0])); + else + emit_insn (gen_one_cmpldi2 (tmp_reg, operands[0])); + break; + + default: + break; + } + + if (mode == SImode) + emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], tmp_reg)); + else + emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], tmp_reg)); + + emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel); +} + +/* Expand op_and_fetch intrinsics. The basic code sequence is: + + mf + ldsz return = [ptr]; + do { + oldval = tmp; + ar.ccv = tmp; + return = tmp + value; + cmpxchgsz.acq tmp = [ptr], return + } while (tmp != oldval) +*/ +void +ia64_expand_op_and_fetch (code, mode, operands) + enum fetchop_code code; + enum machine_mode mode; + rtx operands[]; +{ + rtx oldval, newlabel; + rtx tmp_reg, tmp2_reg = gen_rtx_REG (mode, GR_REG(0)); + rtx mfreg = gen_rtx_MEM (BLKmode, tmp2_reg); + RTX_UNCHANGING_P (mfreg) = 1; + + emit_insn (gen_mf (mfreg)); + tmp_reg = gen_reg_rtx (mode); + if (mode == SImode) + emit_insn (gen_movsi (tmp_reg, operands[1])); + else + emit_insn (gen_movdi (tmp_reg, operands[1])); + + newlabel = gen_label_rtx (); + emit_label (newlabel); + oldval = gen_reg_rtx (mode); + if (mode == SImode) + { + emit_insn (gen_movsi (oldval, tmp_reg)); + emit_insn (gen_ccv_restore_si (tmp_reg)); + } + else + { + emit_insn (gen_movdi (oldval, tmp_reg)); + emit_insn (gen_ccv_restore_di (tmp_reg)); + } + + /* Perform the specific operation. */ + switch (code) + { + case IA64_ADD_OP: + if (mode == SImode) + emit_insn (gen_addsi3 (operands[0], tmp_reg, operands[2])); + else + emit_insn (gen_adddi3 (operands[0], tmp_reg, operands[2])); + break; + + case IA64_SUB_OP: + if (mode == SImode) + emit_insn (gen_subsi3 (operands[0], tmp_reg, operands[2])); + else + emit_insn (gen_subdi3 (operands[0], tmp_reg, operands[2])); + break; + + case IA64_OR_OP: + emit_insn (gen_iordi3 (operands[0], tmp_reg, operands[2])); + break; + + case IA64_AND_OP: + emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2])); + break; + + case IA64_XOR_OP: + emit_insn (gen_xordi3 (operands[0], tmp_reg, operands[2])); + break; + + case IA64_NAND_OP: + emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2])); + if (mode == SImode) + emit_insn (gen_one_cmplsi2 (operands[0], operands[0])); + else + emit_insn (gen_one_cmpldi2 (operands[0], operands[0])); + break; + + default: + break; + } + + if (mode == SImode) + emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], operands[0])); + else + emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], operands[0])); + + emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel); +} + +/* Expand val_ and bool_compare_and_swap. For val_ we want: + + ar.ccv = oldval + mf + cmpxchgsz.acq ret = [ptr], newval, ar.ccv + return ret + + For bool_ it's the same except return ret == oldval. +*/ +static rtx +ia64_expand_compare_and_swap (icode, arglist, target, boolcode) + enum insn_code icode; + tree arglist; + rtx target; + int boolcode; +{ + tree arg0, arg1, arg2; + rtx newlabel, newlabel2, op0, op1, op2, pat; + enum machine_mode tmode, mode0, mode1, mode2; + + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + mode2 = insn_data[icode].operand[3].mode; + + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + RTX_UNCHANGING_P (op0) = 1; + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) + op2 = copy_to_mode_reg (mode2, op2); + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + pat = GEN_FCN (icode) (target, op0, op1, op2); + if (! pat) + return 0; + emit_insn (pat); + if (boolcode) + { + if (tmode == SImode) + { + emit_insn (gen_cmpsi (target, op1)); + emit_insn (gen_seq (gen_lowpart (DImode, target))); + } + else + { + emit_insn (gen_cmpdi (target, op1)); + emit_insn (gen_seq (target)); + } + } + return target; +} + +/* Expand all intrinsics that take 2 arguments. */ +static rtx +ia64_expand_binop_builtin (icode, arglist, target) + enum insn_code icode; + tree arglist; + rtx target; +{ + rtx pat; + tree arg0 = TREE_VALUE (arglist); + tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + +rtx +ia64_expand_builtin (exp, target, subtarget, mode, ignore) + tree exp; + rtx target; + rtx subtarget; + enum machine_mode mode; + int ignore; +{ + rtx op0, op1, op2, op3, pat; + rtx tmp_reg; + rtx newlabel, newlabel2; + tree arg0, arg1, arg2, arg3; + tree arglist = TREE_OPERAND (exp, 1); + tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); + int fcode = DECL_FUNCTION_CODE (fndecl); + enum machine_mode tmode, mode0, mode1, mode2, mode3; + enum insn_code icode; + int boolcode = 0; + int i; + struct builtin_description *d; + + switch (fcode) + { + case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: + return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si, arglist, target, 1); + case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: + return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si, arglist, target, 0); + case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: + return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di, arglist, target, 1); + case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: + return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di, arglist, target, 0); + case IA64_BUILTIN_SYNCHRONIZE: + /* Pass a volatile memory operand. */ + tmp_reg = gen_rtx_REG (DImode, GR_REG(0)); + target = gen_rtx_MEM (BLKmode, tmp_reg); + emit_insn (gen_mf (target)); + return 0; + + case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: + icode = CODE_FOR_lock_test_and_set_si; + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + RTX_UNCHANGING_P (op0) = 1; + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: + icode = CODE_FOR_lock_test_and_set_di; + arg0 = TREE_VALUE (arglist); + arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); + tmode = insn_data[icode].operand[0].mode; + mode0 = insn_data[icode].operand[1].mode; + mode1 = insn_data[icode].operand[2].mode; + op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); + RTX_UNCHANGING_P (op0) = 1; + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; + + case IA64_BUILTIN_LOCK_RELEASE_SI: + arg0 = TREE_VALUE (arglist); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op0 = gen_rtx_MEM (SImode, copy_to_mode_reg (Pmode, op0)); + MEM_VOLATILE_P (op0) = 1; + emit_insn (gen_movsi (op0, GEN_INT(0))); + return 0; + + case IA64_BUILTIN_LOCK_RELEASE_DI: + arg0 = TREE_VALUE (arglist); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); + op0 = gen_rtx_MEM (DImode, copy_to_mode_reg (Pmode, op0)); + MEM_VOLATILE_P (op0) = 1; + emit_insn (gen_movdi (op0, GEN_INT(0))); + return 0; + + default: + break; + } + + /* Expand all 32 bit intrinsics that take 2 arguments. */ + for (i=0, d = bdesc_2argsi; i < sizeof (bdesc_2argsi) / sizeof *d; i++, d++) + if (d->code == fcode) + return ia64_expand_binop_builtin (d->icode, arglist, target); + + /* Expand all 64 bit intrinsics that take 2 arguments. */ + for (i=0, d = bdesc_2argdi; i < sizeof (bdesc_2argdi) / sizeof *d; i++, d++) + if (d->code == fcode) + return ia64_expand_binop_builtin (d->icode, arglist, target); + + fail: + return 0; +} |