diff options
-rw-r--r-- | gcc/ChangeLog | 34 | ||||
-rw-r--r-- | gcc/config/ia64/ia64-modes.def | 8 | ||||
-rw-r--r-- | gcc/config/ia64/ia64-protos.h | 7 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.c | 421 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.h | 47 | ||||
-rw-r--r-- | gcc/config/ia64/ia64.md | 12 | ||||
-rw-r--r-- | gcc/config/ia64/itanium1.md | 19 | ||||
-rw-r--r-- | gcc/config/ia64/itanium2.md | 23 | ||||
-rw-r--r-- | gcc/config/ia64/predicates.md | 14 | ||||
-rw-r--r-- | gcc/config/ia64/vect.md | 996 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-30.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect-8.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/vect.exp | 2 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 9 |
15 files changed, 1544 insertions, 66 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ec06ccb..ec51b64 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,39 @@ 2005-01-03 Richard Henderson <rth@redhat.com> + * config/ia64/ia64.c (TARGET_VECTOR_MODE_SUPPORTED_P): New. + (ia64_const_ok_for_letter_p): New. + (ia64_const_double_ok_for_letter_p): New. + (ia64_extra_constraint): New. + (ia64_expand_vecint_compare): New. + (ia64_expand_vcondu_v2si): New. + (ia64_expand_vecint_cmov): New. + (ia64_expand_vecint_minmax): New. + (ia64_print_operand): Add 'v'. + (ia64_preferred_reload_class): New. + (ia64_vector_mode_supported_p): New. + * config/ia64/ia64.h (UNITS_PER_SIMD_WORD): New. + (PREFERRED_RELOAD_CLASS): Move to function. + (CONST_OK_FOR_LETTER_P): Move to function. + (CONST_DOUBLE_OK_FOR_LETTER_P): Move to function. + (CONSTRAINT_OK_FOR_Q, CONSTRAINT_OK_FOR_R): Remove. + (CONSTRAINT_OK_FOR_S, CONSTRAINT_OK_FOR_T): Remove. + (EXTRA_CONSTRAINT): Move to function. + * config/ia64/ia64.md: Include vect.md. + (itanium_class): Add mmalua. + (type): Handle it. + * config/ia64/itanium1.md (1_mmalua): New. Add it to bypasses. + (1b_mmalua): New. + * config/ia64/itanium2.md (2_mmalua, 2b_mmalua): Similarly. + * config/ia64/predicates.md (gr_reg_or_0_operand): Accept any + CONST0_RTX. + (const_int_2bit_operand): New. + (fr_reg_or_0_operand): New. + * config/ia64/ia64-modes.def: Add vector modes. + * config/ia64/ia64-protos.h: Update. + * config/ia64/vect.md: New file. + +2005-01-03 Richard Henderson <rth@redhat.com> + * simplify-rtx.c (simplify_binary_operation): Handle VEC_CONCAT. 2005-01-03 Uros Bizjak <uros@kss-loka.si> diff --git a/gcc/config/ia64/ia64-modes.def b/gcc/config/ia64/ia64-modes.def index 6e10ac0..3611638 100644 --- a/gcc/config/ia64/ia64-modes.def +++ b/gcc/config/ia64/ia64-modes.def @@ -66,3 +66,11 @@ INT_MODE (OI, 32); so that flow doesn't do something stupid. */ CC_MODE (CCI); + +/* Vector modes. */ +VECTOR_MODES (INT, 4); /* V4QI V2HI */ +VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +VECTOR_MODE (INT, QI, 16); +VECTOR_MODE (INT, HI, 8); +VECTOR_MODE (INT, SI, 4); +VECTOR_MODE (FLOAT, SF, 2); diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h index 30ea5e1..5b4b935 100644 --- a/gcc/config/ia64/ia64-protos.h +++ b/gcc/config/ia64/ia64-protos.h @@ -34,6 +34,10 @@ extern int ia64_st_address_bypass_p (rtx, rtx); extern int ia64_ld_address_bypass_p (rtx, rtx); extern int ia64_produce_address_p (rtx); +extern bool ia64_const_ok_for_letter_p (HOST_WIDE_INT, char); +extern bool ia64_const_double_ok_for_letter_p (rtx, char); +extern bool ia64_extra_constraint (rtx, char); + extern rtx ia64_expand_move (rtx, rtx); extern int ia64_move_ok (rtx, rtx); extern int addp4_optimize_ok (rtx, rtx); @@ -42,6 +46,8 @@ extern int ia64_depz_field_mask (rtx, rtx); extern void ia64_split_tmode_move (rtx[]); extern rtx spill_xfmode_operand (rtx, int); extern rtx ia64_expand_compare (enum rtx_code, enum machine_mode); +extern void ia64_expand_vecint_cmov (rtx[]); +extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]); extern void ia64_expand_call (rtx, rtx, rtx, int); extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int); extern void ia64_reload_gp (void); @@ -57,6 +63,7 @@ extern int ia64_hard_regno_rename_ok (int, int); extern void ia64_initialize_trampoline (rtx, rtx, rtx); extern void ia64_print_operand_address (FILE *, rtx); extern void ia64_print_operand (FILE *, rtx, int); +extern enum reg_class ia64_preferred_reload_class (rtx, enum reg_class); extern enum reg_class ia64_secondary_reload_class (enum reg_class, enum machine_mode, rtx); extern void ia64_output_dwarf_dtprel (FILE*, int, rtx); diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c index 68e6ddb..a6197c0 100644 --- a/gcc/config/ia64/ia64.c +++ b/gcc/config/ia64/ia64.c @@ -279,6 +279,7 @@ static void ia64_encode_section_info (tree, rtx, int); static rtx ia64_struct_value_rtx (tree, int); static tree ia64_gimplify_va_arg (tree, tree, tree *, tree *); static bool ia64_scalar_mode_supported_p (enum machine_mode mode); +static bool ia64_vector_mode_supported_p (enum machine_mode mode); /* Table of valid machine attributes. */ @@ -423,6 +424,8 @@ static const struct attribute_spec ia64_attribute_table[] = #undef TARGET_SCALAR_MODE_SUPPORTED_P #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p +#undef TARGET_VECTOR_MODE_SUPPORTED_P +#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p /* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur in an order different from the specified program order. */ @@ -472,7 +475,8 @@ ia64_get_addr_area (tree decl) } static tree -ia64_handle_model_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) +ia64_handle_model_attribute (tree *node, tree name, tree args, + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) { ia64_addr_area addr_area = ADDR_AREA_NORMAL; ia64_addr_area area; @@ -554,6 +558,103 @@ ia64_encode_section_info (tree decl, rtx rtl, int first) ia64_encode_addr_area (decl, XEXP (rtl, 0)); } +/* Implement CONST_OK_FOR_LETTER_P. */ + +bool +ia64_const_ok_for_letter_p (HOST_WIDE_INT value, char c) +{ + switch (c) + { + case 'I': + return CONST_OK_FOR_I (value); + case 'J': + return CONST_OK_FOR_J (value); + case 'K': + return CONST_OK_FOR_K (value); + case 'L': + return CONST_OK_FOR_L (value); + case 'M': + return CONST_OK_FOR_M (value); + case 'N': + return CONST_OK_FOR_N (value); + case 'O': + return CONST_OK_FOR_O (value); + case 'P': + return CONST_OK_FOR_P (value); + default: + return false; + } +} + +/* Implement CONST_DOUBLE_OK_FOR_LETTER_P. */ + +bool +ia64_const_double_ok_for_letter_p (rtx value, char c) +{ + switch (c) + { + case 'G': + return CONST_DOUBLE_OK_FOR_G (value); + default: + return false; + } +} + +/* Implement EXTRA_CONSTRAINT. */ + +bool +ia64_extra_constraint (rtx value, char c) +{ + switch (c) + { + case 'Q': + /* Non-volatile memory for FP_REG loads/stores. */ + return memory_operand(value, VOIDmode) && !MEM_VOLATILE_P (value); + + case 'R': + /* 1..4 for shladd arguments. */ + return (GET_CODE (value) == CONST_INT + && INTVAL (value) >= 1 && INTVAL (value) <= 4); + + case 'S': + /* Non-post-inc memory for asms and other unsavory creatures. */ + return (GET_CODE (value) == MEM + && GET_RTX_CLASS (GET_CODE (XEXP (value, 0))) != RTX_AUTOINC + && (reload_in_progress || memory_operand (value, VOIDmode))); + + case 'T': + /* Symbol ref to small-address-area. */ + return (GET_CODE (value) == SYMBOL_REF + && SYMBOL_REF_SMALL_ADDR_P (value)); + + case 'U': + /* Vector zero. */ + return value == CONST0_RTX (GET_MODE (value)); + + case 'W': + /* An integer vector, such that conversion to an integer yields a + value appropriate for an integer 'J' constraint. */ + if (GET_CODE (value) == CONST_VECTOR + && GET_MODE_CLASS (GET_MODE (value)) == MODE_VECTOR_INT) + { + value = simplify_subreg (DImode, value, GET_MODE (value), 0); + return ia64_const_ok_for_letter_p (INTVAL (value), 'J'); + } + return false; + + case 'Y': + /* A V2SF vector containing elements that satisfy 'G'. */ + return + (GET_CODE (value) == CONST_VECTOR + && GET_MODE (value) == V2SFmode + && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 0), 'G') + && ia64_const_double_ok_for_letter_p (XVECEXP (value, 0, 1), 'G')); + + default: + return false; + } +} + /* Return 1 if the operands of a move are ok. */ int @@ -1166,6 +1267,264 @@ ia64_expand_compare (enum rtx_code code, enum machine_mode mode) return gen_rtx_fmt_ee (code, mode, cmp, const0_rtx); } +/* Generate an integral vector comparison. */ + +static bool +ia64_expand_vecint_compare (enum rtx_code code, enum machine_mode mode, + rtx dest, rtx op0, rtx op1) +{ + bool negate = false; + rtx x; + + switch (code) + { + case EQ: + case GT: + break; + + case NE: + code = EQ; + negate = true; + break; + + case LE: + code = GT; + negate = true; + break; + + case GE: + negate = true; + /* FALLTHRU */ + + case LT: + x = op0; + op0 = op1; + op1 = x; + code = GT; + break; + + case GTU: + case GEU: + case LTU: + case LEU: + { + rtx w0h, w0l, w1h, w1l, ch, cl; + enum machine_mode wmode; + rtx (*unpack_l) (rtx, rtx, rtx); + rtx (*unpack_h) (rtx, rtx, rtx); + rtx (*pack) (rtx, rtx, rtx); + + /* We don't have native unsigned comparisons, but we can generate + them better than generic code can. */ + + if (mode == V2SImode) + abort (); + else if (mode == V8QImode) + { + wmode = V4HImode; + pack = gen_pack2_sss; + unpack_l = gen_unpack1_l; + unpack_h = gen_unpack1_h; + } + else if (mode == V4HImode) + { + wmode = V2SImode; + pack = gen_pack4_sss; + unpack_l = gen_unpack2_l; + unpack_h = gen_unpack2_h; + } + else + abort (); + + /* Unpack into wider vectors, zero extending the elements. */ + + w0l = gen_reg_rtx (wmode); + w0h = gen_reg_rtx (wmode); + w1l = gen_reg_rtx (wmode); + w1h = gen_reg_rtx (wmode); + emit_insn (unpack_l (gen_lowpart (mode, w0l), op0, CONST0_RTX (mode))); + emit_insn (unpack_h (gen_lowpart (mode, w0h), op0, CONST0_RTX (mode))); + emit_insn (unpack_l (gen_lowpart (mode, w1l), op1, CONST0_RTX (mode))); + emit_insn (unpack_h (gen_lowpart (mode, w1h), op1, CONST0_RTX (mode))); + + /* Compare in the wider mode. */ + + cl = gen_reg_rtx (wmode); + ch = gen_reg_rtx (wmode); + code = signed_condition (code); + ia64_expand_vecint_compare (code, wmode, cl, w0l, w1l); + negate = ia64_expand_vecint_compare (code, wmode, ch, w0h, w1h); + + /* Repack into a single narrower vector. */ + + emit_insn (pack (dest, cl, ch)); + } + return negate; + + default: + abort (); + } + + x = gen_rtx_fmt_ee (code, mode, op0, op1); + emit_insn (gen_rtx_SET (VOIDmode, dest, x)); + + return negate; +} + +static void +ia64_expand_vcondu_v2si (enum rtx_code code, rtx operands[]) +{ + rtx dl, dh, bl, bh, op1l, op1h, op2l, op2h, op4l, op4h, op5l, op5h, x; + + /* In this case, we extract the two SImode quantities and generate + normal comparisons for each of them. */ + + op1l = gen_lowpart (SImode, operands[1]); + op2l = gen_lowpart (SImode, operands[2]); + op4l = gen_lowpart (SImode, operands[4]); + op5l = gen_lowpart (SImode, operands[5]); + + op1h = gen_reg_rtx (SImode); + op2h = gen_reg_rtx (SImode); + op4h = gen_reg_rtx (SImode); + op5h = gen_reg_rtx (SImode); + + emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op1h), + gen_lowpart (DImode, operands[1]), GEN_INT (32))); + emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op2h), + gen_lowpart (DImode, operands[2]), GEN_INT (32))); + emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op4h), + gen_lowpart (DImode, operands[4]), GEN_INT (32))); + emit_insn (gen_lshrdi3 (gen_lowpart (DImode, op5h), + gen_lowpart (DImode, operands[5]), GEN_INT (32))); + + bl = gen_reg_rtx (BImode); + x = gen_rtx_fmt_ee (code, BImode, op4l, op5l); + emit_insn (gen_rtx_SET (VOIDmode, bl, x)); + + bh = gen_reg_rtx (BImode); + x = gen_rtx_fmt_ee (code, BImode, op4h, op5h); + emit_insn (gen_rtx_SET (VOIDmode, bh, x)); + + /* With the results of the comparisons, emit conditional moves. */ + + dl = gen_reg_rtx (SImode); + x = gen_rtx_IF_THEN_ELSE (SImode, bl, op1l, op2l); + emit_insn (gen_rtx_SET (VOIDmode, dl, x)); + + dh = gen_reg_rtx (SImode); + x = gen_rtx_IF_THEN_ELSE (SImode, bh, op1h, op2h); + emit_insn (gen_rtx_SET (VOIDmode, dh, x)); + + /* Merge the two partial results back into a vector. */ + + x = gen_rtx_VEC_CONCAT (V2SImode, dl, dh); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); +} + +/* Emit an integral vector conditional move. */ + +void +ia64_expand_vecint_cmov (rtx operands[]) +{ + enum machine_mode mode = GET_MODE (operands[0]); + enum rtx_code code = GET_CODE (operands[3]); + bool negate; + rtx cmp, x, ot, of; + + /* Since we don't have unsigned V2SImode comparisons, it's more efficient + to special-case them entirely. */ + if (mode == V2SImode + && (code == GTU || code == GEU || code == LEU || code == LTU)) + { + ia64_expand_vcondu_v2si (code, operands); + return; + } + + cmp = gen_reg_rtx (mode); + negate = ia64_expand_vecint_compare (code, mode, cmp, + operands[4], operands[5]); + + ot = operands[1+negate]; + of = operands[2-negate]; + + if (ot == CONST0_RTX (mode)) + { + if (of == CONST0_RTX (mode)) + { + emit_move_insn (operands[0], ot); + return; + } + + x = gen_rtx_NOT (mode, cmp); + x = gen_rtx_AND (mode, x, of); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + } + else if (of == CONST0_RTX (mode)) + { + x = gen_rtx_AND (mode, cmp, ot); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + } + else + { + rtx t, f; + + t = gen_reg_rtx (mode); + x = gen_rtx_AND (mode, cmp, operands[1+negate]); + emit_insn (gen_rtx_SET (VOIDmode, t, x)); + + f = gen_reg_rtx (mode); + x = gen_rtx_NOT (mode, cmp); + x = gen_rtx_AND (mode, x, operands[2-negate]); + emit_insn (gen_rtx_SET (VOIDmode, f, x)); + + x = gen_rtx_IOR (mode, t, f); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + } +} + +/* Emit an integral vector min or max operation. Return true if all done. */ + +bool +ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode, + rtx operands[]) +{ + rtx xops[5]; + + /* These four combinations are supported directly. */ + if (mode == V8QImode && (code == UMIN || code == UMAX)) + return false; + if (mode == V4HImode && (code == SMIN || code == SMAX)) + return false; + + /* Everything else implemented via vector comparisons. */ + xops[0] = operands[0]; + xops[4] = xops[1] = operands[1]; + xops[5] = xops[2] = operands[2]; + + switch (code) + { + case UMIN: + code = LTU; + break; + case UMAX: + code = GTU; + break; + case SMIN: + code = LT; + break; + case SMAX: + code = GT; + break; + default: + abort (); + } + xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]); + + ia64_expand_vecint_cmov (xops); + return true; +} + /* Emit the appropriate sequence for a call. */ void @@ -3613,7 +3972,9 @@ ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED, U Print an 8-bit sign extended number (K) as a 64-bit unsigned number for Intel assembler. r Print register name, or constant 0 as r0. HP compatibility for - Linux kernel. */ + Linux kernel. + v Print vector constant value as an 8-byte integer value. */ + void ia64_print_operand (FILE * file, rtx x, int code) { @@ -3771,6 +4132,11 @@ ia64_print_operand (FILE * file, rtx x, int code) output_operand_lossage ("invalid %%r value"); return; + case 'v': + gcc_assert (GET_CODE (x) == CONST_VECTOR); + x = simplify_subreg (DImode, x, GET_MODE (x), 0); + break; + case '+': { const char *which; @@ -3994,6 +4360,39 @@ ia64_register_move_cost (enum machine_mode mode, enum reg_class from, return 2; } +/* Implement PREFERRED_RELOAD_CLASS. Place additional restrictions on CLASS + to use when copying X into that class. */ + +enum reg_class +ia64_preferred_reload_class (rtx x, enum reg_class class) +{ + switch (class) + { + case FR_REGS: + /* Don't allow volatile mem reloads into floating point registers. + This is defined to force reload to choose the r/m case instead + of the f/f case when reloading (set (reg fX) (mem/v)). */ + if (MEM_P (x) && MEM_VOLATILE_P (x)) + return NO_REGS; + + /* Force all unrecognized constants into the constant pool. */ + if (CONSTANT_P (x)) + return NO_REGS; + break; + + case AR_M_REGS: + case AR_I_REGS: + if (!OBJECT_P (x)) + return NO_REGS; + break; + + default: + break; + } + + return class; +} + /* This function returns the register class required for a secondary register when copying between one of the registers in CLASS, and X, using MODE. A return value of NO_REGS means that no secondary register @@ -8587,4 +8986,22 @@ ia64_scalar_mode_supported_p (enum machine_mode mode) } } +static bool +ia64_vector_mode_supported_p (enum machine_mode mode) +{ + switch (mode) + { + case V8QImode: + case V4HImode: + case V2SImode: + return true; + + case V2SFmode: + return true; + + default: + return false; + } +} + #include "gt-ia64.h" diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h index c38854b..21f9dc3 100644 --- a/gcc/config/ia64/ia64.h +++ b/gcc/config/ia64/ia64.h @@ -338,6 +338,8 @@ extern const char *ia64_tune_string; #define UNITS_PER_WORD 8 +#define UNITS_PER_SIMD_WORD UNITS_PER_WORD + #define POINTER_SIZE (TARGET_ILP32 ? 32 : 64) /* A C expression whose value is zero if pointers that need to be extended @@ -1024,18 +1026,8 @@ enum reg_class The value is a register class; perhaps CLASS, or perhaps another, smaller class. */ -/* Don't allow volatile mem reloads into floating point registers. This - is defined to force reload to choose the r/m case instead of the f/f case - when reloading (set (reg fX) (mem/v)). - - Do not reload expressions into AR regs. */ - #define PREFERRED_RELOAD_CLASS(X, CLASS) \ - (CLASS == FR_REGS && GET_CODE (X) == MEM && MEM_VOLATILE_P (X) ? NO_REGS \ - : CLASS == FR_REGS && GET_CODE (X) == CONST_DOUBLE ? NO_REGS \ - : !OBJECT_P (X) \ - && (CLASS == AR_M_REGS || CLASS == AR_I_REGS) ? NO_REGS \ - : CLASS) + ia64_preferred_reload_class (X, CLASS) /* You should define this macro to indicate to the reload phase that it may need to allocate at least one register for a reload in addition to the @@ -1106,15 +1098,7 @@ enum reg_class #define CONST_OK_FOR_P(VALUE) ((VALUE) == 0 || (VALUE) == -1) #define CONST_OK_FOR_LETTER_P(VALUE, C) \ -((C) == 'I' ? CONST_OK_FOR_I (VALUE) \ - : (C) == 'J' ? CONST_OK_FOR_J (VALUE) \ - : (C) == 'K' ? CONST_OK_FOR_K (VALUE) \ - : (C) == 'L' ? CONST_OK_FOR_L (VALUE) \ - : (C) == 'M' ? CONST_OK_FOR_M (VALUE) \ - : (C) == 'N' ? CONST_OK_FOR_N (VALUE) \ - : (C) == 'O' ? CONST_OK_FOR_O (VALUE) \ - : (C) == 'P' ? CONST_OK_FOR_P (VALUE) \ - : 0) + ia64_const_ok_for_letter_p (VALUE, C) /* A C expression that defines the machine-dependent operand constraint letters (`G', `H') that specify particular ranges of `const_double' values. */ @@ -1125,33 +1109,14 @@ enum reg_class || (VALUE) == CONST1_RTX (GET_MODE (VALUE))) #define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \ - ((C) == 'G' ? CONST_DOUBLE_OK_FOR_G (VALUE) : 0) + ia64_const_double_ok_for_letter_p (VALUE, C) /* A C expression that defines the optional machine-dependent constraint letters (`Q', `R', `S', `T', `U') that can be used to segregate specific types of operands, usually memory references, for the target machine. */ -/* Non-volatile memory for FP_REG loads/stores. */ -#define CONSTRAINT_OK_FOR_Q(VALUE) \ - (memory_operand((VALUE), VOIDmode) && ! MEM_VOLATILE_P (VALUE)) -/* 1..4 for shladd arguments. */ -#define CONSTRAINT_OK_FOR_R(VALUE) \ - (GET_CODE (VALUE) == CONST_INT && INTVAL (VALUE) >= 1 && INTVAL (VALUE) <= 4) -/* Non-post-inc memory for asms and other unsavory creatures. */ -#define CONSTRAINT_OK_FOR_S(VALUE) \ - (GET_CODE (VALUE) == MEM \ - && GET_RTX_CLASS (GET_CODE (XEXP ((VALUE), 0))) != RTX_AUTOINC \ - && (reload_in_progress || memory_operand ((VALUE), VOIDmode))) -/* Symbol ref to small-address-area: */ -#define CONSTRAINT_OK_FOR_T(VALUE) \ - (GET_CODE (VALUE) == SYMBOL_REF && SYMBOL_REF_SMALL_ADDR_P (VALUE)) - #define EXTRA_CONSTRAINT(VALUE, C) \ - ((C) == 'Q' ? CONSTRAINT_OK_FOR_Q (VALUE) \ - : (C) == 'R' ? CONSTRAINT_OK_FOR_R (VALUE) \ - : (C) == 'S' ? CONSTRAINT_OK_FOR_S (VALUE) \ - : (C) == 'T' ? CONSTRAINT_OK_FOR_T (VALUE) \ - : 0) + ia64_extra_constraint (VALUE, C) /* Basic Stack Layout */ diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md index 15650c5..8ce878d 100644 --- a/gcc/config/ia64/ia64.md +++ b/gcc/config/ia64/ia64.md @@ -121,9 +121,9 @@ (define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld, fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld, - chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0, - syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop,nop_b,nop_f, - nop_i,nop_m,nop_x,lfetch,pre_cycle" + chk_s,long_i,mmalua,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf, + st,syst_m0, syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop, + nop_b,nop_f,nop_i,nop_m,nop_x,lfetch,pre_cycle" (const_string "unknown")) ;; chk_s has an I and an M form; use type A for convenience. @@ -132,7 +132,8 @@ (eq_attr "itanium_class" "rse_m,syst_m,syst_m0") (const_string "M") (eq_attr "itanium_class" "frar_m,toar_m,frfr,tofr") (const_string "M") (eq_attr "itanium_class" "lfetch") (const_string "M") - (eq_attr "itanium_class" "chk_s,ialu,icmp,ilog") (const_string "A") + (eq_attr "itanium_class" "chk_s,ialu,icmp,ilog,mmalua") + (const_string "A") (eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy") (const_string "F") (eq_attr "itanium_class" "fcvtfx,nop_f") (const_string "F") (eq_attr "itanium_class" "frar_i,toar_i,frbr,tobr") (const_string "I") @@ -6036,3 +6037,6 @@ "addp4_optimize_ok (operands[1], operands[2])" "addp4 %0 = %1, %2" [(set_attr "itanium_class" "ialu")]) + +;; Vector operations +(include "vect.md") diff --git a/gcc/config/ia64/itanium1.md b/gcc/config/ia64/itanium1.md index 4f01560..f4fcd1c 100644 --- a/gcc/config/ia64/itanium1.md +++ b/gcc/config/ia64/itanium1.md @@ -589,6 +589,11 @@ (and (and (eq_attr "cpu" "itanium") (eq_attr "itanium_class" "ilog")) (eq (symbol_ref "bundling_p") (const_int 0))) "1_A") +(define_insn_reservation "1_mmalua" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmalua")) + (eq (symbol_ref "bundling_p") (const_int 0))) + "1_A") (define_insn_reservation "1_ishf" 1 (and (and (eq_attr "cpu" "itanium") (eq_attr "itanium_class" "ishf")) @@ -920,7 +925,7 @@ ;; There is only one insn `mov ar.pfs =' for toar_i. (define_bypass 0 "1_tobr,1_topr,1_toar_i" "1_br,1_scall") -(define_bypass 3 "1_ialu,1_ialu_addr" "1_mmmul,1_mmshf") +(define_bypass 3 "1_ialu,1_ialu_addr" "1_mmmul,1_mmshf,1_mmalua") ;; ??? howto describe ialu for I slot only. We use ialu_addr for that ;;(define_bypass 2 "1_ialu" "1_ld" "ia64_ld_address_bypass_p") ;; ??? howto describe ialu st/address for I slot only. We use ialu_addr @@ -940,7 +945,7 @@ ;; Intel docs say only LD, ST, IALU, ILOG, ISHF consumers have latency 4, ;; but HP engineers say any non-MM operation. -(define_bypass 4 "1_mmmul,1_mmshf" +(define_bypass 4 "1_mmmul,1_mmshf,1_mmalua" "1_br,1_fcmp,1_fcvtfx,1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,\ 1_frbr,1_frfr,1_frpr,1_ialu,1_icmp,1_ilog,1_ishf,1_ld,1_chk_s,\ 1_long_i,1_rse_m,1_sem,1_stf,1_st,1_syst_m0,1_syst_m,\ @@ -958,13 +963,13 @@ ;; We don't use here fcmp because scall may be predicated. (define_bypass 0 "1_fcvtfx,1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,\ 1_frbr,1_frfr,1_frpr,1_ialu,1_ialu_addr,1_ilog,1_ishf,\ - 1_ld,1_long_i,1_mmmul,1_mmshf,1_mmshfi,1_toar_m,1_tofr,\ - 1_xmpy,1_xtd" "1_scall") + 1_ld,1_long_i,2_mmalua,1_mmmul,1_mmshf,1_mmshfi,1_toar_m,\ + 1_tofr,1_xmpy,1_xtd" "1_scall") (define_bypass 0 "1_unknown,1_ignore,1_stop_bit,1_br,1_fcmp,1_fcvtfx,\ 1_fld,1_fmac,1_fmisc,1_frar_i,1_frar_m,1_frbr,1_frfr,\ 1_frpr,1_ialu,1_ialu_addr,1_icmp,1_ilog,1_ishf,1_ld,\ - 1_chk_s,1_long_i,1_mmmul,1_mmshf,1_mmshfi,1_nop,\ + 1_chk_s,1_long_i,1_mmalua,1_mmmul,1_mmshf,1_mmshfi,1_nop,\ 1_nop_b,1_nop_f,1_nop_i,1_nop_m,1_nop_x,1_rse_m,1_scall,\ 1_sem,1_stf,1_st,1_syst_m0,1_syst_m,1_tbit,1_toar_i,\ 1_toar_m,1_tobr,1_tofr,1_topr,1_xmpy,1_xtd,1_lfetch" @@ -1458,6 +1463,10 @@ (and (and (eq_attr "cpu" "itanium") (eq_attr "itanium_class" "ilog")) (ne (symbol_ref "bundling_p") (const_int 0))) "1b_A") +(define_insn_reservation "1b_mmalua" 2 + (and (and (eq_attr "cpu" "itanium") + (eq_attr "itanium_class" "mmalua")) + (ne (symbol_ref "bundling_p") (const_int 0))) "1b_A") (define_insn_reservation "1b_ishf" 1 (and (and (eq_attr "cpu" "itanium") (eq_attr "itanium_class" "ishf")) diff --git a/gcc/config/ia64/itanium2.md b/gcc/config/ia64/itanium2.md index b78b616..ad24d19 100644 --- a/gcc/config/ia64/itanium2.md +++ b/gcc/config/ia64/itanium2.md @@ -786,6 +786,10 @@ (and (and (eq_attr "cpu" "itanium2") (eq_attr "itanium_class" "ilog")) (eq (symbol_ref "bundling_p") (const_int 0))) "2_A") +(define_insn_reservation "2_mmalua" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmalua")) + (eq (symbol_ref "bundling_p") (const_int 0))) "2_A") ;; Latency time ??? (define_insn_reservation "2_ishf" 1 (and (and (eq_attr "cpu" "itanium2") @@ -1016,23 +1020,24 @@ (define_bypass 0 "2_tbit" "2_br,2_scall") (define_bypass 2 "2_ld" "2_ld" "ia64_ld_address_bypass_p") (define_bypass 2 "2_ld" "2_st" "ia64_st_address_bypass_p") -(define_bypass 2 "2_ld" "2_mmmul,2_mmshf") -(define_bypass 3 "2_ilog" "2_mmmul,2_mmshf") -(define_bypass 3 "2_ialu" "2_mmmul,2_mmshf") -(define_bypass 3 "2_mmmul,2_mmshf" "2_ialu,2_ilog,2_ishf,2_st,2_ld") +(define_bypass 2 "2_ld" "2_mmalua,2_mmmul,2_mmshf") +(define_bypass 3 "2_ilog" "2_mmalua,2_mmmul,2_mmshf") +(define_bypass 3 "2_ialu" "2_mmalua,2_mmmul,2_mmshf") +(define_bypass 3 "2_mmalua,2_mmmul,2_mmshf" "2_ialu,2_ilog,2_ishf,2_st,2_ld") (define_bypass 6 "2_tofr" "2_frfr,2_stf") (define_bypass 7 "2_fmac" "2_frfr,2_stf") ;; We don't use here fcmp because scall may be predicated. (define_bypass 0 "2_fcvtfx,2_fld,2_fmac,2_fmisc,2_frar_i,2_frar_m,\ 2_frbr,2_frfr,2_frpr,2_ialu,2_ilog,2_ishf,2_ld,2_long_i,\ - 2_mmmul,2_mmshf,2_mmshfi,2_toar_m,2_tofr,2_xmpy,2_xtd" + 2_mmalua,2_mmmul,2_mmshf,2_mmshfi,2_toar_m,2_tofr,\ + 2_xmpy,2_xtd" "2_scall") (define_bypass 0 "2_unknown,2_ignore,2_stop_bit,2_br,2_fcmp,2_fcvtfx,2_fld,\ 2_fmac,2_fmisc,2_frar_i,2_frar_m,2_frbr,2_frfr,2_frpr,\ - 2_ialu,2_icmp,2_ilog,2_ishf,2_ld,2_chk_s,\ - 2_long_i,2_mmmul,2_mmshf,2_mmshfi,2_nop,2_nop_b,2_nop_f,\ + 2_ialu,2_icmp,2_ilog,2_ishf,2_ld,2_chk_s,2_long_i,\ + 2_mmalua,2_mmmul,2_mmshf,2_mmshfi,2_nop,2_nop_b,2_nop_f,\ 2_nop_i,2_nop_m,2_nop_x,2_rse_m,2_scall,2_sem,2_stf,2_st,\ 2_syst_m0,2_syst_m,2_tbit,2_toar_i,2_toar_m,2_tobr,2_tofr,\ 2_topr,2_xmpy,2_xtd,2_lfetch" "2_ignore") @@ -1586,6 +1591,10 @@ (and (and (eq_attr "cpu" "itanium2") (eq_attr "itanium_class" "ilog")) (ne (symbol_ref "bundling_p") (const_int 0))) "2b_A") +(define_insn_reservation "2b_mmalua" 2 + (and (and (eq_attr "cpu" "itanium2") + (eq_attr "itanium_class" "mmalua")) + (ne (symbol_ref "bundling_p") (const_int 0))) "2b_A") ;; Latency time ??? (define_insn_reservation "2b_ishf" 1 (and (and (eq_attr "cpu" "itanium2") diff --git a/gcc/config/ia64/predicates.md b/gcc/config/ia64/predicates.md index 6166612..7c9a76f 100644 --- a/gcc/config/ia64/predicates.md +++ b/gcc/config/ia64/predicates.md @@ -236,8 +236,8 @@ ;; True if OP is a GR register operand, or zero. (define_predicate "gr_reg_or_0_operand" (ior (match_operand 0 "gr_register_operand") - (and (match_code "const_int") - (match_test "op == const0_rtx")))) + (and (match_code "const_int,const_double,const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))")))) ;; True if OP is a GR register operand, or a 5 bit immediate operand. (define_predicate "gr_reg_or_5bit_operand" @@ -320,6 +320,10 @@ INTVAL (op) == 1 || INTVAL (op) == 4 || INTVAL (op) == 8 || INTVAL (op) == 16"))) +;; True if OP is 0..3. +(define_predicate "const_int_2bit_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 3"))) ;; True if OP is a floating-point constant zero, one, or a register. (define_predicate "fr_reg_or_fp01_operand" @@ -332,6 +336,12 @@ (and (match_operand 0 "fr_reg_or_fp01_operand") (not (match_code "subreg")))) +;; True if OP is a constant zero, or a register. +(define_predicate "fr_reg_or_0_operand" + (ior (match_operand 0 "fr_register_operand") + (and (match_code "const_double,const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))")))) + ;; True if this is a comparison operator, which accepts a normal 8-bit ;; signed immediate operand. (define_predicate "normal_comparison_operator" diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md new file mode 100644 index 0000000..a42ad2a --- /dev/null +++ b/gcc/config/ia64/vect.md @@ -0,0 +1,996 @@ +;; IA-64 machine description for vector operations. +;; Copyright (C) 2004 +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING. If not, write to +;; the Free Software Foundation, 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + + +;; Integer vector operations + +(define_mode_macro VECINT [V8QI V4HI V2SI]) +(define_mode_macro VECINT12 [V8QI V4HI]) +(define_mode_macro VECINT24 [V4HI V2SI]) +(define_mode_attr vecsize [(V8QI "1") (V4HI "2") (V2SI "4")]) + +(define_expand "mov<mode>" + [(set (match_operand:VECINT 0 "general_operand" "") + (match_operand:VECINT 1 "general_operand" ""))] + "" +{ + rtx op1 = ia64_expand_move (operands[0], operands[1]); + if (!op1) + DONE; + operands[1] = op1; +}) + +(define_insn "*mov<mode>_internal" + [(set (match_operand:VECINT 0 "destination_operand" + "=r,r,r,r,m ,*f ,*f,Q ,r ,*f") + (match_operand:VECINT 1 "move_operand" + "rU,W,i,m,rU,U*f,Q ,*f,*f,r "))] + "ia64_move_ok (operands[0], operands[1])" + "@ + mov %0 = %r1 + addl %0 = %v1, r0 + movl %0 = %v1 + ld8%O1 %0 = %1%P1 + st8%Q0 %0 = %r1%P0 + mov %0 = %F1 + ldf8 %0 = %1%P1 + stf8 %0 = %1%P0 + getf.sig %0 = %1 + setf.sig %0 = %1" + [(set_attr "itanium_class" "ialu,ialu,long_i,ld,st,fmisc,fld,stf,frfr,tofr")]) + +(define_insn "one_cmpl<mode>2" + [(set (match_operand:VECINT 0 "gr_register_operand" "=r") + (not:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")))] + "" + "andcm %0 = -1, %1" + [(set_attr "itanium_class" "ilog")]) + +(define_insn "and<mode>3" + [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f") + (and:VECINT + (match_operand:VECINT 1 "grfr_register_operand" "r,*f") + (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))] + "" + "@ + and %0 = %2, %1 + fand %0 = %2, %1" + [(set_attr "itanium_class" "ilog,fmisc")]) + +(define_insn "*andnot<mode>" + [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f") + (and:VECINT + (not:VECINT (match_operand:VECINT 1 "grfr_register_operand" "r,*f")) + (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))] + "" + "@ + andcm %0 = %2, %1 + fandcm %0 = %2, %1" + [(set_attr "itanium_class" "ilog,fmisc")]) + +(define_insn "ior<mode>3" + [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f") + (ior:VECINT + (match_operand:VECINT 1 "grfr_register_operand" "r,*f") + (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))] + "" + "@ + or %0 = %2, %1 + for %0 = %2, %1" + [(set_attr "itanium_class" "ilog,fmisc")]) + +(define_insn "xor<mode>3" + [(set (match_operand:VECINT 0 "grfr_register_operand" "=r,*f") + (xor:VECINT + (match_operand:VECINT 1 "grfr_register_operand" "r,*f") + (match_operand:VECINT 2 "grfr_reg_or_8bit_operand" "r,*f")))] + "" + "@ + xor %0 = %2, %1 + fxor %0 = %2, %1" + [(set_attr "itanium_class" "ilog,fmisc")]) + +(define_insn "neg<mode>2" + [(set (match_operand:VECINT 0 "gr_register_operand" "=r") + (neg:VECINT (match_operand:VECINT 1 "gr_register_operand" "r")))] + "" + "psub<vecsize> %0 = r0, %1" + [(set_attr "itanium_class" "mmalua")]) + +(define_insn "add<mode>3" + [(set (match_operand:VECINT 0 "gr_register_operand" "=r") + (plus:VECINT (match_operand:VECINT 1 "gr_register_operand" "r") + (match_operand:VECINT 2 "gr_register_operand" "r")))] + "" + "padd<vecsize> %0 = %1, %2" + [(set_attr "itanium_class" "mmalua")]) + +(define_insn "*ssadd<mode>3" + [(set (match_operand:VECINT12 0 "gr_register_operand" "=r") + (ss_plus:VECINT12 + (match_operand:VECINT12 1 "gr_register_operand" "r") + (match_operand:VECINT12 2 "gr_register_operand" "r")))] + "" + "padd<vecsize>.sss %0 = %1, %2" + [(set_attr "itanium_class" "mmalua")]) + +(define_insn "*usadd<mode>3" + [(set (match_operand:VECINT12 0 "gr_register_operand" "=r") + (us_plus:VECINT12 + (match_operand:VECINT12 1 "gr_register_operand" "r") + (match_operand:VECINT12 2 "gr_register_operand" "r")))] + "" + "padd<vecsize>.uuu %0 = %1, %2" + [(set_attr "itanium_class" "mmalua")]) + +(define_insn "sub<mode>3" + [(set (match_operand:VECINT 0 "gr_register_operand" "=r") + (minus:VECINT (match_operand:VECINT 1 "gr_register_operand" "r") + (match_operand:VECINT 2 "gr_register_operand" "r")))] + "" + "psub<vecsize> %0 = %1, %2" + [(set_attr "itanium_class" "mmalua")]) + +(define_insn "*sssub<mode>3" + [(set (match_operand:VECINT12 0 "gr_register_operand" "=r") + (ss_minus:VECINT12 + (match_operand:VECINT12 1 "gr_register_operand" "r") + (match_operand:VECINT12 2 "gr_register_operand" "r")))] + "" + "psub<vecsize>.sss %0 = %1, %2" + [(set_attr "itanium_class" "mmalua")]) + +(define_insn "*ussub<mode>3" + [(set (match_operand:VECINT12 0 "gr_register_operand" "=r") + (us_minus:VECINT12 + (match_operand:VECINT12 1 "gr_register_operand" "r") + (match_operand:VECINT12 2 "gr_register_operand" "r")))] + "" + "psub<vecsize>.uuu %0 = %1, %2" + [(set_attr "itanium_class" "mmalua")]) + +(define_expand "mulv8qi3" + [(set (match_operand:V8QI 0 "gr_register_operand" "") + (mult:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") + (match_operand:V8QI 2 "gr_register_operand" "r")))] + "" +{ + rtx l1, h1, l2, h2, lm, hm, lz, hz; + + l1 = gen_reg_rtx (V4HImode); + h1 = gen_reg_rtx (V4HImode); + l2 = gen_reg_rtx (V4HImode); + h2 = gen_reg_rtx (V4HImode); + + /* Zero-extend the QImode elements into two words of HImode elements. */ + emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1), + operands[1], CONST0_RTX (V8QImode))); + emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2), + operands[2], CONST0_RTX (V8QImode))); + emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1), + operands[1], CONST0_RTX (V8QImode))); + emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2), + operands[2], CONST0_RTX (V8QImode))); + + /* Multiply. */ + lm = gen_reg_rtx (V4HImode); + hm = gen_reg_rtx (V4HImode); + emit_insn (gen_mulv4hi3 (lm, l1, l2)); + emit_insn (gen_mulv4hi3 (hm, h1, h2)); + + /* Zap the high order bytes of the HImode elements. There are several + ways that this could be done. On Itanium2, there's 1 cycle latency + moving between the ALU units and the PALU units, so using AND would + be 3 cycles latency into the eventual pack insn, whereas using MIX + is only 2 cycles. */ + lz = gen_reg_rtx (V4HImode); + hz = gen_reg_rtx (V4HImode); + emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz), + gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode))); + emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz), + gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode))); + + /* Repack the HImode elements as QImode elements. */ + emit_insn (gen_pack2_sss (operands[0], lz, hz)); + DONE; +}) + +(define_insn "mulv4hi3" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (mult:V4HI (match_operand:V4HI 1 "gr_register_operand" "r") + (match_operand:V4HI 2 "gr_register_operand" "r")))] + "" + "pmpyshr2 %0 = %1, %2, 0" + [(set_attr "itanium_class" "mmalua")]) + +(define_expand "umax<mode>3" + [(set (match_operand:VECINT 0 "gr_register_operand" "") + (smax:VECINT (match_operand:VECINT 1 "gr_register_operand" "") + (match_operand:VECINT 2 "gr_register_operand" "")))] + "" +{ + if (ia64_expand_vecint_minmax (UMAX, <MODE>mode, operands)) + DONE; +}) + +(define_expand "smax<mode>3" + [(set (match_operand:VECINT 0 "gr_register_operand" "") + (smax:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "") + (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))] + "" +{ + if (ia64_expand_vecint_minmax (SMAX, <MODE>mode, operands)) + DONE; +}) + +(define_expand "umin<mode>3" + [(set (match_operand:VECINT 0 "gr_register_operand" "") + (umin:VECINT (match_operand:VECINT 1 "gr_register_operand" "") + (match_operand:VECINT 2 "gr_register_operand" "")))] + "" +{ + if (ia64_expand_vecint_minmax (UMIN, <MODE>mode, operands)) + DONE; +}) + +(define_expand "smin<mode>3" + [(set (match_operand:VECINT 0 "gr_register_operand" "") + (smin:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "") + (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))] + "" +{ + if (ia64_expand_vecint_minmax (SMIN, <MODE>mode, operands)) + DONE; +}) + +(define_insn "*umaxv8qi3" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (umax:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") + (match_operand:V8QI 2 "gr_register_operand" "r")))] + "" + "pmax1.u %0 = %1, %2" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*smaxv4hi3" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (smax:V4HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")))] + "" + "pmax2 %0 = %r1, %r2" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*uminv8qi3" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (umin:V8QI (match_operand:V8QI 1 "gr_register_operand" "r") + (match_operand:V8QI 2 "gr_register_operand" "r")))] + "" + "pmin1.u %0 = %1, %2" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*sminv4hi3" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (smin:V4HI (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")))] + "" + "pmin2 %0 = %r1, %r2" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "ashl<mode>3" + [(set (match_operand:VECINT24 0 "gr_register_operand" "=r") + (ashift:VECINT24 + (match_operand:VECINT24 1 "gr_register_operand" "r") + (match_operand:VECINT24 2 "gr_reg_or_5bit_operand" "rn")))] + "" + "pshl<vecsize> %0 = %1, %2" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "ashr<mode>3" + [(set (match_operand:VECINT24 0 "gr_register_operand" "=r") + (ashiftrt:VECINT24 + (match_operand:VECINT24 1 "gr_register_operand" "r") + (match_operand:VECINT24 2 "gr_reg_or_5bit_operand" "rn")))] + "" + "pshr<vecsize> %0 = %1, %2" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "lshr<mode>3" + [(set (match_operand:VECINT24 0 "gr_register_operand" "=r") + (lshiftrt:VECINT24 + (match_operand:VECINT24 1 "gr_register_operand" "r") + (match_operand:VECINT24 2 "gr_reg_or_5bit_operand" "rn")))] + "" + "pshr<vecsize>.u %0 = %1, %2" + [(set_attr "itanium_class" "mmshf")]) + +(define_expand "vcond<mode>" + [(set (match_operand:VECINT 0 "gr_register_operand" "") + (if_then_else:VECINT + (match_operator 3 "" + [(match_operand:VECINT 4 "gr_reg_or_0_operand" "") + (match_operand:VECINT 5 "gr_reg_or_0_operand" "")]) + (match_operand:VECINT 1 "gr_reg_or_0_operand" "") + (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))] + "" +{ + ia64_expand_vecint_cmov (operands); + DONE; +}) + +(define_expand "vcondu<mode>" + [(set (match_operand:VECINT 0 "gr_register_operand" "") + (if_then_else:VECINT + (match_operator 3 "" + [(match_operand:VECINT 4 "gr_reg_or_0_operand" "") + (match_operand:VECINT 5 "gr_reg_or_0_operand" "")]) + (match_operand:VECINT 1 "gr_reg_or_0_operand" "") + (match_operand:VECINT 2 "gr_reg_or_0_operand" "")))] + "" +{ + ia64_expand_vecint_cmov (operands); + DONE; +}) + +(define_insn "*cmpeq_<mode>" + [(set (match_operand:VECINT 0 "gr_register_operand" "=r") + (eq:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "rU") + (match_operand:VECINT 2 "gr_reg_or_0_operand" "rU")))] + "" + "pcmp<vecsize>.eq %0 = %r1, %r2" + [(set_attr "itanium_class" "mmalua")]) + +(define_insn "*cmpgt_<mode>" + [(set (match_operand:VECINT 0 "gr_register_operand" "=r") + (gt:VECINT (match_operand:VECINT 1 "gr_reg_or_0_operand" "rU") + (match_operand:VECINT 2 "gr_reg_or_0_operand" "rU")))] + "" + "pcmp<vecsize>.gt %0 = %r1, %r2" + [(set_attr "itanium_class" "mmalua")]) + +(define_insn "pack2_sss" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_concat:V8QI + (ss_truncate:V4QI + (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")) + (ss_truncate:V4QI + (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))] + "" + "pack2.sss %0 = %r1, %r2" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*pack2_uss" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_concat:V8QI + (us_truncate:V4QI + (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")) + (us_truncate:V4QI + (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))] + "" + "pack2.uss %0 = %r1, %r2" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "pack4_sss" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (vec_concat:V4HI + (ss_truncate:V2HI + (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")) + (ss_truncate:V2HI + (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))))] + "" + "pack4.sss %0 = %r1, %r2" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "unpack1_l" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 0) + (const_int 1) + (const_int 2) + (const_int 3) + (const_int 8) + (const_int 9) + (const_int 10) + (const_int 11)])))] + "" + "unpack1.l %0 = %r2, %r1" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "unpack1_h" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 4) + (const_int 5) + (const_int 6) + (const_int 7) + (const_int 12) + (const_int 13) + (const_int 14) + (const_int 15)])))] + "" + "unpack1.h %0 = %r2, %r1" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "mix1_r" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 0) + (const_int 8) + (const_int 2) + (const_int 10) + (const_int 4) + (const_int 12) + (const_int 6) + (const_int 14)])))] + "" + "mix1.r %0 = %r2, %r1" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*mix1_l" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_select:V8QI + (vec_concat:V16QI + (match_operand:V8QI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V8QI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 1) + (const_int 9) + (const_int 3) + (const_int 11) + (const_int 5) + (const_int 13) + (const_int 7) + (const_int 15)])))] + "" + "mix1.l %0 = %r2, %r1" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*mux1_rev" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_select:V8QI + (match_operand:V8QI 1 "gr_register_operand" "r") + (parallel [(const_int 7) + (const_int 6) + (const_int 5) + (const_int 4) + (const_int 3) + (const_int 2) + (const_int 1) + (const_int 0)])))] + "" + "mux1 %0 = %1, @rev" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*mux1_mix" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_select:V8QI + (match_operand:V8QI 1 "gr_register_operand" "r") + (parallel [(const_int 0) + (const_int 4) + (const_int 2) + (const_int 6) + (const_int 1) + (const_int 5) + (const_int 3) + (const_int 7)])))] + "" + "mux1 %0 = %1, @mix" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*mux1_shuf" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_select:V8QI + (match_operand:V8QI 1 "gr_register_operand" "r") + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5) + (const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])))] + "" + "mux1 %0 = %1, @shuf" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*mux1_alt" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_select:V8QI + (match_operand:V8QI 1 "gr_register_operand" "r") + (parallel [(const_int 0) + (const_int 2) + (const_int 4) + (const_int 6) + (const_int 1) + (const_int 3) + (const_int 5) + (const_int 7)])))] + "" + "mux1 %0 = %1, @alt" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*mux1_brcst_v8qi" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_select:V8QI + (match_operand:V8QI 1 "gr_register_operand" "r") + (parallel [(const_int 0) + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0) + (const_int 0)])))] + "" + "mux1 %0 = %1, @brcst" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*mux1_brcst_qi" + [(set (match_operand:V8QI 0 "gr_register_operand" "=r") + (vec_duplicate:V8QI + (match_operand:QI 1 "gr_register_operand" "r")))] + "" + "mux1 %0 = %1, @brcst" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "unpack2_l" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 0) + (const_int 4) + (const_int 1) + (const_int 5)])))] + "" + "unpack2.l %0 = %r2, %r1" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "unpack2_h" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 2) + (const_int 6) + (const_int 3) + (const_int 7)])))] + "" + "unpack2.h %0 = %r2, %r1" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*mix2_r" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 0) + (const_int 4) + (const_int 2) + (const_int 6)])))] + "" + "mix2.r %0 = %r2, %r1" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*mix2_l" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (vec_select:V4HI + (vec_concat:V8HI + (match_operand:V4HI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V4HI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 1) + (const_int 5) + (const_int 3) + (const_int 7)])))] + "" + "mix2.l %0 = %r2, %r1" + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*mux2" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (vec_select:V4HI + (match_operand:V4HI 1 "gr_register_operand" "r") + (parallel [(match_operand 2 "const_int_2bit_operand" "") + (match_operand 3 "const_int_2bit_operand" "") + (match_operand 4 "const_int_2bit_operand" "") + (match_operand 5 "const_int_2bit_operand" "")])))] + "" +{ + int mask; + mask = INTVAL (operands[2]); + mask |= INTVAL (operands[3]) << 2; + mask |= INTVAL (operands[4]) << 4; + mask |= INTVAL (operands[5]) << 6; + operands[2] = GEN_INT (mask); + return "%,mux2 %0 = %1, %2"; +} + [(set_attr "itanium_class" "mmshf")]) + +(define_insn "*mux2_brcst_hi" + [(set (match_operand:V4HI 0 "gr_register_operand" "=r") + (vec_duplicate:V4HI + (match_operand:HI 1 "gr_register_operand" "r")))] + "" + "mux2 %0 = %1, 0" + [(set_attr "itanium_class" "mmshf")]) + +;; Note that mix4.r performs the exact same operation. +(define_insn "*unpack4_l" + [(set (match_operand:V2SI 0 "gr_register_operand" "=r") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 0) + (const_int 2)])))] + "" + "unpack4.l %0 = %r2, %r1" + [(set_attr "itanium_class" "mmshf")]) + +;; Note that mix4.l performs the exact same operation. +(define_insn "*unpack4_h" + [(set (match_operand:V2SI 0 "gr_register_operand" "=r") + (vec_select:V2SI + (vec_concat:V4SI + (match_operand:V2SI 1 "gr_reg_or_0_operand" "rU") + (match_operand:V2SI 2 "gr_reg_or_0_operand" "rU")) + (parallel [(const_int 1) + (const_int 3)])))] + "" + "unpack4.h %0 = %r2, %r1" + [(set_attr "itanium_class" "mmshf")]) + +(define_expand "vec_initv2si" + [(match_operand:V2SF 0 "gr_register_operand" "") + (match_operand 1 "" "")] + "" +{ + rtx op1 = XVECEXP (operands[1], 0, 0); + rtx op2 = XVECEXP (operands[1], 0, 1); + rtx x; + + if (GET_CODE (op1) == CONST_INT && GET_CODE (op2) == CONST_INT) + { + x = gen_rtx_CONST_VECTOR (V2SImode, XVEC (operands[1], 0)); + emit_move_insn (operands[0], x); + DONE; + } + + if (!gr_reg_or_0_operand (op1, SImode)) + op1 = force_reg (SImode, op1); + if (!gr_reg_or_0_operand (op2, SImode)) + op2 = force_reg (SImode, op2); + + x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*vecinit_v2si" + [(set (match_operand:V2SI 0 "gr_register_operand" "=r") + (vec_concat:V2SI + (match_operand:SI 1 "gr_reg_or_0_operand" "rO") + (match_operand:SI 2 "gr_reg_or_0_operand" "rO")))] + "" + "unpack4.l %0 = %r2, %r1" + [(set_attr "itanium_class" "mmshf")]) + +;; Missing operations +;; padd.uus +;; pavg +;; pavgsub +;; pmpy +;; pmpyshr, general form +;; psad +;; pshladd +;; pshradd +;; psub.uus +;; vec_set<mode> +;; vec_extract<mode> +;; vec_init<mode> + +;; Floating point vector operations + +(define_expand "movv2sf" + [(set (match_operand:V2SF 0 "general_operand" "") + (match_operand:V2SF 1 "general_operand" ""))] + "" +{ + rtx op1 = ia64_expand_move (operands[0], operands[1]); + if (!op1) + DONE; + operands[1] = op1; +}) + +(define_insn "*movv2sf_internal" + [(set (match_operand:V2SF 0 "destination_operand" + "=f,f,f,Q,*r ,*r,*r,*r,m ,f ,*r") + (match_operand:V2SF 1 "move_operand" + "fU,Y,Q,f,U*r,W ,i ,m ,*r,*r,f "))] + "ia64_move_ok (operands[0], operands[1])" +{ + static const char * const alt[] = { + "%,mov %0 = %F1", + "%,fpack %0 = %F2, %F1", + "%,ldf8 %0 = %1%P1", + "%,stf8 %0 = %1%P0", + "%,mov %0 = %r1", + "%,addl %0 = %v1, r0", + "%,movl %0 = %v1", + "%,ld8%O1 %0 = %1%P1", + "%,st8%Q0 %0 = %r1%P0", + "%,setf.sig %0 = %1", + "%,getf.sig %0 = %1" + }; + + if (which_alternative == 1) + { + operands[2] = XVECEXP (operands[1], 0, 1); + operands[1] = XVECEXP (operands[1], 0, 0); + } + + return alt[which_alternative]; +} + [(set_attr "itanium_class" "fmisc,fmisc,fld,stf,ialu,ialu,long_i,ld,st,tofr,frfr")]) + +(define_insn "absv2sf2" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (abs:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")))] + "" + "fpabs %0 = %1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "negv2sf2" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (neg:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")))] + "" + "fpneg %0 = %1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "*negabsv2sf2" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (neg:V2SF + (abs:V2SF (match_operand:V2SF 1 "fr_register_operand" "f"))))] + "" + "fpnegabs %0 = %1" + [(set_attr "itanium_class" "fmisc")]) + +(define_expand "addv2sf3" + [(set (match_operand:V2SF 0 "fr_register_operand" "") + (plus:V2SF + (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "") + (match_dup 3)) + (match_operand:V2SF 2 "fr_register_operand" "")))] + "" +{ + rtvec v = gen_rtvec (2, CONST1_RTX (SFmode), CONST1_RTX (SFmode)); + operands[3] = force_reg (V2SFmode, gen_rtx_CONST_VECTOR (V2SFmode, v)); +}) + +(define_expand "subv2sf3" + [(set (match_operand:V2SF 0 "fr_register_operand" "") + (minus:V2SF + (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "") + (match_dup 3)) + (match_operand:V2SF 2 "fr_register_operand" "")))] + "" +{ + rtvec v = gen_rtvec (2, CONST1_RTX (SFmode), CONST1_RTX (SFmode)); + operands[3] = force_reg (V2SFmode, gen_rtx_CONST_VECTOR (V2SFmode, v)); +}) + +(define_insn "mulv2sf3" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") + (match_operand:V2SF 2 "fr_register_operand" "f")))] + "" + "fpmpy %0 = %1, %2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*fpma" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (plus:V2SF + (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") + (match_operand:V2SF 2 "fr_register_operand" "f")) + (match_operand:V2SF 3 "fr_register_operand" "f")))] + "" + "fpma %0 = %1, %2, %3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*fpms" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (minus:V2SF + (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") + (match_operand:V2SF 2 "fr_register_operand" "f")) + (match_operand:V2SF 3 "fr_register_operand" "f")))] + "" + "fpms %0 = %1, %2, %3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*fpnmpy" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (neg:V2SF + (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") + (match_operand:V2SF 2 "fr_register_operand" "f"))))] + "" + "fpnmpy %0 = %1, %2" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "*fpnma" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (plus:V2SF + (neg:V2SF + (mult:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") + (match_operand:V2SF 2 "fr_register_operand" "f"))) + (match_operand:V2SF 3 "fr_register_operand" "f")))] + "" + "fpnma %0 = %1, %2, %3" + [(set_attr "itanium_class" "fmac")]) + +(define_insn "smaxv2sf2" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (smax:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") + (match_operand:V2SF 2 "fr_register_operand" "f")))] + "" + "fpmax %0 = %1, %2" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "sminv2sf2" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (smin:V2SF (match_operand:V2SF 1 "fr_register_operand" "f") + (match_operand:V2SF 2 "fr_register_operand" "f")))] + "" + "fpmin %0 = %1, %2" + [(set_attr "itanium_class" "fmisc")]) + +(define_expand "vcondv2sf" + [(set (match_operand:V2SF 0 "fr_register_operand" "") + (if_then_else:V2SF + (match_operator 3 "" + [(match_operand:V2SF 4 "fr_reg_or_0_operand" "") + (match_operand:V2SF 5 "fr_reg_or_0_operand" "")]) + (match_operand:V2SF 1 "fr_reg_or_0_operand" "") + (match_operand:V2SF 2 "fr_reg_or_0_operand" "")))] + "" +{ + rtx x, cmp; + + PUT_MODE (operands[3], V2SFmode); + switch (GET_CODE (operands[3])) + { + case EQ: + case NE: + case LT: + case LE: + case UNORDERED: + case ORDERED: + break; + + case GT: + case GE: + x = XEXP (operands[3], 0); + XEXP (operands[3], 0) = XEXP (operands[3], 1); + XEXP (operands[3], 1) = x; + PUT_CODE (operands[3], swap_condition (GET_CODE (operands[3]))); + break; + + default: + abort (); + } + + cmp = gen_reg_rtx (V2SFmode); + emit_insn (gen_rtx_SET (VOIDmode, cmp, operands[3])); + + x = gen_rtx_IF_THEN_ELSE (V2SFmode, cmp, operands[1], operands[2]); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*fpcmp" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (if_then_else:V2SF + (match_operand:V2SF 1 "fr_register_operand" "f") + (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU") + (match_operand:V2SF 3 "fr_reg_or_0_operand" "fU")))] + "" + "fselect %0 = %2, %3, %1" + [(set_attr "itanium_class" "fmisc")]) + +(define_expand "vec_initv2sf" + [(match_operand:V2SF 0 "fr_register_operand" "") + (match_operand 1 "" "")] + "" +{ + rtx op1 = XVECEXP (operands[1], 0, 0); + rtx op2 = XVECEXP (operands[1], 0, 1); + rtx x; + + if (GET_CODE (op1) == CONST_DOUBLE && GET_CODE (op2) == CONST_DOUBLE) + { + x = gen_rtx_CONST_VECTOR (V2SFmode, XVEC (operands[1], 0)); + emit_move_insn (operands[0], x); + DONE; + } + + if (!fr_reg_or_fp01_operand (op1, SFmode)) + op1 = force_reg (SFmode, op1); + if (!fr_reg_or_fp01_operand (op2, SFmode)) + op2 = force_reg (SFmode, op2); + + x = gen_rtx_VEC_CONCAT (V2SFmode, op1, op2); + emit_insn (gen_rtx_SET (VOIDmode, operands[0], x)); + DONE; +}) + +(define_insn "*fpack_sfsf" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (vec_concat:V2SF + (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") + (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))] + "" + "fpack %0 = %F2, %F1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "*fpack_sfxf" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (vec_concat:V2SF + (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG") + (float_truncate:SF + (match_operand 2 "fr_register_operand" "f"))))] + "GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode" + "fpack %0 = %2, %F1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "*fpack_xfsf" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (vec_concat:V2SF + (float_truncate:SF + (match_operand 1 "fr_register_operand" "f")) + (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))] + "GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode" + "fpack %0 = %F2, %1" + [(set_attr "itanium_class" "fmisc")]) + +(define_insn "*fpack_xfxf" + [(set (match_operand:V2SF 0 "fr_register_operand" "=f") + (vec_concat:V2SF + (float_truncate:SF + (match_operand 1 "fr_register_operand" "f")) + (float_truncate:SF + (match_operand 2 "fr_register_operand" "f"))))] + "(GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode) + && (GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode)" + "fpack %0 = %2, %1" + [(set_attr "itanium_class" "fmisc")]) + +;; Missing operations +;; fprcpa +;; fpsqrta +;; vec_setv2sf +;; vec_extractv2sf diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8c4ea6a..56e4aed 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,4 +1,14 @@ +2005-01-03 Richard Henderson <rth@redhat.com> + + * gcc.dg/vect/vect.exp: Enable for ia64. + * lib/target-supports.exp (check_effective_target_vect_int): Likewise. + (check_effective_target_vect_float): Likewise. + (check_effective_target_vect_no_align): Likewise. + * gcc.dg/vect/vect-30.c: XFAIL for vect_no_align. + * gcc.dg/vect/vect-8.c: Likewise. + 2005-01-03 Uros Bizjak <uros@kss-loka.si> + PR target/19235 * gcc.dg/pr19236-1.c: New test case. diff --git a/gcc/testsuite/gcc.dg/vect/vect-30.c b/gcc/testsuite/gcc.dg/vect/vect-30.c index 056d689..c6f03ed 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-30.c +++ b/gcc/testsuite/gcc.dg/vect/vect-30.c @@ -59,4 +59,6 @@ int main (void) return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* Need misalignment support, or cgraph to delay emitting the arrays until + after vectorization can force-align them. */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-8.c b/gcc/testsuite/gcc.dg/vect/vect-8.c index 960eb34..7712a02 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-8.c +++ b/gcc/testsuite/gcc.dg/vect/vect-8.c @@ -34,4 +34,6 @@ int main (void) return main1 (N); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* Need misalignment support, or cgraph to delay emitting the arrays until + after vectorization can force-align them. */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp b/gcc/testsuite/gcc.dg/vect/vect.exp index 14e4597..94fd56c 100644 --- a/gcc/testsuite/gcc.dg/vect/vect.exp +++ b/gcc/testsuite/gcc.dg/vect/vect.exp @@ -65,6 +65,8 @@ if [istarget "powerpc*-*-*"] { } else { set dg-do-what-default compile } +} elseif [istarget "ia64-*-*"] { + set dg-do-what-default run } else { return } diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 19d95af..defb4d4 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -455,7 +455,8 @@ proc check_effective_target_vect_int { } { || [istarget powerpc*-*-*] || [istarget x86_64-*-*] || [istarget sparc*-*-*] - || [istarget alpha*-*-*] } { + || [istarget alpha*-*-*] + || [istarget ia64-*-*] } { set et_vect_int_saved 1 } } @@ -496,7 +497,8 @@ proc check_effective_target_vect_float { } { if { [istarget i?86-*-*] || [istarget powerpc*-*-*] || [istarget mipsisa64*-*-*] - || [istarget x86_64-*-*] } { + || [istarget x86_64-*-*] + || [istarget ia64-*-*] } { set et_vect_float_saved 1 } } @@ -583,7 +585,8 @@ proc check_effective_target_vect_no_align { } { } else { set et_vect_no_align_saved 0 if { [istarget mipsisa64*-*-*] - || [istarget sparc*-*-*] } { + || [istarget sparc*-*-*] + || [istarget ia64-*-*] } { set et_vect_no_align_saved 1 } } |