diff options
48 files changed, 4215 insertions, 164 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 9d1d026..6b3e220 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -702,6 +702,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { VREINTERPRETQ_BUILTINS #define AARCH64_SIMD_VGET_LOW_BUILTINS \ + VGET_LOW_BUILTIN(mf8) \ VGET_LOW_BUILTIN(f16) \ VGET_LOW_BUILTIN(f32) \ VGET_LOW_BUILTIN(f64) \ @@ -719,6 +720,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { VGET_LOW_BUILTIN(bf16) #define AARCH64_SIMD_VGET_HIGH_BUILTINS \ + VGET_HIGH_BUILTIN(mf8) \ VGET_HIGH_BUILTIN(f16) \ VGET_HIGH_BUILTIN(f32) \ VGET_HIGH_BUILTIN(f64) \ @@ -1096,6 +1098,8 @@ aarch64_int_or_fp_type (machine_mode mode, switch (mode) { case E_QImode: + if (qualifiers & qualifier_modal_float) + return aarch64_mfp8_type_node; return QUAL_TYPE (QI); case E_HImode: return QUAL_TYPE (HI); @@ -1333,6 +1337,16 @@ aarch64_init_simd_builtin_scalar_types (void) "__builtin_aarch64_simd_udi"); } +/* If MODE is a single Advanced SIMD vector, return the number of lanes in the + vector. If MODE is an Advanced SIMD structure/tuple mode, return the number + of lanes in a single vector. */ +static unsigned int +aarch64_num_lanes (machine_mode mode) +{ + unsigned int nregs = targetm.hard_regno_nregs (V0_REGNUM, mode); + return exact_div (GET_MODE_NUNITS (mode), nregs).to_constant (); +} + /* Return a set of FLAG_* flags derived from FLAGS that describe what a function with result MODE could do, taking the command-line flags into account. */ @@ -1620,9 +1634,15 @@ enum class aarch64_builtin_signatures { binary, binary_lane, + binary_two_lanes, + load, + load_lane, + store, + store_lane, ternary, ternary_lane, unary, + unary_lane, }; namespace { @@ -1631,22 +1651,27 @@ namespace { function argument type or return type. */ struct simd_type { tree type () const { return aarch64_simd_builtin_type (mode, qualifiers); } + unsigned nunits () const { return GET_MODE_NUNITS (mode).to_constant (); } machine_mode mode; aarch64_type_qualifiers qualifiers; }; namespace simd_types { -#define VARIANTS(BASE, D, Q, MODE, QUALIFIERS) \ - constexpr simd_type BASE { V##D##MODE, QUALIFIERS }; \ - constexpr simd_type BASE##x2 { V2x##D##MODE, QUALIFIERS }; \ - constexpr simd_type BASE##x3 { V3x##D##MODE, QUALIFIERS }; \ - constexpr simd_type BASE##x4 { V4x##D##MODE, QUALIFIERS }; \ - constexpr simd_type BASE##q { V##Q##MODE, QUALIFIERS }; \ - constexpr simd_type BASE##qx2 { V2x##Q##MODE, QUALIFIERS }; \ - constexpr simd_type BASE##qx3 { V3x##Q##MODE, QUALIFIERS }; \ - constexpr simd_type BASE##qx4 { V4x##Q##MODE, QUALIFIERS }; \ - constexpr simd_type BASE##_scalar { MODE, QUALIFIERS }; +#define VARIANTS(BASE, D, Q, MODE, QUALIFIERS) \ + constexpr simd_type BASE { V##D##MODE, QUALIFIERS }; \ + constexpr simd_type BASE##x2 { V2x##D##MODE, QUALIFIERS }; \ + constexpr simd_type BASE##x3 { V3x##D##MODE, QUALIFIERS }; \ + constexpr simd_type BASE##x4 { V4x##D##MODE, QUALIFIERS }; \ + constexpr simd_type BASE##q { V##Q##MODE, QUALIFIERS }; \ + constexpr simd_type BASE##qx2 { V2x##Q##MODE, QUALIFIERS }; \ + constexpr simd_type BASE##qx3 { V3x##Q##MODE, QUALIFIERS }; \ + constexpr simd_type BASE##qx4 { V4x##Q##MODE, QUALIFIERS }; \ + constexpr simd_type BASE##_scalar { MODE, QUALIFIERS }; \ + constexpr simd_type BASE##_scalar_ptr \ + { MODE, aarch64_type_qualifiers (QUALIFIERS | qualifier_pointer) }; \ + constexpr simd_type BASE##_scalar_const_ptr \ + { MODE, aarch64_type_qualifiers (QUALIFIERS | qualifier_const_pointer) }; VARIANTS (mf8, 8, 16, QImode, qualifier_modal_float) VARIANTS (p8, 8, 16, QImode, qualifier_poly) @@ -1707,27 +1732,50 @@ aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data) { case aarch64_builtin_signatures::binary: case aarch64_builtin_signatures::binary_lane: + case aarch64_builtin_signatures::load_lane: return_type = builtin_data.types[0].type (); for (int i = 1; i <= 2; ++i) arg_types.quick_push (builtin_data.types[i].type ()); break; - case aarch64_builtin_signatures::ternary: - case aarch64_builtin_signatures::ternary_lane: + case aarch64_builtin_signatures::binary_two_lanes: + /* binary_two_lanes has to be handled as a special case because indices + interleave vectors. */ return_type = builtin_data.types[0].type (); - for (int i = 1; i <= 3; ++i) - arg_types.quick_push (builtin_data.types[i].type ()); + arg_types.quick_push (builtin_data.types[1].type ()); + arg_types.quick_push (integer_type_node); + arg_types.quick_push (builtin_data.types[2].type ()); + arg_types.quick_push (integer_type_node); break; + case aarch64_builtin_signatures::load: case aarch64_builtin_signatures::unary: + case aarch64_builtin_signatures::unary_lane: return_type = builtin_data.types[0].type (); arg_types.quick_push (builtin_data.types[1].type ()); break; + + case aarch64_builtin_signatures::store: + case aarch64_builtin_signatures::store_lane: + return_type = void_type_node; + for (int i = 0; i <= 1; ++i) + arg_types.quick_push (builtin_data.types[i].type ()); + break; + + case aarch64_builtin_signatures::ternary: + case aarch64_builtin_signatures::ternary_lane: + return_type = builtin_data.types[0].type (); + for (int i = 1; i <= 3; ++i) + arg_types.quick_push (builtin_data.types[i].type ()); + break; } switch (builtin_data.signature) { case aarch64_builtin_signatures::binary_lane: + case aarch64_builtin_signatures::load_lane: + case aarch64_builtin_signatures::store_lane: case aarch64_builtin_signatures::ternary_lane: + case aarch64_builtin_signatures::unary_lane: arg_types.quick_push (integer_type_node); break; @@ -2654,8 +2702,9 @@ require_immediate_lane_index (unsigned int lane_argno, unsigned vec_argno, { auto vec_mode = TYPE_MODE (TREE_TYPE (args[vec_argno])); auto elt_mode = TYPE_MODE (TREE_TYPE (args[elt_argno])); - auto nunits = exact_div (GET_MODE_SIZE (vec_mode), - GET_MODE_UNIT_SIZE (elt_mode)).to_constant (); + auto nunits = (aarch64_num_lanes (vec_mode) + * GET_MODE_UNIT_SIZE (vec_mode) + / GET_MODE_UNIT_SIZE (elt_mode)); return require_immediate_range (lane_argno, 0, nunits - 1); } @@ -2674,8 +2723,25 @@ require_immediate_lane_index (unsigned int lane_argno, unsigned int vec_argno) bool aarch64_pragma_builtins_checker::check () { + auto &types = builtin_data.types; + switch (builtin_data.unspec) { + case UNSPEC_DUP_LANE: + case UNSPEC_GET_LANE: + case UNSPEC_LD2_LANE: + case UNSPEC_LD3_LANE: + case UNSPEC_LD4_LANE: + case UNSPEC_SET_LANE: + case UNSPEC_ST1_LANE: + case UNSPEC_ST2_LANE: + case UNSPEC_ST3_LANE: + case UNSPEC_ST4_LANE: + return require_immediate_lane_index (nargs - 1, nargs - 2); + + case UNSPEC_EXT: + return require_immediate_range (2, 0, types[2].nunits () - 1); + case UNSPEC_FDOT_LANE_FP8: return require_immediate_lane_index (nargs - 2, nargs - 3, 0); @@ -2695,11 +2761,8 @@ aarch64_pragma_builtins_checker::check () case UNSPEC_LUTI2: case UNSPEC_LUTI4: { - auto vector_to_index_mode = builtin_data.types[nargs - 1].mode; - int vector_to_index_nunits - = GET_MODE_NUNITS (vector_to_index_mode).to_constant (); - int output_mode_nunits - = GET_MODE_NUNITS (builtin_data.types[0].mode).to_constant (); + auto vector_to_index_nunits = types[nargs - 1].nunits (); + int output_mode_nunits = types[0].nunits (); int high; if (builtin_data.unspec == UNSPEC_LUTI2) @@ -2710,6 +2773,11 @@ aarch64_pragma_builtins_checker::check () return require_immediate_range (nargs - 1, 0, high); } + case UNSPEC_VEC_COPY: + /* & rather than && so that we report errors against both indices. */ + return (require_immediate_lane_index (1, 0) + & require_immediate_lane_index (3, 2)); + default: return true; } @@ -3622,6 +3690,52 @@ aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target) return ops[0].value; } +/* Convert ptr_mode value OP to a Pmode value (for ILP32). */ +static void +aarch64_convert_address (expand_operand *op) +{ + op->value = convert_memory_address (Pmode, op->value); + op->mode = Pmode; +} + +/* Dereference the pointer in OP, turning it into a memory reference to + NELTS instances of MEM_MODE. */ +static void +aarch64_dereference_pointer (expand_operand *op, machine_mode mem_mode, + unsigned int nelts = 1) +{ + if (nelts == 1) + { + op->value = gen_rtx_MEM (mem_mode, op->value); + op->mode = mem_mode; + } + else + { + op->value = gen_rtx_MEM (BLKmode, op->value); + op->mode = BLKmode; + set_mem_size (op->value, GET_MODE_SIZE (mem_mode) * nelts); + } +} + +/* OP contains an integer index into a vector or tuple of mode VEC_MODE. + Convert OP from an architectural lane number to a GCC lane number. */ +static void +aarch64_canonicalize_lane (expand_operand *op, machine_mode vec_mode) +{ + auto nunits = aarch64_num_lanes (vec_mode); + op->value = gen_int_mode (ENDIAN_LANE_N (nunits, UINTVAL (op->value)), + SImode); +} + +/* OP contains an integer index into a vector or tuple of mode VEC_MODE. + Convert OP from an architectural lane number to a vec_merge mask. */ +static void +aarch64_convert_to_lane_mask (expand_operand *op, machine_mode vec_mode) +{ + auto nunits = aarch64_num_lanes (vec_mode); + create_integer_operand (op, 1 << ENDIAN_LANE_N (nunits, INTVAL (op->value))); +} + /* If OP is a 128-bit vector, convert it to the equivalent 64-bit vector. Do nothing otherwise. */ static void @@ -3634,6 +3748,56 @@ aarch64_convert_to_v64 (expand_operand *op) } } +/* If OP is a 64-bit (half-register) vector or a structure of 64-bit vectors, + pack its contents into the smallest associated full-register mode, + padding with zeros if necessary. Return true if padding was used. */ +static bool +aarch64_pack_into_v128s (expand_operand *op) +{ + bool padded = false; + unsigned int nregs = targetm.hard_regno_nregs (V0_REGNUM, op->mode); + + /* Do nothing if the operand is already a full-register mode. */ + if (known_eq (nregs * UNITS_PER_VREG, GET_MODE_SIZE (op->mode))) + return padded; + + auto elt_mode = GET_MODE_INNER (op->mode); + auto v64_mode = aarch64_v64_mode (elt_mode).require (); + auto v128_mode = aarch64_v128_mode (elt_mode).require (); + + auto new_mode = v128_mode; + if (nregs > 2) + new_mode = aarch64_advsimd_vector_array_mode (v128_mode, CEIL (nregs, 2)) + .require (); + + /* Get enough V64_MODE inputs to fill NEW_MDOE, which is made up of a + whole number of V128_MODEs. */ + auto_vec<rtx, 4> inputs; + for (unsigned int i = 0; i < nregs; ++i) + { + rtx input = simplify_gen_subreg (v64_mode, op->value, op->mode, + i * GET_MODE_SIZE (v64_mode)); + inputs.quick_push (input); + } + if (nregs & 1) + { + inputs.quick_push (CONST0_RTX (v64_mode)); + padded = true; + } + + /* Create a NEW_MODE register and build it up from individual V128_MODEs. */ + op->mode = new_mode; + op->value = gen_reg_rtx (new_mode); + for (unsigned int i = 0; i < inputs.length (); i += 2) + { + rtx result = gen_rtx_SUBREG (v128_mode, op->value, + i * GET_MODE_SIZE (v64_mode)); + emit_insn (gen_aarch64_combine (v64_mode, result, + inputs[i], inputs[i + 1])); + } + return padded; +} + /* UNSPEC is a high unspec, indicated by "2" in mnemonics and "_high" in intrinsic names. Return the equivalent low unspec. */ static int @@ -3652,6 +3816,88 @@ aarch64_get_low_unspec (int unspec) } } +/* OPS contains the operands for one of the permute pair functions vtrn, + vuzp or vzip. Expand the call, given that PERMUTE1 is the unspec for + the first permute and PERMUTE2 is the unspec for the second permute. */ +static rtx +aarch64_expand_permute_pair (vec<expand_operand> &ops, int permute1, + int permute2) +{ + rtx op0 = force_reg (ops[1].mode, ops[1].value); + rtx op1 = force_reg (ops[2].mode, ops[2].value); + rtx target = gen_reg_rtx (ops[0].mode); + rtx target0 = gen_rtx_SUBREG (ops[1].mode, target, 0); + rtx target1 = gen_rtx_SUBREG (ops[1].mode, target, + GET_MODE_SIZE (ops[1].mode)); + emit_insn (gen_aarch64 (permute1, ops[1].mode, target0, op0, op1)); + emit_insn (gen_aarch64 (permute2, ops[1].mode, target1, op0, op1)); + return target; +} + +/* Emit a TBL or TBX instruction with inputs INPUTS and a result of mode + MODE. Return the result of the instruction. + + UNSPEC is either UNSPEC_TBL or UNSPEC_TBX. The inputs must already be in + registers. */ +static rtx +aarch64_expand_tbl_tbx (vec<rtx> &inputs, int unspec, machine_mode mode) +{ + rtx result = gen_reg_rtx (mode); + rtvec vec = gen_rtvec_v (inputs.length (), inputs.address ()); + emit_insn (gen_rtx_SET (result, gen_rtx_UNSPEC (mode, vec, unspec))); + return result; +} + +/* Emit a TBL or TBX intrinsic with the operands given by OPS. Return the + result of the intrinsic. + + UNSPEC is either UNSPEC_TBL or UNSPEC_TBX. */ +static rtx +aarch64_expand_tbl_tbx (vec<expand_operand> &ops, int unspec) +{ + for (unsigned int i = 1; i < ops.length (); ++i) + ops[i].value = force_reg (ops[i].mode, ops[i].value); + + /* Handle the legacy forms for which the table is composed of 64-bit + rather than 128-bit vectors. */ + auto &table = ops[ops.length () - 2]; + auto table_nelts = GET_MODE_NUNITS (table.mode); + bool padded = aarch64_pack_into_v128s (&table); + + /* Packing to 128-bit vectors is enough for everything except the 64-bit + forms of vtbx1 and vtbx3, where we need to handle the zero padding. */ + if (unspec == UNSPEC_TBL || !padded) + { + auto_vec<rtx, 3> inputs; + for (unsigned int i = 1; i < ops.length (); ++i) + inputs.quick_push (ops[i].value); + return aarch64_expand_tbl_tbx (inputs, unspec, ops[0].mode); + } + + /* Generate a TBL, which will give the right results for indices that + are less than TABLE_NELTS. */ + auto_vec<rtx, 2> inputs; + for (unsigned int i = 2; i < ops.length (); ++i) + inputs.quick_push (ops[i].value); + rtx tbl_result = aarch64_expand_tbl_tbx (inputs, UNSPEC_TBL, ops[0].mode); + + /* Get a mask of the indices that are less than TABLE_NELTS. */ + auto &indices = ops.last (); + rtx cmp_result = gen_reg_rtx (indices.mode); + rtx bound_rtx = gen_int_mode (table_nelts, GET_MODE_INNER (indices.mode)); + rtx bound_vec_rtx = gen_const_vec_duplicate (indices.mode, bound_rtx); + emit_insn (gen_aarch64_cm (GTU, indices.mode, cmp_result, + force_reg (indices.mode, bound_vec_rtx), + indices.value)); + + /* Select from the TBL result if the index is less than TABLE_NELTS + and from OPS[1] otherwise. */ + rtx result = gen_reg_rtx (ops[0].mode); + auto icode = get_vcond_mask_icode (ops[0].mode, indices.mode); + emit_insn (GEN_FCN (icode) (result, tbl_result, ops[1].value, cmp_result)); + return result; +} + /* Expand CALL_EXPR EXP, given that it is a call to the function described by BUILTIN_DATA, and return the function's return value. Put the result in TARGET if convenient. */ @@ -3660,15 +3906,19 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, const aarch64_pragma_builtins_data &builtin_data) { unsigned int nargs = call_expr_nargs (exp); + bool returns_void = VOID_TYPE_P (TREE_TYPE (exp)); auto_vec<expand_operand, 8> ops; - ops.safe_grow (nargs + 1); - create_output_operand (&ops[0], target, TYPE_MODE (TREE_TYPE (exp))); - for (unsigned int i = 1; i <= nargs; ++i) + if (!returns_void) + create_output_operand (ops.safe_push ({}), target, + TYPE_MODE (TREE_TYPE (exp))); + for (unsigned int i = 0; i < nargs; ++i) { - tree arg = CALL_EXPR_ARG (exp, i - 1); - create_input_operand (&ops[i], expand_normal (arg), + tree arg = CALL_EXPR_ARG (exp, i); + create_input_operand (ops.safe_push ({}), expand_normal (arg), TYPE_MODE (TREE_TYPE (arg))); + if (POINTER_TYPE_P (TREE_TYPE (arg))) + aarch64_convert_address (&ops.last ()); } if (builtin_data.flags & FLAG_USES_FPMR) @@ -3698,12 +3948,43 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, insn_code icode; switch (builtin_data.unspec) { + case UNSPEC_BSL: + icode = code_for_aarch64_simd_bsl (ops[0].mode); + break; + + case UNSPEC_COMBINE: + icode = code_for_aarch64_combine (ops[1].mode); + break; + + case UNSPEC_DUP: + if (builtin_data.signature == aarch64_builtin_signatures::load) + aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode)); + return expand_vector_broadcast (ops[0].mode, ops[1].value); + + case UNSPEC_DUP_LANE: + aarch64_canonicalize_lane (&ops[2], ops[1].mode); + if (ops[0].mode == ops[1].mode) + icode = code_for_aarch64_dup_lane (ops[0].mode); + else + icode = code_for_aarch64_dup_lane (ops[0].mode, ops[0].mode); + break; + + case UNSPEC_EXT: + icode = code_for_aarch64_ext (ops[0].mode); + break; + case UNSPEC_FAMAX: case UNSPEC_FAMIN: case UNSPEC_F1CVTL_FP8: case UNSPEC_F2CVTL_FP8: case UNSPEC_FDOT_FP8: case UNSPEC_FSCALE: + case UNSPEC_TRN1: + case UNSPEC_TRN2: + case UNSPEC_UZP1: + case UNSPEC_UZP2: + case UNSPEC_ZIP1: + case UNSPEC_ZIP2: icode = code_for_aarch64 (builtin_data.unspec, ops[0].mode); break; @@ -3737,6 +4018,7 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, } case UNSPEC_FDOT_LANE_FP8: + /* This pattern does not canonicalize the lane number. */ icode = code_for_aarch64_lane (builtin_data.unspec, ops[0].mode, ops[3].mode); break; @@ -3749,8 +4031,7 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, case UNSPEC_FMLALLTT_FP8: if (builtin_data.signature == aarch64_builtin_signatures::ternary_lane) { - ops[4].value = aarch64_endian_lane_rtx (ops[3].mode, - INTVAL (ops[4].value)); + aarch64_canonicalize_lane (&ops[4], ops[3].mode); icode = code_for_aarch64_lane (builtin_data.unspec, ops[0].mode, ops[3].mode); } @@ -3760,6 +4041,55 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, gcc_unreachable (); break; + case UNSPEC_GET_LANE: + aarch64_canonicalize_lane (&ops[2], ops[1].mode); + icode = code_for_aarch64_get_lane (ops[1].mode); + break; + + case UNSPEC_LD1: + icode = code_for_aarch64_ld1 (ops[0].mode); + break; + + case UNSPEC_LD1x2: + icode = code_for_aarch64_ld1x2 (ops[0].mode); + break; + + case UNSPEC_LD1x3: + icode = code_for_aarch64_ld1x3 (ops[0].mode); + break; + + case UNSPEC_LD1x4: + icode = code_for_aarch64_ld1x4 (ops[0].mode); + break; + + case UNSPEC_LD2: + case UNSPEC_LD3: + case UNSPEC_LD4: + icode = code_for_aarch64_ld (ops[0].mode, ops[0].mode); + break; + + case UNSPEC_LD2_DUP: + aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode), 2); + icode = code_for_aarch64_simd_ld2r (ops[0].mode); + break; + + case UNSPEC_LD3_DUP: + aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode), 3); + icode = code_for_aarch64_simd_ld3r (ops[0].mode); + break; + + case UNSPEC_LD4_DUP: + aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode), 4); + icode = code_for_aarch64_simd_ld4r (ops[0].mode); + break; + + case UNSPEC_LD2_LANE: + case UNSPEC_LD3_LANE: + case UNSPEC_LD4_LANE: + aarch64_canonicalize_lane (&ops[3], ops[2].mode); + icode = code_for_aarch64_ld_lane (ops[0].mode, ops[0].mode); + break; + case UNSPEC_LUTI2: case UNSPEC_LUTI4: create_integer_operand (ops.safe_push ({}), @@ -3767,6 +4097,86 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, icode = code_for_aarch64_lut (ops[1].mode, ops[2].mode); break; + case UNSPEC_REV16: + case UNSPEC_REV32: + case UNSPEC_REV64: + icode = code_for_aarch64_rev (builtin_data.unspec, ops[0].mode); + break; + + case UNSPEC_SET_LANE: + if (builtin_data.signature == aarch64_builtin_signatures::load_lane) + aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode)); + /* The vec_set operand order is: dest, scalar, mask, vector. */ + std::swap (ops[2], ops[3]); + aarch64_convert_to_lane_mask (&ops[2], ops[3].mode); + icode = code_for_aarch64_simd_vec_set (ops[0].mode); + break; + + case UNSPEC_ST1: + icode = code_for_aarch64_st1 (ops[1].mode); + break; + + case UNSPEC_ST1_LANE: + aarch64_dereference_pointer (&ops[0], GET_MODE_INNER (ops[1].mode)); + /* Reinterpret ops[0] as an output. */ + create_fixed_operand (&ops[0], ops[0].value); + aarch64_canonicalize_lane (&ops[2], ops[1].mode); + icode = code_for_aarch64_get_lane (ops[1].mode); + break; + + case UNSPEC_ST1x2: + icode = code_for_aarch64_st1x2 (ops[1].mode); + break; + + case UNSPEC_ST1x3: + icode = code_for_aarch64_st1x3 (ops[1].mode); + break; + + case UNSPEC_ST1x4: + icode = code_for_aarch64_st1x4 (ops[1].mode); + break; + + case UNSPEC_ST2: + case UNSPEC_ST3: + case UNSPEC_ST4: + icode = code_for_aarch64_st (ops[1].mode, ops[1].mode); + break; + + case UNSPEC_ST2_LANE: + case UNSPEC_ST3_LANE: + case UNSPEC_ST4_LANE: + aarch64_canonicalize_lane (&ops[2], ops[1].mode); + icode = code_for_aarch64_st_lane (ops[1].mode, ops[1].mode); + break; + + case UNSPEC_TBL: + case UNSPEC_TBX: + return aarch64_expand_tbl_tbx (ops, builtin_data.unspec); + + case UNSPEC_TRN: + return aarch64_expand_permute_pair (ops, UNSPEC_TRN1, UNSPEC_TRN2); + + case UNSPEC_UZP: + return aarch64_expand_permute_pair (ops, UNSPEC_UZP1, UNSPEC_UZP2); + + case UNSPEC_VCREATE: + return force_lowpart_subreg (ops[0].mode, ops[1].value, ops[1].mode); + + case UNSPEC_VEC_COPY: + { + aarch64_convert_to_lane_mask (&ops[2], ops[1].mode); + aarch64_canonicalize_lane (&ops[4], ops[3].mode); + if (ops[1].mode == ops[3].mode) + icode = code_for_aarch64_simd_vec_copy_lane (ops[1].mode); + else + icode = code_for_aarch64_simd_vec_copy_lane (ops[1].mode, + ops[1].mode); + break; + } + + case UNSPEC_ZIP: + return aarch64_expand_permute_pair (ops, UNSPEC_ZIP1, UNSPEC_ZIP2); + default: gcc_unreachable (); } @@ -4214,12 +4624,346 @@ aarch64_record_vector_load_arg (tree addr) cfun->machine->vector_load_decls->add (decl); } +/* Force VAL into a valid gimple value, creating a new SSA_NAME if + necessary. Insert any new statements before GSI. */ +static tree +aarch64_force_gimple_val (gimple_stmt_iterator *gsi, tree val) +{ + if (is_gimple_val (val)) + return val; + + tree tmp = make_ssa_name (TREE_TYPE (val)); + gsi_insert_before_without_update (gsi, gimple_build_assign (tmp, val), + GSI_SAME_STMT); + return tmp; +} + +/* Copy vops from FROM to TO and return TO. */ +static gimple * +aarch64_copy_vops (gimple *to, gimple *from) +{ + gimple_set_vuse (to, gimple_vuse (from)); + gimple_set_vdef (to, gimple_vdef (from)); + return to; +} + +/* Fold STMT (at GSI) to VAL, with SEQ setting up the value of VAL. + Return the replacement statement. */ +static gimple * +aarch64_fold_to_val (gcall *stmt, gimple_stmt_iterator *gsi, + gimple *seq, tree val) +{ + auto *assign = gimple_build_assign (gimple_call_lhs (stmt), val); + gimple_seq_add_stmt_without_update (&seq, assign); + gsi_replace_with_seq_vops (gsi, seq); + return assign; +} + +/* Dereference pointer ADDR, giving a memory reference of type TYPE. */ +static tree +aarch64_dereference (tree addr, tree type) +{ + tree elt_type = (VECTOR_TYPE_P (type) ? TREE_TYPE (type) : type); + tree elt_ptr_type = build_pointer_type_for_mode (elt_type, VOIDmode, true); + tree zero = build_zero_cst (elt_ptr_type); + /* Use element type alignment. */ + tree access_type = build_aligned_type (type, TYPE_ALIGN (elt_type)); + return fold_build2 (MEM_REF, access_type, addr, zero); +} + +/* LANE is a lane index into VEC. Return the associated bit index + (counting from the first byte in memory order). */ +static tree +aarch64_get_lane_bit_index (tree vec, tree lane) +{ + auto vec_mode = TYPE_MODE (TREE_TYPE (vec)); + auto nunits = aarch64_num_lanes (vec_mode); + auto idx = ENDIAN_LANE_N (nunits, tree_to_uhwi (lane)); + return bitsize_int (idx * GET_MODE_UNIT_BITSIZE (vec_mode)); +} + +/* LANE is a lane index into VEC. Return a BIT_FIELD_REF for the + selected element. */ +static tree +aarch64_get_lane (tree vec, tree lane) +{ + auto elt_type = TREE_TYPE (TREE_TYPE (vec)); + return fold_build3 (BIT_FIELD_REF, elt_type, vec, TYPE_SIZE (elt_type), + aarch64_get_lane_bit_index (vec, lane)); +} + +/* LANE is a lane index into VEC. Return a BIT_INSERT_EXPR that replaces + that index with ELT and stores the result in LHS. */ +static gimple * +aarch64_set_lane (tree lhs, tree elt, tree vec, tree lane) +{ + tree bit = aarch64_get_lane_bit_index (vec, lane); + return gimple_build_assign (lhs, BIT_INSERT_EXPR, vec, elt, bit); +} + +/* Fold a call to vcombine. */ +static gimple * +aarch64_fold_combine (gcall *stmt) +{ + tree first_part, second_part; + if (BYTES_BIG_ENDIAN) + { + second_part = gimple_call_arg (stmt, 0); + first_part = gimple_call_arg (stmt, 1); + } + else + { + first_part = gimple_call_arg (stmt, 0); + second_part = gimple_call_arg (stmt, 1); + } + tree ret_type = gimple_call_return_type (stmt); + tree ctor = build_constructor_va (ret_type, 2, NULL_TREE, first_part, + NULL_TREE, second_part); + return gimple_build_assign (gimple_call_lhs (stmt), ctor); +} + +/* Fold a call to vld1, given that it loads something of type TYPE. */ +static gimple * +aarch64_fold_load (gcall *stmt, tree type) +{ + /* Punt until after inlining, so that we stand more chance of + recording something meaningful in vector_load_decls. */ + if (!cfun->after_inlining) + return nullptr; + tree addr = gimple_call_arg (stmt, 0); + aarch64_record_vector_load_arg (addr); + if (!BYTES_BIG_ENDIAN) + { + tree mem = aarch64_dereference (addr, type); + auto *new_stmt = gimple_build_assign (gimple_get_lhs (stmt), mem); + return aarch64_copy_vops (new_stmt, stmt); + } + return nullptr; +} + +/* Fold a call to vst1, given that it loads something of type TYPE. */ +static gimple * +aarch64_fold_store (gcall *stmt, tree type) +{ + tree addr = gimple_call_arg (stmt, 0); + tree data = gimple_call_arg (stmt, 1); + if (!BYTES_BIG_ENDIAN) + { + tree mem = aarch64_dereference (addr, type); + auto *new_stmt = gimple_build_assign (mem, data); + return aarch64_copy_vops (new_stmt, stmt); + } + return nullptr; +} + +/* An aarch64_fold_permute callback for vext. SELECTOR is the value of + the final argument. */ +static unsigned int +aarch64_ext_index (unsigned int, unsigned int selector, unsigned int i) +{ + return selector + i; +} + +/* An aarch64_fold_permute callback for vrev. SELECTOR is the number + of elements in each reversal group. */ +static unsigned int +aarch64_rev_index (unsigned int, unsigned int selector, unsigned int i) +{ + return ROUND_DOWN (i, selector) + (selector - 1) - (i % selector); +} + +/* An aarch64_fold_permute callback for vtrn. SELECTOR is 0 for TRN1 + and 1 for TRN2. */ +static unsigned int +aarch64_trn_index (unsigned int nelts, unsigned int selector, unsigned int i) +{ + return (i % 2) * nelts + ROUND_DOWN (i, 2) + selector; +} + +/* An aarch64_fold_permute callback for vuzp. SELECTOR is 0 for UZP1 + and 1 for UZP2. */ +static unsigned int +aarch64_uzp_index (unsigned int, unsigned int selector, unsigned int i) +{ + return i * 2 + selector; +} + +/* An aarch64_fold_permute callback for vzip. SELECTOR is 0 for ZIP1 + and 1 for ZIP2. */ +static unsigned int +aarch64_zip_index (unsigned int nelts, unsigned int selector, unsigned int i) +{ + return (i % 2) * nelts + (i / 2) + selector * (nelts / 2); +} + +/* Fold STMT to a VEC_PERM_EXPR on the first NINPUTS arguments. + Make the VEC_PERM_EXPR emulate an NINPUTS-input TBL in which + architectural lane I of the result selects architectural lane: + + GET_INDEX (NELTS, SELECTOR, I) + + of the input table. NELTS is the number of elements in one vector. */ +static gimple * +aarch64_fold_permute (gcall *stmt, unsigned int ninputs, + unsigned int (*get_index) (unsigned int, unsigned int, + unsigned int), + unsigned int selector) +{ + tree op0 = gimple_call_arg (stmt, 0); + tree op1 = ninputs == 2 ? gimple_call_arg (stmt, 1) : op0; + auto nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)).to_constant (); + vec_perm_builder sel (nelts, nelts, 1); + for (unsigned int i = 0; i < nelts; ++i) + { + unsigned int index = get_index (nelts, selector, + ENDIAN_LANE_N (nelts, i)); + unsigned int vec = index / nelts; + unsigned int elt = ENDIAN_LANE_N (nelts, index % nelts); + sel.quick_push (vec * nelts + elt); + } + + vec_perm_indices indices (sel, ninputs, nelts); + tree mask_type = build_vector_type (ssizetype, nelts); + tree mask = vec_perm_indices_to_tree (mask_type, indices); + return gimple_build_assign (gimple_call_lhs (stmt), VEC_PERM_EXPR, + op0, op1, mask); +} + +/* Try to fold STMT (at GSI), given that it is a call to the builtin + described by BUILTIN_DATA. Return the new statement on success, + otherwise return null. */ +static gimple * +aarch64_gimple_fold_pragma_builtin + (gcall *stmt, gimple_stmt_iterator *gsi, + const aarch64_pragma_builtins_data &builtin_data) +{ + auto &types = builtin_data.types; + + switch (builtin_data.unspec) + { + case UNSPEC_COMBINE: + return aarch64_fold_combine (stmt); + + case UNSPEC_DUP: + case UNSPEC_DUP_LANE: + { + tree arg = gimple_call_arg (stmt, 0); + tree type = types[0].type (); + if (builtin_data.signature == aarch64_builtin_signatures::load) + arg = aarch64_dereference (arg, TREE_TYPE (type)); + else if (builtin_data.unspec == UNSPEC_DUP_LANE) + arg = aarch64_get_lane (arg, gimple_call_arg (stmt, 1)); + arg = aarch64_force_gimple_val (gsi, arg); + + tree dup = build_vector_from_val (type, arg); + return aarch64_fold_to_val (stmt, gsi, nullptr, dup); + } + + case UNSPEC_EXT: + { + auto index = tree_to_uhwi (gimple_call_arg (stmt, 2)); + return aarch64_fold_permute (stmt, 2, aarch64_ext_index, index); + } + + case UNSPEC_GET_LANE: + { + tree val = aarch64_get_lane (gimple_call_arg (stmt, 0), + gimple_call_arg (stmt, 1)); + return gimple_build_assign (gimple_call_lhs (stmt), val); + } + + case UNSPEC_LD1: + return aarch64_fold_load (stmt, types[0].type ()); + + case UNSPEC_REV16: + { + auto selector = 16 / GET_MODE_UNIT_BITSIZE (types[0].mode); + return aarch64_fold_permute (stmt, 1, aarch64_rev_index, selector); + } + + case UNSPEC_REV32: + { + auto selector = 32 / GET_MODE_UNIT_BITSIZE (types[0].mode); + return aarch64_fold_permute (stmt, 1, aarch64_rev_index, selector); + } + + case UNSPEC_REV64: + { + auto selector = 64 / GET_MODE_UNIT_BITSIZE (types[0].mode); + return aarch64_fold_permute (stmt, 1, aarch64_rev_index, selector); + } + + case UNSPEC_SET_LANE: + { + tree elt = gimple_call_arg (stmt, 0); + if (builtin_data.signature == aarch64_builtin_signatures::load_lane) + { + elt = aarch64_dereference (elt, TREE_TYPE (types[0].type ())); + elt = aarch64_force_gimple_val (gsi, elt); + } + return aarch64_set_lane (gimple_call_lhs (stmt), elt, + gimple_call_arg (stmt, 1), + gimple_call_arg (stmt, 2)); + } + + case UNSPEC_ST1: + return aarch64_fold_store (stmt, types[1].type ()); + + case UNSPEC_ST1_LANE: + { + tree val = aarch64_get_lane (gimple_call_arg (stmt, 1), + gimple_call_arg (stmt, 2)); + tree mem = aarch64_dereference (gimple_call_arg (stmt, 0), + TREE_TYPE (types[0].type ())); + val = aarch64_force_gimple_val (gsi, val); + return aarch64_copy_vops (gimple_build_assign (mem, val), stmt); + } + + case UNSPEC_TRN1: + return aarch64_fold_permute (stmt, 2, aarch64_trn_index, 0); + + case UNSPEC_TRN2: + return aarch64_fold_permute (stmt, 2, aarch64_trn_index, 1); + + case UNSPEC_UZP1: + return aarch64_fold_permute (stmt, 2, aarch64_uzp_index, 0); + + case UNSPEC_UZP2: + return aarch64_fold_permute (stmt, 2, aarch64_uzp_index, 1); + + case UNSPEC_VCREATE: + return gimple_build_assign (gimple_call_lhs (stmt), + fold_build1 (VIEW_CONVERT_EXPR, + types[0].type (), + gimple_call_arg (stmt, 0))); + + case UNSPEC_VEC_COPY: + { + tree elt = aarch64_get_lane (gimple_call_arg (stmt, 2), + gimple_call_arg (stmt, 3)); + elt = aarch64_force_gimple_val (gsi, elt); + return aarch64_set_lane (gimple_call_lhs (stmt), elt, + gimple_call_arg (stmt, 0), + gimple_call_arg (stmt, 1)); + } + + case UNSPEC_ZIP1: + return aarch64_fold_permute (stmt, 2, aarch64_zip_index, 0); + + case UNSPEC_ZIP2: + return aarch64_fold_permute (stmt, 2, aarch64_zip_index, 1); + + default: + return nullptr; + } +} + /* Try to fold STMT, given that it's a call to the built-in function with subcode FCODE. Return the new statement on success and null on failure. */ gimple * aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt, - gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED) + gimple_stmt_iterator *gsi) { gimple *new_stmt = NULL; unsigned nargs = gimple_call_num_args (stmt); @@ -4249,81 +4993,33 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt, BUILTIN_VDC (BINOP, combine, 0, QUIET) BUILTIN_VD_I (BINOPU, combine, 0, DEFAULT) BUILTIN_VDC_P (BINOPP, combine, 0, DEFAULT) - { - tree first_part, second_part; - if (BYTES_BIG_ENDIAN) - { - second_part = args[0]; - first_part = args[1]; - } - else - { - first_part = args[0]; - second_part = args[1]; - } - tree ret_type = gimple_call_return_type (stmt); - tree ctor = build_constructor_va (ret_type, 2, NULL_TREE, first_part, - NULL_TREE, second_part); - new_stmt = gimple_build_assign (gimple_call_lhs (stmt), ctor); - } - break; + new_stmt = aarch64_fold_combine (stmt); + break; /*lower store and load neon builtins to gimple. */ BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD) BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD) BUILTIN_VALLP_NO_DI (LOAD1_P, ld1, 0, LOAD) - /* Punt until after inlining, so that we stand more chance of - recording something meaningful in vector_load_decls. */ - if (!cfun->after_inlining) - break; - aarch64_record_vector_load_arg (args[0]); - if (!BYTES_BIG_ENDIAN) - { - enum aarch64_simd_type mem_type - = get_mem_type_for_load_store(fcode); - aarch64_simd_type_info_trees simd_type - = aarch64_simd_types_trees[mem_type]; - tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype, - VOIDmode, true); - tree zero = build_zero_cst (elt_ptr_type); - /* Use element type alignment. */ - tree access_type - = build_aligned_type (simd_type.itype, - TYPE_ALIGN (simd_type.eltype)); - new_stmt - = gimple_build_assign (gimple_get_lhs (stmt), - fold_build2 (MEM_REF, - access_type, - args[0], zero)); - gimple_set_vuse (new_stmt, gimple_vuse (stmt)); - gimple_set_vdef (new_stmt, gimple_vdef (stmt)); - } - break; + { + enum aarch64_simd_type mem_type + = get_mem_type_for_load_store (fcode); + aarch64_simd_type_info_trees simd_type + = aarch64_simd_types_trees[mem_type]; + new_stmt = aarch64_fold_load (stmt, simd_type.itype); + break; + } BUILTIN_VALL_F16 (STORE1, st1, 0, STORE) BUILTIN_VDQ_I (STORE1_U, st1, 0, STORE) BUILTIN_VALLP_NO_DI (STORE1_P, st1, 0, STORE) - if (!BYTES_BIG_ENDIAN) - { - enum aarch64_simd_type mem_type - = get_mem_type_for_load_store(fcode); - aarch64_simd_type_info_trees simd_type - = aarch64_simd_types_trees[mem_type]; - tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype, - VOIDmode, true); - tree zero = build_zero_cst (elt_ptr_type); - /* Use element type alignment. */ - tree access_type - = build_aligned_type (simd_type.itype, - TYPE_ALIGN (simd_type.eltype)); - new_stmt - = gimple_build_assign (fold_build2 (MEM_REF, access_type, - args[0], zero), - args[1]); - gimple_set_vuse (new_stmt, gimple_vuse (stmt)); - gimple_set_vdef (new_stmt, gimple_vdef (stmt)); - } - break; + { + enum aarch64_simd_type mem_type + = get_mem_type_for_load_store (fcode); + aarch64_simd_type_info_trees simd_type + = aarch64_simd_types_trees[mem_type]; + new_stmt = aarch64_fold_store (stmt, simd_type.itype); + break; + } BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10, ALL) BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, ALL) @@ -4440,6 +5136,9 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt, } break; default: + if (auto builtin_data = aarch64_get_pragma_builtin (fcode)) + new_stmt = aarch64_gimple_fold_pragma_builtin (stmt, gsi, + *builtin_data); break; } diff --git a/gcc/config/aarch64/aarch64-builtins.h b/gcc/config/aarch64/aarch64-builtins.h index f4d54de..d998370 100644 --- a/gcc/config/aarch64/aarch64-builtins.h +++ b/gcc/config/aarch64/aarch64-builtins.h @@ -28,6 +28,8 @@ enum aarch64_type_qualifiers qualifier_const = 0x2, /* 1 << 1 */ /* T *foo. */ qualifier_pointer = 0x4, /* 1 << 2 */ + /* const T *foo. */ + qualifier_const_pointer = 0x6, /* Used when expanding arguments if an operand could be an immediate. */ qualifier_immediate = 0x8, /* 1 << 3 */ diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 18764e4..21c7e67 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -896,6 +896,8 @@ bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode); machine_mode aarch64_sve_int_mode (machine_mode); opt_machine_mode aarch64_sve_pred_mode (unsigned int); machine_mode aarch64_sve_pred_mode (machine_mode); +opt_machine_mode aarch64_advsimd_vector_array_mode (machine_mode, + unsigned HOST_WIDE_INT); opt_machine_mode aarch64_sve_data_mode (scalar_mode, poly_uint64); bool aarch64_sve_mode_p (machine_mode); HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int); diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def index 8924262..e725b52 100644 --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def @@ -26,6 +26,26 @@ #define ENTRY_BINARY_LANE(N, T0, T1, T2, U, F) \ ENTRY (N, binary_lane, T0, T1, T2, none, U, F) +#undef ENTRY_BINARY_TWO_LANES +#define ENTRY_BINARY_TWO_LANES(N, T0, T1, T2, U, F) \ + ENTRY (N, binary_two_lanes, T0, T1, T2, none, U, F) + +#undef ENTRY_LOAD +#define ENTRY_LOAD(N, T0, T1, U) \ + ENTRY (N, load, T0, T1, none, none, U, LOAD) + +#undef ENTRY_LOAD_LANE +#define ENTRY_LOAD_LANE(N, T0, T1, T2, U) \ + ENTRY (N, load_lane, T0, T1, T2, none, U, LOAD) + +#undef ENTRY_STORE +#define ENTRY_STORE(N, T0, T1, U) \ + ENTRY (N, store, T0, T1, none, none, U, STORE) + +#undef ENTRY_STORE_LANE +#define ENTRY_STORE_LANE(N, T0, T1, U) \ + ENTRY (N, store_lane, T0, T1, none, none, U, STORE) + #undef ENTRY_TERNARY #define ENTRY_TERNARY(N, T0, T1, T2, T3, U, F) \ ENTRY (N, ternary, T0, T1, T2, T3, U, F) @@ -38,6 +58,10 @@ #define ENTRY_UNARY(N, T0, T1, U, F) \ ENTRY (N, unary, T0, T1, none, none, U, F) +#undef ENTRY_UNARY_LANE +#define ENTRY_UNARY_LANE(N, T0, T1, U, F) \ + ENTRY (N, unary_lane, T0, T1, none, none, U, F) + #undef ENTRY_BINARY_VHSDF #define ENTRY_BINARY_VHSDF(NAME, UNSPEC, FLAGS) \ ENTRY_BINARY (NAME##_f16, f16, f16, f16, UNSPEC, FLAGS) \ @@ -121,6 +145,7 @@ ENTRY_BINARY_VHSDF (vamin, UNSPEC_FAMIN, FP) ENTRY_TERNARY_VLUT8 (p) ENTRY_TERNARY_VLUT8 (s) ENTRY_TERNARY_VLUT8 (u) +ENTRY_TERNARY_VLUT8 (mf) ENTRY_TERNARY_VLUT16 (bf) ENTRY_TERNARY_VLUT16 (f) @@ -170,3 +195,224 @@ ENTRY_FMA_FPM (vmlallbt, f32, UNSPEC_FMLALLBT_FP8) ENTRY_FMA_FPM (vmlalltb, f32, UNSPEC_FMLALLTB_FP8) ENTRY_FMA_FPM (vmlalltt, f32, UNSPEC_FMLALLTT_FP8) #undef REQUIRED_EXTENSIONS + +// bsl +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_TERNARY (vbsl_mf8, mf8, u8, mf8, mf8, UNSPEC_BSL, QUIET) +ENTRY_TERNARY (vbslq_mf8, mf8q, u8q, mf8q, mf8q, UNSPEC_BSL, QUIET) +#undef REQUIRED_EXTENSIONS + +// combine +#define REQUIRED_EXTENSIONS nonstreaming_only (NONE) +ENTRY_BINARY (vcombine_mf8, mf8q, mf8, mf8, UNSPEC_COMBINE, QUIET) +#undef REQUIRED_EXTENSIONS + +// copy_lane +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_BINARY_TWO_LANES (vcopy_lane_mf8, mf8, mf8, mf8, + UNSPEC_VEC_COPY, QUIET) +ENTRY_BINARY_TWO_LANES (vcopyq_lane_mf8, mf8q, mf8q, mf8, + UNSPEC_VEC_COPY, QUIET) +ENTRY_BINARY_TWO_LANES (vcopy_laneq_mf8, mf8, mf8, mf8q, + UNSPEC_VEC_COPY, QUIET) +ENTRY_BINARY_TWO_LANES (vcopyq_laneq_mf8, mf8q, mf8q, mf8q, + UNSPEC_VEC_COPY, QUIET) +#undef REQUIRED_EXTENSIONS + +// create +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_UNARY (vcreate_mf8, mf8, u64_scalar, UNSPEC_VCREATE, QUIET) +#undef REQUIRED_EXTENSIONS + +// dup +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_UNARY (vdup_n_mf8, mf8, mf8_scalar, UNSPEC_DUP, QUIET) +ENTRY_UNARY (vdupq_n_mf8, mf8q, mf8_scalar, UNSPEC_DUP, QUIET) + +ENTRY_UNARY_LANE (vdup_lane_mf8, mf8, mf8, UNSPEC_DUP_LANE, QUIET) +ENTRY_UNARY_LANE (vdupq_lane_mf8, mf8q, mf8, UNSPEC_DUP_LANE, QUIET) +ENTRY_UNARY_LANE (vdup_laneq_mf8, mf8, mf8q, UNSPEC_DUP_LANE, QUIET) +ENTRY_UNARY_LANE (vdupq_laneq_mf8, mf8q, mf8q, UNSPEC_DUP_LANE, QUIET) +#undef REQUIRED_EXTENSIONS + +// dupb_lane +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_UNARY_LANE (vdupb_lane_mf8, mf8_scalar, mf8, UNSPEC_GET_LANE, QUIET) +ENTRY_UNARY_LANE (vdupb_laneq_mf8, mf8_scalar, mf8q, UNSPEC_GET_LANE, QUIET) +#undef REQUIRED_EXTENSIONS + +// ext +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_BINARY_LANE (vext_mf8, mf8, mf8, mf8, UNSPEC_EXT, QUIET) +ENTRY_BINARY_LANE (vextq_mf8, mf8q, mf8q, mf8q, UNSPEC_EXT, QUIET) +#undef REQUIRED_EXTENSIONS + +// ld1 +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_LOAD (vld1_mf8, mf8, mf8_scalar_const_ptr, UNSPEC_LD1) +ENTRY_LOAD (vld1q_mf8, mf8q, mf8_scalar_const_ptr, UNSPEC_LD1) +ENTRY_LOAD (vld1_dup_mf8, mf8, mf8_scalar_const_ptr, UNSPEC_DUP) +ENTRY_LOAD (vld1q_dup_mf8, mf8q, mf8_scalar_const_ptr, UNSPEC_DUP) + +ENTRY_LOAD_LANE (vld1_lane_mf8, mf8, mf8_scalar_const_ptr, mf8, + UNSPEC_SET_LANE) +ENTRY_LOAD_LANE (vld1q_lane_mf8, mf8q, mf8_scalar_const_ptr, mf8q, + UNSPEC_SET_LANE) +#undef REQUIRED_EXTENSIONS + +// ld<n> +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_LOAD (vld1_mf8_x2, mf8x2, mf8_scalar_const_ptr, UNSPEC_LD1x2) +ENTRY_LOAD (vld1q_mf8_x2, mf8qx2, mf8_scalar_const_ptr, UNSPEC_LD1x2) +ENTRY_LOAD (vld2_mf8, mf8x2, mf8_scalar_const_ptr, UNSPEC_LD2) +ENTRY_LOAD (vld2q_mf8, mf8qx2, mf8_scalar_const_ptr, UNSPEC_LD2) +ENTRY_LOAD (vld2_dup_mf8, mf8x2, mf8_scalar_const_ptr, UNSPEC_LD2_DUP) +ENTRY_LOAD (vld2q_dup_mf8, mf8qx2, mf8_scalar_const_ptr, UNSPEC_LD2_DUP) +ENTRY_LOAD_LANE (vld2_lane_mf8, mf8x2, mf8_scalar_const_ptr, mf8x2, + UNSPEC_LD2_LANE) +ENTRY_LOAD_LANE (vld2q_lane_mf8, mf8qx2, mf8_scalar_const_ptr, mf8qx2, + UNSPEC_LD2_LANE) + +ENTRY_LOAD (vld1_mf8_x3, mf8x3, mf8_scalar_const_ptr, UNSPEC_LD1x3) +ENTRY_LOAD (vld1q_mf8_x3, mf8qx3, mf8_scalar_const_ptr, UNSPEC_LD1x3) +ENTRY_LOAD (vld3_mf8, mf8x3, mf8_scalar_const_ptr, UNSPEC_LD3) +ENTRY_LOAD (vld3q_mf8, mf8qx3, mf8_scalar_const_ptr, UNSPEC_LD3) +ENTRY_LOAD (vld3_dup_mf8, mf8x3, mf8_scalar_const_ptr, UNSPEC_LD3_DUP) +ENTRY_LOAD (vld3q_dup_mf8, mf8qx3, mf8_scalar_const_ptr, UNSPEC_LD3_DUP) +ENTRY_LOAD_LANE (vld3_lane_mf8, mf8x3, mf8_scalar_const_ptr, mf8x3, + UNSPEC_LD3_LANE) +ENTRY_LOAD_LANE (vld3q_lane_mf8, mf8qx3, mf8_scalar_const_ptr, mf8qx3, + UNSPEC_LD3_LANE) + +ENTRY_LOAD (vld1_mf8_x4, mf8x4, mf8_scalar_const_ptr, UNSPEC_LD1x4) +ENTRY_LOAD (vld1q_mf8_x4, mf8qx4, mf8_scalar_const_ptr, UNSPEC_LD1x4) +ENTRY_LOAD (vld4_mf8, mf8x4, mf8_scalar_const_ptr, UNSPEC_LD4) +ENTRY_LOAD (vld4q_mf8, mf8qx4, mf8_scalar_const_ptr, UNSPEC_LD4) +ENTRY_LOAD (vld4_dup_mf8, mf8x4, mf8_scalar_const_ptr, UNSPEC_LD4_DUP) +ENTRY_LOAD (vld4q_dup_mf8, mf8qx4, mf8_scalar_const_ptr, UNSPEC_LD4_DUP) +ENTRY_LOAD_LANE (vld4_lane_mf8, mf8x4, mf8_scalar_const_ptr, mf8x4, + UNSPEC_LD4_LANE) +ENTRY_LOAD_LANE (vld4q_lane_mf8, mf8qx4, mf8_scalar_const_ptr, mf8qx4, + UNSPEC_LD4_LANE) +#undef REQUIRED_EXTENSIONS + +// mov +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_UNARY (vmov_n_mf8, mf8, mf8_scalar, UNSPEC_DUP, QUIET) +ENTRY_UNARY (vmovq_n_mf8, mf8q, mf8_scalar, UNSPEC_DUP, QUIET) +#undef REQUIRED_EXTENSIONS + +// rev +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_UNARY (vrev64_mf8, mf8, mf8, UNSPEC_REV64, QUIET) +ENTRY_UNARY (vrev64q_mf8, mf8q, mf8q, UNSPEC_REV64, QUIET) + +ENTRY_UNARY (vrev32_mf8, mf8, mf8, UNSPEC_REV32, QUIET) +ENTRY_UNARY (vrev32q_mf8, mf8q, mf8q, UNSPEC_REV32, QUIET) + +ENTRY_UNARY (vrev16_mf8, mf8, mf8, UNSPEC_REV16, QUIET) +ENTRY_UNARY (vrev16q_mf8, mf8q, mf8q, UNSPEC_REV16, QUIET) +#undef REQUIRED_EXTENSIONS + +// set_lane +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_BINARY_LANE (vset_lane_mf8, mf8, mf8_scalar, mf8, UNSPEC_SET_LANE, QUIET) +ENTRY_BINARY_LANE (vsetq_lane_mf8, mf8q, mf8_scalar, mf8q, UNSPEC_SET_LANE, QUIET) +#undef REQUIRED_EXTENSIONS + +// st1 +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_STORE (vst1_mf8, mf8_scalar_ptr, mf8, UNSPEC_ST1) +ENTRY_STORE (vst1q_mf8, mf8_scalar_ptr, mf8q, UNSPEC_ST1) + +ENTRY_STORE_LANE (vst1_lane_mf8, mf8_scalar_ptr, mf8, UNSPEC_ST1_LANE) +ENTRY_STORE_LANE (vst1q_lane_mf8, mf8_scalar_ptr, mf8q, UNSPEC_ST1_LANE) +#undef REQUIRED_EXTENSIONS + +// st<n> +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_STORE (vst2_mf8, mf8_scalar_ptr, mf8x2, UNSPEC_ST2) +ENTRY_STORE (vst2q_mf8, mf8_scalar_ptr, mf8qx2, UNSPEC_ST2) +ENTRY_STORE (vst1_mf8_x2, mf8_scalar_ptr, mf8x2, UNSPEC_ST1x2) +ENTRY_STORE (vst1q_mf8_x2, mf8_scalar_ptr, mf8qx2, UNSPEC_ST1x2) +ENTRY_STORE_LANE (vst2_lane_mf8, mf8_scalar_ptr, mf8x2, UNSPEC_ST2_LANE) +ENTRY_STORE_LANE (vst2q_lane_mf8, mf8_scalar_ptr, mf8qx2, UNSPEC_ST2_LANE) + +ENTRY_STORE (vst3_mf8, mf8_scalar_ptr, mf8x3, UNSPEC_ST3) +ENTRY_STORE (vst3q_mf8, mf8_scalar_ptr, mf8qx3, UNSPEC_ST3) +ENTRY_STORE (vst1_mf8_x3, mf8_scalar_ptr, mf8x3, UNSPEC_ST1x3) +ENTRY_STORE (vst1q_mf8_x3, mf8_scalar_ptr, mf8qx3, UNSPEC_ST1x3) +ENTRY_STORE_LANE (vst3_lane_mf8, mf8_scalar_ptr, mf8x3, UNSPEC_ST3_LANE) +ENTRY_STORE_LANE (vst3q_lane_mf8, mf8_scalar_ptr, mf8qx3, UNSPEC_ST3_LANE) + +ENTRY_STORE (vst4_mf8, mf8_scalar_ptr, mf8x4, UNSPEC_ST4) +ENTRY_STORE (vst4q_mf8, mf8_scalar_ptr, mf8qx4, UNSPEC_ST4) +ENTRY_STORE (vst1_mf8_x4, mf8_scalar_ptr, mf8x4, UNSPEC_ST1x4) +ENTRY_STORE (vst1q_mf8_x4, mf8_scalar_ptr, mf8qx4, UNSPEC_ST1x4) +ENTRY_STORE_LANE (vst4_lane_mf8, mf8_scalar_ptr, mf8x4, UNSPEC_ST4_LANE) +ENTRY_STORE_LANE (vst4q_lane_mf8, mf8_scalar_ptr, mf8qx4, UNSPEC_ST4_LANE) +#undef REQUIRED_EXTENSIONS + +// tbl<n> +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_BINARY (vtbl1_mf8, mf8, mf8, u8, UNSPEC_TBL, QUIET) +ENTRY_BINARY (vtbl2_mf8, mf8, mf8x2, u8, UNSPEC_TBL, QUIET) +ENTRY_BINARY (vtbl3_mf8, mf8, mf8x3, u8, UNSPEC_TBL, QUIET) +ENTRY_BINARY (vtbl4_mf8, mf8, mf8x4, u8, UNSPEC_TBL, QUIET) + +ENTRY_BINARY (vqtbl1_mf8, mf8, mf8q, u8, UNSPEC_TBL, QUIET) +ENTRY_BINARY (vqtbl1q_mf8, mf8q, mf8q, u8q, UNSPEC_TBL, QUIET) +ENTRY_BINARY (vqtbl2_mf8, mf8, mf8qx2, u8, UNSPEC_TBL, QUIET) +ENTRY_BINARY (vqtbl2q_mf8, mf8q, mf8qx2, u8q, UNSPEC_TBL, QUIET) +ENTRY_BINARY (vqtbl3_mf8, mf8, mf8qx3, u8, UNSPEC_TBL, QUIET) +ENTRY_BINARY (vqtbl3q_mf8, mf8q, mf8qx3, u8q, UNSPEC_TBL, QUIET) +ENTRY_BINARY (vqtbl4_mf8, mf8, mf8qx4, u8, UNSPEC_TBL, QUIET) +ENTRY_BINARY (vqtbl4q_mf8, mf8q, mf8qx4, u8q, UNSPEC_TBL, QUIET) +#undef REQUIRED_EXTENSIONS + +// tbx<n> +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_TERNARY (vtbx1_mf8, mf8, mf8, mf8, u8, UNSPEC_TBX, QUIET) +ENTRY_TERNARY (vtbx2_mf8, mf8, mf8, mf8x2, u8, UNSPEC_TBX, QUIET) +ENTRY_TERNARY (vtbx3_mf8, mf8, mf8, mf8x3, u8, UNSPEC_TBX, QUIET) +ENTRY_TERNARY (vtbx4_mf8, mf8, mf8, mf8x4, u8, UNSPEC_TBX, QUIET) + +ENTRY_TERNARY (vqtbx1_mf8, mf8, mf8, mf8q, u8, UNSPEC_TBX, QUIET) +ENTRY_TERNARY (vqtbx1q_mf8, mf8q, mf8q, mf8q, u8q, UNSPEC_TBX, QUIET) +ENTRY_TERNARY (vqtbx2_mf8, mf8, mf8, mf8qx2, u8, UNSPEC_TBX, QUIET) +ENTRY_TERNARY (vqtbx2q_mf8, mf8q, mf8q, mf8qx2, u8q, UNSPEC_TBX, QUIET) +ENTRY_TERNARY (vqtbx3_mf8, mf8, mf8, mf8qx3, u8, UNSPEC_TBX, QUIET) +ENTRY_TERNARY (vqtbx3q_mf8, mf8q, mf8q, mf8qx3, u8q, UNSPEC_TBX, QUIET) +ENTRY_TERNARY (vqtbx4_mf8, mf8, mf8, mf8qx4, u8, UNSPEC_TBX, QUIET) +ENTRY_TERNARY (vqtbx4q_mf8, mf8q, mf8q, mf8qx4, u8q, UNSPEC_TBX, QUIET) +#undef REQUIRED_EXTENSIONS + +// trn<n> +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_BINARY (vtrn1_mf8, mf8, mf8, mf8, UNSPEC_TRN1, QUIET) +ENTRY_BINARY (vtrn1q_mf8, mf8q, mf8q, mf8q, UNSPEC_TRN1, QUIET) +ENTRY_BINARY (vtrn2_mf8, mf8, mf8, mf8, UNSPEC_TRN2, QUIET) +ENTRY_BINARY (vtrn2q_mf8, mf8q, mf8q, mf8q, UNSPEC_TRN2, QUIET) +ENTRY_BINARY (vtrn_mf8, mf8x2, mf8, mf8, UNSPEC_TRN, QUIET) +ENTRY_BINARY (vtrnq_mf8, mf8qx2, mf8q, mf8q, UNSPEC_TRN, QUIET) +#undef REQUIRED_EXTENSIONS + +// uzp<n> +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_BINARY (vuzp1_mf8, mf8, mf8, mf8, UNSPEC_UZP1, QUIET) +ENTRY_BINARY (vuzp1q_mf8, mf8q, mf8q, mf8q, UNSPEC_UZP1, QUIET) +ENTRY_BINARY (vuzp2_mf8, mf8, mf8, mf8, UNSPEC_UZP2, QUIET) +ENTRY_BINARY (vuzp2q_mf8, mf8q, mf8q, mf8q, UNSPEC_UZP2, QUIET) +ENTRY_BINARY (vuzp_mf8, mf8x2, mf8, mf8, UNSPEC_UZP, QUIET) +ENTRY_BINARY (vuzpq_mf8, mf8qx2, mf8q, mf8q, UNSPEC_UZP, QUIET) +#undef REQUIRED_EXTENSIONS + +// zip<n> +#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD) +ENTRY_BINARY (vzip1_mf8, mf8, mf8, mf8, UNSPEC_ZIP1, QUIET) +ENTRY_BINARY (vzip1q_mf8, mf8q, mf8q, mf8q, UNSPEC_ZIP1, QUIET) +ENTRY_BINARY (vzip2_mf8, mf8, mf8, mf8, UNSPEC_ZIP2, QUIET) +ENTRY_BINARY (vzip2q_mf8, mf8q, mf8q, mf8q, UNSPEC_ZIP2, QUIET) +ENTRY_BINARY (vzip_mf8, mf8x2, mf8, mf8, UNSPEC_ZIP, QUIET) +ENTRY_BINARY (vzipq_mf8, mf8qx2, mf8q, mf8q, UNSPEC_ZIP, QUIET) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 7959cca..237de1b 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -112,7 +112,7 @@ } ) -(define_insn "aarch64_dup_lane<mode>" +(define_insn "@aarch64_dup_lane<mode>" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (vec_duplicate:VALL_F16 (vec_select:<VEL> @@ -127,7 +127,7 @@ [(set_attr "type" "neon_dup<q>")] ) -(define_insn "aarch64_dup_lane_<vswap_width_name><mode>" +(define_insn "@aarch64_dup_lane_<vswap_width_name><mode>" [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") (vec_duplicate:VALL_F16_NO_V2Q (vec_select:<VEL> @@ -1164,7 +1164,7 @@ [(set_attr "type" "neon_logic<q>")] ) -(define_insn "aarch64_simd_vec_set<mode>" +(define_insn "@aarch64_simd_vec_set<mode>" [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w") (vec_merge:VALL_F16 (vec_duplicate:VALL_F16 @@ -1225,7 +1225,7 @@ [(set_attr "type" "neon_ins<q>")] ) -(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>" +(define_insn "@aarch64_simd_vec_copy_lane_<vswap_width_name><mode>" [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") (vec_merge:VALL_F16_NO_V2Q (vec_duplicate:VALL_F16_NO_V2Q @@ -3837,7 +3837,7 @@ } ) -(define_expand "aarch64_simd_bsl<mode>" +(define_expand "@aarch64_simd_bsl<mode>" [(match_operand:VALLDIF 0 "register_operand") (match_operand:<V_INT_EQUIV> 1 "register_operand") (match_operand:VALLDIF 2 "register_operand") @@ -4438,7 +4438,7 @@ ;; Form a vector whose least significant half comes from operand 1 and whose ;; most significant half comes from operand 2. This operand order follows ;; arm_neon.h vcombine* intrinsics. -(define_expand "aarch64_combine<mode>" +(define_expand "@aarch64_combine<mode>" [(match_operand:<VDBL> 0 "register_operand") (match_operand:VDC 1 "general_operand") (match_operand:VDC 2 "general_operand")] @@ -6971,7 +6971,7 @@ ;; Note, we have constraints for Dz and Z as different expanders ;; have different ideas of what should be passed to this pattern. -(define_insn "aarch64_cm<optab><mode><vczle><vczbe>" +(define_insn "@aarch64_cm<optab><mode><vczle><vczbe>" [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") (neg:<V_INT_EQUIV> (COMPARISONS:<V_INT_EQUIV> @@ -7036,7 +7036,7 @@ ;; cm(hs|hi) -(define_insn "aarch64_cm<optab><mode><vczle><vczbe>" +(define_insn "@aarch64_cm<optab><mode><vczle><vczbe>" [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") (neg:<V_INT_EQUIV> (UCOMPARISONS:<V_INT_EQUIV> @@ -7188,7 +7188,7 @@ ;; fcm(eq|ge|gt|le|lt) -(define_insn "aarch64_cm<optab><mode><vczle><vczbe>" +(define_insn "@aarch64_cm<optab><mode><vczle><vczbe>" [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") (neg:<V_INT_EQUIV> (COMPARISONS:<V_INT_EQUIV> @@ -7349,7 +7349,7 @@ [(set_attr "type" "neon_load2_2reg<q>")] ) -(define_insn "aarch64_simd_ld2r<vstruct_elt>" +(define_insn "@aarch64_simd_ld2r<vstruct_elt>" [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w") (unspec:VSTRUCT_2QD [ (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] @@ -7359,7 +7359,7 @@ [(set_attr "type" "neon_load2_all_lanes<q>")] ) -(define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>" +(define_insn "@aarch64_vec_load_lanes<mode>_lane<vstruct_elt>" [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w") (unspec:VSTRUCT_2QD [ (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") @@ -7449,7 +7449,7 @@ [(set_attr "type" "neon_load3_3reg<q>")] ) -(define_insn "aarch64_simd_ld3r<vstruct_elt>" +(define_insn "@aarch64_simd_ld3r<vstruct_elt>" [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w") (unspec:VSTRUCT_3QD [ (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] @@ -7549,7 +7549,7 @@ [(set_attr "type" "neon_load4_4reg<q>")] ) -(define_insn "aarch64_simd_ld4r<vstruct_elt>" +(define_insn "@aarch64_simd_ld4r<vstruct_elt>" [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w") (unspec:VSTRUCT_4QD [ (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] @@ -7773,7 +7773,7 @@ operands[1] = force_reg (V8DImode, operands[1]); }) -(define_expand "aarch64_ld1x3<vstruct_elt>" +(define_expand "@aarch64_ld1x3<vstruct_elt>" [(match_operand:VSTRUCT_3QD 0 "register_operand") (match_operand:DI 1 "register_operand")] "TARGET_SIMD" @@ -7793,7 +7793,7 @@ [(set_attr "type" "neon_load1_3reg<q>")] ) -(define_expand "aarch64_ld1x4<vstruct_elt>" +(define_expand "@aarch64_ld1x4<vstruct_elt>" [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w") (match_operand:DI 1 "register_operand" "r")] "TARGET_SIMD" @@ -7813,7 +7813,7 @@ [(set_attr "type" "neon_load1_4reg<q>")] ) -(define_expand "aarch64_st1x2<vstruct_elt>" +(define_expand "@aarch64_st1x2<vstruct_elt>" [(match_operand:DI 0 "register_operand") (match_operand:VSTRUCT_2QD 1 "register_operand")] "TARGET_SIMD" @@ -7833,7 +7833,7 @@ [(set_attr "type" "neon_store1_2reg<q>")] ) -(define_expand "aarch64_st1x3<vstruct_elt>" +(define_expand "@aarch64_st1x3<vstruct_elt>" [(match_operand:DI 0 "register_operand") (match_operand:VSTRUCT_3QD 1 "register_operand")] "TARGET_SIMD" @@ -7853,7 +7853,7 @@ [(set_attr "type" "neon_store1_3reg<q>")] ) -(define_expand "aarch64_st1x4<vstruct_elt>" +(define_expand "@aarch64_st1x4<vstruct_elt>" [(match_operand:DI 0 "register_operand" "") (match_operand:VSTRUCT_4QD 1 "register_operand" "")] "TARGET_SIMD" @@ -8220,7 +8220,7 @@ [(set_attr "type" "neon_load1_4reg<q>")] ) -(define_expand "aarch64_ld<nregs><vstruct_elt>" +(define_expand "@aarch64_ld<nregs><vstruct_elt>" [(match_operand:VSTRUCT_D 0 "register_operand") (match_operand:DI 1 "register_operand")] "TARGET_SIMD" @@ -8230,7 +8230,7 @@ DONE; }) -(define_expand "aarch64_ld1<VALL_F16:mode>" +(define_expand "@aarch64_ld1<VALL_F16:mode>" [(match_operand:VALL_F16 0 "register_operand") (match_operand:DI 1 "register_operand")] "TARGET_SIMD" @@ -8245,7 +8245,7 @@ DONE; }) -(define_expand "aarch64_ld<nregs><vstruct_elt>" +(define_expand "@aarch64_ld<nregs><vstruct_elt>" [(match_operand:VSTRUCT_Q 0 "register_operand") (match_operand:DI 1 "register_operand")] "TARGET_SIMD" @@ -8255,7 +8255,7 @@ DONE; }) -(define_expand "aarch64_ld1x2<vstruct_elt>" +(define_expand "@aarch64_ld1x2<vstruct_elt>" [(match_operand:VSTRUCT_2QD 0 "register_operand") (match_operand:DI 1 "register_operand")] "TARGET_SIMD" @@ -8267,7 +8267,7 @@ DONE; }) -(define_expand "aarch64_ld<nregs>_lane<vstruct_elt>" +(define_expand "@aarch64_ld<nregs>_lane<vstruct_elt>" [(match_operand:VSTRUCT_QD 0 "register_operand") (match_operand:DI 1 "register_operand") (match_operand:VSTRUCT_QD 2 "register_operand") @@ -8411,7 +8411,7 @@ ;; This instruction's pattern is generated directly by ;; aarch64_expand_vec_perm_const, so any changes to the pattern would ;; need corresponding changes there. -(define_insn "aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>" +(define_insn "@aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") (match_operand:VALL_F16 2 "register_operand" "w")] @@ -8437,7 +8437,7 @@ ;; aarch64_expand_vec_perm_const, so any changes to the pattern would ;; need corresponding changes there. Note that the immediate (third) ;; operand is a lane index not a byte index. -(define_insn "aarch64_ext<mode>" +(define_insn "@aarch64_ext<mode>" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") (match_operand:VALL_F16 2 "register_operand" "w") @@ -8455,7 +8455,7 @@ ;; This instruction's pattern is generated directly by ;; aarch64_expand_vec_perm_const, so any changes to the pattern would ;; need corresponding changes there. -(define_insn "aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>" +(define_insn "@aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")] REVERSE))] @@ -8524,7 +8524,7 @@ [(set_attr "type" "neon_store1_4reg")] ) -(define_expand "aarch64_st<nregs><vstruct_elt>" +(define_expand "@aarch64_st<nregs><vstruct_elt>" [(match_operand:DI 0 "register_operand") (match_operand:VSTRUCT_D 1 "register_operand")] "TARGET_SIMD" @@ -8534,7 +8534,7 @@ DONE; }) -(define_expand "aarch64_st<nregs><vstruct_elt>" +(define_expand "@aarch64_st<nregs><vstruct_elt>" [(match_operand:DI 0 "register_operand") (match_operand:VSTRUCT_Q 1 "register_operand")] "TARGET_SIMD" @@ -8544,7 +8544,7 @@ DONE; }) -(define_expand "aarch64_st<nregs>_lane<vstruct_elt>" +(define_expand "@aarch64_st<nregs>_lane<vstruct_elt>" [(match_operand:DI 0 "register_operand") (match_operand:VSTRUCT_QD 1 "register_operand") (match_operand:SI 2 "immediate_operand")] @@ -8560,7 +8560,7 @@ DONE; }) -(define_expand "aarch64_st1<VALL_F16:mode>" +(define_expand "@aarch64_st1<VALL_F16:mode>" [(match_operand:DI 0 "register_operand") (match_operand:VALL_F16 1 "register_operand")] "TARGET_SIMD" diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 41cc2ee..6bb4bdf 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -1802,7 +1802,7 @@ aarch64_ldn_stn_vectors (machine_mode mode) /* Given an Advanced SIMD vector mode MODE and a tuple size NELEMS, return the corresponding vector structure mode. */ -static opt_machine_mode +opt_machine_mode aarch64_advsimd_vector_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems) { diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index edac1ae..c62de38 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -198,8 +198,10 @@ UNSPEC_AUTIB1716 UNSPEC_AUTIASP UNSPEC_AUTIBSP + UNSPEC_BSL UNSPEC_CALLEE_ABI UNSPEC_CASESI + UNSPEC_COMBINE UNSPEC_CPYMEM UNSPEC_CRC32B UNSPEC_CRC32CB @@ -209,6 +211,8 @@ UNSPEC_CRC32H UNSPEC_CRC32W UNSPEC_CRC32X + UNSPEC_DUP + UNSPEC_DUP_LANE UNSPEC_FCVTZS UNSPEC_FCVTZU UNSPEC_FJCVTZS @@ -227,6 +231,7 @@ UNSPEC_FRINTP UNSPEC_FRINTX UNSPEC_FRINTZ + UNSPEC_GET_LANE UNSPEC_GOTSMALLPIC UNSPEC_GOTSMALLPIC28K UNSPEC_GOTSMALLTLS @@ -236,6 +241,10 @@ UNSPEC_LDP_FST UNSPEC_LDP_SND UNSPEC_LD1 + UNSPEC_LD1_DUP + UNSPEC_LD1x2 + UNSPEC_LD1x3 + UNSPEC_LD1x4 UNSPEC_LD2 UNSPEC_LD2_DREG UNSPEC_LD2_DUP @@ -265,12 +274,17 @@ UNSPEC_REV UNSPEC_SADALP UNSPEC_SCVTF + UNSPEC_SET_LANE UNSPEC_SETMEM UNSPEC_SISD_NEG UNSPEC_SISD_SSHL UNSPEC_SISD_USHL UNSPEC_SSHL_2S UNSPEC_ST1 + UNSPEC_ST1_LANE + UNSPEC_ST1x2 + UNSPEC_ST1x3 + UNSPEC_ST1x4 UNSPEC_ST2 UNSPEC_ST3 UNSPEC_ST4 @@ -314,6 +328,8 @@ UNSPEC_UNPACKSLO UNSPEC_UNPACKULO UNSPEC_PACK + UNSPEC_VCREATE + UNSPEC_VEC_COPY UNSPEC_WHILEGE UNSPEC_WHILEGT UNSPEC_WHILEHI diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 34200b0..07b9754 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -1095,6 +1095,7 @@ UNSPEC_SUBHNB ; Used in aarch64-sve2.md. UNSPEC_SUBHNT ; Used in aarch64-sve2.md. UNSPEC_TBL2 ; Used in aarch64-sve2.md. + UNSPEC_TRN ; Used in aarch64-builtins.cc UNSPEC_UABDLB ; Used in aarch64-sve2.md. UNSPEC_UABDLT ; Used in aarch64-sve2.md. UNSPEC_UADDLB ; Used in aarch64-sve2.md. diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h index 6f4d62b..567ca2a8 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h @@ -7,6 +7,7 @@ #include <inttypes.h> /* helper type, to help write floating point results in integer form. */ +typedef uint8_t hmfloat8_t; typedef uint16_t hfloat16_t; typedef uint32_t hfloat32_t; typedef uint64_t hfloat64_t; @@ -38,10 +39,24 @@ extern size_t strlen(const char *); Use this macro to guard against them. */ #ifdef __aarch64__ #define AARCH64_ONLY(X) X +#define MFLOAT8_SUPPORTED 1 #else #define AARCH64_ONLY(X) +#define MFLOAT8_SUPPORTED 0 #endif +#if MFLOAT8_SUPPORTED +#define MFLOAT8_ONLY(X) X +#define MFLOAT8(X) (((union { uint8_t x; mfloat8_t y; }) { X }).y) +#define CONVERT(T, X) \ + ((T) _Generic ((T){}, mfloat8_t: MFLOAT8(X), default: X)) +#else +#define MFLOAT8_ONLY(X) +#define CONVERT(T, X) ((T) X) +#endif + +#define BITEQUAL(X, Y) (__builtin_memcmp (&X, &Y, sizeof(X)) == 0) + #define xSTR(X) #X #define STR(X) xSTR(X) @@ -182,6 +197,9 @@ static ARRAY(result, poly, 16, 4); #if defined (__ARM_FEATURE_CRYPTO) static ARRAY(result, poly, 64, 1); #endif +#if MFLOAT8_SUPPORTED +static ARRAY(result, mfloat, 8, 8); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) static ARRAY(result, float, 16, 4); #endif @@ -202,6 +220,9 @@ static ARRAY(result, poly, 16, 8); #if defined (__ARM_FEATURE_CRYPTO) static ARRAY(result, poly, 64, 2); #endif +#if MFLOAT8_SUPPORTED +static ARRAY(result, mfloat, 8, 16); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) static ARRAY(result, float, 16, 8); #endif @@ -222,6 +243,9 @@ extern ARRAY(expected, uint, 32, 2); extern ARRAY(expected, uint, 64, 1); extern ARRAY(expected, poly, 8, 8); extern ARRAY(expected, poly, 16, 4); +#if MFLOAT8_SUPPORTED +extern ARRAY(expected, hmfloat, 8, 8); +#endif extern ARRAY(expected, hfloat, 16, 4); extern ARRAY(expected, hfloat, 32, 2); extern ARRAY(expected, hfloat, 64, 1); @@ -235,6 +259,9 @@ extern ARRAY(expected, uint, 32, 4); extern ARRAY(expected, uint, 64, 2); extern ARRAY(expected, poly, 8, 16); extern ARRAY(expected, poly, 16, 8); +#if MFLOAT8_SUPPORTED +extern ARRAY(expected, hmfloat, 8, 16); +#endif extern ARRAY(expected, hfloat, 16, 8); extern ARRAY(expected, hfloat, 32, 4); extern ARRAY(expected, hfloat, 64, 2); @@ -251,6 +278,8 @@ extern ARRAY(expected, hfloat, 64, 2); CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ + MFLOAT8_ONLY(CHECK_FP(test_name, mfloat, 8, 8, PRIx8, \ + EXPECTED, comment);) \ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ \ CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \ @@ -263,6 +292,8 @@ extern ARRAY(expected, hfloat, 64, 2); CHECK(test_name, uint, 64, 2, PRIx64, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ + MFLOAT8_ONLY(CHECK_FP(test_name, mfloat, 8, 16, PRIx8, \ + EXPECTED, comment);) \ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \ } \ @@ -372,6 +403,9 @@ static void clean_results (void) #if defined (__ARM_FEATURE_CRYPTO) CLEAN(result, poly, 64, 1); #endif +#if MFLOAT8_SUPPORTED + CLEAN(result, mfloat, 8, 8); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CLEAN(result, float, 16, 4); #endif @@ -390,6 +424,9 @@ static void clean_results (void) #if defined (__ARM_FEATURE_CRYPTO) CLEAN(result, poly, 64, 2); #endif +#if MFLOAT8_SUPPORTED + CLEAN(result, mfloat, 8, 16); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CLEAN(result, float, 16, 8); #endif @@ -460,6 +497,7 @@ static void clean_results (void) DECL_VARIABLE(VAR, poly, 8, 8); \ DECL_VARIABLE(VAR, poly, 16, 4); \ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \ + MFLOAT8_ONLY(DECL_VARIABLE(VAR, mfloat, 8, 8);) \ DECL_VARIABLE(VAR, float, 16, 4); \ DECL_VARIABLE(VAR, float, 32, 2) #else @@ -480,6 +518,7 @@ static void clean_results (void) DECL_VARIABLE(VAR, poly, 8, 16); \ DECL_VARIABLE(VAR, poly, 16, 8); \ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \ + MFLOAT8_ONLY(DECL_VARIABLE(VAR, mfloat, 8, 16);) \ DECL_VARIABLE(VAR, float, 16, 8); \ DECL_VARIABLE(VAR, float, 32, 4); \ AARCH64_ONLY(DECL_VARIABLE(VAR, float, 64, 2)) @@ -490,6 +529,7 @@ static void clean_results (void) DECL_VARIABLE(VAR, poly, 8, 16); \ DECL_VARIABLE(VAR, poly, 16, 8); \ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \ + MFLOAT8_ONLY(DECL_VARIABLE(VAR, mfloat, 8, 16);) \ DECL_VARIABLE(VAR, float, 32, 4); \ AARCH64_ONLY(DECL_VARIABLE(VAR, float, 64, 2)) #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h index f8c4aef..7666ae0 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h @@ -122,6 +122,10 @@ PAD(buffer_pad, uint, 64, 1); VECT_VAR_DECL_INIT(buffer, poly, 64, 1); PAD(buffer_pad, poly, 64, 1); #endif +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(buffer, mfloat, 8, 8)[8]; +PAD(buffer_pad, mfloat, 8, 8); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer, float, 16, 4); PAD(buffer_pad, float, 16, 4); @@ -152,6 +156,10 @@ PAD(buffer_pad, poly, 16, 8); VECT_VAR_DECL_INIT(buffer, poly, 64, 2); PAD(buffer_pad, poly, 64, 2); #endif +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(buffer, mfloat, 8, 16)[16]; +PAD(buffer_pad, mfloat, 8, 16); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer, float, 16, 8); PAD(buffer_pad, float, 16, 8); @@ -190,6 +198,10 @@ VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4); VECT_VAR_DECL_INIT4(buffer_dup, poly, 64, 1); VECT_VAR_DECL(buffer_dup_pad, poly, 64, 1); #endif +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(buffer_dup, mfloat, 8, 8)[8]; +PAD(buffer_dup_pad, mfloat, 8, 8); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT4(buffer_dup, float, 16, 4); VECT_VAR_DECL(buffer_dup_pad, float, 16, 4); @@ -221,9 +233,26 @@ VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8); VECT_VAR_DECL_INIT4(buffer_dup, poly, 64, 2); VECT_VAR_DECL(buffer_dup_pad, poly, 64, 2); #endif +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(buffer_dup, mfloat, 8, 16)[16]; +PAD(buffer_dup_pad, mfloat, 8, 16); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer_dup, float, 16, 8); VECT_VAR_DECL(buffer_dup_pad, float, 16, 8); #endif VECT_VAR_DECL_INIT(buffer_dup, float, 32, 4); VECT_VAR_DECL(buffer_dup_pad, float, 32, 4); + +#if MFLOAT8_SUPPORTED +static void __attribute__((constructor)) +copy_mfloat8 () +{ + memcpy (VECT_VAR(buffer, mfloat, 8, 8), VECT_VAR(buffer, uint, 8, 8), 8); + memcpy (VECT_VAR(buffer, mfloat, 8, 16), VECT_VAR(buffer, uint, 8, 16), 16); + memcpy (VECT_VAR(buffer_dup, mfloat, 8, 8), + VECT_VAR(buffer_dup, uint, 8, 8), 8); + memcpy (VECT_VAR(buffer_dup, mfloat, 8, 16), + VECT_VAR(buffer_dup, uint, 8, 16), 16); +} +#endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c index e9b3dfd..4c50acc 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xfa, 0xfa, 0xfa, 0xfa, + 0xfe, 0xfe, 0xfe, 0xfe }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc09, 0xcb89, 0xcb09, 0xca89 }; @@ -47,6 +51,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3, 0xf7, 0xf7, 0xf7, 0xf7 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2, 0xfff4, 0xfff4, 0xfff6, 0xfff6 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, + 0xf5, 0xf5, 0xf5, 0xf5, + 0xf1, 0xf1, 0xf1, 0xf1, + 0xf5, 0xf5, 0xf5, 0xf5 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc09, 0xcb89, 0xcb09, 0xca89, @@ -76,6 +86,10 @@ void exec_vbsl (void) clean_results (); TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if MFLOAT8_SUPPORTED + VLOAD(vector, buffer, , mfloat, mf, 8, 8); + VLOAD(vector, buffer, q, mfloat, mf, 8, 16); +#endif #if defined (FP16_SUPPORTED) VLOAD(vector, buffer, , float, f, 16, 4); VLOAD(vector, buffer, q, float, f, 16, 8); @@ -94,6 +108,7 @@ void exec_vbsl (void) VDUP(vector2, , uint, u, 16, 4, 0xFFF2); VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0); VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3); + MFLOAT8_ONLY(VDUP(vector2, , mfloat, mf, 8, 8, MFLOAT8(0xca));) #if defined (FP16_SUPPORTED) VDUP(vector2, , float, f, 16, 4, -2.4f); /* -2.4f is 0xC0CD. */ #endif @@ -111,6 +126,7 @@ void exec_vbsl (void) VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3); VDUP(vector2, q, poly, p, 8, 16, 0xF3); VDUP(vector2, q, poly, p, 16, 8, 0xFFF2); + MFLOAT8_ONLY(VDUP(vector2, q, mfloat, mf, 8, 16, MFLOAT8(0x55));) #if defined (FP16_SUPPORTED) VDUP(vector2, q, float, f, 16, 8, -2.4f); #endif @@ -131,6 +147,10 @@ void exec_vbsl (void) TEST_VBSL(uint, , poly, p, 16, 4); TEST_VBSL(uint, q, poly, p, 8, 16); TEST_VBSL(uint, q, poly, p, 16, 8); +#if MFLOAT8_SUPPORTED + TEST_VBSL(uint, , mfloat, mf, 8, 8); + TEST_VBSL(uint, q, mfloat, mf, 8, 16); +#endif #if defined (FP16_SUPPORTED) TEST_VBSL(uint, , float, f, 16, 4); TEST_VBSL(uint, q, float, f, 16, 8); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c index e9d31d6..05933f9 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c @@ -25,6 +25,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0x66, 0x66, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xcc, 0xcc, 0xcc, 0xcc, + 0xcc, 0xcc, 0xcc, 0xcc }; +#endif VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, 0x40533333, 0x40533333 }; VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, @@ -46,6 +52,7 @@ void exec_vcombine (void) /* Initialize input "vector64_a" from "buffer". */ TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector64_a, buffer); + MFLOAT8_ONLY(VLOAD(vector64_a, buffer, , mfloat, mf, 8, 8);) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VLOAD(vector64_a, buffer, , float, f, 16, 4); #endif @@ -62,6 +69,7 @@ void exec_vcombine (void) VDUP(vector64_b, , uint, u, 64, 1, 0x88); VDUP(vector64_b, , poly, p, 8, 8, 0x55); VDUP(vector64_b, , poly, p, 16, 4, 0x66); + MFLOAT8_ONLY(VDUP(vector64_b, , mfloat, mf, 8, 8, MFLOAT8(0xcc));) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VDUP(vector64_b, , float, f, 16, 4, 2.25); #endif @@ -80,6 +88,7 @@ void exec_vcombine (void) TEST_VCOMBINE(uint, u, 64, 1, 2); TEST_VCOMBINE(poly, p, 8, 8, 16); TEST_VCOMBINE(poly, p, 16, 4, 8); + MFLOAT8_ONLY(TEST_VCOMBINE(mfloat, mf, 8, 8, 16);) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) TEST_VCOMBINE(float, f, 16, 4, 8); #endif @@ -95,6 +104,7 @@ void exec_vcombine (void) CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 16, PRIx16, expected, "");) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c index c0b9c7a..77d9be2 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x123456789abcdef0 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a, 0x78, 0x56, 0x34, 0x12 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xf0, 0xde, 0xbc, 0x9a, + 0x78, 0x56, 0x34, 0x12 }; +#endif VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xdef0, 0x9abc, 0x5678, 0x1234 }; VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x9abcdef0, 0x12345678 }; @@ -39,6 +43,7 @@ FNNAME (INSN_NAME) DECL_VAL(val, int, 16, 4); DECL_VAL(val, int, 32, 2); DECL_VAL(val, int, 64, 1); + MFLOAT8_ONLY(DECL_VAL(val, mfloat, 8, 8);) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) DECL_VAL(val, float, 16, 4); #endif @@ -54,6 +59,7 @@ FNNAME (INSN_NAME) DECL_VARIABLE(vector_res, int, 16, 4); DECL_VARIABLE(vector_res, int, 32, 2); DECL_VARIABLE(vector_res, int, 64, 1); + MFLOAT8_ONLY(DECL_VARIABLE(vector_res, mfloat, 8, 8);) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) DECL_VARIABLE(vector_res, float, 16, 4); #endif @@ -72,6 +78,7 @@ FNNAME (INSN_NAME) VECT_VAR(val, int, 16, 4) = 0x123456789abcdef0LL; VECT_VAR(val, int, 32, 2) = 0x123456789abcdef0LL; VECT_VAR(val, int, 64, 1) = 0x123456789abcdef0LL; + MFLOAT8_ONLY(VECT_VAR(val, mfloat, 8, 8) = 0x123456789abcdef0LL;) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR(val, float, 16, 4) = 0x123456789abcdef0LL; #endif @@ -86,6 +93,7 @@ FNNAME (INSN_NAME) TEST_VCREATE(int, s, 8, 8); TEST_VCREATE(int, s, 16, 4); TEST_VCREATE(int, s, 32, 2); + MFLOAT8_ONLY(TEST_VCREATE(mfloat, mf, 8, 8);) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) TEST_VCREATE(float, f, 16, 4); #endif @@ -108,6 +116,7 @@ FNNAME (INSN_NAME) CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx16, expected, "");) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c index aef4173..26c5489 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c @@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected0,hmfloat,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00 }; @@ -50,6 +54,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected0,hmfloat,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00, @@ -73,6 +83,10 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 }; VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected1,hmfloat,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, + 0xf1, 0xf1, 0xf1, 0xf1 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80 }; @@ -104,6 +118,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected1,hmfloat,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, + 0xf1, 0xf1, 0xf1, 0xf1, + 0xf1, 0xf1, 0xf1, 0xf1, + 0xf1, 0xf1, 0xf1, 0xf1 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80, @@ -127,6 +147,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 }; VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, + 0xf2, 0xf2, 0xf2, 0xf2 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00 }; @@ -158,6 +182,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, + 0xf2, 0xf2, 0xf2, 0xf2, + 0xf2, 0xf2, 0xf2, 0xf2, + 0xf2, 0xf2, 0xf2, 0xf2 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00, @@ -201,6 +231,7 @@ void exec_vdup_vmov (void) TEST_VDUP(, uint, u, 64, 1); TEST_VDUP(, poly, p, 8, 8); TEST_VDUP(, poly, p, 16, 4); + MFLOAT8_ONLY(TEST_VDUP(, mfloat, mf, 8, 8)); #if defined (FP16_SUPPORTED) TEST_VDUP(, float, f, 16, 4); #endif @@ -216,6 +247,7 @@ void exec_vdup_vmov (void) TEST_VDUP(q, uint, u, 64, 2); TEST_VDUP(q, poly, p, 8, 16); TEST_VDUP(q, poly, p, 16, 8); + MFLOAT8_ONLY(TEST_VDUP(q, mfloat, mf, 8, 16)); #if defined (FP16_SUPPORTED) TEST_VDUP(q, float, f, 16, 8); #endif @@ -268,6 +300,7 @@ void exec_vdup_vmov (void) TEST_VMOV(, uint, u, 64, 1); TEST_VMOV(, poly, p, 8, 8); TEST_VMOV(, poly, p, 16, 4); + MFLOAT8_ONLY(TEST_VMOV(, mfloat, mf, 8, 8)); #if defined (FP16_SUPPORTED) TEST_VMOV(, float, f, 16, 4); #endif @@ -283,6 +316,7 @@ void exec_vdup_vmov (void) TEST_VMOV(q, uint, u, 64, 2); TEST_VMOV(q, poly, p, 8, 16); TEST_VMOV(q, poly, p, 16, 8); + MFLOAT8_ONLY(TEST_VMOV(q, mfloat, mf, 8, 16)); #if defined (FP16_SUPPORTED) TEST_VMOV(q, float, f, 16, 8); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c index 5d0dba3..e0f6a86 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xf6, 0xf6, 0xf6, 0xf6, + 0xf6, 0xf6, 0xf6, 0xf6 }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xca80, 0xca80, @@ -47,6 +51,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,16) [] = { 0xf7, 0xf7, 0xf7, 0xf7, + 0xf7, 0xf7, 0xf7, 0xf7, + 0xf7, 0xf7, 0xf7, 0xf7, + 0xf7, 0xf7, 0xf7, 0xf7 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80, 0xca80, 0xca80, @@ -73,6 +83,7 @@ void exec_vdup_lane (void) clean_results (); TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer); + MFLOAT8_ONLY(VLOAD(vector, buffer, , mfloat, mf, 8, 8);) #if defined (FP16_SUPPORTED) VLOAD(vector, buffer, , float, f, 16, 4); #endif @@ -89,6 +100,7 @@ void exec_vdup_lane (void) TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0); TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7); TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3); + MFLOAT8_ONLY(TEST_VDUP_LANE(, mfloat, mf, 8, 8, 8, 6);) #if defined (FP16_SUPPORTED) TEST_VDUP_LANE(, float, f, 16, 4, 4, 3); #endif @@ -104,6 +116,7 @@ void exec_vdup_lane (void) TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0); TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5); TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1); + MFLOAT8_ONLY(TEST_VDUP_LANE(q, mfloat, mf, 8, 16, 8, 7);) #if defined (FP16_SUPPORTED) TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3); #endif @@ -134,6 +147,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7, 0xf7 }; VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,8) [] = { 0xfb, 0xfb, 0xfb, 0xfb, + 0xfb, 0xfb, 0xfb, 0xfb }; +#endif VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xca80, 0xca80, @@ -165,6 +182,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5, 0xf5 }; VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,16) [] = { 0xfc, 0xfc, 0xfc, 0xfc, + 0xfc, 0xfc, 0xfc, 0xfc, + 0xfc, 0xfc, 0xfc, 0xfc, + 0xfc, 0xfc, 0xfc, 0xfc }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xc880, 0xc880, 0xc880, 0xc880, @@ -188,6 +211,7 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, clean_results (); TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector, buffer); + MFLOAT8_ONLY(VLOAD(vector, buffer, q, mfloat, mf, 8, 16);) #if defined (FP16_SUPPORTED) VLOAD(vector, buffer, q, float, f, 16, 8); #endif @@ -204,6 +228,7 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, TEST_VDUP_LANEQ(, uint, u, 64, 1, 2, 0); TEST_VDUP_LANEQ(, poly, p, 8, 8, 16, 7); TEST_VDUP_LANEQ(, poly, p, 16, 4, 8, 3); + MFLOAT8_ONLY(TEST_VDUP_LANEQ(, mfloat, mf, 8, 8, 16, 11);) #if defined (FP16_SUPPORTED) TEST_VDUP_LANEQ(, float, f, 16, 4, 8, 3); #endif @@ -219,6 +244,7 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, TEST_VDUP_LANEQ(q, uint, u, 64, 2, 2, 0); TEST_VDUP_LANEQ(q, poly, p, 8, 16, 16, 5); TEST_VDUP_LANEQ(q, poly, p, 16, 8, 8, 1); + MFLOAT8_ONLY(TEST_VDUP_LANEQ(q, mfloat, mf, 8, 16, 16, 12);) #if defined (FP16_SUPPORTED) TEST_VDUP_LANEQ(q, float, f, 16, 8, 8, 7); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c index 908294a..f7da4ee 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xf5, 0xf6, 0xf7, 0x77, + 0x77, 0x77, 0x77, 0x77 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcb00, 0xca80, 0x4b4d, 0x4b4d }; @@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,16) [] = { 0xf9, 0xfa, 0xfb, 0xfc, + 0xfd, 0xfe, 0xff, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xc880, 0x4b4d, 0x4b4d, 0x4b4d, @@ -70,6 +80,10 @@ void exec_vext (void) clean_results (); TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); +#if MFLOAT8_SUPPORTED + VLOAD(vector1, buffer, , mfloat, mf, 8, 8); + VLOAD(vector1, buffer, q, mfloat, mf, 8, 16); +#endif #ifdef FP16_SUPPORTED VLOAD(vector1, buffer, , float, f, 16, 4); VLOAD(vector1, buffer, q, float, f, 16, 8); @@ -88,6 +102,7 @@ void exec_vext (void) VDUP(vector2, , uint, u, 64, 1, 0x88); VDUP(vector2, , poly, p, 8, 8, 0x55); VDUP(vector2, , poly, p, 16, 4, 0x66); + MFLOAT8_ONLY(VDUP(vector2, , mfloat, mf, 8, 8, MFLOAT8(0x77))); #if defined (FP16_SUPPORTED) VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ #endif @@ -103,6 +118,7 @@ void exec_vext (void) VDUP(vector2, q, uint, u, 64, 2, 0x88); VDUP(vector2, q, poly, p, 8, 16, 0x55); VDUP(vector2, q, poly, p, 16, 8, 0x66); + MFLOAT8_ONLY(VDUP(vector2, q, mfloat, mf, 8, 16, MFLOAT8(0xaa))); #if defined (FP16_SUPPORTED) VDUP (vector2, q, float, f, 16, 8, 14.6f); #endif @@ -119,6 +135,7 @@ void exec_vext (void) TEST_VEXT(, uint, u, 64, 1, 0); TEST_VEXT(, poly, p, 8, 8, 6); TEST_VEXT(, poly, p, 16, 4, 2); + MFLOAT8_ONLY(TEST_VEXT(, mfloat, mf, 8, 8, 5)); #if defined (FP16_SUPPORTED) TEST_VEXT(, float, f, 16, 4, 2); #endif @@ -134,6 +151,7 @@ void exec_vext (void) TEST_VEXT(q, uint, u, 64, 2, 1); TEST_VEXT(q, poly, p, 8, 16, 12); TEST_VEXT(q, poly, p, 16, 8, 6); + MFLOAT8_ONLY(TEST_VEXT(q, mfloat, mf, 8, 16, 9)); #if defined (FP16_SUPPORTED) TEST_VEXT(q, float, f, 16, 8, 7); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c index f3b14ce..e2c9273 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff1 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +#endif VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 }; VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; @@ -32,6 +36,7 @@ void exec_vget_high (void) DECL_VARIABLE_128BITS_VARIANTS(vector128); TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector128, buffer); + MFLOAT8_ONLY(VLOAD(vector128, buffer, q, mfloat, mf, 8, 16);) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VLOAD(vector128, buffer, q, float, f, 16, 8); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c index 4ed0e46..7044b3c 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; +#endif VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, @@ -45,6 +49,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xfc, 0xfd, 0xfe, 0xff }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +#endif VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0xc980, 0xc900, 0xc880 }; VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, @@ -65,6 +75,10 @@ void exec_vld1 (void) TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer); +#if MFLOAT8_SUPPORTED + TEST_VLD1(vector, buffer, , mfloat, mf, 8, 8); + TEST_VLD1(vector, buffer, q, mfloat, mf, 8, 16); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) TEST_VLD1(vector, buffer, , float, f, 16, 4); TEST_VLD1(vector, buffer, q, float, f, 16, 8); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c index 34be214..275cfee 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_dup.c @@ -17,6 +17,10 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected0,hmfloat,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0 }; +#endif VECT_VAR_DECL(expected0,hfloat,16,4) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00 }; VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, @@ -45,6 +49,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0 }; VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected0,hmfloat,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0, + 0xf0, 0xf0, 0xf0, 0xf0 }; +#endif VECT_VAR_DECL(expected0,hfloat,16,8) [] = { 0xcc00, 0xcc00, 0xcc00, 0xcc00, 0xcc00, 0xcc00, 0xcc00, 0xcc00 }; VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000, @@ -64,6 +74,10 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 }; VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected1,hmfloat,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, + 0xf1, 0xf1, 0xf1, 0xf1 }; +#endif VECT_VAR_DECL(expected1,hfloat,16,4) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80 }; VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, @@ -92,6 +106,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected1,hmfloat,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, + 0xf1, 0xf1, 0xf1, 0xf1, + 0xf1, 0xf1, 0xf1, 0xf1, + 0xf1, 0xf1, 0xf1, 0xf1 }; +#endif VECT_VAR_DECL(expected1,hfloat,16,8) [] = { 0xcb80, 0xcb80, 0xcb80, 0xcb80, 0xcb80, 0xcb80, 0xcb80, 0xcb80 }; VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, @@ -111,6 +131,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 }; VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, + 0xf2, 0xf2, 0xf2, 0xf2 }; +#endif VECT_VAR_DECL(expected2,hfloat,16,4) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00 }; VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 }; VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, @@ -139,6 +163,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2, 0xf2 }; VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, + 0xf2, 0xf2, 0xf2, 0xf2, + 0xf2, 0xf2, 0xf2, 0xf2, + 0xf2, 0xf2, 0xf2, 0xf2 }; +#endif VECT_VAR_DECL(expected2,hfloat,16,8) [] = { 0xcb00, 0xcb00, 0xcb00, 0xcb00, 0xcb00, 0xcb00, 0xcb00, 0xcb00 }; VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000, @@ -163,6 +193,10 @@ void exec_vld1_dup (void) TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1_DUP, vector, buffer_dup); +#if MFLOAT8_SUPPORTED + TEST_VLD1_DUP(vector, buffer_dup, , mfloat, mf, 8, 8); + TEST_VLD1_DUP(vector, buffer_dup, q, mfloat, mf, 8, 16); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) TEST_VLD1_DUP(vector, buffer_dup, , float, f, 16, 4); TEST_VLD1_DUP(vector, buffer_dup, q, float, f, 16, 8); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c index 1f39006..d6f3ce7 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1_lane.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xf0 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xf0, 0xaa, 0xaa }; +#endif VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xaaaa }; VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xaaaaaaaa, 0xc1800000 }; VECT_VAR_DECL(expected,int,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, @@ -44,6 +48,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xaa, 0xaa, 0xaa }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xfff0, 0xaaaa }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,16) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xf0, + 0xaa, 0xaa, 0xaa, 0xaa }; +#endif VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xcc00, 0xaaaa, 0xaaaa }; VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xaaaaaaaa, 0xaaaaaaaa, @@ -75,6 +85,7 @@ void exec_vld1_lane (void) ARRAY(buffer_src, uint, 64, 1); ARRAY(buffer_src, poly, 8, 8); ARRAY(buffer_src, poly, 16, 4); + MFLOAT8_ONLY(ARRAY(buffer_src, mfloat, 8, 8)); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) ARRAY(buffer_src, float, 16, 4); #endif @@ -90,6 +101,7 @@ void exec_vld1_lane (void) ARRAY(buffer_src, uint, 64, 2); ARRAY(buffer_src, poly, 8, 16); ARRAY(buffer_src, poly, 16, 8); + MFLOAT8_ONLY(ARRAY(buffer_src, mfloat, 8, 16)); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) ARRAY(buffer_src, float, 16, 8); #endif @@ -108,6 +120,7 @@ void exec_vld1_lane (void) TEST_VLD1_LANE(, uint, u, 64, 1, 0); TEST_VLD1_LANE(, poly, p, 8, 8, 7); TEST_VLD1_LANE(, poly, p, 16, 4, 3); + MFLOAT8_ONLY(TEST_VLD1_LANE(, mfloat, mf, 8, 8, 5)); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) TEST_VLD1_LANE(, float, f, 16, 4, 2); #endif @@ -123,6 +136,7 @@ void exec_vld1_lane (void) TEST_VLD1_LANE(q, uint, u, 64, 2, 0); TEST_VLD1_LANE(q, poly, p, 8, 16, 12); TEST_VLD1_LANE(q, poly, p, 16, 8, 6); + MFLOAT8_ONLY(TEST_VLD1_LANE(q, mfloat, mf, 8, 16, 11)); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) TEST_VLD1_LANE(q, float, f, 16, 8, 5); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c index 0c45a2b..6e56ff1 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x2.c @@ -4,6 +4,7 @@ /* { dg-options "-O3" } */ #include <arm_neon.h> +#include "arm-neon-ref.h" extern void abort (void); @@ -16,14 +17,14 @@ test_vld##SUFFIX##_x2 () \ BASE##x##ELTS##x##2##_t vectors; \ int i,j; \ for (i = 0; i < ELTS * 2; i++) \ - data [i] = (BASE##_t) 2*i + 1; \ + data [i] = CONVERT (BASE##_t, 2*i + 1); \ asm volatile ("" : : : "memory"); \ vectors = vld1##SUFFIX##_x2 (data); \ vst1##SUFFIX (temp, vectors.val[0]); \ vst1##SUFFIX (&temp[ELTS], vectors.val[1]); \ asm volatile ("" : : : "memory"); \ for (j = 0; j < ELTS * 2; j++) \ - if (temp[j] != data[j]) \ + if (!BITEQUAL (temp[j], data[j])) \ return 1; \ return 0; \ } @@ -56,6 +57,8 @@ VARIANT (float32, 4, q_f32) #ifdef __aarch64__ #define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ +VARIANT (mfloat8, 8, _mf8) \ +VARIANT (mfloat8, 16, q_mf8) \ VARIANT (float64, 1, _f64) \ VARIANT (float64, 2, q_f64) #else @@ -65,14 +68,14 @@ VARIANT (float64, 2, q_f64) /* Tests of vld1_x2 and vld1q_x2. */ VARIANTS (TESTMETH) -#define CHECK(BASE, ELTS, SUFFIX) \ +#define CHECKS(BASE, ELTS, SUFFIX) \ if (test_vld##SUFFIX##_x2 () != 0) \ abort (); int main (int argc, char **argv) { - VARIANTS (CHECK) + VARIANTS (CHECKS) return 0; } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x3.c index 4174dcd..42aeadf 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x3.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x3.c @@ -17,7 +17,7 @@ test_vld##SUFFIX##_x3 () \ BASE##x##ELTS##x##3##_t vectors; \ int i,j; \ for (i = 0; i < ELTS * 3; i++) \ - data [i] = (BASE##_t) 3*i; \ + data [i] = CONVERT (BASE##_t, 3*i); \ asm volatile ("" : : : "memory"); \ vectors = vld1##SUFFIX##_x3 (data); \ vst1##SUFFIX (temp, vectors.val[0]); \ @@ -25,7 +25,7 @@ test_vld##SUFFIX##_x3 () \ vst1##SUFFIX (&temp[ELTS * 2], vectors.val[2]); \ asm volatile ("" : : : "memory"); \ for (j = 0; j < ELTS * 3; j++) \ - if (temp[j] != data[j]) \ + if (!BITEQUAL (temp[j], data[j])) \ return 1; \ return 0; \ } @@ -58,6 +58,8 @@ VARIANT (float32, 4, q_f32) #ifdef __aarch64__ #define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ +VARIANT (mfloat8, 8, _mf8) \ +VARIANT (mfloat8, 16, q_mf8) \ VARIANT (float64, 1, _f64) \ VARIANT (float64, 2, q_f64) #else @@ -70,7 +72,7 @@ VARIANTS (TESTMETH) #define CHECKS(BASE, ELTS, SUFFIX) \ if (test_vld##SUFFIX##_x3 () != 0) \ - fprintf (stderr, "test_vld1##SUFFIX##_x3"); + fprintf (stderr, "test_vld1##SUFFIX##_x3"), abort (); int main (int argc, char **argv) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c index 17db262..694fda8 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c @@ -18,7 +18,7 @@ test_vld1##SUFFIX##_x4 () \ BASE##x##ELTS##x##4##_t vectors; \ int i,j; \ for (i = 0; i < ELTS * 4; i++) \ - data [i] = (BASE##_t) 4*i; \ + data [i] = CONVERT (BASE##_t, 4*i); \ asm volatile ("" : : : "memory"); \ vectors = vld1##SUFFIX##_x4 (data); \ vst1##SUFFIX (temp, vectors.val[0]); \ @@ -27,7 +27,7 @@ test_vld1##SUFFIX##_x4 () \ vst1##SUFFIX (&temp[ELTS * 3], vectors.val[3]); \ asm volatile ("" : : : "memory"); \ for (j = 0; j < ELTS * 4; j++) \ - if (temp[j] != data[j]) \ + if (!BITEQUAL (temp[j], data[j])) \ return 1; \ return 0; \ } @@ -62,6 +62,8 @@ VARIANT (float32, 4, q_f32) #ifdef __aarch64__ #define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ +VARIANT (mfloat8, 8, _mf8) \ +VARIANT (mfloat8, 16, q_mf8) \ VARIANT (float64, 1, _f64) \ VARIANT (float64, 2, q_f64) #else diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c index 8a5fc22..81d7669 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c @@ -18,6 +18,10 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld2_0,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; +#endif VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected_vld2_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, @@ -42,6 +46,12 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xfc, 0xfd, 0xfe, 0xff }; VECT_VAR_DECL(expected_vld2_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld2_0,hmfloat,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +#endif VECT_VAR_DECL(expected_vld2_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0xc980, 0xc900, 0xc880 }; VECT_VAR_DECL(expected_vld2_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, @@ -61,6 +71,10 @@ VECT_VAR_DECL(expected_vld2_1,uint,64,1) [] = { 0xfffffffffffffff1 }; VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld2_1,hmfloat,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +#endif VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 }; VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; VECT_VAR_DECL(expected_vld2_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3, @@ -85,6 +99,12 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf }; VECT_VAR_DECL(expected_vld2_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld2_1,hmfloat,8,16) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, + 0xc, 0xd, 0xe, 0xf }; +#endif VECT_VAR_DECL(expected_vld2_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500, 0xc400, 0xc200, 0xc000, 0xbc00 }; VECT_VAR_DECL(expected_vld2_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000, @@ -104,6 +124,10 @@ VECT_VAR_DECL(expected_vld3_0,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_0,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; +#endif VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected_vld3_0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, @@ -128,6 +152,12 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xfc, 0xfd, 0xfe, 0xff }; VECT_VAR_DECL(expected_vld3_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_0,hmfloat,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +#endif VECT_VAR_DECL(expected_vld3_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0xc980, 0xc900, 0xc880 }; VECT_VAR_DECL(expected_vld3_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, @@ -147,6 +177,10 @@ VECT_VAR_DECL(expected_vld3_1,uint,64,1) [] = { 0xfffffffffffffff1 }; VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_1,hmfloat,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +#endif VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 }; VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; VECT_VAR_DECL(expected_vld3_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3, @@ -171,6 +205,12 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf }; VECT_VAR_DECL(expected_vld3_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_1,hmfloat,8,16) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, + 0xc, 0xd, 0xe, 0xf }; +#endif VECT_VAR_DECL(expected_vld3_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500, 0xc400, 0xc200, 0xc000, 0xbc00 }; VECT_VAR_DECL(expected_vld3_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000, @@ -193,6 +233,10 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_2,hmfloat,8,8) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7 }; +#endif VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 }; VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 }; VECT_VAR_DECL(expected_vld3_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13, @@ -217,6 +261,12 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x1c, 0x1d, 0x1e, 0x1f }; VECT_VAR_DECL(expected_vld3_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_2,hmfloat,8,16) [] = { 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, + 0x1c, 0x1d, 0x1e, 0x1f }; +#endif VECT_VAR_DECL(expected_vld3_2,hfloat,16,8) [] = { 0x0000, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700 }; VECT_VAR_DECL(expected_vld3_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000, @@ -237,6 +287,10 @@ VECT_VAR_DECL(expected_vld4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_0,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; +#endif VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; @@ -262,6 +316,12 @@ VECT_VAR_DECL(expected_vld4_0,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xfc, 0xfd, 0xfe, 0xff }; VECT_VAR_DECL(expected_vld4_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_0,hmfloat,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +#endif VECT_VAR_DECL(expected_vld4_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0xc980, 0xc900, 0xc880 }; VECT_VAR_DECL(expected_vld4_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, @@ -281,6 +341,10 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 }; VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff }; VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_1,hmfloat,8,8) [] = { 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +#endif VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xca00, 0xc980, 0xc900, 0xc880 }; VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; VECT_VAR_DECL(expected_vld4_1,int,8,16) [] = { 0x0, 0x1, 0x2, 0x3, @@ -305,6 +369,12 @@ VECT_VAR_DECL(expected_vld4_1,poly,8,16) [] = { 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf }; VECT_VAR_DECL(expected_vld4_1,poly,16,8) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb, 0xfffc, 0xfffd, 0xfffe, 0xffff }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_1,hmfloat,8,16) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, + 0xc, 0xd, 0xe, 0xf }; +#endif VECT_VAR_DECL(expected_vld4_1,hfloat,16,8) [] = { 0xc800, 0xc700, 0xc600, 0xc500, 0xc400, 0xc200, 0xc000, 0xbc00 }; VECT_VAR_DECL(expected_vld4_1,hfloat,32,4) [] = { 0xc1400000, 0xc1300000, @@ -324,6 +394,10 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 }; VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff8, 0xfff9, 0xfffa, 0xfffb }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_2,hmfloat,8,8) [] = { 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7 }; +#endif VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xc800, 0xc700, 0xc600, 0xc500 }; VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1400000, 0xc1300000 }; VECT_VAR_DECL(expected_vld4_2,int,8,16) [] = { 0x10, 0x11, 0x12, 0x13, @@ -348,6 +422,12 @@ VECT_VAR_DECL(expected_vld4_2,poly,8,16) [] = { 0x10, 0x11, 0x12, 0x13, 0x1c, 0x1d, 0x1e, 0x1f }; VECT_VAR_DECL(expected_vld4_2,poly,16,8) [] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_2,hmfloat,8,16) [] = { 0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, + 0x1c, 0x1d, 0x1e, 0x1f }; +#endif VECT_VAR_DECL(expected_vld4_2,hfloat,16,8) [] = { 0x0000, 0x3c00, 0x4000, 0x4200, 0x4400, 0x4500, 0x4600, 0x4700 }; VECT_VAR_DECL(expected_vld4_2,hfloat,32,4) [] = { 0xc1000000, 0xc0e00000, @@ -367,6 +447,10 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 }; VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfffc, 0xfffd, 0xfffe, 0xffff }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_3,hmfloat,8,8) [] = { 0x8, 0x9, 0xa, 0xb, + 0xc, 0xd, 0xe, 0xf }; +#endif VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xc400, 0xc200, 0xc000, 0xbc00 }; VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1200000, 0xc1100000 }; VECT_VAR_DECL(expected_vld4_3,int,8,16) [] = { 0x20, 0x21, 0x22, 0x23, @@ -391,6 +475,12 @@ VECT_VAR_DECL(expected_vld4_3,poly,8,16) [] = { 0x20, 0x21, 0x22, 0x23, 0x2c, 0x2d, 0x2e, 0x2f }; VECT_VAR_DECL(expected_vld4_3,poly,16,8) [] = { 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_3,hmfloat,8,16) [] = { 0x20, 0x21, 0x22, 0x23, + 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, + 0x2c, 0x2d, 0x2e, 0x2f }; +#endif VECT_VAR_DECL(expected_vld4_3,hfloat,16,8) [] = { 0x4800, 0x4880, 0x4900, 0x4980, 0x4a00, 0x4a80, 0x4b00, 0x4b80 }; VECT_VAR_DECL(expected_vld4_3,hfloat,32,4) [] = { 0xc0800000, 0xc0400000, @@ -436,6 +526,7 @@ void exec_vldX (void) DECL_VLDX(uint, 64, 1, X); \ DECL_VLDX(poly, 8, 8, X); \ DECL_VLDX(poly, 16, 4, X); \ + MFLOAT8_ONLY(DECL_VLDX(mfloat, 8, 8, X)); \ DECL_VLDX(float, 32, 2, X); \ DECL_VLDX(int, 8, 16, X); \ DECL_VLDX(int, 16, 8, X); \ @@ -445,6 +536,7 @@ void exec_vldX (void) DECL_VLDX(uint, 32, 4, X); \ DECL_VLDX(poly, 8, 16, X); \ DECL_VLDX(poly, 16, 8, X); \ + MFLOAT8_ONLY(DECL_VLDX(mfloat, 8, 16, X)); \ DECL_VLDX(float, 32, 4, X) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) @@ -467,6 +559,7 @@ void exec_vldX (void) TEST_VLDX(, uint, u, 64, 1, X); \ TEST_VLDX(, poly, p, 8, 8, X); \ TEST_VLDX(, poly, p, 16, 4, X); \ + MFLOAT8_ONLY(TEST_VLDX(, mfloat, mf, 8, 8, X)); \ TEST_VLDX(, float, f, 32, 2, X); \ TEST_VLDX(q, int, s, 8, 16, X); \ TEST_VLDX(q, int, s, 16, 8, X); \ @@ -476,6 +569,7 @@ void exec_vldX (void) TEST_VLDX(q, uint, u, 32, 4, X); \ TEST_VLDX(q, poly, p, 8, 16, X); \ TEST_VLDX(q, poly, p, 16, 8, X); \ + MFLOAT8_ONLY(TEST_VLDX(q, mfloat, mf, 8, 16, X)); \ TEST_VLDX(q, float, f, 32, 4, X) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) @@ -498,6 +592,7 @@ void exec_vldX (void) TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \ TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \ TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ + MFLOAT8_ONLY(TEST_EXTRA_CHUNK(mfloat, 8, 8, X, Y)); \ TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ TEST_EXTRA_CHUNK(int, 8, 16, X, Y); \ TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \ @@ -507,6 +602,7 @@ void exec_vldX (void) TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \ TEST_EXTRA_CHUNK(poly, 8, 16, X, Y); \ TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ + MFLOAT8_ONLY(TEST_EXTRA_CHUNK(mfloat, 8, 16, X, Y)); \ TEST_EXTRA_CHUNK(float, 32, 4, X, Y) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) @@ -530,6 +626,7 @@ void exec_vldX (void) CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ + MFLOAT8_ONLY(CHECK_FP(test_name, mfloat, 8, 8, PRIx8, EXPECTED, comment)); \ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ \ CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \ @@ -540,6 +637,7 @@ void exec_vldX (void) CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ + MFLOAT8_ONLY(CHECK_FP(test_name, mfloat, 8, 16, PRIx8, EXPECTED, comment)); \ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) @@ -580,6 +678,12 @@ void exec_vldX (void) PAD(buffer_vld2_pad, poly, 8, 8); VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4); PAD(buffer_vld2_pad, poly, 16, 4); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld2, mfloat, 8, 8, 2); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld2, mfloat, 8, 8, 2), + VECT_ARRAY_VAR(buffer_vld2, int, 8, 8, 2), 8 * 2); + PAD(buffer_vld2_pad, mfloat, 8, 8); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4); PAD(buffer_vld2_pad, float, 16, 4); @@ -607,6 +711,12 @@ void exec_vldX (void) PAD(buffer_vld2_pad, poly, 8, 16); VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8); PAD(buffer_vld2_pad, poly, 16, 8); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld2, mfloat, 8, 16, 2); + PAD(buffer_vld2_pad, mfloat, 8, 16); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld2, mfloat, 8, 16, 2), + VECT_ARRAY_VAR(buffer_vld2, int, 8, 16, 2), 16 * 2); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8); PAD(buffer_vld2_pad, float, 16, 8); @@ -635,6 +745,12 @@ void exec_vldX (void) PAD(buffer_vld3_pad, poly, 8, 8); VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4); PAD(buffer_vld3_pad, poly, 16, 4); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld3, mfloat, 8, 8, 3); + PAD(buffer_vld3_pad, mfloat, 8, 8); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld3, mfloat, 8, 8, 3), + VECT_ARRAY_VAR(buffer_vld3, int, 8, 8, 3), 8 * 3); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4); PAD(buffer_vld3_pad, float, 16, 4); @@ -662,6 +778,12 @@ void exec_vldX (void) PAD(buffer_vld3_pad, poly, 8, 16); VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8); PAD(buffer_vld3_pad, poly, 16, 8); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld3, mfloat, 8, 16, 3); + PAD(buffer_vld3_pad, mfloat, 8, 16); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld3, mfloat, 8, 16, 3), + VECT_ARRAY_VAR(buffer_vld3, int, 8, 16, 3), 16 * 3); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8); PAD(buffer_vld3_pad, float, 16, 8); @@ -690,6 +812,12 @@ void exec_vldX (void) PAD(buffer_vld4_pad, poly, 8, 8); VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4); PAD(buffer_vld4_pad, poly, 16, 4); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld4, mfloat, 8, 8, 4); + PAD(buffer_vld4_pad, mfloat, 8, 8); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld4, mfloat, 8, 8, 4), + VECT_ARRAY_VAR(buffer_vld4, int, 8, 8, 4), 8 * 4); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4); PAD(buffer_vld4_pad, float, 16, 4); @@ -717,6 +845,12 @@ void exec_vldX (void) PAD(buffer_vld4_pad, poly, 8, 16); VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8); PAD(buffer_vld4_pad, poly, 16, 8); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld4, mfloat, 8, 16, 4); + PAD(buffer_vld4_pad, mfloat, 8, 16); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld4, mfloat, 8, 16, 4), + VECT_ARRAY_VAR(buffer_vld4, int, 8, 16, 4), 16 * 4); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8); PAD(buffer_vld4_pad, float, 16, 8); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c index 903d306..76b720ee 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c @@ -18,6 +18,10 @@ VECT_VAR_DECL(expected_vld2_0,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld2_0,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, + 0xf0, 0xf1, 0xf0, 0xf1 }; +#endif VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = {0xcc00, 0xcb80, 0xcc00, 0xcb80 }; VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; @@ -36,6 +40,10 @@ VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1, 0xf0, 0xf1 }; VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff0, 0xfff1 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld2_1,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf0, 0xf1, + 0xf0, 0xf1, 0xf0, 0xf1 }; +#endif VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcc00, 0xcb80 }; VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; @@ -56,6 +64,10 @@ VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1 }; VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_0,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf0, + 0xf1, 0xf2, 0xf0, 0xf1 }; +#endif VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xcc00 }; VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; @@ -76,6 +88,10 @@ VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0 }; VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xfff1, 0xfff2, 0xfff0, 0xfff1 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_1,hmfloat,8,8) [] = { 0xf2, 0xf0, 0xf1, 0xf2, + 0xf0, 0xf1, 0xf2, 0xf0 }; +#endif VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xcb80, 0xcb00, 0xcc00, 0xcb80 }; VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xc1800000 }; @@ -96,6 +112,10 @@ VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1, 0xf2, 0xf0, 0xf1, 0xf2 }; VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xfff2, 0xfff0, 0xfff1, 0xfff2 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_2,hmfloat,8,8) [] = { 0xf1, 0xf2, 0xf0, 0xf1, + 0xf2, 0xf0, 0xf1, 0xf2 }; +#endif VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xcc00, 0xcb80, 0xcb00 }; VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xc1700000, 0xc1600000 }; @@ -114,6 +134,10 @@ VECT_VAR_DECL(expected_vld4_0,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_0,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf0, 0xf1, 0xf2, 0xf3 }; +#endif VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; @@ -131,6 +155,10 @@ VECT_VAR_DECL(expected_vld4_1,uint,64,1) [] = { 0xfffffffffffffff1 }; VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_1,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf0, 0xf1, 0xf2, 0xf3 }; +#endif VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; @@ -148,6 +176,10 @@ VECT_VAR_DECL(expected_vld4_2,uint,64,1) [] = { 0xfffffffffffffff2 }; VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_2,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf0, 0xf1, 0xf2, 0xf3 }; +#endif VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; @@ -165,6 +197,10 @@ VECT_VAR_DECL(expected_vld4_3,uint,64,1) [] = { 0xfffffffffffffff3 }; VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf0, 0xf1, 0xf2, 0xf3 }; VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_3,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf0, 0xf1, 0xf2, 0xf3 }; +#endif VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; @@ -208,6 +244,7 @@ void exec_vldX_dup (void) DECL_VLDX_DUP(uint, 64, 1, X); \ DECL_VLDX_DUP(poly, 8, 8, X); \ DECL_VLDX_DUP(poly, 16, 4, X); \ + MFLOAT8_ONLY(DECL_VLDX_DUP(mfloat, 8, 8, X)); \ DECL_VLDX_DUP(float, 32, 2, X) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) @@ -229,6 +266,7 @@ void exec_vldX_dup (void) TEST_VLDX_DUP(, uint, u, 64, 1, X); \ TEST_VLDX_DUP(, poly, p, 8, 8, X); \ TEST_VLDX_DUP(, poly, p, 16, 4, X); \ + MFLOAT8_ONLY(TEST_VLDX_DUP(, mfloat, mf, 8, 8, X)); \ TEST_VLDX_DUP(, float, f, 32, 2, X) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) @@ -250,6 +288,7 @@ void exec_vldX_dup (void) TEST_EXTRA_CHUNK(uint, 64, 1, X, Y); \ TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \ TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ + MFLOAT8_ONLY(TEST_EXTRA_CHUNK(mfloat, 8, 8, X, Y)); \ TEST_EXTRA_CHUNK(float, 32, 2, X, Y) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) @@ -272,6 +311,7 @@ void exec_vldX_dup (void) CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ + MFLOAT8_ONLY(CHECK_FP(test_name, mfloat, 8, 8, PRIx8, EXPECTED, comment)); \ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) @@ -313,6 +353,12 @@ void exec_vldX_dup (void) PAD(buffer_vld2_pad, poly, 8, 8); VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 4); PAD(buffer_vld2_pad, poly, 16, 4); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld2, mfloat, 8, 8, 2); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld2, mfloat, 8, 8, 2), + VECT_ARRAY_VAR(buffer_vld2, int, 8, 8, 2), 8 * 2); + PAD(buffer_vld2_pad, mfloat, 8, 8); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT2(buffer_vld2, float, 16, 4); PAD(buffer_vld2_pad, float, 16, 4); @@ -340,6 +386,12 @@ void exec_vldX_dup (void) PAD(buffer_vld2_pad, poly, 8, 16); VECT_ARRAY_INIT2(buffer_vld2, poly, 16, 8); PAD(buffer_vld2_pad, poly, 16, 8); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld2, mfloat, 8, 16, 2); + PAD(buffer_vld2_pad, mfloat, 8, 16); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld2, mfloat, 8, 16, 2), + VECT_ARRAY_VAR(buffer_vld2, int, 8, 16, 2), 16 * 2); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT2(buffer_vld2, float, 16, 8); PAD(buffer_vld2_pad, float, 16, 8); @@ -368,6 +420,12 @@ void exec_vldX_dup (void) PAD(buffer_vld3_pad, poly, 8, 8); VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 4); PAD(buffer_vld3_pad, poly, 16, 4); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld3, mfloat, 8, 8, 3); + PAD(buffer_vld3_pad, mfloat, 8, 8); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld3, mfloat, 8, 8, 3), + VECT_ARRAY_VAR(buffer_vld3, int, 8, 8, 3), 8 * 3); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT3(buffer_vld3, float, 16, 4); PAD(buffer_vld3_pad, float, 16, 4); @@ -395,6 +453,12 @@ void exec_vldX_dup (void) PAD(buffer_vld3_pad, poly, 8, 16); VECT_ARRAY_INIT3(buffer_vld3, poly, 16, 8); PAD(buffer_vld3_pad, poly, 16, 8); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld3, mfloat, 8, 16, 3); + PAD(buffer_vld3_pad, mfloat, 8, 16); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld3, mfloat, 8, 16, 3), + VECT_ARRAY_VAR(buffer_vld3, int, 8, 16, 3), 16 * 3); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT3(buffer_vld3, float, 16, 8); PAD(buffer_vld3_pad, float, 16, 8); @@ -423,6 +487,12 @@ void exec_vldX_dup (void) PAD(buffer_vld4_pad, poly, 8, 8); VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 4); PAD(buffer_vld4_pad, poly, 16, 4); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld4, mfloat, 8, 8, 4); + PAD(buffer_vld4_pad, mfloat, 8, 8); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld4, mfloat, 8, 8, 4), + VECT_ARRAY_VAR(buffer_vld4, int, 8, 8, 4), 8 * 4); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT4(buffer_vld4, float, 16, 4); PAD(buffer_vld4_pad, float, 16, 4); @@ -450,6 +520,12 @@ void exec_vldX_dup (void) PAD(buffer_vld4_pad, poly, 8, 16); VECT_ARRAY_INIT4(buffer_vld4, poly, 16, 8); PAD(buffer_vld4_pad, poly, 16, 8); +#if MFLOAT8_SUPPORTED + VECT_ARRAY(buffer_vld4, mfloat, 8, 16, 4); + PAD(buffer_vld4_pad, mfloat, 8, 16); + __builtin_memcpy (VECT_ARRAY_VAR(buffer_vld4, mfloat, 8, 16, 4), + VECT_ARRAY_VAR(buffer_vld4, int, 8, 16, 4), 16 * 4); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_ARRAY_INIT4(buffer_vld4, float, 16, 8); PAD(buffer_vld4_pad, float, 16, 8); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c index 9651b70..dfda634 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c @@ -18,6 +18,10 @@ VECT_VAR_DECL(expected_vld2_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; VECT_VAR_DECL(expected_vld2_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld2_0,hmfloat,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; +#endif VECT_VAR_DECL(expected_vld2_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; VECT_VAR_DECL(expected_vld2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected_vld2_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, @@ -47,6 +51,10 @@ VECT_VAR_DECL(expected_vld2_1,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; VECT_VAR_DECL(expected_vld2_1,poly,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; VECT_VAR_DECL(expected_vld2_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld2_1,hmfloat,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xf0, 0xf1 }; +#endif VECT_VAR_DECL(expected_vld2_1,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xaaaa, 0xaaaa }; VECT_VAR_DECL(expected_vld2_1,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; VECT_VAR_DECL(expected_vld2_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, @@ -76,6 +84,10 @@ VECT_VAR_DECL(expected_vld3_0,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; VECT_VAR_DECL(expected_vld3_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; VECT_VAR_DECL(expected_vld3_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_0,hmfloat,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; +#endif VECT_VAR_DECL(expected_vld3_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; VECT_VAR_DECL(expected_vld3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected_vld3_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, @@ -105,6 +117,10 @@ VECT_VAR_DECL(expected_vld3_1,uint,32,2) [] = { 0xaaaaaaaa, 0xfffffff0 }; VECT_VAR_DECL(expected_vld3_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xf0, 0xf1, 0xf2, 0xaa }; VECT_VAR_DECL(expected_vld3_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_1,hmfloat,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; +#endif VECT_VAR_DECL(expected_vld3_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xcc00, 0xcb80 }; VECT_VAR_DECL(expected_vld3_1,hfloat,32,2) [] = { 0xc1600000, 0xaaaaaaaa }; VECT_VAR_DECL(expected_vld3_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, @@ -134,6 +150,10 @@ VECT_VAR_DECL(expected_vld3_2,uint,32,2) [] = { 0xfffffff1, 0xfffffff2 }; VECT_VAR_DECL(expected_vld3_2,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; VECT_VAR_DECL(expected_vld3_2,poly,16,4) [] = { 0xaaaa, 0xfff0, 0xfff1, 0xfff2 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld3_2,hmfloat,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xf0, 0xf1, 0xf2 }; +#endif VECT_VAR_DECL(expected_vld3_2,hfloat,16,4) [] = { 0xcb00, 0xaaaa, 0xaaaa, 0xaaaa }; VECT_VAR_DECL(expected_vld3_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; VECT_VAR_DECL(expected_vld3_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xfff0, 0xfff1, @@ -164,6 +184,10 @@ VECT_VAR_DECL(expected_vld4_0,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; VECT_VAR_DECL(expected_vld4_0,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; VECT_VAR_DECL(expected_vld4_0,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_0,hmfloat,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; +#endif VECT_VAR_DECL(expected_vld4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected_vld4_0,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; @@ -192,6 +216,10 @@ VECT_VAR_DECL(expected_vld4_1,uint,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; VECT_VAR_DECL(expected_vld4_1,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; VECT_VAR_DECL(expected_vld4_1,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_1,hmfloat,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; +#endif VECT_VAR_DECL(expected_vld4_1,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; VECT_VAR_DECL(expected_vld4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; VECT_VAR_DECL(expected_vld4_1,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, @@ -221,6 +249,10 @@ VECT_VAR_DECL(expected_vld4_2,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; VECT_VAR_DECL(expected_vld4_2,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xaa, 0xaa, 0xaa, 0xaa }; VECT_VAR_DECL(expected_vld4_2,poly,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_2,hmfloat,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; +#endif VECT_VAR_DECL(expected_vld4_2,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; VECT_VAR_DECL(expected_vld4_2,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; VECT_VAR_DECL(expected_vld4_2,int,16,8) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, @@ -250,6 +282,10 @@ VECT_VAR_DECL(expected_vld4_3,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; VECT_VAR_DECL(expected_vld4_3,poly,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa }; VECT_VAR_DECL(expected_vld4_3,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vld4_3,hmfloat,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xf0, 0xf1, 0xf2, 0xf3 }; +#endif VECT_VAR_DECL(expected_vld4_3,hfloat,16,4) [] = { 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; VECT_VAR_DECL(expected_vld4_3,hfloat,32,2) [] = { 0xaaaaaaaa, 0xaaaaaaaa }; VECT_VAR_DECL(expected_vld4_3,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, @@ -279,6 +315,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2); VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2); VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2); VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2); +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(buffer_vld2_lane, mfloat, 8, 2)[2]; +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2); #endif @@ -295,6 +334,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3); VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3); VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3); VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3); +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(buffer_vld3_lane, mfloat, 8, 3)[3]; +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3); #endif @@ -311,6 +353,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4); VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4); VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4); VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4); +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(buffer_vld4_lane, mfloat, 8, 4)[4]; +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4); #endif @@ -371,6 +416,7 @@ void exec_vldX_lane (void) DECL_VLDX_LANE(uint, 16, 8, X); \ DECL_VLDX_LANE(uint, 32, 4, X); \ DECL_VLDX_LANE(poly, 16, 8, X); \ + MFLOAT8_ONLY(DECL_VLDX_LANE(mfloat, 8, 8, X)); \ DECL_VLDX_LANE(float, 32, 2, X); \ DECL_VLDX_LANE(float, 32, 4, X) @@ -384,9 +430,9 @@ void exec_vldX_lane (void) #endif /* Add some padding to try to catch out of bound accesses. */ -#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42} +#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={CONVERT(T##W##_t,42)} #define DUMMY_ARRAY(V, T, W, N, L) \ - VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \ + VECT_VAR_DECL(V,T,W,N)[N*L]={}; \ ARRAY1(V##_pad,T,W,N) /* Use the same lanes regardless of the size of the array (X), for @@ -405,6 +451,7 @@ void exec_vldX_lane (void) TEST_VLDX_LANE(q, uint, u, 16, 8, X, 5); \ TEST_VLDX_LANE(q, uint, u, 32, 4, X, 0); \ TEST_VLDX_LANE(q, poly, p, 16, 8, X, 5); \ + MFLOAT8_ONLY(TEST_VLDX_LANE(, mfloat, mf, 8, 8, X, 7)); \ TEST_VLDX_LANE(, float, f, 32, 2, X, 0); \ TEST_VLDX_LANE(q, float, f, 32, 4, X, 2) @@ -431,6 +478,7 @@ void exec_vldX_lane (void) TEST_EXTRA_CHUNK(uint, 16, 8, X, Y); \ TEST_EXTRA_CHUNK(uint, 32, 4, X, Y); \ TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ + MFLOAT8_ONLY(TEST_EXTRA_CHUNK(mfloat, 8, 8, X, Y)); \ TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ TEST_EXTRA_CHUNK(float, 32, 4, X, Y) @@ -453,6 +501,7 @@ void exec_vldX_lane (void) CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ + MFLOAT8_ONLY(CHECK_FP(test_name, mfloat, 8, 8, PRIx8, EXPECTED, comment)); \ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \ CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \ @@ -475,6 +524,15 @@ void exec_vldX_lane (void) } #endif +#if MFLOAT8_SUPPORTED + __builtin_memcpy (VECT_VAR(buffer_vld2_lane, mfloat, 8, 2), + VECT_VAR(buffer_vld2_lane, int, 8, 2), 2); + __builtin_memcpy (VECT_VAR(buffer_vld3_lane, mfloat, 8, 3), + VECT_VAR(buffer_vld3_lane, int, 8, 3), 3); + __builtin_memcpy (VECT_VAR(buffer_vld4_lane, mfloat, 8, 4), + VECT_VAR(buffer_vld4_lane, int, 8, 4), 4); +#endif + /* Declare the temporary buffers / variables. */ DECL_ALL_VLDX_LANE(2); DECL_ALL_VLDX_LANE(3); @@ -494,6 +552,9 @@ void exec_vldX_lane (void) DUMMY_ARRAY(buffer_src, uint, 16, 8, 4); DUMMY_ARRAY(buffer_src, uint, 32, 4, 4); DUMMY_ARRAY(buffer_src, poly, 16, 8, 4); +#if MFLOAT8_SUPPORTED + DUMMY_ARRAY(buffer_src, mfloat, 8, 8, 4); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) DUMMY_ARRAY(buffer_src, float, 16, 4, 4); DUMMY_ARRAY(buffer_src, float, 16, 8, 4); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c index 5215538..b1c57cf 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c @@ -21,6 +21,10 @@ VECT_VAR_DECL(expected_vrev16,poly,8,16) [] = { 0xf1, 0xf0, 0xf3, 0xf2, 0xf5, 0xf4, 0xf7, 0xf6, 0xf9, 0xf8, 0xfb, 0xfa, 0xfd, 0xfc, 0xff, 0xfe }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vrev16,hmfloat,8,8) [] = { 0xf1, 0xf0, 0xf3, 0xf2, + 0xf5, 0xf4, 0xf7, 0xf6 }; +#endif /* Expected results for vrev32. */ VECT_VAR_DECL(expected_vrev32,int,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, @@ -32,6 +36,10 @@ VECT_VAR_DECL(expected_vrev32,uint,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 } VECT_VAR_DECL(expected_vrev32,poly,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4 }; VECT_VAR_DECL(expected_vrev32,poly,16,4) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vrev32,hmfloat,8,8) [] = { 0xf3, 0xf2, 0xf1, 0xf0, + 0xf7, 0xf6, 0xf5, 0xf4 }; +#endif VECT_VAR_DECL(expected_vrev32,int,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xf7, 0xf6, 0xf5, 0xf4, 0xfb, 0xfa, 0xf9, 0xf8, @@ -50,6 +58,12 @@ VECT_VAR_DECL(expected_vrev32,poly,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, 0xff, 0xfe, 0xfd, 0xfc }; VECT_VAR_DECL(expected_vrev32,poly,16,8) [] = { 0xfff1, 0xfff0, 0xfff3, 0xfff2, 0xfff5, 0xfff4, 0xfff7, 0xfff6 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vrev32,hmfloat,8,16) [] = { 0xf3, 0xf2, 0xf1, 0xf0, + 0xf7, 0xf6, 0xf5, 0xf4, + 0xfb, 0xfa, 0xf9, 0xf8, + 0xff, 0xfe, 0xfd, 0xfc }; +#endif /* Expected results for vrev64. */ VECT_VAR_DECL(expected_vrev64,int,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, @@ -63,6 +77,10 @@ VECT_VAR_DECL(expected_vrev64,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 }; VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xf3, 0xf2, 0xf1, 0xf0 }; VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vrev64,hmfloat,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, + 0xf3, 0xf2, 0xf1, 0xf0 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected_vrev64, hfloat, 16, 4) [] = { 0xca80, 0xcb00, 0xcb80, 0xcc00 }; @@ -90,6 +108,12 @@ VECT_VAR_DECL(expected_vrev64,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, 0xfb, 0xfa, 0xf9, 0xf8 }; VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vrev64,hmfloat,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, + 0xf3, 0xf2, 0xf1, 0xf0, + 0xff, 0xfe, 0xfd, 0xfc, + 0xfb, 0xfa, 0xf9, 0xf8 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected_vrev64, hfloat, 16, 8) [] = { 0xca80, 0xcb00, 0xcb80, 0xcc00, @@ -114,6 +138,10 @@ void exec_vrev (void) /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if MFLOAT8_SUPPORTED + VLOAD (vector, buffer, , mfloat, mf, 8, 8); + VLOAD (vector, buffer, q, mfloat, mf, 8, 16); +#endif #if defined (FP16_SUPPORTED) VLOAD (vector, buffer, , float, f, 16, 4); VLOAD (vector, buffer, q, float, f, 16, 8); @@ -129,6 +157,7 @@ void exec_vrev (void) TEST_VREV(q, int, s, 8, 16, 16); TEST_VREV(q, uint, u, 8, 16, 16); TEST_VREV(q, poly, p, 8, 16, 16); + MFLOAT8_ONLY(TEST_VREV(, mfloat, mf, 8, 8, 16)); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vrev16, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vrev16, ""); @@ -136,6 +165,7 @@ void exec_vrev (void) CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev16, ""); CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vrev16, ""); CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev16, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_vrev16, "")); #undef TEST_MSG #define TEST_MSG "VREV32" @@ -145,12 +175,14 @@ void exec_vrev (void) TEST_VREV(, uint, u, 16, 4, 32); TEST_VREV(, poly, p, 8, 8, 32); TEST_VREV(, poly, p, 16, 4, 32); + MFLOAT8_ONLY(TEST_VREV(, mfloat, mf, 8, 8, 32)); TEST_VREV(q, int, s, 8, 16, 32); TEST_VREV(q, int, s, 16, 8, 32); TEST_VREV(q, uint, u, 8, 16, 32); TEST_VREV(q, uint, u, 16, 8, 32); TEST_VREV(q, poly, p, 8, 16, 32); TEST_VREV(q, poly, p, 16, 8, 32); + MFLOAT8_ONLY(TEST_VREV(q, mfloat, mf, 8, 16, 32)); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vrev32, ""); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_vrev32, ""); @@ -158,12 +190,14 @@ void exec_vrev (void) CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_vrev32, ""); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev32, ""); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_vrev32, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_vrev32, "")); CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev32, ""); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_vrev32, ""); CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vrev32, ""); CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_vrev32, ""); CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev32, ""); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev32, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 16, PRIx8, expected_vrev32, "")); #undef TEST_MSG #define TEST_MSG "VREV64" @@ -175,6 +209,7 @@ void exec_vrev (void) TEST_VREV(, uint, u, 32, 2, 64); TEST_VREV(, poly, p, 8, 8, 64); TEST_VREV(, poly, p, 16, 4, 64); + MFLOAT8_ONLY(TEST_VREV(, mfloat, mf, 8, 8, 64)); TEST_VREV(q, int, s, 8, 16, 64); TEST_VREV(q, int, s, 16, 8, 64); TEST_VREV(q, int, s, 32, 4, 64); @@ -183,6 +218,7 @@ void exec_vrev (void) TEST_VREV(q, uint, u, 32, 4, 64); TEST_VREV(q, poly, p, 8, 16, 64); TEST_VREV(q, poly, p, 16, 8, 64); + MFLOAT8_ONLY(TEST_VREV(q, mfloat, mf, 8, 16, 64)); CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vrev64, ""); CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_vrev64, ""); @@ -192,6 +228,7 @@ void exec_vrev (void) CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_vrev64, ""); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev64, ""); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_vrev64, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_vrev64, "")); CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev64, ""); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_vrev64, ""); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_vrev64, ""); @@ -200,6 +237,7 @@ void exec_vrev (void) CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_vrev64, ""); CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, ""); CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 16, PRIx8, expected_vrev64, "")); #if defined (FP16_SUPPORTED) TEST_VREV (, float, f, 16, 4, 64); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c index e0499df..dc7d6ec 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vset_lane.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0x88 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0x55, 0xf7 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xbb, 0xf5, 0xf6, 0xf7 }; +#endif VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0x4840, 0xca80 }; VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x4204cccd }; VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, @@ -42,6 +46,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xfc, 0xfd, 0xdd, 0xff }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xee, 0xfff7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xa0, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +#endif VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, 0xca00, 0x4480, 0xc900, 0xc880 }; VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, @@ -64,6 +74,10 @@ void exec_vset_lane (void) /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if MFLOAT8_SUPPORTED + VLOAD (vector, buffer, , mfloat, mf, 8, 8); + VLOAD (vector, buffer, q, mfloat, mf, 8, 16); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VLOAD(vector, buffer, , float, f, 16, 4); VLOAD(vector, buffer, q, float, f, 16, 8); @@ -82,6 +96,7 @@ void exec_vset_lane (void) TEST_VSET_LANE(, uint, u, 64, 1, 0x88, 0); TEST_VSET_LANE(, poly, p, 8, 8, 0x55, 6); TEST_VSET_LANE(, poly, p, 16, 4, 0x66, 2); + MFLOAT8_ONLY(TEST_VSET_LANE(, mfloat, mf, 8, 8, MFLOAT8(0xbb), 4)); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) TEST_VSET_LANE(, float, f, 16, 4, 8.5f, 2); #endif @@ -97,6 +112,7 @@ void exec_vset_lane (void) TEST_VSET_LANE(q, uint, u, 64, 2, 0x11, 1); TEST_VSET_LANE(q, poly, p, 8, 16, 0xDD, 14); TEST_VSET_LANE(q, poly, p, 16, 8, 0xEE, 6); + MFLOAT8_ONLY(TEST_VSET_LANE(q, mfloat, mf, 8, 16, MFLOAT8(0xa0), 10)); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) TEST_VSET_LANE(q, float, f, 16, 8, 4.5f, 5); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc index 9976488..42922b6 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc @@ -41,6 +41,7 @@ void FNNAME (INSN_NAME) (void) DECL_VSHUFFLE(uint, 32, 2); \ DECL_VSHUFFLE(poly, 8, 8); \ DECL_VSHUFFLE(poly, 16, 4); \ + MFLOAT8_ONLY(DECL_VSHUFFLE(mfloat, 8, 8)); \ DECL_VSHUFFLE(float, 32, 2); \ DECL_VSHUFFLE(int, 8, 16); \ DECL_VSHUFFLE(int, 16, 8); \ @@ -50,6 +51,7 @@ void FNNAME (INSN_NAME) (void) DECL_VSHUFFLE(uint, 32, 4); \ DECL_VSHUFFLE(poly, 8, 16); \ DECL_VSHUFFLE(poly, 16, 8); \ + MFLOAT8_ONLY(DECL_VSHUFFLE(mfloat, 8, 16)); \ DECL_VSHUFFLE(float, 32, 4) DECL_ALL_VSHUFFLE(); @@ -60,6 +62,10 @@ void FNNAME (INSN_NAME) (void) /* Initialize input "vector" from "buffer". */ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); +#if MFLOAT8_SUPPORTED + VLOAD (vector1, buffer, , mfloat, mf, 8, 8); + VLOAD (vector1, buffer, q, mfloat, mf, 8, 16); +#endif #if defined (FP16_SUPPORTED) VLOAD (vector1, buffer, , float, f, 16, 4); VLOAD (vector1, buffer, q, float, f, 16, 8); @@ -76,6 +82,7 @@ void FNNAME (INSN_NAME) (void) VDUP(vector2, , uint, u, 32, 2, 0x77); VDUP(vector2, , poly, p, 8, 8, 0x55); VDUP(vector2, , poly, p, 16, 4, 0x66); + MFLOAT8_ONLY(VDUP(vector2, , mfloat, mf, 8, 8, MFLOAT8(0xaa))); #if defined (FP16_SUPPORTED) VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ #endif @@ -89,6 +96,7 @@ void FNNAME (INSN_NAME) (void) VDUP(vector2, q, uint, u, 32, 4, 0x77); VDUP(vector2, q, poly, p, 8, 16, 0x55); VDUP(vector2, q, poly, p, 16, 8, 0x66); + MFLOAT8_ONLY(VDUP(vector2, q, mfloat, mf, 8, 16, MFLOAT8(0xbc))); #if defined (FP16_SUPPORTED) VDUP (vector2, q, float, f, 16, 8, 14.6f); #endif @@ -103,6 +111,7 @@ void FNNAME (INSN_NAME) (void) TEST_VSHUFFLE(INSN, , uint, u, 32, 2); \ TEST_VSHUFFLE(INSN, , poly, p, 8, 8); \ TEST_VSHUFFLE(INSN, , poly, p, 16, 4); \ + MFLOAT8_ONLY(TEST_VSHUFFLE(INSN, , mfloat, mf, 8, 8)); \ TEST_VSHUFFLE(INSN, , float, f, 32, 2); \ TEST_VSHUFFLE(INSN, q, int, s, 8, 16); \ TEST_VSHUFFLE(INSN, q, int, s, 16, 8); \ @@ -112,6 +121,7 @@ void FNNAME (INSN_NAME) (void) TEST_VSHUFFLE(INSN, q, uint, u, 32, 4); \ TEST_VSHUFFLE(INSN, q, poly, p, 8, 16); \ TEST_VSHUFFLE(INSN, q, poly, p, 16, 8); \ + MFLOAT8_ONLY(TEST_VSHUFFLE(INSN, q, mfloat, mf, 8, 16)); \ TEST_VSHUFFLE(INSN, q, float, f, 32, 4) #define TEST_VSHUFFLE_FP16(INSN) \ @@ -127,6 +137,7 @@ void FNNAME (INSN_NAME) (void) TEST_EXTRA_CHUNK(uint, 32, 2, 1); \ TEST_EXTRA_CHUNK(poly, 8, 8, 1); \ TEST_EXTRA_CHUNK(poly, 16, 4, 1); \ + MFLOAT8_ONLY(TEST_EXTRA_CHUNK(mfloat, 8, 8, 1)); \ TEST_EXTRA_CHUNK(float, 32, 2, 1); \ TEST_EXTRA_CHUNK(int, 8, 16, 1); \ TEST_EXTRA_CHUNK(int, 16, 8, 1); \ @@ -136,6 +147,7 @@ void FNNAME (INSN_NAME) (void) TEST_EXTRA_CHUNK(uint, 32, 4, 1); \ TEST_EXTRA_CHUNK(poly, 8, 16, 1); \ TEST_EXTRA_CHUNK(poly, 16, 8, 1); \ + MFLOAT8_ONLY(TEST_EXTRA_CHUNK(mfloat, 8, 16, 1)); \ TEST_EXTRA_CHUNK(float, 32, 4, 1) /* vshuffle support all vector types except [u]int64x1 and @@ -150,6 +162,7 @@ void FNNAME (INSN_NAME) (void) CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ + MFLOAT8_ONLY(CHECK_FP(test_name, mfloat, 8, 8, PRIx8, EXPECTED, comment)); \ CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ \ CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \ @@ -160,6 +173,7 @@ void FNNAME (INSN_NAME) (void) CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ CHECK_POLY(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ + MFLOAT8_ONLY(CHECK_FP(test_name, mfloat, 8, 16, PRIx8, EXPECTED, comment)); \ CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \ } diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c index 825d07d..f26c467 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1_lane.c @@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0x3333, 0x3333, 0x3333 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xf2, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +#endif VECT_VAR_DECL(expected,hfloat,16,4) [] = { 0xcb80, 0x3333, 0x3333, 0x3333 }; VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x33333333 }; VECT_VAR_DECL(expected,int,8,16) [] = { 0xff, 0x33, 0x33, 0x33, @@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfa, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33, 0x33 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff4, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,16) [] = { 0xfe, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33, + 0x33, 0x33, 0x33, 0x33 }; +#endif VECT_VAR_DECL(expected,hfloat,16,8) [] = { 0xc900, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333, 0x3333 }; VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0x33333333, @@ -72,6 +82,7 @@ void exec_vst1_lane (void) TEST_VST1_LANE(, uint, u, 64, 1, 0); TEST_VST1_LANE(, poly, p, 8, 8, 6); TEST_VST1_LANE(, poly, p, 16, 4, 2); + MFLOAT8_ONLY(TEST_VST1_LANE(, mfloat, mf, 8, 8, 2)); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) TEST_VST1_LANE(, float, f, 16, 4, 1); #endif @@ -87,6 +98,7 @@ void exec_vst1_lane (void) TEST_VST1_LANE(q, uint, u, 64, 2, 0); TEST_VST1_LANE(q, poly, p, 8, 16, 10); TEST_VST1_LANE(q, poly, p, 16, 8, 4); + MFLOAT8_ONLY(TEST_VST1_LANE(q, mfloat, mf, 8, 16, 14)); #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) TEST_VST1_LANE(q, float, f, 16, 8, 6); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x2.c index 6d20a46..69be40a 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x2.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x2.c @@ -17,14 +17,14 @@ test_vst1##SUFFIX##_x2 () \ BASE##x##ELTS##x##2##_t vectors; \ int i,j; \ for (i = 0; i < ELTS * 2; i++) \ - data [i] = (BASE##_t) 2*i; \ + data [i] = CONVERT (BASE##_t, 2*i); \ asm volatile ("" : : : "memory"); \ vectors.val[0] = vld1##SUFFIX (data); \ vectors.val[1] = vld1##SUFFIX (&data[ELTS]); \ vst1##SUFFIX##_x2 (temp, vectors); \ asm volatile ("" : : : "memory"); \ for (j = 0; j < ELTS * 2; j++) \ - if (temp[j] != data[j]) \ + if (!BITEQUAL (temp[j], data[j])) \ return 1; \ return 0; \ } @@ -57,6 +57,8 @@ VARIANT (float32, 4, q_f32) #ifdef __aarch64__ #define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ +VARIANT (mfloat8, 8, _mf8) \ +VARIANT (mfloat8, 16, q_mf8) \ VARIANT (float64, 1, _f64) \ VARIANT (float64, 2, q_f64) #else @@ -68,7 +70,7 @@ VARIANTS (TESTMETH) #define CHECKS(BASE, ELTS, SUFFIX) \ if (test_vst1##SUFFIX##_x2 () != 0) \ - fprintf (stderr, "test_vst1##SUFFIX##_x2"); + fprintf (stderr, "test_vst1##SUFFIX##_x2"), __builtin_abort (); int main (int argc, char **argv) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x3.c index 87eae4d..4d42bcc 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x3.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x3.c @@ -17,7 +17,7 @@ test_vst1##SUFFIX##_x3 () \ BASE##x##ELTS##x##3##_t vectors; \ int i,j; \ for (i = 0; i < ELTS * 3; i++) \ - data [i] = (BASE##_t) 3*i; \ + data [i] = CONVERT (BASE##_t, 3*i); \ asm volatile ("" : : : "memory"); \ vectors.val[0] = vld1##SUFFIX (data); \ vectors.val[1] = vld1##SUFFIX (&data[ELTS]); \ @@ -25,7 +25,7 @@ test_vst1##SUFFIX##_x3 () \ vst1##SUFFIX##_x3 (temp, vectors); \ asm volatile ("" : : : "memory"); \ for (j = 0; j < ELTS * 3; j++) \ - if (temp[j] != data[j]) \ + if (!BITEQUAL (temp[j], data[j])) \ return 1; \ return 0; \ } @@ -58,6 +58,8 @@ VARIANT (float32, 4, q_f32) #ifdef __aarch64__ #define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ +VARIANT (mfloat8, 8, _mf8) \ +VARIANT (mfloat8, 16, q_mf8) \ VARIANT (float64, 1, _f64) \ VARIANT (float64, 2, q_f64) #else @@ -69,7 +71,7 @@ VARIANTS (TESTMETH) #define CHECKS(BASE, ELTS, SUFFIX) \ if (test_vst1##SUFFIX##_x3 () != 0) \ - fprintf (stderr, "test_vst1##SUFFIX##_x3"); + fprintf (stderr, "test_vst1##SUFFIX##_x3"), __builtin_abort (); int main (int argc, char **argv) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c index 829a18d..ddc7fa5 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c @@ -17,7 +17,7 @@ test_vst1##SUFFIX##_x4 () \ BASE##x##ELTS##x##4##_t vectors; \ int i,j; \ for (i = 0; i < ELTS * 4; i++) \ - data [i] = (BASE##_t) 4*i; \ + data [i] = CONVERT (BASE##_t, 4*i); \ asm volatile ("" : : : "memory"); \ vectors.val[0] = vld1##SUFFIX (data); \ vectors.val[1] = vld1##SUFFIX (&data[ELTS]); \ @@ -26,7 +26,7 @@ test_vst1##SUFFIX##_x4 () \ vst1##SUFFIX##_x4 (temp, vectors); \ asm volatile ("" : : : "memory"); \ for (j = 0; j < ELTS * 4; j++) \ - if (temp[j] != data[j]) \ + if (!BITEQUAL (temp[j], data[j])) \ return 1; \ return 0; \ } @@ -61,6 +61,8 @@ VARIANT (float32, 4, q_f32) #ifdef __aarch64__ #define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ +VARIANT (mfloat8, 8, _mf8) \ +VARIANT (mfloat8, 16, q_mf8) \ VARIANT (float64, 1, _f64) \ VARIANT (float64, 2, q_f64) #else @@ -72,7 +74,7 @@ VARIANTS (TESTMETH) #define CHECKS(BASE, ELTS, SUFFIX) \ if (test_vst1##SUFFIX##_x4 () != 0) \ - fprintf (stderr, "test_vst1##SUFFIX##_x4"); + fprintf (stderr, "test_vst1##SUFFIX##_x4"), __builtin_abort (); int main (int argc, char **argv) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c index 45062d9..4ca5a4bd 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c @@ -14,6 +14,10 @@ VECT_VAR_DECL(expected_st2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; VECT_VAR_DECL(expected_st2_0,poly,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_st2_0,hmfloat,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_st2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0x0, 0x0 }; VECT_VAR_DECL(expected_st2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected_st2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, @@ -42,6 +46,10 @@ VECT_VAR_DECL(expected_st2_1,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_st2_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st2_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_st2_1,hmfloat,8,8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_st2_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st2_1,hfloat,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_st2_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, @@ -68,6 +76,10 @@ VECT_VAR_DECL(expected_st3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; VECT_VAR_DECL(expected_st3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_st3_0,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_st3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0x0 }; VECT_VAR_DECL(expected_st3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected_st3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, @@ -97,6 +109,10 @@ VECT_VAR_DECL(expected_st3_1,uint,32,2) [] = { 0xfffffff2, 0x0 }; VECT_VAR_DECL(expected_st3_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st3_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_st3_1,hmfloat,8,8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_st3_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st3_1,hfloat,32,2) [] = { 0xc1600000, 0x0 }; VECT_VAR_DECL(expected_st3_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, @@ -123,6 +139,10 @@ VECT_VAR_DECL(expected_st3_2,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_st3_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st3_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_st3_2,hmfloat,8,8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_st3_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st3_2,hfloat,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_st3_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, @@ -149,6 +169,10 @@ VECT_VAR_DECL(expected_st4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; VECT_VAR_DECL(expected_st4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_st4_0,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_st4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; VECT_VAR_DECL(expected_st4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; VECT_VAR_DECL(expected_st4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, @@ -178,6 +202,10 @@ VECT_VAR_DECL(expected_st4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; VECT_VAR_DECL(expected_st4_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st4_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_st4_1,hmfloat,8,8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_st4_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; VECT_VAR_DECL(expected_st4_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, @@ -204,6 +232,10 @@ VECT_VAR_DECL(expected_st4_2,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_st4_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st4_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_st4_2,hmfloat,8,8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_st4_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st4_2,hfloat,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_st4_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, @@ -230,6 +262,10 @@ VECT_VAR_DECL(expected_st4_3,uint,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_st4_3,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st4_3,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_st4_3,hmfloat,8,8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +#endif VECT_VAR_DECL(expected_st4_3,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; VECT_VAR_DECL(expected_st4_3,hfloat,32,2) [] = { 0x0, 0x0 }; VECT_VAR_DECL(expected_st4_3,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, @@ -256,6 +292,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2); VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2); VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2); VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2); +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(buffer_vld2_lane, mfloat, 8, 2)[2]; +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2); #endif @@ -272,6 +311,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3); VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3); VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3); VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3); +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(buffer_vld3_lane, mfloat, 8, 3)[3]; +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3); #endif @@ -288,6 +330,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4); VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4); VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4); VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4); +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(buffer_vld4_lane, mfloat, 8, 4)[4]; +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4); #endif @@ -347,6 +392,7 @@ void exec_vstX_lane (void) DECL_VSTX_LANE(uint, 32, 2, X); \ DECL_VSTX_LANE(poly, 8, 8, X); \ DECL_VSTX_LANE(poly, 16, 4, X); \ + MFLOAT8_ONLY(DECL_VSTX_LANE(mfloat, 8, 8, X);) \ DECL_VSTX_LANE(float, 32, 2, X); \ DECL_VSTX_LANE(int, 16, 8, X); \ DECL_VSTX_LANE(int, 32, 4, X); \ @@ -378,6 +424,7 @@ void exec_vstX_lane (void) TEST_VSTX_LANE(, uint, u, 32, 2, X, 1); \ TEST_VSTX_LANE(, poly, p, 8, 8, X, 4); \ TEST_VSTX_LANE(, poly, p, 16, 4, X, 3); \ + MFLOAT8_ONLY(TEST_VSTX_LANE(, mfloat, mf, 8, 8, X, 5)); \ TEST_VSTX_LANE(q, int, s, 16, 8, X, 6); \ TEST_VSTX_LANE(q, int, s, 32, 4, X, 2); \ TEST_VSTX_LANE(q, uint, u, 16, 8, X, 5); \ @@ -403,6 +450,7 @@ void exec_vstX_lane (void) TEST_EXTRA_CHUNK(uint, 32, 2, X, Y); \ TEST_EXTRA_CHUNK(poly, 8, 8, X, Y); \ TEST_EXTRA_CHUNK(poly, 16, 4, X, Y); \ + MFLOAT8_ONLY(TEST_EXTRA_CHUNK(mfloat, 8, 8, X, Y)); \ TEST_EXTRA_CHUNK(float, 32, 2, X, Y); \ TEST_EXTRA_CHUNK(int, 16, 8, X, Y); \ TEST_EXTRA_CHUNK(int, 32, 4, X, Y); \ @@ -420,6 +468,15 @@ void exec_vstX_lane (void) #define TEST_ALL_EXTRA_CHUNKS(X,Y) TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) #endif +#if MFLOAT8_SUPPORTED + __builtin_memcpy (VECT_VAR(buffer_vld2_lane, mfloat, 8, 2), + VECT_VAR(buffer_vld2_lane, int, 8, 2), 2); + __builtin_memcpy (VECT_VAR(buffer_vld3_lane, mfloat, 8, 3), + VECT_VAR(buffer_vld3_lane, int, 8, 3), 3); + __builtin_memcpy (VECT_VAR(buffer_vld4_lane, mfloat, 8, 4), + VECT_VAR(buffer_vld4_lane, int, 8, 4), 4); +#endif + /* Declare the temporary buffers / variables. */ DECL_ALL_VSTX_LANE(2); DECL_ALL_VSTX_LANE(3); @@ -434,6 +491,9 @@ void exec_vstX_lane (void) DUMMY_ARRAY(buffer_src, uint, 32, 2, 4); DUMMY_ARRAY(buffer_src, poly, 8, 8, 4); DUMMY_ARRAY(buffer_src, poly, 16, 4, 4); +#if MFLOAT8_SUPPORTED + DUMMY_ARRAY(buffer_src, mfloat, 8, 8, 4); +#endif #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) DUMMY_ARRAY(buffer_src, float, 16, 4, 4); #endif @@ -462,6 +522,7 @@ void exec_vstX_lane (void) CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_0, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_0, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_0, CMT); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_st2_0, CMT)); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_0, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_0, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_0, CMT); @@ -485,6 +546,7 @@ void exec_vstX_lane (void) CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_1, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_1, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_1, CMT); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_st2_1, CMT)); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_1, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_1, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_1, CMT); @@ -514,6 +576,7 @@ void exec_vstX_lane (void) CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_0, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_0, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_0, CMT); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_st3_0, CMT)); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_0, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_0, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_0, CMT); @@ -538,6 +601,7 @@ void exec_vstX_lane (void) CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_1, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_1, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_1, CMT); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_st3_1, CMT)); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_1, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_1, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_1, CMT); @@ -562,6 +626,7 @@ void exec_vstX_lane (void) CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_2, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_2, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_2, CMT); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_st3_2, CMT)); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_2, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_2, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_2, CMT); @@ -591,6 +656,7 @@ void exec_vstX_lane (void) CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_0, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_0, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_0, CMT); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_st4_0, CMT)); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_0, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_0, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_0, CMT); @@ -615,6 +681,7 @@ void exec_vstX_lane (void) CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_1, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_1, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_1, CMT); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_st4_1, CMT)); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_1, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_1, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_1, CMT); @@ -639,6 +706,7 @@ void exec_vstX_lane (void) CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_2, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_2, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_2, CMT); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_st4_2, CMT)); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_2, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_2, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_2, CMT); @@ -663,6 +731,7 @@ void exec_vstX_lane (void) CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_3, CMT); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_3, CMT); CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_3, CMT); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_st4_3, CMT)); CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_3, CMT); CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_3, CMT); CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_3, CMT); diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtbX.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtbX.c index c3e1d9b..7d6e0a6 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtbX.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtbX.c @@ -9,6 +9,10 @@ VECT_VAR_DECL(expected_vtbl1,uint,8,8) [] = { 0x0, 0xf3, 0xf3, 0xf3, 0x0, 0x0, 0xf3, 0xf3 }; VECT_VAR_DECL(expected_vtbl1,poly,8,8) [] = { 0x0, 0xf3, 0xf3, 0xf3, 0x0, 0x0, 0xf3, 0xf3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vtbl1,hmfloat,8,8) [] = { 0x0, 0xf3, 0xf3, 0xf3, + 0x0, 0x0, 0xf3, 0xf3 }; +#endif /* Expected results for vtbl2. */ VECT_VAR_DECL(expected_vtbl2,int,8,8) [] = { 0xf6, 0xf3, 0xf3, 0xf3, @@ -17,6 +21,10 @@ VECT_VAR_DECL(expected_vtbl2,uint,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0x0, 0x0, 0xf5, 0xf5 }; VECT_VAR_DECL(expected_vtbl2,poly,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0x0, 0x0, 0xf5, 0xf5 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vtbl2,hmfloat,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, + 0x0, 0x0, 0xf5, 0xf5 }; +#endif /* Expected results for vtbl3. */ VECT_VAR_DECL(expected_vtbl3,int,8,8) [] = { 0xf8, 0xf4, 0xf4, 0xf4, @@ -25,6 +33,10 @@ VECT_VAR_DECL(expected_vtbl3,uint,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0x0, 0xf7, 0xf7 }; VECT_VAR_DECL(expected_vtbl3,poly,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0x0, 0xf7, 0xf7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vtbl3,hmfloat,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, + 0xff, 0x0, 0xf7, 0xf7 }; +#endif /* Expected results for vtbl4. */ VECT_VAR_DECL(expected_vtbl4,int,8,8) [] = { 0xfa, 0xf5, 0xf5, 0xf5, @@ -33,6 +45,10 @@ VECT_VAR_DECL(expected_vtbl4,uint,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0x0, 0xf9, 0xf9 }; VECT_VAR_DECL(expected_vtbl4,poly,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0x0, 0xf9, 0xf9 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vtbl4,hmfloat,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, + 0x3, 0x0, 0xf9, 0xf9 }; +#endif /* Expected results for vtbx1. */ VECT_VAR_DECL(expected_vtbx1,int,8,8) [] = { 0x33, 0xf2, 0xf2, 0xf2, @@ -41,6 +57,10 @@ VECT_VAR_DECL(expected_vtbx1,uint,8,8) [] = { 0xcc, 0xf3, 0xf3, 0xf3, 0xcc, 0xcc, 0xf3, 0xf3 }; VECT_VAR_DECL(expected_vtbx1,poly,8,8) [] = { 0xcc, 0xf3, 0xf3, 0xf3, 0xcc, 0xcc, 0xf3, 0xf3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vtbx1,hmfloat,8,8) [] = { 0x55, 0xf3, 0xf3, 0xf3, + 0x55, 0x55, 0xf3, 0xf3 }; +#endif /* Expected results for vtbx2. */ VECT_VAR_DECL(expected_vtbx2,int,8,8) [] = { 0xf6, 0xf3, 0xf3, 0xf3, @@ -49,6 +69,10 @@ VECT_VAR_DECL(expected_vtbx2,uint,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0xcc, 0xcc, 0xf5, 0xf5 }; VECT_VAR_DECL(expected_vtbx2,poly,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, 0xcc, 0xcc, 0xf5, 0xf5 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vtbx2,hmfloat,8,8) [] = { 0xf6, 0xf5, 0xf5, 0xf5, + 0x55, 0x55, 0xf5, 0xf5 }; +#endif /* Expected results for vtbx3. */ VECT_VAR_DECL(expected_vtbx3,int,8,8) [] = { 0xf8, 0xf4, 0xf4, 0xf4, @@ -57,6 +81,10 @@ VECT_VAR_DECL(expected_vtbx3,uint,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0xcc, 0xf7, 0xf7 }; VECT_VAR_DECL(expected_vtbx3,poly,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, 0xff, 0xcc, 0xf7, 0xf7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vtbx3,hmfloat,8,8) [] = { 0xf8, 0xf7, 0xf7, 0xf7, + 0xff, 0x55, 0xf7, 0xf7 }; +#endif /* Expected results for vtbx4. */ VECT_VAR_DECL(expected_vtbx4,int,8,8) [] = { 0xfa, 0xf5, 0xf5, 0xf5, @@ -65,6 +93,10 @@ VECT_VAR_DECL(expected_vtbx4,uint,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0xcc, 0xf9, 0xf9 }; VECT_VAR_DECL(expected_vtbx4,poly,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, 0x3, 0xcc, 0xf9, 0xf9 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected_vtbx4,hmfloat,8,8) [] = { 0xfa, 0xf9, 0xf9, 0xf9, + 0x3, 0x55, 0xf9, 0xf9 }; +#endif void exec_vtbX (void) { @@ -105,32 +137,38 @@ void exec_vtbX (void) DECL_VARIABLE(vector_res, int, 8, 8); DECL_VARIABLE(vector_res, uint, 8, 8); DECL_VARIABLE(vector_res, poly, 8, 8); + MFLOAT8_ONLY(DECL_VARIABLE(vector_res, mfloat, 8, 8)); /* For vtbl1. */ DECL_VARIABLE(table_vector, int, 8, 8); DECL_VARIABLE(table_vector, uint, 8, 8); DECL_VARIABLE(table_vector, poly, 8, 8); + MFLOAT8_ONLY(DECL_VARIABLE(table_vector, mfloat, 8, 8)); /* For vtbx*. */ DECL_VARIABLE(default_vector, int, 8, 8); DECL_VARIABLE(default_vector, uint, 8, 8); DECL_VARIABLE(default_vector, poly, 8, 8); + MFLOAT8_ONLY(DECL_VARIABLE(default_vector, mfloat, 8, 8)); /* We need only 8 bits variants. */ #define DECL_ALL_VTBLX(X) \ DECL_VTBX(int, 8, 8, X); \ DECL_VTBX(uint, 8, 8, X); \ - DECL_VTBX(poly, 8, 8, X) + DECL_VTBX(poly, 8, 8, X); \ + MFLOAT8_ONLY(DECL_VTBX(mfloat, 8, 8, X)) #define TEST_ALL_VTBL1() \ TEST_VTBL1(int, s, int, 8, 8); \ TEST_VTBL1(uint, u, uint, 8, 8); \ - TEST_VTBL1(poly, p, uint, 8, 8) + TEST_VTBL1(poly, p, uint, 8, 8); \ + MFLOAT8_ONLY(TEST_VTBL1(mfloat, mf, uint, 8, 8)) #define TEST_ALL_VTBLX(X) \ TEST_VTBLX(int, s, int, 8, 8, X); \ TEST_VTBLX(uint, u, uint, 8, 8, X); \ - TEST_VTBLX(poly, p, uint, 8, 8, X) + TEST_VTBLX(poly, p, uint, 8, 8, X); \ + MFLOAT8_ONLY(TEST_VTBLX(mfloat, mf, uint, 8, 8, X)) /* Declare the temporary buffers / variables. */ DECL_ALL_VTBLX(2); @@ -168,6 +206,7 @@ void exec_vtbX (void) CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl1, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl1, ""); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl1, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_vtbl1, "")); /* Check vtbl2. */ clean_results (); @@ -178,6 +217,7 @@ void exec_vtbX (void) CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl2, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl2, ""); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl2, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_vtbl2, "")); /* Check vtbl3. */ clean_results (); @@ -188,6 +228,7 @@ void exec_vtbX (void) CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl3, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl3, ""); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl3, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_vtbl3, "")); /* Check vtbl4. */ clean_results (); @@ -198,6 +239,7 @@ void exec_vtbX (void) CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl4, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl4, ""); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl4, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_vtbl4, "")); /* Now test VTBX. */ @@ -229,17 +271,20 @@ void exec_vtbX (void) #define TEST_ALL_VTBX1() \ TEST_VTBX1(int, s, int, 8, 8); \ TEST_VTBX1(uint, u, uint, 8, 8); \ - TEST_VTBX1(poly, p, uint, 8, 8) + TEST_VTBX1(poly, p, uint, 8, 8); \ + MFLOAT8_ONLY(TEST_VTBX1(mfloat, mf, uint, 8, 8)) #define TEST_ALL_VTBXX(X) \ TEST_VTBXX(int, s, int, 8, 8, X); \ TEST_VTBXX(uint, u, uint, 8, 8, X); \ - TEST_VTBXX(poly, p, uint, 8, 8, X) + TEST_VTBXX(poly, p, uint, 8, 8, X); \ + MFLOAT8_ONLY(TEST_VTBXX(mfloat, mf, uint, 8, 8, X)) /* Choose init value arbitrarily, will be used as default value. */ VDUP(default_vector, , int, s, 8, 8, 0x33); VDUP(default_vector, , uint, u, 8, 8, 0xCC); VDUP(default_vector, , poly, p, 8, 8, 0xCC); + MFLOAT8_ONLY(VDUP(default_vector, , mfloat, mf, 8, 8, MFLOAT8(0x55))); /* Check vtbx1. */ clean_results (); @@ -250,6 +295,7 @@ void exec_vtbX (void) CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx1, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx1, ""); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx1, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_vtbx1, "")); /* Check vtbx2. */ clean_results (); @@ -260,6 +306,7 @@ void exec_vtbX (void) CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx2, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx2, ""); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx2, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_vtbx2, "")); /* Check vtbx3. */ clean_results (); @@ -270,6 +317,7 @@ void exec_vtbX (void) CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx3, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx3, ""); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx3, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_vtbx3, "")); /* Check vtbx4. */ clean_results (); @@ -280,6 +328,7 @@ void exec_vtbX (void) CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx4, ""); CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx4, ""); CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx4, ""); + MFLOAT8_ONLY(CHECK_FP(TEST_MSG, mfloat, 8, 8, PRIx8, expected_vtbx4, "")); } int main (void) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c index ea2d8d8..9e13bc1 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c @@ -15,6 +15,10 @@ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf2, 0xf3, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected0,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xaa, 0xaa, + 0xf2, 0xf3, 0xaa, 0xaa }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, 0x4b4d, 0x4b4d }; @@ -40,6 +44,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66, 0xfff2, 0xfff3, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected0,hmfloat,8,16) [] = { 0xf0, 0xf1, 0xbc, 0xbc, + 0xf2, 0xf3, 0xbc, 0xbc, + 0xf4, 0xf5, 0xbc, 0xbc, + 0xf6, 0xf7, 0xbc, 0xbc }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, 0x4b4d, 0x4b4d, @@ -61,6 +71,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 }; VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55, 0xf6, 0xf7, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected1,hmfloat,8,8) [] = { 0xf4, 0xf5, 0xaa, 0xaa, + 0xf6, 0xf7, 0xaa, 0xaa }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb00, 0xca80, 0x4b4d, 0x4b4d }; @@ -86,6 +100,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55, 0xfe, 0xff, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66, 0xfff6, 0xfff7, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected1,hmfloat,8,16) [] = { 0xf8, 0xf9, 0xbc, 0xbc, + 0xfa, 0xfb, 0xbc, 0xbc, + 0xfc, 0xfd, 0xbc, 0xbc, + 0xfe, 0xff, 0xbc, 0xbc }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xca00, 0xc980, 0x4b4d, 0x4b4d, diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c index 25a0f19..6debfe5 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c @@ -20,6 +20,10 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0x55, 0xf2, 0x55, 0xf4, 0x55, 0xf6, 0x55 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0x66, 0xfff2, 0x66 }; VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xf0, 0x29, 0xf2, 0x29, + 0xf4, 0x29, 0xf6, 0x29 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0x4b4d, 0xcb00, 0x4b4d }; @@ -50,6 +54,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0x55, 0xf2, 0x55, 0xfc, 0x55, 0xfe, 0x55 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0x66, 0xfff2, 0x66, 0xfff4, 0x66, 0xfff6, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,16) [] = { 0xf0, 0xea, 0xf2, 0xea, + 0xf4, 0xea, 0xf6, 0xea, + 0xf8, 0xea, 0xfa, 0xea, + 0xfc, 0xea, 0xfe, 0xea }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0x4b4d, 0xcb00, 0x4b4d, @@ -82,6 +92,10 @@ void exec_vtrn_half (void) CLEAN(expected, uint, 64, 1); TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if MFLOAT8_SUPPORTED + VLOAD(vector, buffer, , mfloat, mf, 8, 8); + VLOAD(vector, buffer, q, mfloat, mf, 8, 16); +#endif #if defined (FP16_SUPPORTED) VLOAD(vector, buffer, , float, f, 16, 4); VLOAD(vector, buffer, q, float, f, 16, 8); @@ -99,6 +113,7 @@ void exec_vtrn_half (void) VDUP(vector2, , uint, u, 32, 2, 0x77); VDUP(vector2, , poly, p, 8, 8, 0x55); VDUP(vector2, , poly, p, 16, 4, 0x66); + MFLOAT8_ONLY(VDUP(vector2, , mfloat, mf, 8, 8, MFLOAT8(0x29))); #if defined (FP16_SUPPORTED) VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ #endif @@ -114,6 +129,7 @@ void exec_vtrn_half (void) VDUP(vector2, q, uint, u, 64, 2, 0x88); VDUP(vector2, q, poly, p, 8, 16, 0x55); VDUP(vector2, q, poly, p, 16, 8, 0x66); + MFLOAT8_ONLY(VDUP(vector2, q, mfloat, mf, 8, 16, MFLOAT8(0xea))); #if defined (FP16_SUPPORTED) VDUP (vector2, q, float, f, 16, 8, 14.6f); #endif @@ -128,6 +144,7 @@ void exec_vtrn_half (void) TEST_VTRN1(, uint, u, 32, 2); TEST_VTRN1(, poly, p, 8, 8); TEST_VTRN1(, poly, p, 16, 4); + MFLOAT8_ONLY(TEST_VTRN1(, mfloat, mf, 8, 8)); #if defined (FP16_SUPPORTED) TEST_VTRN1(, float, f, 16, 4); #endif @@ -143,6 +160,7 @@ void exec_vtrn_half (void) TEST_VTRN1(q, uint, u, 64, 2); TEST_VTRN1(q, poly, p, 8, 16); TEST_VTRN1(q, poly, p, 16, 8); + MFLOAT8_ONLY(TEST_VTRN1(q, mfloat, mf, 8, 16)); #if defined (FP16_SUPPORTED) TEST_VTRN1(q, float, f, 16, 8); #endif @@ -174,6 +192,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff1 }; VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf1, 0x55, 0xf3, 0x55, 0xf5, 0x55, 0xf7, 0x55 }; VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff1, 0x66, 0xfff3, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,8) [] = { 0xf1, 0x29, 0xf3, 0x29, + 0xf5, 0x29, 0xf7, 0x29 }; +#endif VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb80, 0x4b4d, @@ -205,6 +227,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf1, 0x55, 0xf3, 0x55, 0xfd, 0x55, 0xff, 0x55 }; VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0x66, 0xfff3, 0x66, 0xfff5, 0x66, 0xfff7, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,16) [] = { 0xf1, 0xea, 0xf3, 0xea, + 0xf5, 0xea, 0xf7, 0xea, + 0xf9, 0xea, 0xfb, 0xea, + 0xfd, 0xea, 0xff, 0xea }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb80, 0x4b4d, 0xca80, 0x4b4d, @@ -225,6 +253,7 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0x42073333, TEST_VTRN2(, uint, u, 32, 2); TEST_VTRN2(, poly, p, 8, 8); TEST_VTRN2(, poly, p, 16, 4); + MFLOAT8_ONLY(TEST_VTRN2(, mfloat, mf, 8, 8)); #if defined (FP16_SUPPORTED) TEST_VTRN2(, float, f, 16, 4); #endif @@ -240,6 +269,7 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0x42073333, TEST_VTRN2(q, uint, u, 64, 2); TEST_VTRN2(q, poly, p, 8, 16); TEST_VTRN2(q, poly, p, 16, 8); + MFLOAT8_ONLY(TEST_VTRN2(q, mfloat, mf, 8, 16)); #if defined (FP16_SUPPORTED) TEST_VTRN2(q, float, f, 16, 8); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c index 43b49ca..6b105ab 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c @@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected0,hmfloat,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; @@ -52,6 +56,12 @@ VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected0,hmfloat,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, + 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, + 0xfc, 0xfd, 0xfe, 0xff }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, @@ -73,6 +83,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 }; VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected1,hmfloat,8,8) [] = { 0xaa, 0xaa, 0xaa, 0xaa, + 0xaa, 0xaa, 0xaa, 0xaa }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0x4b4d, 0x4b4d, 0x4b4d, 0x4b4d }; @@ -98,6 +112,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected1,hmfloat,8,16) [] = { 0xbc, 0xbc, 0xbc, 0xbc, + 0xbc, 0xbc, 0xbc, 0xbc, + 0xbc, 0xbc, 0xbc, 0xbc, + 0xbc, 0xbc, 0xbc, 0xbc }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0x4b4d, 0x4b4d, 0x4b4d, 0x4b4d, diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c index 2e6b666..fe35e15 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c @@ -19,6 +19,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0x7b, 0x7b, 0x7b, 0x7b }; +#endif VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb00, @@ -49,6 +53,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf2, 0xf4, 0xf6, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff2, 0xfff4, 0xfff6, 0x66, 0x66, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,16) [] = { 0xf0, 0xf2, 0xf4, 0xf6, + 0xf8, 0xfa, 0xfc, 0xfe, + 0x92, 0x92, 0x92, 0x92, + 0x92, 0x92, 0x92, 0x92 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb00, 0xca00, 0xc900, 0x4b4d, 0x4b4d, 0x4b4d, 0x4b4d }; @@ -79,6 +89,10 @@ void exec_vuzp_half (void) CLEAN(expected, uint, 64, 1); TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if MFLOAT8_SUPPORTED + VLOAD(vector, buffer, , mfloat, mf, 8, 8); + VLOAD(vector, buffer, q, mfloat, mf, 8, 16); +#endif #if defined (FP16_SUPPORTED) VLOAD(vector, buffer, , float, f, 16, 4); VLOAD(vector, buffer, q, float, f, 16, 8); @@ -96,6 +110,7 @@ void exec_vuzp_half (void) VDUP(vector2, , uint, u, 32, 2, 0x77); VDUP(vector2, , poly, p, 8, 8, 0x55); VDUP(vector2, , poly, p, 16, 4, 0x66); + MFLOAT8_ONLY(VDUP(vector2, , mfloat, mf, 8, 8, MFLOAT8(0x7b))); #if defined (FP16_SUPPORTED) VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ #endif @@ -111,6 +126,7 @@ void exec_vuzp_half (void) VDUP(vector2, q, uint, u, 64, 2, 0x88); VDUP(vector2, q, poly, p, 8, 16, 0x55); VDUP(vector2, q, poly, p, 16, 8, 0x66); + MFLOAT8_ONLY(VDUP(vector2, q, mfloat, mf, 8, 16, MFLOAT8(0x92))); #if defined (FP16_SUPPORTED) VDUP (vector2, q, float, f, 16, 8, 14.6f); #endif @@ -125,6 +141,7 @@ void exec_vuzp_half (void) TEST_VUZP1(, uint, u, 32, 2); TEST_VUZP1(, poly, p, 8, 8); TEST_VUZP1(, poly, p, 16, 4); + MFLOAT8_ONLY(TEST_VUZP1(, mfloat, mf, 8, 8)); #if defined (FP16_SUPPORTED) TEST_VUZP1(, float, f, 16, 4); #endif @@ -140,6 +157,7 @@ void exec_vuzp_half (void) TEST_VUZP1(q, uint, u, 64, 2); TEST_VUZP1(q, poly, p, 8, 16); TEST_VUZP1(q, poly, p, 16, 8); + MFLOAT8_ONLY(TEST_VUZP1(q, mfloat, mf, 8, 16)); #if defined (FP16_SUPPORTED) TEST_VUZP1(q, float, f, 16, 8); #endif @@ -171,6 +189,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff1 }; VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0x7b, 0x7b, 0x7b, 0x7b }; +#endif VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb80, 0xca80, @@ -201,6 +223,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf1, 0xf3, 0xf5, 0xf7, 0x55, 0x55, 0x55, 0x55 }; VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0xfff3, 0xfff5, 0xfff7, 0x66, 0x66, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,16) [] = { 0xf1, 0xf3, 0xf5, 0xf7, + 0xf9, 0xfb, 0xfd, 0xff, + 0x92, 0x92, 0x92, 0x92, + 0x92, 0x92, 0x92, 0x92 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb80, 0xca80, 0xc980, 0xc880, 0x4b4d, 0x4b4d, 0x4b4d, 0x4b4d @@ -221,6 +249,7 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1500000, TEST_VUZP2(, uint, u, 32, 2); TEST_VUZP2(, poly, p, 8, 8); TEST_VUZP2(, poly, p, 16, 4); + MFLOAT8_ONLY(TEST_VUZP2(, mfloat, mf, 8, 8)); #if defined (FP16_SUPPORTED) TEST_VUZP2(, float, f, 16, 4); #endif @@ -236,6 +265,7 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1500000, TEST_VUZP2(q, uint, u, 64, 2); TEST_VUZP2(q, poly, p, 8, 16); TEST_VUZP2(q, poly, p, 16, 8); + MFLOAT8_ONLY(TEST_VUZP2(q, mfloat, mf, 8, 16)); #if defined (FP16_SUPPORTED) TEST_VUZP2(q, float, f, 16, 8); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c index 20f4f5d..766da27 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c @@ -18,6 +18,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55, 0xf1, 0xf5, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected0,hmfloat,8,8) [] = { 0xf0, 0xf4, 0xaa, 0xaa, + 0xf1, 0xf5, 0xaa, 0xaa }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb00, 0x4b4d, 0x4b4d }; @@ -45,6 +49,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55, 0xf3, 0xfb, 0x55, 0x55 }; VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66, 0xfff1, 0xfff5, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected0,hmfloat,8,16) [] = { 0xf0, 0xf8, 0xbc, 0xbc, + 0xf1, 0xf9, 0xbc, 0xbc, + 0xf2, 0xfa, 0xbc, 0xbc, + 0xf3, 0xfb, 0xbc, 0xbc }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xca00, 0x4b4d, 0x4b4d, @@ -69,6 +79,10 @@ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55, 0xf3, 0xf7, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected1,hmfloat,8,8) [] = { 0xf2, 0xf6, 0xaa, 0xaa, + 0xf3, 0xf7, 0xaa, 0xaa }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xca80, 0x4b4d, 0x4b4d }; @@ -96,6 +110,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55, 0xf7, 0xff, 0x55, 0x55 }; VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66, 0xfff3, 0xfff7, 0x66, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected1,hmfloat,8,16) [] = { 0xf4, 0xfc, 0xbc, 0xbc, + 0xf5, 0xfd, 0xbc, 0xbc, + 0xf6, 0xfe, 0xbc, 0xbc, + 0xf7, 0xff, 0xbc, 0xbc }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb00, 0xc900, 0x4b4d, 0x4b4d, diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c index ef42451..5914192 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c @@ -20,6 +20,10 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0x55, 0xf1, 0x55, 0xf2, 0x55, 0xf3, 0x55 }; VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0x66, 0xfff1, 0x66 }; VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,8) [] = { 0xf0, 0xf9, 0xf1, 0xf9, + 0xf2, 0xf9, 0xf3, 0xf9 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0x4b4d, 0xcb80, 0x4b4d }; @@ -50,6 +54,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0x55, 0xf1, 0x55, 0xf6, 0x55, 0xf7, 0x55 }; VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0x66, 0xfff1, 0x66, 0xfff2, 0x66, 0xfff3, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected,hmfloat,8,16) [] = { 0xf0, 0xd6, 0xf1, 0xd6, + 0xf2, 0xd6, 0xf3, 0xd6, + 0xf4, 0xd6, 0xf5, 0xd6, + 0xf6, 0xd6, 0xf7, 0xd6 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0x4b4d, 0xcb80, 0x4b4d, @@ -82,6 +92,10 @@ void exec_vzip_half (void) CLEAN(expected, uint, 64, 1); TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); +#if MFLOAT8_SUPPORTED + VLOAD(vector, buffer, , mfloat, mf, 8, 8); + VLOAD(vector, buffer, q, mfloat, mf, 8, 16); +#endif #if defined (FP16_SUPPORTED) VLOAD(vector, buffer, , float, f, 16, 4); VLOAD(vector, buffer, q, float, f, 16, 8); @@ -99,6 +113,7 @@ void exec_vzip_half (void) VDUP(vector2, , uint, u, 32, 2, 0x77); VDUP(vector2, , poly, p, 8, 8, 0x55); VDUP(vector2, , poly, p, 16, 4, 0x66); + MFLOAT8_ONLY(VDUP(vector2, , mfloat, mf, 8, 8, MFLOAT8(0xf9))); #if defined (FP16_SUPPORTED) VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ #endif @@ -114,6 +129,7 @@ void exec_vzip_half (void) VDUP(vector2, q, uint, u, 64, 2, 0x88); VDUP(vector2, q, poly, p, 8, 16, 0x55); VDUP(vector2, q, poly, p, 16, 8, 0x66); + MFLOAT8_ONLY(VDUP(vector2, q, mfloat, mf, 8, 16, MFLOAT8(0xd6))); #if defined (FP16_SUPPORTED) VDUP (vector2, q, float, f, 16, 8, 14.6f); #endif @@ -128,6 +144,7 @@ void exec_vzip_half (void) TEST_VZIP1(, uint, u, 32, 2); TEST_VZIP1(, poly, p, 8, 8); TEST_VZIP1(, poly, p, 16, 4); + MFLOAT8_ONLY(TEST_VZIP1(, mfloat, mf, 8, 8)); #if defined (FP16_SUPPORTED) TEST_VZIP1(, float, f, 16, 4); #endif @@ -143,6 +160,7 @@ void exec_vzip_half (void) TEST_VZIP1(q, uint, u, 64, 2); TEST_VZIP1(q, poly, p, 8, 16); TEST_VZIP1(q, poly, p, 16, 8); + MFLOAT8_ONLY(TEST_VZIP1(q, mfloat, mf, 8, 16)); #if defined (FP16_SUPPORTED) TEST_VZIP1(q, float, f, 16, 8); #endif @@ -175,6 +193,10 @@ VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf4, 0x55, 0xf5, 0x55, 0xf6, 0x55, 0xf7, 0x55 }; VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0x66, 0xfff3, 0x66 }; VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,8) [] = { 0xf4, 0xf9, 0xf5, 0xf9, + 0xf6, 0xf9, 0xf7, 0xf9 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0x4b4d, 0xca80, 0x4b4d }; @@ -205,6 +227,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf8, 0x55, 0xf9, 0x55, 0xfe, 0x55, 0xff, 0x55 }; VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff4, 0x66, 0xfff5, 0x66, 0xfff6, 0x66, 0xfff7, 0x66 }; +#if MFLOAT8_SUPPORTED +VECT_VAR_DECL(expected2,hmfloat,8,16) [] = { 0xf8, 0xd6, 0xf9, 0xd6, + 0xfa, 0xd6, 0xfb, 0xd6, + 0xfc, 0xd6, 0xfd, 0xd6, + 0xfe, 0xd6, 0xff, 0xd6 }; +#endif #if defined (FP16_SUPPORTED) VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xca00, 0x4b4d, 0xc980, 0x4b4d, @@ -225,6 +253,7 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0x42073333, TEST_VZIP2(, uint, u, 32, 2); TEST_VZIP2(, poly, p, 8, 8); TEST_VZIP2(, poly, p, 16, 4); + MFLOAT8_ONLY(TEST_VZIP2(, mfloat, mf, 8, 8)); #if defined (FP16_SUPPORTED) TEST_VZIP2(, float, f, 16, 4); #endif @@ -240,6 +269,7 @@ VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0x42073333, TEST_VZIP2(q, uint, u, 64, 2); TEST_VZIP2(q, poly, p, 8, 16); TEST_VZIP2(q, poly, p, 16, 8); + MFLOAT8_ONLY(TEST_VZIP2(q, mfloat, mf, 8, 16)); #if defined (FP16_SUPPORTED) TEST_VZIP2(q, float, f, 16, 8); #endif diff --git a/gcc/testsuite/gcc.target/aarch64/simd/lut.c b/gcc/testsuite/gcc.target/aarch64/simd/lut.c index fc89b21..51b7b9c 100644 --- a/gcc/testsuite/gcc.target/aarch64/simd/lut.c +++ b/gcc/testsuite/gcc.target/aarch64/simd/lut.c @@ -197,6 +197,70 @@ test_vluti2q_laneqp8(poly8x16_t a, uint8x16_t b, poly8x16_t results[]) } /* +** test_vluti2_lanemf8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti2_lanemf8(mfloat8x8_t a, uint8x8_t b, mfloat8x16_t results[]) +{ + results[0] = vluti2_lane_mf8(a, b, 0); + results[1] = vluti2_lane_mf8(a, b, 1); +} + +/* +** test_vluti2_laneqmf8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[2\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2_laneqmf8(mfloat8x8_t a, uint8x16_t b, mfloat8x16_t results[]) +{ + results[0] = vluti2_laneq_mf8(a, b, 0); + results[1] = vluti2_laneq_mf8(a, b, 1); + results[2] = vluti2_laneq_mf8(a, b, 2); + results[3] = vluti2_laneq_mf8(a, b, 3); +} + +/* +** test_vluti2q_lanemf8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti2q_lanemf8(mfloat8x16_t a, uint8x8_t b, mfloat8x16_t results[]) +{ + results[0] = vluti2q_lane_mf8(a, b, 0); + results[1] = vluti2q_lane_mf8(a, b, 1); +} + +/* +** test_vluti2q_laneqmf8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[2\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2q_laneqmf8(mfloat8x16_t a, uint8x16_t b, mfloat8x16_t results[]) +{ + results[0] = vluti2q_laneq_mf8(a, b, 0); + results[1] = vluti2q_laneq_mf8(a, b, 1); + results[2] = vluti2q_laneq_mf8(a, b, 2); + results[3] = vluti2q_laneq_mf8(a, b, 3); +} + +/* ** test_vluti2_laneu16: ** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] ** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] @@ -689,6 +753,32 @@ test_vluti4q_laneqp8(poly8x16_t a, uint8x16_t b, poly8x16_t results[]) } /* +** test_vluti4q_lanemf8: +** luti4 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** ... +** ret +*/ +void +test_vluti4q_lanemf8(mfloat8x16_t a, uint8x8_t b, mfloat8x16_t results[]) +{ + results[0] = vluti4q_lane_mf8(a, b, 0); +} + +/* +** test_vluti4q_laneqmf8: +** luti4 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti4 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti4q_laneqmf8(mfloat8x16_t a, uint8x16_t b, mfloat8x16_t results[]) +{ + results[0] = vluti4q_laneq_mf8(a, b, 0); + results[1] = vluti4q_laneq_mf8(a, b, 1); +} + +/* ** test_vluti4q_laneu16_x2: ** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[0\] ** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[1\] diff --git a/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_1.c b/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_1.c new file mode 100644 index 0000000..a3fd9b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_1.c @@ -0,0 +1,1822 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-O -std=gnu23 --save-temps" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <arm_neon.h> + +/* +** test_bsl1: +** bsl v0.8b, v1.8b, v2.8b +** ret +*/ +mfloat8x8_t test_bsl1(uint8x8_t a, mfloat8x8_t b, mfloat8x8_t c) +{ + return vbsl_mf8(a, b, c); +} + +/* +** test_bsl2: +** bif v0.8b, v2.8b, v1.8b +** ret +*/ +mfloat8x8_t test_bsl2(mfloat8x8_t a, uint8x8_t b, mfloat8x8_t c) +{ + return vbsl_mf8(b, a, c); +} + +/* +** test_bsl3: +** bit v0.8b, v2.8b, v1.8b +** ret +*/ +mfloat8x8_t test_bsl3(mfloat8x8_t a, uint8x8_t b, mfloat8x8_t c) +{ + return vbsl_mf8(b, c, a); +} + +/* +** test_bslq1: +** bsl v0.16b, v1.16b, v2.16b +** ret +*/ +mfloat8x16_t test_bslq1(uint8x16_t a, mfloat8x16_t b, mfloat8x16_t c) +{ + return vbslq_mf8(a, b, c); +} + +/* +** test_bslq2: +** bif v0.16b, v2.16b, v1.16b +** ret +*/ +mfloat8x16_t test_bslq2(mfloat8x16_t a, uint8x16_t b, mfloat8x16_t c) +{ + return vbslq_mf8(b, a, c); +} + +/* +** test_bslq3: +** bit v0.16b, v2.16b, v1.16b +** ret +*/ +mfloat8x16_t test_bslq3(mfloat8x16_t a, uint8x16_t b, mfloat8x16_t c) +{ + return vbslq_mf8(b, c, a); +} + +/* +** test_combine1: +** uzp1 v0.2d, v1.2d, v2.2d +** ret +*/ +mfloat8x16_t test_combine1(mfloat8_t a, mfloat8x8_t b, mfloat8x8_t c) +{ + return vcombine_mf8(b, c); +} + +/* +** test_copy_lane1: +** ins v0.b\[0\], v1.b\[0\] +** ret +*/ +mfloat8x8_t test_copy_lane1(mfloat8x8_t a, mfloat8x8_t b) +{ + return vcopy_lane_mf8(a, 0, b, 0); +} + +/* +** test_copy_lane2: +** ins v0.b\[0\], v1.b\[7\] +** ret +*/ +mfloat8x8_t test_copy_lane2(mfloat8x8_t a, mfloat8x8_t b) +{ + return vcopy_lane_mf8(a, 0, b, 7); +} + +/* +** test_copy_lane3: +** ins v0.b\[7\], v1.b\[0\] +** ret +*/ +mfloat8x8_t test_copy_lane3(mfloat8x8_t a, mfloat8x8_t b) +{ + return vcopy_lane_mf8(a, 7, b, 0); +} + +/* +** test_copy_lane4: +** ins v0.b\[5\], v1.b\[2\] +** ret +*/ +mfloat8x8_t test_copy_lane4(mfloat8x8_t a, mfloat8x8_t b) +{ + return vcopy_lane_mf8(a, 5, b, 2); +} + +/* +** test_copy_laneq1: +** ins v0.b\[0\], v1.b\[0\] +** ret +*/ +mfloat8x8_t test_copy_laneq1(mfloat8x8_t a, mfloat8x16_t b) +{ + return vcopy_laneq_mf8(a, 0, b, 0); +} + +/* +** test_copy_laneq2: +** ins v0.b\[0\], v1.b\[15\] +** ret +*/ +mfloat8x8_t test_copy_laneq2(mfloat8x8_t a, mfloat8x16_t b) +{ + return vcopy_laneq_mf8(a, 0, b, 15); +} + +/* +** test_copy_laneq3: +** ins v0.b\[7\], v1.b\[0\] +** ret +*/ +mfloat8x8_t test_copy_laneq3(mfloat8x8_t a, mfloat8x16_t b) +{ + return vcopy_laneq_mf8(a, 7, b, 0); +} + +/* +** test_copy_laneq4: +** ins v0.b\[6\], v1.b\[13\] +** ret +*/ +mfloat8x8_t test_copy_laneq4(mfloat8x8_t a, mfloat8x16_t b) +{ + return vcopy_laneq_mf8(a, 6, b, 13); +} + +/* +** test_copyq_lane1: +** ins v0.b\[0\], v1.b\[0\] +** ret +*/ +mfloat8x16_t test_copyq_lane1(mfloat8x16_t a, mfloat8x8_t b) +{ + return vcopyq_lane_mf8(a, 0, b, 0); +} + +/* +** test_copyq_lane2: +** ins v0.b\[0\], v1.b\[7\] +** ret +*/ +mfloat8x16_t test_copyq_lane2(mfloat8x16_t a, mfloat8x8_t b) +{ + return vcopyq_lane_mf8(a, 0, b, 7); +} + +/* +** test_copyq_lane3: +** ins v0.b\[15\], v1.b\[0\] +** ret +*/ +mfloat8x16_t test_copyq_lane3(mfloat8x16_t a, mfloat8x8_t b) +{ + return vcopyq_lane_mf8(a, 15, b, 0); +} + +/* +** test_copyq_lane4: +** ins v0.b\[11\], v1.b\[2\] +** ret +*/ +mfloat8x16_t test_copyq_lane4(mfloat8x16_t a, mfloat8x8_t b) +{ + return vcopyq_lane_mf8(a, 11, b, 2); +} + +/* +** test_copyq_laneq1: +** ins v0.b\[0\], v1.b\[0\] +** ret +*/ +mfloat8x16_t test_copyq_laneq1(mfloat8x16_t a, mfloat8x16_t b) +{ + return vcopyq_laneq_mf8(a, 0, b, 0); +} + +/* +** test_copyq_laneq2: +** ins v0.b\[0\], v1.b\[15\] +** ret +*/ +mfloat8x16_t test_copyq_laneq2(mfloat8x16_t a, mfloat8x16_t b) +{ + return vcopyq_laneq_mf8(a, 0, b, 15); +} + +/* +** test_copyq_laneq3: +** ins v0.b\[15\], v1.b\[0\] +** ret +*/ +mfloat8x16_t test_copyq_laneq3(mfloat8x16_t a, mfloat8x16_t b) +{ + return vcopyq_laneq_mf8(a, 15, b, 0); +} + +/* +** test_copyq_laneq4: +** ins v0.b\[9\], v1.b\[13\] +** ret +*/ +mfloat8x16_t test_copyq_laneq4(mfloat8x16_t a, mfloat8x16_t b) +{ + return vcopyq_laneq_mf8(a, 9, b, 13); +} + +/* +** test_create1: +** fmov d0, x0 +** ret +*/ +mfloat8x8_t test_create1(uint64_t a) +{ + return vcreate_mf8(a); +} + +/* +** test_create2: +** movi d0, #?0xffff +** ret +*/ +mfloat8x8_t test_create2() +{ + return vcreate_mf8(0xffff); +} + +/* +** test_dup1: +** dup v0.8b, v1.b\[0\] +** ret +*/ +mfloat8x8_t test_dup1(mfloat8_t a, mfloat8_t b) +{ + return vdup_n_mf8(b); +} + +/* +** test_dup2: +** movi v0.2s, #?0 +** ret +*/ +mfloat8x8_t test_dup2() +{ + return vdup_n_mf8(((union { uint8_t x; mfloat8_t y; }) { 0 }).y); +} + +/* +** test_dup3: +** movi v0.8b, #?0xf +** ret +*/ +mfloat8x8_t test_dup3() +{ + return vdup_n_mf8(((union { uint8_t x; mfloat8_t y; }) { 0x0f }).y); +} + +/* +** test_dupq1: +** dup v0.16b, v1.b\[0\] +** ret +*/ +mfloat8x16_t test_dupq1(mfloat8_t a, mfloat8_t b) +{ + return vdupq_n_mf8(b); +} + +/* +** test_dupq2: +** movi v0.4s, #?0 +** ret +*/ +mfloat8x16_t test_dupq2() +{ + return vdupq_n_mf8(((union { uint8_t x; mfloat8_t y; }) { 0 }).y); +} + +/* +** test_dupq3: +** movi v0.16b, #?0xf +** ret +*/ +mfloat8x16_t test_dupq3() +{ + return vdupq_n_mf8(((union { uint8_t x; mfloat8_t y; }) { 0x0f }).y); +} + +/* +** test_dup_lane1: +** dup v0.8b, v1.b\[0\] +** ret +*/ +mfloat8x8_t test_dup_lane1(mfloat8_t a, mfloat8x8_t b) +{ + return vdup_lane_mf8(b, 0); +} + +/* +** test_dup_lane2: +** dup v0.8b, v1.b\[7\] +** ret +*/ +mfloat8x8_t test_dup_lane2(mfloat8_t a, mfloat8x8_t b) +{ + return vdup_lane_mf8(b, 7); +} + +/* +** test_dup_laneq1: +** dup v0.8b, v1.b\[0\] +** ret +*/ +mfloat8x8_t test_dup_laneq1(mfloat8_t a, mfloat8x16_t b) +{ + return vdup_laneq_mf8(b, 0); +} + +/* +** test_dup_laneq2: +** dup v0.8b, v1.b\[15\] +** ret +*/ +mfloat8x8_t test_dup_laneq2(mfloat8_t a, mfloat8x16_t b) +{ + return vdup_laneq_mf8(b, 15); +} + +/* +** test_dupq_lane1: +** dup v0.16b, v1.b\[0\] +** ret +*/ +mfloat8x16_t test_dupq_lane1(mfloat8_t a, mfloat8x8_t b) +{ + return vdupq_lane_mf8(b, 0); +} + +/* +** test_dupq_lane2: +** dup v0.16b, v1.b\[7\] +** ret +*/ +mfloat8x16_t test_dupq_lane2(mfloat8_t a, mfloat8x8_t b) +{ + return vdupq_lane_mf8(b, 7); +} + +/* +** test_dupq_laneq1: +** dup v0.16b, v1.b\[0\] +** ret +*/ +mfloat8x16_t test_dupq_laneq1(mfloat8_t a, mfloat8x16_t b) +{ + return vdupq_laneq_mf8(b, 0); +} + +/* +** test_dupq_laneq2: +** dup v0.16b, v1.b\[15\] +** ret +*/ +mfloat8x16_t test_dupq_laneq2(mfloat8_t a, mfloat8x16_t b) +{ + return vdupq_laneq_mf8(b, 15); +} + +/* +** test_dupb_lane1: +** dup b0, v1.b\[0\] +** ret +*/ +mfloat8_t test_dupb_lane1(mfloat8_t a, mfloat8x8_t b) +{ + return vdupb_lane_mf8(b, 0); +} + +/* +** test_dupb_lane2: +** dup b0, v1.b\[7\] +** ret +*/ +mfloat8_t test_dupb_lane2(mfloat8_t a, mfloat8x8_t b) +{ + return vdupb_lane_mf8(b, 7); +} + +/* +** test_dupb_laneq1: +** dup b0, v1.b\[0\] +** ret +*/ +mfloat8_t test_dupb_laneq1(mfloat8_t a, mfloat8x16_t b) +{ + return vdupb_laneq_mf8(b, 0); +} + +/* +** test_dupb_laneq2: +** dup b0, v1.b\[15\] +** ret +*/ +mfloat8_t test_dupb_laneq2(mfloat8_t a, mfloat8x16_t b) +{ + return vdupb_laneq_mf8(b, 15); +} + +/* +** test_ext1: +** ext v0.8b, v0.8b, v1.8b, #1 +** ret +*/ +mfloat8x8_t test_ext1(mfloat8x8_t a, mfloat8x8_t b) +{ + return vext_mf8(a, b, 1); +} + +/* +** test_ext2: +** ext v0.8b, v1.8b, v2.8b, #7 +** ret +*/ +mfloat8x8_t test_ext2(mfloat8x8_t a, mfloat8x8_t b, mfloat8x8_t c) +{ + return vext_mf8(b, c, 7); +} + +/* +** test_extq1: +** ext v0.16b, v0.16b, v1.16b, #1 +** ret +*/ +mfloat8x16_t test_extq1(mfloat8x16_t a, mfloat8x16_t b) +{ + return vextq_mf8(a, b, 1); +} + +/* +** test_extq2: +** ext v0.16b, v1.16b, v2.16b, #15 +** ret +*/ +mfloat8x16_t test_extq2(mfloat8x16_t a, mfloat8x16_t b, mfloat8x16_t c) +{ + return vextq_mf8(b, c, 15); +} + +/* +** test_ld1: { target { le && lp64 } } +** ldr d0, \[x0\] +** ret +*/ +/* +** test_ld1: { target { be && lp64 } } +** ld1 {v0.8b}, \[x0\] +** ret +*/ +mfloat8x8_t test_ld1(const mfloat8_t *ptr) +{ + return vld1_mf8(ptr); +} + +/* +** test_ld1q: { target { le && lp64 } } +** ldr q0, \[x0\] +** ret +*/ +/* +** test_ld1q: { target { be && lp64 } } +** ld1 {v0.16b}, \[x0\] +** ret +*/ +mfloat8x16_t test_ld1q(const mfloat8_t *ptr) +{ + return vld1q_mf8(ptr); +} + +/* +** test_ld1_dup: { target lp64 } +** ld1r {v0.8b}, \[x0\] +** ret +*/ +mfloat8x8_t test_ld1_dup(const mfloat8_t *ptr) +{ + return vld1_dup_mf8(ptr); +} + +/* +** test_ld1q_dup: { target lp64 } +** ld1r {v0.16b}, \[x0\] +** ret +*/ +mfloat8x16_t test_ld1q_dup(const mfloat8_t *ptr) +{ + return vld1q_dup_mf8(ptr); +} + +/* +** test_ld1_lane1: { target lp64 } +** ld1 {v0.b}\[0\], \[x0\] +** ret +*/ +mfloat8x8_t test_ld1_lane1(const mfloat8_t *ptr, mfloat8x8_t a) +{ + return vld1_lane_mf8(ptr, a, 0); +} + +/* +** test_ld1_lane2: { target lp64 } +** ld1 {v0.b}\[7\], \[x0\] +** ret +*/ +mfloat8x8_t test_ld1_lane2(const mfloat8_t *ptr, mfloat8x8_t a) +{ + return vld1_lane_mf8(ptr, a, 7); +} + +/* +** test_ld1q_lane1: { target lp64 } +** ld1 {v0.b}\[0\], \[x0\] +** ret +*/ +mfloat8x16_t test_ld1q_lane1(const mfloat8_t *ptr, mfloat8x16_t a) +{ + return vld1q_lane_mf8(ptr, a, 0); +} + +/* +** test_ld1q_lane2: { target lp64 } +** ld1 {v0.b}\[15\], \[x0\] +** ret +*/ +mfloat8x16_t test_ld1q_lane2(const mfloat8_t *ptr, mfloat8x16_t a) +{ + return vld1q_lane_mf8(ptr, a, 15); +} + +/* +** test_ld1_x2: { target lp64 } +** ld1 {v0.8b( - |, )v1.8b}, \[x0\] +** ret +*/ +mfloat8x8x2_t test_ld1_x2(const mfloat8_t *ptr) +{ + return vld1_mf8_x2(ptr); +} + +/* +** test_ld1q_x2: { target lp64 } +** ld1 {v0.16b( - |, )v1.16b}, \[x0\] +** ret +*/ +mfloat8x16x2_t test_ld1q_x2(const mfloat8_t *ptr) +{ + return vld1q_mf8_x2(ptr); +} + +/* +** test_ld1_x3: { target lp64 } +** ld1 {v0.8b - v2.8b}, \[x0\] +** ret +*/ +mfloat8x8x3_t test_ld1_x3(const mfloat8_t *ptr) +{ + return vld1_mf8_x3(ptr); +} + +/* +** test_ld1q_x3: { target lp64 } +** ld1 {v0.16b - v2.16b}, \[x0\] +** ret +*/ +mfloat8x16x3_t test_ld1q_x3(const mfloat8_t *ptr) +{ + return vld1q_mf8_x3(ptr); +} + +/* +** test_ld1_x4: { target lp64 } +** ld1 {v0.8b - v3.8b}, \[x0\] +** ret +*/ +mfloat8x8x4_t test_ld1_x4(const mfloat8_t *ptr) +{ + return vld1_mf8_x4(ptr); +} + +/* +** test_ld1q_x4: { target lp64 } +** ld1 {v0.16b - v3.16b}, \[x0\] +** ret +*/ +mfloat8x16x4_t test_ld1q_x4(const mfloat8_t *ptr) +{ + return vld1q_mf8_x4(ptr); +} + +/* +** test_ld2: { target lp64 } +** ld2 {v0.8b( - |, )v1.8b}, \[x0\] +** ret +*/ +mfloat8x8x2_t test_ld2(const mfloat8_t *ptr) +{ + return vld2_mf8(ptr); +} + +/* +** test_ld2q: { target lp64 } +** ld2 {v0.16b( - |, )v1.16b}, \[x0\] +** ret +*/ +mfloat8x16x2_t test_ld2q(const mfloat8_t *ptr) +{ + return vld2q_mf8(ptr); +} + +/* +** test_ld2_dup: { target lp64 } +** ld2r {v0.8b( - |, )v1.8b}, \[x0\] +** ret +*/ +mfloat8x8x2_t test_ld2_dup(const mfloat8_t *ptr) +{ + return vld2_dup_mf8(ptr); +} + +/* +** test_ld2q_dup: { target lp64 } +** ld2r {v0.16b( - |, )v1.16b}, \[x0\] +** ret +*/ +mfloat8x16x2_t test_ld2q_dup(const mfloat8_t *ptr) +{ + return vld2q_dup_mf8(ptr); +} + +/* +** test_ld2_lane1: { target lp64 } +** ld2 {v0.b( - |, )v1.b}\[0\], \[x0\] +** ret +*/ +mfloat8x8x2_t test_ld2_lane1(const mfloat8_t *ptr, mfloat8x8x2_t a) +{ + return vld2_lane_mf8(ptr, a, 0); +} + +/* +** test_ld2_lane2: { target lp64 } +** ld2 {v0.b( - |, )v1.b}\[7\], \[x0\] +** ret +*/ +mfloat8x8x2_t test_ld2_lane2(const mfloat8_t *ptr, mfloat8x8x2_t a) +{ + return vld2_lane_mf8(ptr, a, 7); +} + +/* +** test_ld2q_lane1: { target lp64 } +** ld2 {v0.b( - |, )v1.b}\[0\], \[x0\] +** ret +*/ +mfloat8x16x2_t test_ld2q_lane1(const mfloat8_t *ptr, mfloat8x16x2_t a) +{ + return vld2q_lane_mf8(ptr, a, 0); +} + +/* +** test_ld2q_lane2: { target lp64 } +** ld2 {v0.b( - |, )v1.b}\[15\], \[x0\] +** ret +*/ +mfloat8x16x2_t test_ld2q_lane2(const mfloat8_t *ptr, mfloat8x16x2_t a) +{ + return vld2q_lane_mf8(ptr, a, 15); +} + +/* +** test_ld3: { target lp64 } +** ld3 {v0.8b - v2.8b}, \[x0\] +** ret +*/ +mfloat8x8x3_t test_ld3(const mfloat8_t *ptr) +{ + return vld3_mf8(ptr); +} + +/* +** test_ld3q: { target lp64 } +** ld3 {v0.16b - v2.16b}, \[x0\] +** ret +*/ +mfloat8x16x3_t test_ld3q(const mfloat8_t *ptr) +{ + return vld3q_mf8(ptr); +} + +/* +** test_ld3_dup: { target lp64 } +** ld3r {v0.8b - v2.8b}, \[x0\] +** ret +*/ +mfloat8x8x3_t test_ld3_dup(const mfloat8_t *ptr) +{ + return vld3_dup_mf8(ptr); +} + +/* +** test_ld3q_dup: { target lp64 } +** ld3r {v0.16b - v2.16b}, \[x0\] +** ret +*/ +mfloat8x16x3_t test_ld3q_dup(const mfloat8_t *ptr) +{ + return vld3q_dup_mf8(ptr); +} + +/* +** test_ld3_lane1: { target lp64 } +** ld3 {v0.b - v2.b}\[0\], \[x0\] +** ret +*/ +mfloat8x8x3_t test_ld3_lane1(const mfloat8_t *ptr, mfloat8x8x3_t a) +{ + return vld3_lane_mf8(ptr, a, 0); +} + +/* +** test_ld3_lane2: { target lp64 } +** ld3 {v0.b - v2.b}\[7\], \[x0\] +** ret +*/ +mfloat8x8x3_t test_ld3_lane2(const mfloat8_t *ptr, mfloat8x8x3_t a) +{ + return vld3_lane_mf8(ptr, a, 7); +} + +/* +** test_ld3q_lane1: { target lp64 } +** ld3 {v0.b - v2.b}\[0\], \[x0\] +** ret +*/ +mfloat8x16x3_t test_ld3q_lane1(const mfloat8_t *ptr, mfloat8x16x3_t a) +{ + return vld3q_lane_mf8(ptr, a, 0); +} + +/* +** test_ld3q_lane2: { target lp64 } +** ld3 {v0.b - v2.b}\[15\], \[x0\] +** ret +*/ +mfloat8x16x3_t test_ld3q_lane2(const mfloat8_t *ptr, mfloat8x16x3_t a) +{ + return vld3q_lane_mf8(ptr, a, 15); +} + +/* +** test_ld4: { target lp64 } +** ld4 {v0.8b - v3.8b}, \[x0\] +** ret +*/ +mfloat8x8x4_t test_ld4(const mfloat8_t *ptr) +{ + return vld4_mf8(ptr); +} + +/* +** test_ld4q: { target lp64 } +** ld4 {v0.16b - v3.16b}, \[x0\] +** ret +*/ +mfloat8x16x4_t test_ld4q(const mfloat8_t *ptr) +{ + return vld4q_mf8(ptr); +} + +/* +** test_ld4_dup: { target lp64 } +** ld4r {v0.8b - v3.8b}, \[x0\] +** ret +*/ +mfloat8x8x4_t test_ld4_dup(const mfloat8_t *ptr) +{ + return vld4_dup_mf8(ptr); +} + +/* +** test_ld4q_dup: { target lp64 } +** ld4r {v0.16b - v3.16b}, \[x0\] +** ret +*/ +mfloat8x16x4_t test_ld4q_dup(const mfloat8_t *ptr) +{ + return vld4q_dup_mf8(ptr); +} + +/* +** test_ld4_lane1: { target lp64 } +** ld4 {v0.b - v3.b}\[0\], \[x0\] +** ret +*/ +mfloat8x8x4_t test_ld4_lane1(const mfloat8_t *ptr, mfloat8x8x4_t a) +{ + return vld4_lane_mf8(ptr, a, 0); +} + +/* +** test_ld4_lane2: { target lp64 } +** ld4 {v0.b - v3.b}\[7\], \[x0\] +** ret +*/ +mfloat8x8x4_t test_ld4_lane2(const mfloat8_t *ptr, mfloat8x8x4_t a) +{ + return vld4_lane_mf8(ptr, a, 7); +} + +/* +** test_ld4q_lane1: { target lp64 } +** ld4 {v0.b - v3.b}\[0\], \[x0\] +** ret +*/ +mfloat8x16x4_t test_ld4q_lane1(const mfloat8_t *ptr, mfloat8x16x4_t a) +{ + return vld4q_lane_mf8(ptr, a, 0); +} + +/* +** test_ld4q_lane2: { target lp64 } +** ld4 {v0.b - v3.b}\[15\], \[x0\] +** ret +*/ +mfloat8x16x4_t test_ld4q_lane2(const mfloat8_t *ptr, mfloat8x16x4_t a) +{ + return vld4q_lane_mf8(ptr, a, 15); +} + +/* +** test_mov1: +** dup v0.8b, v1.b\[0\] +** ret +*/ +mfloat8x8_t test_mov1(mfloat8_t a, mfloat8_t b) +{ + return vmov_n_mf8(b); +} + +/* +** test_mov2: +** movi v0.2s, #?0 +** ret +*/ +mfloat8x8_t test_mov2() +{ + return vmov_n_mf8(((union { uint8_t x; mfloat8_t y; }) { 0 }).y); +} + +/* +** test_mov3: +** movi v0.8b, #?0xf +** ret +*/ +mfloat8x8_t test_mov3() +{ + return vmov_n_mf8(((union { uint8_t x; mfloat8_t y; }) { 0x0f }).y); +} + +/* +** test_movq1: +** dup v0.16b, v1.b\[0\] +** ret +*/ +mfloat8x16_t test_movq1(mfloat8_t a, mfloat8_t b) +{ + return vmovq_n_mf8(b); +} + +/* +** test_movq2: +** movi v0.4s, #?0 +** ret +*/ +mfloat8x16_t test_movq2() +{ + return vmovq_n_mf8(((union { uint8_t x; mfloat8_t y; }) { 0 }).y); +} + +/* +** test_movq3: +** movi v0.16b, #?0xf +** ret +*/ +mfloat8x16_t test_movq3() +{ + return vmovq_n_mf8(((union { uint8_t x; mfloat8_t y; }) { 0x0f }).y); +} + +/* +** test_rev16: +** rev16 v0.8b, v1.8b +** ret +*/ +mfloat8x8_t test_rev16(mfloat8_t a, mfloat8x8_t b) +{ + return vrev16_mf8(b); +} + +/* +** test_rev16q: +** rev16 v0.16b, v1.16b +** ret +*/ +mfloat8x16_t test_rev16q(mfloat8_t a, mfloat8x16_t b) +{ + return vrev16q_mf8(b); +} + +/* +** test_rev32: +** rev32 v0.8b, v1.8b +** ret +*/ +mfloat8x8_t test_rev32(mfloat8_t a, mfloat8x8_t b) +{ + return vrev32_mf8(b); +} + +/* +** test_rev32q: +** rev32 v0.16b, v1.16b +** ret +*/ +mfloat8x16_t test_rev32q(mfloat8_t a, mfloat8x16_t b) +{ + return vrev32q_mf8(b); +} + +/* +** test_rev64: +** rev64 v0.8b, v1.8b +** ret +*/ +mfloat8x8_t test_rev64(mfloat8_t a, mfloat8x8_t b) +{ + return vrev64_mf8(b); +} + +/* +** test_rev64q: +** rev64 v0.16b, v1.16b +** ret +*/ +mfloat8x16_t test_rev64q(mfloat8_t a, mfloat8x16_t b) +{ + return vrev64q_mf8(b); +} + +/* +** test_set_lane1: +** ins v0.b\[0\], v1.b\[0\] +** ret +*/ +mfloat8x8_t test_set_lane1(mfloat8x8_t a, mfloat8_t b) +{ + return vset_lane_mf8(b, a, 0); +} + +/* +** test_set_lane2: +** ins v0.b\[7\], v1.b\[0\] +** ret +*/ +mfloat8x8_t test_set_lane2(mfloat8x8_t a, mfloat8_t b) +{ + return vset_lane_mf8(b, a, 7); +} + +/* +** test_set_lane3: { target lp64 } +** ld1 {v0.b}\[3\], \[x0\] +** ret +*/ +mfloat8x8_t test_set_lane3(mfloat8x8_t a, const mfloat8_t *ptr) +{ + return vset_lane_mf8(*ptr, a, 3); +} + +/* +** test_set_lane4: +** ins v0.b\[6\], wzr +** ret +*/ +mfloat8x8_t test_set_lane4(mfloat8x8_t a) +{ + return vset_lane_mf8(((union { uint8_t x; mfloat8_t y; }) { 0 }).y, a, 6); +} + +/* +** test_setq_lane1: +** ins v0.b\[0\], v1.b\[0\] +** ret +*/ +mfloat8x16_t test_setq_lane1(mfloat8x16_t a, mfloat8_t b) +{ + return vsetq_lane_mf8(b, a, 0); +} + +/* +** test_setq_lane2: +** ins v0.b\[15\], v1.b\[0\] +** ret +*/ +mfloat8x16_t test_setq_lane2(mfloat8x16_t a, mfloat8_t b) +{ + return vsetq_lane_mf8(b, a, 15); +} + +/* +** test_setq_lane3: { target lp64 } +** ld1 {v0.b}\[9\], \[x0\] +** ret +*/ +mfloat8x16_t test_setq_lane3(mfloat8x16_t a, const mfloat8_t *ptr) +{ + return vsetq_lane_mf8(*ptr, a, 9); +} + +/* +** test_setq_lane4: +** ins v0.b\[14\], wzr +** ret +*/ +mfloat8x16_t test_setq_lane4(mfloat8x16_t a) +{ + return vsetq_lane_mf8(((union { uint8_t x; mfloat8_t y; }) { 0 }).y, a, 14); +} + +/* +** test_st1: { target { le && lp64 } } +** str d0, \[x0\] +** ret +*/ +/* +** test_st1: { target { be && lp64 } } +** st1 {v0.8b}, \[x0\] +** ret +*/ +void test_st1(mfloat8_t *ptr, mfloat8x8_t a) +{ + vst1_mf8(ptr, a); +} + +/* +** test_st1q: { target { le && lp64 } } +** str q0, \[x0\] +** ret +*/ +/* +** test_st1q: { target { be && lp64 } } +** st1 {v0.16b}, \[x0\] +** ret +*/ +void test_st1q(mfloat8_t *ptr, mfloat8x16_t a) +{ + vst1q_mf8(ptr, a); +} + +/* +** test_st1_lane1: { target lp64 } +** str b0, \[x0\] +** ret +*/ +void test_st1_lane1(mfloat8_t *ptr, mfloat8x8_t a) +{ + vst1_lane_mf8(ptr, a, 0); +} + +/* +** test_st1_lane2: { target lp64 } +** st1 {v0.b}\[7\], \[x0\] +** ret +*/ +void test_st1_lane2(mfloat8_t *ptr, mfloat8x8_t a) +{ + vst1_lane_mf8(ptr, a, 7); +} + +/* +** test_st1q_lane1: { target lp64 } +** str b0, \[x0\] +** ret +*/ +void test_st1q_lane1(mfloat8_t *ptr, mfloat8x16_t a) +{ + vst1q_lane_mf8(ptr, a, 0); +} + +/* +** test_st1q_lane2: { target lp64 } +** st1 {v0.b}\[15\], \[x0\] +** ret +*/ +void test_st1q_lane2(mfloat8_t *ptr, mfloat8x16_t a) +{ + vst1q_lane_mf8(ptr, a, 15); +} + +/* +** test_st1_x2: { target lp64 } +** st1 {v0.8b( - |, )v1.8b}, \[x0\] +** ret +*/ +void test_st1_x2(mfloat8_t *ptr, mfloat8x8x2_t a) +{ + vst1_mf8_x2(ptr, a); +} + +/* +** test_st1q_x2: { target lp64 } +** st1 {v0.16b( - |, )v1.16b}, \[x0\] +** ret +*/ +void test_st1q_x2(mfloat8_t *ptr, mfloat8x16x2_t a) +{ + vst1q_mf8_x2(ptr, a); +} + +/* +** test_st1_x3: { target lp64 } +** st1 {v0.8b - v2.8b}, \[x0\] +** ret +*/ +void test_st1_x3(mfloat8_t *ptr, mfloat8x8x3_t a) +{ + vst1_mf8_x3(ptr, a); +} + +/* +** test_st1q_x3: { target lp64 } +** st1 {v0.16b - v2.16b}, \[x0\] +** ret +*/ +void test_st1q_x3(mfloat8_t *ptr, mfloat8x16x3_t a) +{ + vst1q_mf8_x3(ptr, a); +} + +/* +** test_st1_x4: { target lp64 } +** st1 {v0.8b - v3.8b}, \[x0\] +** ret +*/ +void test_st1_x4(mfloat8_t *ptr, mfloat8x8x4_t a) +{ + vst1_mf8_x4(ptr, a); +} + +/* +** test_st1q_x4: { target lp64 } +** st1 {v0.16b - v3.16b}, \[x0\] +** ret +*/ +void test_st1q_x4(mfloat8_t *ptr, mfloat8x16x4_t a) +{ + vst1q_mf8_x4(ptr, a); +} + +/* +** test_st2: { target lp64 } +** st2 {v0.8b( - |, )v1.8b}, \[x0\] +** ret +*/ +void test_st2(mfloat8_t *ptr, mfloat8x8x2_t a) +{ + vst2_mf8(ptr, a); +} + +/* +** test_st2q: { target lp64 } +** st2 {v0.16b( - |, )v1.16b}, \[x0\] +** ret +*/ +void test_st2q(mfloat8_t *ptr, mfloat8x16x2_t a) +{ + vst2q_mf8(ptr, a); +} + +/* +** test_st2_lane1: { target lp64 } +** st2 {v0.b( - |, )v1.b}\[0\], \[x0\] +** ret +*/ +void test_st2_lane1(mfloat8_t *ptr, mfloat8x8x2_t a) +{ + vst2_lane_mf8(ptr, a, 0); +} + +/* +** test_st2_lane2: { target lp64 } +** st2 {v0.b( - |, )v1.b}\[7\], \[x0\] +** ret +*/ +void test_st2_lane2(mfloat8_t *ptr, mfloat8x8x2_t a) +{ + vst2_lane_mf8(ptr, a, 7); +} + +/* +** test_st2q_lane1: { target lp64 } +** st2 {v0.b( - |, )v1.b}\[0\], \[x0\] +** ret +*/ +void test_st2q_lane1(mfloat8_t *ptr, mfloat8x16x2_t a) +{ + vst2q_lane_mf8(ptr, a, 0); +} + +/* +** test_st2q_lane2: { target lp64 } +** st2 {v0.b( - |, )v1.b}\[15\], \[x0\] +** ret +*/ +void test_st2q_lane2(mfloat8_t *ptr, mfloat8x16x2_t a) +{ + vst2q_lane_mf8(ptr, a, 15); +} + +/* +** test_st3: { target lp64 } +** st3 {v0.8b - v2.8b}, \[x0\] +** ret +*/ +void test_st3(mfloat8_t *ptr, mfloat8x8x3_t a) +{ + vst3_mf8(ptr, a); +} + +/* +** test_st3q: { target lp64 } +** st3 {v0.16b - v2.16b}, \[x0\] +** ret +*/ +void test_st3q(mfloat8_t *ptr, mfloat8x16x3_t a) +{ + vst3q_mf8(ptr, a); +} + +/* +** test_st3_lane1: { target lp64 } +** st3 {v0.b - v2.b}\[0\], \[x0\] +** ret +*/ +void test_st3_lane1(mfloat8_t *ptr, mfloat8x8x3_t a) +{ + vst3_lane_mf8(ptr, a, 0); +} + +/* +** test_st3_lane2: { target lp64 } +** st3 {v0.b - v2.b}\[7\], \[x0\] +** ret +*/ +void test_st3_lane2(mfloat8_t *ptr, mfloat8x8x3_t a) +{ + vst3_lane_mf8(ptr, a, 7); +} + +/* +** test_st3q_lane1: { target lp64 } +** st3 {v0.b - v2.b}\[0\], \[x0\] +** ret +*/ +void test_st3q_lane1(mfloat8_t *ptr, mfloat8x16x3_t a) +{ + vst3q_lane_mf8(ptr, a, 0); +} + +/* +** test_st3q_lane2: { target lp64 } +** st3 {v0.b - v2.b}\[15\], \[x0\] +** ret +*/ +void test_st3q_lane2(mfloat8_t *ptr, mfloat8x16x3_t a) +{ + vst3q_lane_mf8(ptr, a, 15); +} + +/* +** test_st4: { target lp64 } +** st4 {v0.8b - v3.8b}, \[x0\] +** ret +*/ +void test_st4(mfloat8_t *ptr, mfloat8x8x4_t a) +{ + vst4_mf8(ptr, a); +} + +/* +** test_st4q: { target lp64 } +** st4 {v0.16b - v3.16b}, \[x0\] +** ret +*/ +void test_st4q(mfloat8_t *ptr, mfloat8x16x4_t a) +{ + vst4q_mf8(ptr, a); +} + +/* +** test_st4_lane1: { target lp64 } +** st4 {v0.b - v3.b}\[0\], \[x0\] +** ret +*/ +void test_st4_lane1(mfloat8_t *ptr, mfloat8x8x4_t a) +{ + vst4_lane_mf8(ptr, a, 0); +} + +/* +** test_st4_lane2: { target lp64 } +** st4 {v0.b - v3.b}\[7\], \[x0\] +** ret +*/ +void test_st4_lane2(mfloat8_t *ptr, mfloat8x8x4_t a) +{ + vst4_lane_mf8(ptr, a, 7); +} + +/* +** test_st4q_lane1: { target lp64 } +** st4 {v0.b - v3.b}\[0\], \[x0\] +** ret +*/ +void test_st4q_lane1(mfloat8_t *ptr, mfloat8x16x4_t a) +{ + vst4q_lane_mf8(ptr, a, 0); +} + +/* +** test_st4q_lane2: { target lp64 } +** st4 {v0.b - v3.b}\[15\], \[x0\] +** ret +*/ +void test_st4q_lane2(mfloat8_t *ptr, mfloat8x16x4_t a) +{ + vst4q_lane_mf8(ptr, a, 15); +} + +/* +** test_tbl1: +** fmov d([0-9]+), d0 +** tbl v0.8b, {v\1.16b}, v1.8b +** ret +*/ +mfloat8x8_t test_tbl1(mfloat8x8_t a, uint8x8_t b) +{ + return vtbl1_mf8(a, b); +} + +/* +** test_tbl2: +** uzp1 v([0-9]+).2d, v0.2d, v1.2d +** tbl v0.8b, {v\1.16b}, v2.8b +** ret +*/ +mfloat8x8_t test_tbl2(mfloat8x8x2_t a, uint8x8_t b) +{ + return vtbl2_mf8(a, b); +} + +/* +** test_tbl3: +** uzp1 v([0-9]+).2d, v0.2d, v1.2d +** fmov d([0-9]+), d2 +** tbl v0.8b, {v\1.16b( - |, )v\2.16b}, v3.8b +** ret +*/ +mfloat8x8_t test_tbl3(mfloat8x8x3_t a, uint8x8_t b) +{ + return vtbl3_mf8(a, b); +} + +/* +** test_tbl4: +** uzp1 v([0-9]+).2d, v0.2d, v1.2d +** uzp1 v([0-9]+).2d, v2.2d, v3.2d +** tbl v0.8b, {v\1.16b( - |, )v\2.16b}, v4.8b +** ret +*/ +mfloat8x8_t test_tbl4(mfloat8x8x4_t a, uint8x8_t b) +{ + return vtbl4_mf8(a, b); +} + +/* +** test_qtbl1: +** tbl v0.8b, {v0.16b}, v1.8b +** ret +*/ +mfloat8x8_t test_qtbl1(mfloat8x16_t a, uint8x8_t b) +{ + return vqtbl1_mf8(a, b); +} + +/* +** test_qtbl1q: +** tbl v0.16b, {v0.16b}, v1.16b +** ret +*/ +mfloat8x16_t test_qtbl1q(mfloat8x16_t a, uint8x16_t b) +{ + return vqtbl1q_mf8(a, b); +} + +/* +** test_qtbl2: +** tbl v0.8b, {v0.16b( - |, )v1.16b}, v2.8b +** ret +*/ +mfloat8x8_t test_qtbl2(mfloat8x16x2_t a, uint8x8_t b) +{ + return vqtbl2_mf8(a, b); +} + +/* +** test_qtbl2q: +** tbl v0.16b, {v0.16b( - |, )v1.16b}, v2.16b +** ret +*/ +mfloat8x16_t test_qtbl2q(mfloat8x16x2_t a, uint8x16_t b) +{ + return vqtbl2q_mf8(a, b); +} + +/* +** test_qtbl3: +** tbl v0.8b, {v0.16b - v2.16b}, v3.8b +** ret +*/ +mfloat8x8_t test_qtbl3(mfloat8x16x3_t a, uint8x8_t b) +{ + return vqtbl3_mf8(a, b); +} + +/* +** test_qtbl3q: +** tbl v0.16b, {v0.16b - v2.16b}, v3.16b +** ret +*/ +mfloat8x16_t test_qtbl3q(mfloat8x16x3_t a, uint8x16_t b) +{ + return vqtbl3q_mf8(a, b); +} + +/* +** test_qtbl4: +** tbl v0.8b, {v0.16b - v3.16b}, v4.8b +** ret +*/ +mfloat8x8_t test_qtbl4(mfloat8x16x4_t a, uint8x8_t b) +{ + return vqtbl4_mf8(a, b); +} + +/* +** test_qtbl4q: +** tbl v0.16b, {v0.16b - v3.16b}, v4.16b +** ret +*/ +mfloat8x16_t test_qtbl4q(mfloat8x16x4_t a, uint8x16_t b) +{ + return vqtbl4q_mf8(a, b); +} + +/* +** test_tbx1: +** fmov d([0-9]+), d1 +** tbl v[0-9]+.8b, {v\1.16b}, v2.8b +** ... +** cmh[is] [^\n]+ +** (bit|bif|bsl) [^\n]+ +** ret +*/ +mfloat8x8_t test_tbx1(mfloat8x8_t a, mfloat8x8_t b, uint8x8_t c) +{ + return vtbx1_mf8(a, b, c); +} + +/* +** test_tbx2: +** uzp1 v([0-9]+).2d, v1.2d, v2.2d +** tbx v[0-9]+.8b, {v\1.16b}, v3.8b +** ret +*/ +mfloat8x8_t test_tbx2(mfloat8x8_t a, mfloat8x8x2_t b, uint8x8_t c) +{ + return vtbx2_mf8(a, b, c); +} + +/* +** test_tbx3: +** uzp1 v([0-9]+).2d, v1.2d, v2.2d +** fmov d([0-9]+), d3 +** tbl v[0-9]+.8b, {v\1.16b( - |, )v\2.16b}, v4.8b +** ... +** cmh[is] [^\n]+ +** (bit|bif|bsl) [^\n]+ +** ret +*/ +mfloat8x8_t test_tbx3(mfloat8x8_t a, mfloat8x8x3_t b, uint8x8_t c) +{ + return vtbx3_mf8(a, b, c); +} + +/* +** test_tbx4: +** uzp1 v([0-9]+).2d, v1.2d, v2.2d +** uzp1 v([0-9]+).2d, v3.2d, v4.2d +** tbx v0.8b, {v\1.16b( - |, )v\2.16b}, v5.8b +** ret +*/ +mfloat8x8_t test_tbx4(mfloat8x8_t a, mfloat8x8x4_t b, uint8x8_t c) +{ + return vtbx4_mf8(a, b, c); +} + +/* +** test_qtbx1: +** tbx v0.8b, {v1.16b}, v2.8b +** ret +*/ +mfloat8x8_t test_qtbx1(mfloat8x8_t a, mfloat8x16_t b, uint8x8_t c) +{ + return vqtbx1_mf8(a, b, c); +} + +/* +** test_qtbx1q: +** tbx v0.16b, {v1.16b}, v2.16b +** ret +*/ +mfloat8x16_t test_qtbx1q(mfloat8x16_t a, mfloat8x16_t b, uint8x16_t c) +{ + return vqtbx1q_mf8(a, b, c); +} + +/* +** test_qtbx2: +** tbx v0.8b, {v1.16b( - |, )v2.16b}, v3.8b +** ret +*/ +mfloat8x8_t test_qtbx2(mfloat8x8_t a, mfloat8x16x2_t b, uint8x8_t c) +{ + return vqtbx2_mf8(a, b, c); +} + +/* +** test_qtbx2q: +** tbx v0.16b, {v1.16b( - |, )v2.16b}, v3.16b +** ret +*/ +mfloat8x16_t test_qtbx2q(mfloat8x16_t a, mfloat8x16x2_t b, uint8x16_t c) +{ + return vqtbx2q_mf8(a, b, c); +} + +/* +** test_qtbx3: +** tbx v0.8b, {v1.16b - v3.16b}, v4.8b +** ret +*/ +mfloat8x8_t test_qtbx3(mfloat8x8_t a, mfloat8x16x3_t b, uint8x8_t c) +{ + return vqtbx3_mf8(a, b, c); +} + +/* +** test_qtbx3q: +** tbx v0.16b, {v1.16b - v3.16b}, v4.16b +** ret +*/ +mfloat8x16_t test_qtbx3q(mfloat8x16_t a, mfloat8x16x3_t b, uint8x16_t c) +{ + return vqtbx3q_mf8(a, b, c); +} + +/* +** test_qtbx4: +** tbx v0.8b, {v1.16b - v4.16b}, v5.8b +** ret +*/ +mfloat8x8_t test_qtbx4(mfloat8x8_t a, mfloat8x16x4_t b, uint8x8_t c) +{ + return vqtbx4_mf8(a, b, c); +} + +/* +** test_qtbx4q: +** tbx v0.16b, {v1.16b - v4.16b}, v5.16b +** ret +*/ +mfloat8x16_t test_qtbx4q(mfloat8x16_t a, mfloat8x16x4_t b, uint8x16_t c) +{ + return vqtbx4q_mf8(a, b, c); +} + +/* +** test_trn: +** trn1 v0.8b, v2.8b, v3.8b +** trn2 v1.8b, v2.8b, v3.8b +** ret +*/ +mfloat8x8x2_t test_trn(mfloat8_t a, mfloat8_t b, mfloat8x8_t c, mfloat8x8_t d) +{ + return vtrn_mf8(c, d); +} + +/* +** test_trnq: +** trn1 v0.16b, v2.16b, v3.16b +** trn2 v1.16b, v2.16b, v3.16b +** ret +*/ +mfloat8x16x2_t test_trnq(mfloat8_t a, mfloat8_t b, + mfloat8x16_t c, mfloat8x16_t d) +{ + return vtrnq_mf8(c, d); +} + +/* +** test_trn1: +** trn1 v0.8b, v1.8b, v2.8b +** ret +*/ +mfloat8x8_t test_trn1(mfloat8_t a, mfloat8x8_t b, mfloat8x8_t c) +{ + return vtrn1_mf8(b, c); +} + +/* +** test_trn1q: +** trn1 v0.16b, v1.16b, v2.16b +** ret +*/ +mfloat8x16_t test_trn1q(mfloat8_t a, mfloat8x16_t b, mfloat8x16_t c) +{ + return vtrn1q_mf8(b, c); +} + +/* +** test_trn2: +** trn2 v0.8b, v1.8b, v2.8b +** ret +*/ +mfloat8x8_t test_trn2(mfloat8_t a, mfloat8x8_t b, mfloat8x8_t c) +{ + return vtrn2_mf8(b, c); +} + +/* +** test_trn2q: +** trn2 v0.16b, v1.16b, v2.16b +** ret +*/ +mfloat8x16_t test_trn2q(mfloat8_t a, mfloat8x16_t b, mfloat8x16_t c) +{ + return vtrn2q_mf8(b, c); +} + +/* +** test_uzp: +** uzp1 v0.8b, v2.8b, v3.8b +** uzp2 v1.8b, v2.8b, v3.8b +** ret +*/ +mfloat8x8x2_t test_uzp(mfloat8_t a, mfloat8_t b, mfloat8x8_t c, mfloat8x8_t d) +{ + return vuzp_mf8(c, d); +} + +/* +** test_uzpq: +** uzp1 v0.16b, v2.16b, v3.16b +** uzp2 v1.16b, v2.16b, v3.16b +** ret +*/ +mfloat8x16x2_t test_uzpq(mfloat8_t a, mfloat8_t b, + mfloat8x16_t c, mfloat8x16_t d) +{ + return vuzpq_mf8(c, d); +} + +/* +** test_uzp1: +** uzp1 v0.8b, v1.8b, v2.8b +** ret +*/ +mfloat8x8_t test_uzp1(mfloat8_t a, mfloat8x8_t b, mfloat8x8_t c) +{ + return vuzp1_mf8(b, c); +} + +/* +** test_uzp1q: +** uzp1 v0.16b, v1.16b, v2.16b +** ret +*/ +mfloat8x16_t test_uzp1q(mfloat8_t a, mfloat8x16_t b, mfloat8x16_t c) +{ + return vuzp1q_mf8(b, c); +} + +/* +** test_uzp2: +** uzp2 v0.8b, v1.8b, v2.8b +** ret +*/ +mfloat8x8_t test_uzp2(mfloat8_t a, mfloat8x8_t b, mfloat8x8_t c) +{ + return vuzp2_mf8(b, c); +} + +/* +** test_uzp2q: +** uzp2 v0.16b, v1.16b, v2.16b +** ret +*/ +mfloat8x16_t test_uzp2q(mfloat8_t a, mfloat8x16_t b, mfloat8x16_t c) +{ + return vuzp2q_mf8(b, c); +} + +/* +** test_zip: +** zip1 v0.8b, v2.8b, v3.8b +** zip2 v1.8b, v2.8b, v3.8b +** ret +*/ +mfloat8x8x2_t test_zip(mfloat8_t a, mfloat8_t b, mfloat8x8_t c, mfloat8x8_t d) +{ + return vzip_mf8(c, d); +} + +/* +** test_zipq: +** zip1 v0.16b, v2.16b, v3.16b +** zip2 v1.16b, v2.16b, v3.16b +** ret +*/ +mfloat8x16x2_t test_zipq(mfloat8_t a, mfloat8_t b, + mfloat8x16_t c, mfloat8x16_t d) +{ + return vzipq_mf8(c, d); +} + +/* +** test_zip1: +** zip1 v0.8b, v1.8b, v2.8b +** ret +*/ +mfloat8x8_t test_zip1(mfloat8_t a, mfloat8x8_t b, mfloat8x8_t c) +{ + return vzip1_mf8(b, c); +} + +/* +** test_zip1q: +** zip1 v0.16b, v1.16b, v2.16b +** ret +*/ +mfloat8x16_t test_zip1q(mfloat8_t a, mfloat8x16_t b, mfloat8x16_t c) +{ + return vzip1q_mf8(b, c); +} + +/* +** test_zip2: +** zip2 v0.8b, v1.8b, v2.8b +** ret +*/ +mfloat8x8_t test_zip2(mfloat8_t a, mfloat8x8_t b, mfloat8x8_t c) +{ + return vzip2_mf8(b, c); +} + +/* +** test_zip2q: +** zip2 v0.16b, v1.16b, v2.16b +** ret +*/ +mfloat8x16_t test_zip2q(mfloat8_t a, mfloat8x16_t b, mfloat8x16_t c) +{ + return vzip2q_mf8(b, c); +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_2.c b/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_2.c new file mode 100644 index 0000000..0f923f2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/mf8_data_2.c @@ -0,0 +1,98 @@ +/* { dg-do assemble } */ +/* { dg-additional-options "-O -std=gnu23 --save-temps" } */ + +#include <arm_neon.h> + +void test(mfloat8x8_t x8, mfloat8x16_t x16, + mfloat8x8x2_t x8x2, mfloat8x16x2_t x16x2, + mfloat8x8x3_t x8x3, mfloat8x16x3_t x16x3, + mfloat8x8x4_t x8x4, mfloat8x16x4_t x16x4, + mfloat8_t *ptr, mfloat8_t scalar) +{ + vcopy_lane_mf8(x8, -1, x8, 0); /* { dg-error {passing -1 to argument 2 of 'vcopy_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vcopy_lane_mf8(x8, 8, x8, 0); /* { dg-error {passing 8 to argument 2 of 'vcopy_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vcopy_lane_mf8(x8, 0, x8, -1); /* { dg-error {passing -1 to argument 4 of 'vcopy_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vcopy_lane_mf8(x8, 0, x8, 8); /* { dg-error {passing 8 to argument 4 of 'vcopy_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vcopy_lane_mf8(x8, 100, x8, 100); /* { dg-error {passing 100 to argument 2 of 'vcopy_lane_mf8', which expects a value in the range \[0, 7\]} } */ + /* { dg-error {passing 100 to argument 4 of 'vcopy_lane_mf8', which expects a value in the range \[0, 7\]} "" { target *-*-* } .-1 } */ + + vcopy_laneq_mf8(x8, -1, x16, 0); /* { dg-error {passing -1 to argument 2 of 'vcopy_laneq_mf8', which expects a value in the range \[0, 7\]} } */ + vcopy_laneq_mf8(x8, 8, x16, 0); /* { dg-error {passing 8 to argument 2 of 'vcopy_laneq_mf8', which expects a value in the range \[0, 7\]} } */ + vcopy_laneq_mf8(x8, 0, x16, -1); /* { dg-error {passing -1 to argument 4 of 'vcopy_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + vcopy_laneq_mf8(x8, 0, x16, 16); /* { dg-error {passing 16 to argument 4 of 'vcopy_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + + vcopyq_lane_mf8(x16, -1, x8, 0); /* { dg-error {passing -1 to argument 2 of 'vcopyq_lane_mf8', which expects a value in the range \[0, 15\]} } */ + vcopyq_lane_mf8(x16, 16, x8, 0); /* { dg-error {passing 16 to argument 2 of 'vcopyq_lane_mf8', which expects a value in the range \[0, 15\]} } */ + vcopyq_lane_mf8(x16, 0, x8, -1); /* { dg-error {passing -1 to argument 4 of 'vcopyq_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vcopyq_lane_mf8(x16, 0, x8, 8); /* { dg-error {passing 8 to argument 4 of 'vcopyq_lane_mf8', which expects a value in the range \[0, 7\]} } */ + + vcopyq_laneq_mf8(x16, -1, x16, 0); /* { dg-error {passing -1 to argument 2 of 'vcopyq_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + vcopyq_laneq_mf8(x16, 16, x16, 0); /* { dg-error {passing 16 to argument 2 of 'vcopyq_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + vcopyq_laneq_mf8(x16, 0, x16, -1); /* { dg-error {passing -1 to argument 4 of 'vcopyq_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + vcopyq_laneq_mf8(x16, 0, x16, 16); /* { dg-error {passing 16 to argument 4 of 'vcopyq_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + + vdup_lane_mf8(x8, -1); /* { dg-error {passing -1 to argument 2 of 'vdup_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vdup_lane_mf8(x8, 8); /* { dg-error {passing 8 to argument 2 of 'vdup_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vdup_laneq_mf8(x16, -1); /* { dg-error {passing -1 to argument 2 of 'vdup_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + vdup_laneq_mf8(x16, 16); /* { dg-error {passing 16 to argument 2 of 'vdup_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + + vdupq_lane_mf8(x8, -1); /* { dg-error {passing -1 to argument 2 of 'vdupq_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vdupq_lane_mf8(x8, 8); /* { dg-error {passing 8 to argument 2 of 'vdupq_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vdupq_laneq_mf8(x16, -1); /* { dg-error {passing -1 to argument 2 of 'vdupq_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + vdupq_laneq_mf8(x16, 16); /* { dg-error {passing 16 to argument 2 of 'vdupq_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + + vdupb_lane_mf8(x8, -1); /* { dg-error {passing -1 to argument 2 of 'vdupb_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vdupb_lane_mf8(x8, 8); /* { dg-error {passing 8 to argument 2 of 'vdupb_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vdupb_laneq_mf8(x16, -1); /* { dg-error {passing -1 to argument 2 of 'vdupb_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + vdupb_laneq_mf8(x16, 16); /* { dg-error {passing 16 to argument 2 of 'vdupb_laneq_mf8', which expects a value in the range \[0, 15\]} } */ + + vext_mf8(x8, x8, -1); /* { dg-error {passing -1 to argument 3 of 'vext_mf8', which expects a value in the range \[0, 7\]} } */ + vext_mf8(x8, x8, 8); /* { dg-error {passing 8 to argument 3 of 'vext_mf8', which expects a value in the range \[0, 7\]} } */ + vextq_mf8(x16, x16, -1); /* { dg-error {passing -1 to argument 3 of 'vextq_mf8', which expects a value in the range \[0, 15\]} } */ + vextq_mf8(x16, x16, 16); /* { dg-error {passing 16 to argument 3 of 'vextq_mf8', which expects a value in the range \[0, 15\]} } */ + + vld1_lane_mf8(ptr, x8, -1); /* { dg-error {passing -1 to argument 3 of 'vld1_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vld1_lane_mf8(ptr, x8, 8); /* { dg-error {passing 8 to argument 3 of 'vld1_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vld1q_lane_mf8(ptr, x16, -1); /* { dg-error {passing -1 to argument 3 of 'vld1q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + vld1q_lane_mf8(ptr, x16, 16); /* { dg-error {passing 16 to argument 3 of 'vld1q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + + vld2_lane_mf8(ptr, x8x2, -1); /* { dg-error {passing -1 to argument 3 of 'vld2_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vld2_lane_mf8(ptr, x8x2, 8); /* { dg-error {passing 8 to argument 3 of 'vld2_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vld2q_lane_mf8(ptr, x16x2, -1); /* { dg-error {passing -1 to argument 3 of 'vld2q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + vld2q_lane_mf8(ptr, x16x2, 16); /* { dg-error {passing 16 to argument 3 of 'vld2q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + + vld3_lane_mf8(ptr, x8x3, -1); /* { dg-error {passing -1 to argument 3 of 'vld3_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vld3_lane_mf8(ptr, x8x3, 8); /* { dg-error {passing 8 to argument 3 of 'vld3_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vld3q_lane_mf8(ptr, x16x3, -1); /* { dg-error {passing -1 to argument 3 of 'vld3q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + vld3q_lane_mf8(ptr, x16x3, 16); /* { dg-error {passing 16 to argument 3 of 'vld3q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + + vld4_lane_mf8(ptr, x8x4, -1); /* { dg-error {passing -1 to argument 3 of 'vld4_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vld4_lane_mf8(ptr, x8x4, 8); /* { dg-error {passing 8 to argument 3 of 'vld4_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vld4q_lane_mf8(ptr, x16x4, -1); /* { dg-error {passing -1 to argument 3 of 'vld4q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + vld4q_lane_mf8(ptr, x16x4, 16); /* { dg-error {passing 16 to argument 3 of 'vld4q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + + vset_lane_mf8(scalar, x8, -1); /* { dg-error {passing -1 to argument 3 of 'vset_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vset_lane_mf8(scalar, x8, 8); /* { dg-error {passing 8 to argument 3 of 'vset_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vsetq_lane_mf8(scalar, x16, -1); /* { dg-error {passing -1 to argument 3 of 'vsetq_lane_mf8', which expects a value in the range \[0, 15\]} } */ + vsetq_lane_mf8(scalar, x16, 16); /* { dg-error {passing 16 to argument 3 of 'vsetq_lane_mf8', which expects a value in the range \[0, 15\]} } */ + + vst1_lane_mf8(ptr, x8, -1); /* { dg-error {passing -1 to argument 3 of 'vst1_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vst1_lane_mf8(ptr, x8, 8); /* { dg-error {passing 8 to argument 3 of 'vst1_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vst1q_lane_mf8(ptr, x16, -1); /* { dg-error {passing -1 to argument 3 of 'vst1q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + vst1q_lane_mf8(ptr, x16, 16); /* { dg-error {passing 16 to argument 3 of 'vst1q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + + vst2_lane_mf8(ptr, x8x2, -1); /* { dg-error {passing -1 to argument 3 of 'vst2_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vst2_lane_mf8(ptr, x8x2, 8); /* { dg-error {passing 8 to argument 3 of 'vst2_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vst2q_lane_mf8(ptr, x16x2, -1); /* { dg-error {passing -1 to argument 3 of 'vst2q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + vst2q_lane_mf8(ptr, x16x2, 16); /* { dg-error {passing 16 to argument 3 of 'vst2q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + + vst3_lane_mf8(ptr, x8x3, -1); /* { dg-error {passing -1 to argument 3 of 'vst3_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vst3_lane_mf8(ptr, x8x3, 8); /* { dg-error {passing 8 to argument 3 of 'vst3_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vst3q_lane_mf8(ptr, x16x3, -1); /* { dg-error {passing -1 to argument 3 of 'vst3q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + vst3q_lane_mf8(ptr, x16x3, 16); /* { dg-error {passing 16 to argument 3 of 'vst3q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + + vst4_lane_mf8(ptr, x8x4, -1); /* { dg-error {passing -1 to argument 3 of 'vst4_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vst4_lane_mf8(ptr, x8x4, 8); /* { dg-error {passing 8 to argument 3 of 'vst4_lane_mf8', which expects a value in the range \[0, 7\]} } */ + vst4q_lane_mf8(ptr, x16x4, -1); /* { dg-error {passing -1 to argument 3 of 'vst4q_lane_mf8', which expects a value in the range \[0, 15\]} } */ + vst4q_lane_mf8(ptr, x16x4, 16); /* { dg-error {passing 16 to argument 3 of 'vst4q_lane_mf8', which expects a value in the range \[0, 15\]} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c index 596b9ee..2db3c3c 100644 --- a/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c +++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c @@ -6,6 +6,92 @@ extern void abort (void); +mfloat8x8_t __attribute__ ((noinline)) +wrap_vdup_lane_mf8_0 (mfloat8x8_t a) +{ + return vdup_lane_mf8 (a, 0); +} + +mfloat8x8_t __attribute__ ((noinline)) +wrap_vdup_lane_mf8_1 (mfloat8x8_t a) +{ + return vdup_lane_mf8 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_mf8 () +{ + mfloat8_t m; + uint8_t n = 11; + mfloat8x8_t a; + mfloat8x8_t b; + int i; + /* Only two first cases are interesting. */ + mfloat8_t c[8]; + mfloat8_t d[8]; + + __builtin_memcpy(&m, &n, 1); + b = vdup_n_mf8 (m); + vst1_mf8 (d, b); + + a = vld1_mf8 (c); + b = wrap_vdup_lane_mf8_0 (a); + vst1_mf8 (d, b); + for (i = 0; i < 8; i++) + if (__builtin_memcmp (&c[0], &d[i], 1) != 0) + return 1; + + b = wrap_vdup_lane_mf8_1 (a); + vst1_mf8 (d, b); + for (i = 0; i < 8; i++) + if (__builtin_memcmp (&c[1], &d[i], 1) != 0) + return 1; + return 0; +} + +mfloat8x16_t __attribute__ ((noinline)) +wrap_vdupq_lane_mf8_0 (mfloat8x8_t a) +{ + return vdupq_lane_mf8 (a, 0); +} + +mfloat8x16_t __attribute__ ((noinline)) +wrap_vdupq_lane_mf8_1 (mfloat8x8_t a) +{ + return vdupq_lane_mf8 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_mf8 () +{ + mfloat8_t m; + uint8_t n = 11; + mfloat8x8_t a; + mfloat8x16_t b; + int i; + /* Only two first cases are interesting. */ + mfloat8_t c[8]; + mfloat8_t d[16]; + + __builtin_memcpy(&m, &n, 1); + b = vdupq_n_mf8 (m); + vst1q_mf8 (d, b); + + a = vld1_mf8 (c); + b = wrap_vdupq_lane_mf8_0 (a); + vst1q_mf8 (d, b); + for (i = 0; i < 16; i++) + if (__builtin_memcmp (&c[0], &d[i], 1) != 0) + return 1; + + b = wrap_vdupq_lane_mf8_1 (a); + vst1q_mf8 (d, b); + for (i = 0; i < 16; i++) + if (__builtin_memcmp (&c[1], &d[i], 1) != 0) + return 1; + return 0; +} + float32x2_t __attribute__ ((noinline)) wrap_vdup_lane_f32_0 (float32x2_t a) { @@ -350,7 +436,10 @@ test_vdupq_lane_s64 () int main () { - + if (test_vdup_lane_mf8 ()) + abort (); + if (test_vdupq_lane_mf8 ()) + abort (); if (test_vdup_lane_f32 ()) abort (); if (test_vdup_lane_s8 ()) @@ -376,12 +465,12 @@ main () } /* Asm check for test_vdup_lane_s8. */ -/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ -/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 2 } } */ /* Asm check for test_vdupq_lane_s8. */ -/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ -/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 2 } } */ /* Asm check for test_vdup_lane_s16. */ /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c index 16f4808..e9b4cdd 100644 --- a/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c +++ b/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c @@ -11,6 +11,45 @@ extern void abort (void); +mfloat8_t __attribute__ ((noinline)) +wrap_vdupb_lane_mf8_0 (mfloat8x8_t dummy, mfloat8x8_t a) +{ + mfloat8_t result = vdupb_lane_mf8 (a, 0); + force_simd (result); + return result; +} + +mfloat8_t __attribute__ ((noinline)) +wrap_vdupb_lane_mf8_1 (mfloat8x8_t a) +{ + mfloat8_t result = vdupb_lane_mf8 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdupb_lane_mf8 () +{ + mfloat8_t m; + uint8_t n = 11; + mfloat8x8_t a; + mfloat8_t b; + mfloat8_t c[8]; + + __builtin_memcpy(&m, &n, 1); + a = vdup_n_mf8 (m); + vst1_mf8 (c, a); + + b = wrap_vdupb_lane_mf8_0 (a, a); + if (__builtin_memcmp (&c[0], &b, 1) != 0) + return 1; + b = wrap_vdupb_lane_mf8_1 (a); + if (__builtin_memcmp (&c[1], &b, 1) != 0) + return 1; + + return 0; +} + float32_t __attribute__ ((noinline)) wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a) { @@ -300,6 +339,8 @@ test_vdupd_lane_s64 () int main () { + if (test_vdupb_lane_mf8 ()) + abort (); if (test_vdups_lane_f32 ()) abort (); if (test_vdupd_lane_f64 ()) @@ -323,9 +364,9 @@ main () return 0; } -/* Asm check for vdupb_lane_s8, vdupb_lane_u8. */ +/* Asm check for vdupb_lane_s8, vdupb_lane_u8, and vdupb_lane_mf8. */ /* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */ -/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 3 } } */ /* Asm check for vduph_lane_h16, vduph_lane_h16. */ /* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c index 99ac887..bac061b 100644 --- a/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c +++ b/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c @@ -6,6 +6,48 @@ extern void abort (void); +mfloat8x8_t __attribute__ ((noinline)) +wrap_vdup_n_mf8 (mfloat8_t a) +{ + return vdup_n_mf8 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_mf8 (mfloat8_t a) +{ + mfloat8x8_t b; + mfloat8_t c[8]; + int i; + + b = wrap_vdup_n_mf8 (a); + vst1_mf8 (c, b); + for (i = 0; i < 8; i++) + if (__builtin_memcmp (&a, &c[i], 1) != 0) + return 1; + return 0; +} + +mfloat8x16_t __attribute__ ((noinline)) +wrap_vdupq_n_mf8 (mfloat8_t a) +{ + return vdupq_n_mf8 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_mf8 (mfloat8_t a) +{ + mfloat8x16_t b; + mfloat8_t c[16]; + int i; + + b = wrap_vdupq_n_mf8 (a); + vst1q_mf8 (c, b); + for (i = 0; i < 16; i++) + if (__builtin_memcmp (&a, &c[i], 1) != 0) + return 1; + return 0; +} + float32x2_t __attribute__ ((noinline)) wrap_vdup_n_f32 (float32_t a) { @@ -537,6 +579,16 @@ test_vdupq_n_u64 () int main () { + mfloat8_t a, c; + uint8_t b = 11; + uint8_t d = 12; + __builtin_memcpy(&a, &b, 1); + __builtin_memcpy(&c, &d, 1); + + if (test_vdup_n_mf8(a)) + abort (); + if (test_vdupq_n_mf8(c)) + abort (); if (test_vdup_n_f32 ()) abort (); if (test_vdup_n_f64 ()) @@ -591,12 +643,16 @@ main () /* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64. Cannot force floating point value in general purpose regester. */ -/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8. */ -/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */ +/* Asm check for test_vdup_n_mf8, test_vdup_n_p8, test_vdup_n_s8, + test_vdup_n_u8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 5 } } */ /* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8. */ /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */ +/* Asm check for test_vdupq_n_mf8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ + /* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16. */ /* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect_copy_lane_1.c b/gcc/testsuite/gcc.target/aarch64/vect_copy_lane_1.c index 811dc67..6053dfa 100644 --- a/gcc/testsuite/gcc.target/aarch64/vect_copy_lane_1.c +++ b/gcc/testsuite/gcc.target/aarch64/vect_copy_lane_1.c @@ -14,7 +14,8 @@ test_copy##Q1##_lane##Q2##_##SUFFIX (TYPE1 a, TYPE2 b) \ BUILD_TEST (poly8x8_t, poly8x8_t, , , p8, 7, 6) BUILD_TEST (int8x8_t, int8x8_t, , , s8, 7, 6) BUILD_TEST (uint8x8_t, uint8x8_t, , , u8, 7, 6) -/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[7\\\], v1.b\\\[6\\\]" 3 } } */ +BUILD_TEST (mfloat8x8_t, mfloat8x8_t, , , mf8, 7, 6) +/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[7\\\], v1.b\\\[6\\\]" 4 } } */ BUILD_TEST (poly16x4_t, poly16x4_t, , , p16, 3, 2) BUILD_TEST (int16x4_t, int16x4_t, , , s16, 3, 2) BUILD_TEST (uint16x4_t, uint16x4_t, , , u16, 3, 2) @@ -33,7 +34,8 @@ BUILD_TEST (float64x1_t, float64x1_t, , , f64, 0, 0) BUILD_TEST (poly8x8_t, poly8x16_t, , q, p8, 7, 15) BUILD_TEST (int8x8_t, int8x16_t, , q, s8, 7, 15) BUILD_TEST (uint8x8_t, uint8x16_t, , q, u8, 7, 15) -/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[7\\\], v1.b\\\[15\\\]" 3 } } */ +BUILD_TEST (mfloat8x8_t, mfloat8x16_t, , q, mf8, 7, 15) +/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[7\\\], v1.b\\\[15\\\]" 4 } } */ BUILD_TEST (poly16x4_t, poly16x8_t, , q, p16, 3, 7) BUILD_TEST (int16x4_t, int16x8_t, , q, s16, 3, 7) BUILD_TEST (uint16x4_t, uint16x8_t, , q, u16, 3, 7) @@ -51,7 +53,8 @@ BUILD_TEST (uint64x1_t, uint64x2_t, , q, u64, 0, 1) BUILD_TEST (poly8x16_t, poly8x8_t, q, , p8, 15, 7) BUILD_TEST (int8x16_t, int8x8_t, q, , s8, 15, 7) BUILD_TEST (uint8x16_t, uint8x8_t, q, , u8, 15, 7) -/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[15\\\], v1.b\\\[7\\\]" 3 } } */ +BUILD_TEST (mfloat8x16_t, mfloat8x8_t, q, , mf8, 15, 7) +/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[15\\\], v1.b\\\[7\\\]" 4 } } */ BUILD_TEST (poly16x8_t, poly16x4_t, q, , p16, 7, 3) BUILD_TEST (int16x8_t, int16x4_t, q, , s16, 7, 3) BUILD_TEST (uint16x8_t, uint16x4_t, q, , u16, 7, 3) @@ -70,7 +73,8 @@ BUILD_TEST (uint64x2_t, uint64x1_t, q, , u64, 1, 0) BUILD_TEST (poly8x16_t, poly8x16_t, q, q, p8, 14, 15) BUILD_TEST (int8x16_t, int8x16_t, q, q, s8, 14, 15) BUILD_TEST (uint8x16_t, uint8x16_t, q, q, u8, 14, 15) -/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[14\\\], v1.b\\\[15\\\]" 3 } } */ +BUILD_TEST (mfloat8x16_t, mfloat8x16_t, q, q, mf8, 14, 15) +/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[14\\\], v1.b\\\[15\\\]" 4 } } */ BUILD_TEST (poly16x8_t, poly16x8_t, q, q, p16, 6, 7) BUILD_TEST (int16x8_t, int16x8_t, q, q, s16, 6, 7) BUILD_TEST (uint16x8_t, uint16x8_t, q, q, u16, 6, 7) |