diff options
author | Martin Liska <mliska@suse.cz> | 2021-08-06 12:38:10 +0200 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2021-08-06 12:38:10 +0200 |
commit | 01c909e1a5fca4988431a328454e9d8c0eea9ef6 (patch) | |
tree | 0cb8cd9cc4879749dc330119198ebe0d109767e5 | |
parent | f182597d273fe81ffae6dfece17fecadd01842f7 (diff) | |
parent | 3c94db20be9af3cb0376292e2d4672b515558231 (diff) | |
download | gcc-01c909e1a5fca4988431a328454e9d8c0eea9ef6.zip gcc-01c909e1a5fca4988431a328454e9d8c0eea9ef6.tar.gz gcc-01c909e1a5fca4988431a328454e9d8c0eea9ef6.tar.bz2 |
Merge branch 'master' into devel/sphinx
43 files changed, 2217 insertions, 569 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d888dc5..6b7a77d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,99 @@ +2021-08-05 H.J. Lu <hjl.tools@gmail.com> + + PR target/99744 + * config/i386/i386.c (ix86_can_inline_p): Ignore MASK_80387 if + callee only uses GPRs. + * config/i386/ia32intrin.h: Revert commit 5463cee2770. + * config/i386/serializeintrin.h: Revert commit 71958f740f1. + * config/i386/x86gprintrin.h: Add + #pragma GCC target("general-regs-only") and #pragma GCC pop_options + to disable non-GPR ISAs. + +2021-08-05 Richard Sandiford <richard.sandiford@arm.com> + + PR middle-end/101787 + * doc/md.texi (cond_ashl, cond_ashr, cond_lshr): Document. + +2021-08-05 Richard Sandiford <richard.sandiford@arm.com> + + * tree-vectorizer.h (vect_is_store_elt_extraction, vect_is_reduction) + (vect_reduc_type, vect_embedded_comparison_type, vect_comparison_type) + (vect_is_extending_load, vect_is_integer_truncation): New functions, + moved from aarch64.c but given different names. + * config/aarch64/aarch64.c (aarch64_is_store_elt_extraction) + (aarch64_is_reduction, aarch64_reduc_type) + (aarch64_embedded_comparison_type, aarch64_comparison_type) + (aarch64_extending_load_p, aarch64_integer_truncation_p): Delete + in favor of the above. Update callers accordingly. + +2021-08-05 Richard Earnshaw <rearnsha@arm.com> + + PR target/101723 + * config/arm/arm-cpus.in (generic-armv7-a): Add quirk to suppress + writing .cpu directive in asm output. + * config/arm/arm.c (arm_identify_fpu_from_isa): New variable. + (arm_last_printed_arch_string): Delete. + (arm_last-printed_fpu_string): Delete. + (arm_configure_build_target): If use of floating-point/SIMD is + disabled, remove all fp/simd related features from the target ISA. + (last_arm_targ_options): New variable. + (arm_print_asm_arch_directives): Add new parameters. Change order + of emitted directives and handle all cases here. + (arm_file_start): Always call arm_print_asm_arch_directives, move + all generation of .arch/.arch_extension here. + (arm_file_end): Call arm_print_asm_arch. + (arm_declare_function_name): Call arm_print_asm_arch_directives + instead of printing .arch/.fpu directives directly. + +2021-08-05 Richard Earnshaw <rearnsha@arm.com> + + * config/arm/arm.c (arm_configure_build_target): Don't call + arm_option_reconfigure_globals. + (arm_option_restore): Call arm_option_reconfigure_globals after + reconfiguring the target. + * config/arm/arm-c.c (arm_pragma_target_parse): Likewise. + +2021-08-05 Richard Earnshaw <rearnsha@arm.com> + + * config/arm/arm.c (arm_configure_build_target): Ensure the target's + arch_name is always set. + +2021-08-05 Jonathan Wright <jonathan.wright@arm.com> + + * config/aarch64/aarch64.c: Traverse RTL tree to prevent cost + of vec_select high-half from being added into Neon subtract + cost. + +2021-08-05 Jonathan Wright <jonathan.wright@arm.com> + + * config/aarch64/aarch64.c: Traverse RTL tree to prevent cost + of vec_select high-half from being added into Neon add cost. + +2021-08-05 Kewen Lin <linkw@linux.ibm.com> + + * cfgloop.h (loops_list::loops_list): Add one optional argument + root and adjust accordingly, update loop tree walking and factor + out to ... + * cfgloop.c (loops_list::walk_loop_tree): ... this. New function. + +2021-08-05 Eric Botcazou <ebotcazou@gcc.gnu.org> + + PR tree-optimization/101626 + * tree-sra.c (propagate_subaccesses_from_rhs): Do not set the + reverse scalar storage order on a pointer or vector component. + +2021-08-05 liuhongt <hongtao.liu@intel.com> + + * config/i386/sse.md (cond_<code><mode>): New expander. + +2021-08-05 liuhongt <hongtao.liu@intel.com> + + * config/i386/sse.md (cond_<code><mode>): New expander. + +2021-08-05 liuhongt <hongtao.liu@intel.com> + + * config/i386/sse.md (cond_<code><mode>): New expander. + 2021-08-04 David Malcolm <dmalcolm@redhat.com> PR analyzer/101570 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 6168f46..891ccf6 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20210805 +20210806 diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 92d22d1..d24bfdb 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -1489,7 +1489,8 @@ static tree c_parser_std_attribute_specifier_sequence (c_parser *); static void c_parser_external_declaration (c_parser *); static void c_parser_asm_definition (c_parser *); static void c_parser_declaration_or_fndef (c_parser *, bool, bool, bool, - bool, bool, tree *, vec<c_token>, + bool, bool, tree * = NULL, + vec<c_token> * = NULL, bool have_attrs = false, tree attrs = NULL, struct oacc_routine_data * = NULL, @@ -1774,13 +1775,12 @@ c_parser_external_declaration (c_parser *parser) an @interface or @protocol with prefix attributes). We can only tell which after parsing the declaration specifiers, if any, and the first declarator. */ - c_parser_declaration_or_fndef (parser, true, true, true, false, true, - NULL, vNULL); + c_parser_declaration_or_fndef (parser, true, true, true, false, true); break; } } -static void c_finish_omp_declare_simd (c_parser *, tree, tree, vec<c_token>); +static void c_finish_omp_declare_simd (c_parser *, tree, tree, vec<c_token> *); static void c_finish_oacc_routine (struct oacc_routine_data *, tree, bool); /* Build and add a DEBUG_BEGIN_STMT statement with location LOC. */ @@ -1890,11 +1890,15 @@ static void c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, bool static_assert_ok, bool empty_ok, bool nested, bool start_attr_ok, - tree *objc_foreach_object_declaration, - vec<c_token> omp_declare_simd_clauses, - bool have_attrs, tree attrs, - struct oacc_routine_data *oacc_routine_data, - bool *fallthru_attr_p) + tree *objc_foreach_object_declaration + /* = NULL */, + vec<c_token> *omp_declare_simd_clauses + /* = NULL */, + bool have_attrs /* = false */, + tree attrs /* = NULL_TREE */, + struct oacc_routine_data *oacc_routine_data + /* = NULL */, + bool *fallthru_attr_p /* = NULL */) { struct c_declspecs *specs; tree prefix_attrs; @@ -2150,7 +2154,7 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, C_DTR_NORMAL, &dummy); if (declarator == NULL) { - if (omp_declare_simd_clauses.exists ()) + if (omp_declare_simd_clauses) c_finish_omp_declare_simd (parser, NULL_TREE, NULL_TREE, omp_declare_simd_clauses); if (oacc_routine_data) @@ -2250,7 +2254,7 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, chainon (postfix_attrs, all_prefix_attrs)); if (!d) d = error_mark_node; - if (omp_declare_simd_clauses.exists ()) + if (omp_declare_simd_clauses) c_finish_omp_declare_simd (parser, d, NULL_TREE, omp_declare_simd_clauses); } @@ -2262,7 +2266,7 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, chainon (postfix_attrs, all_prefix_attrs)); if (!d) d = error_mark_node; - if (omp_declare_simd_clauses.exists ()) + if (omp_declare_simd_clauses) c_finish_omp_declare_simd (parser, d, NULL_TREE, omp_declare_simd_clauses); init_loc = c_parser_peek_token (parser)->location; @@ -2342,7 +2346,7 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, warn_parm_array_mismatch (lastloc, d, parms); } } - if (omp_declare_simd_clauses.exists ()) + if (omp_declare_simd_clauses) { tree parms = NULL_TREE; if (d && TREE_CODE (d) == FUNCTION_DECL) @@ -2496,9 +2500,9 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, while (c_parser_next_token_is_not (parser, CPP_EOF) && c_parser_next_token_is_not (parser, CPP_OPEN_BRACE)) c_parser_declaration_or_fndef (parser, false, false, false, - true, false, NULL, vNULL); + true, false); store_parm_decls (); - if (omp_declare_simd_clauses.exists ()) + if (omp_declare_simd_clauses) c_finish_omp_declare_simd (parser, current_function_decl, NULL_TREE, omp_declare_simd_clauses); if (oacc_routine_data) @@ -5699,7 +5703,7 @@ c_parser_compound_statement_nostart (c_parser *parser) bool fallthru_attr_p = false; c_parser_declaration_or_fndef (parser, true, !have_std_attrs, true, true, true, NULL, - vNULL, have_std_attrs, std_attrs, + NULL, have_std_attrs, std_attrs, NULL, &fallthru_attr_p); if (last_stmt && !fallthru_attr_p) @@ -5731,7 +5735,7 @@ c_parser_compound_statement_nostart (c_parser *parser) last_label = false; mark_valid_location_for_stdc_pragma (false); c_parser_declaration_or_fndef (parser, true, true, true, true, - true, NULL, vNULL); + true); /* Following the old parser, __extension__ does not disable this diagnostic. */ restore_extension_diagnostics (ext); @@ -6782,7 +6786,7 @@ c_parser_for_statement (c_parser *parser, bool ivdep, unsigned short unroll, || c_parser_nth_token_starts_std_attributes (parser, 1)) { c_parser_declaration_or_fndef (parser, true, true, true, true, true, - &object_expression, vNULL); + &object_expression); parser->objc_could_be_foreach_context = false; if (c_parser_next_token_is_keyword (parser, RID_IN)) @@ -6813,7 +6817,7 @@ c_parser_for_statement (c_parser *parser, bool ivdep, unsigned short unroll, ext = disable_extension_diagnostics (); c_parser_consume_token (parser); c_parser_declaration_or_fndef (parser, true, true, true, true, - true, &object_expression, vNULL); + true, &object_expression); parser->objc_could_be_foreach_context = false; restore_extension_diagnostics (ext); @@ -11277,7 +11281,7 @@ c_parser_objc_methodprotolist (c_parser *parser) } else c_parser_declaration_or_fndef (parser, false, false, true, - false, true, NULL, vNULL); + false, true); break; } } @@ -17281,12 +17285,12 @@ c_parser_oacc_routine (c_parser *parser, enum pragma_context context) while (c_parser_next_token_is (parser, CPP_KEYWORD) && c_parser_peek_token (parser)->keyword == RID_EXTENSION); c_parser_declaration_or_fndef (parser, true, true, true, false, true, - NULL, vNULL, false, NULL, &data); + NULL, NULL, false, NULL, &data); restore_extension_diagnostics (ext); } else c_parser_declaration_or_fndef (parser, true, true, true, false, true, - NULL, vNULL, false, NULL, &data); + NULL, NULL, false, NULL, &data); } } @@ -18393,8 +18397,7 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code, vec_safe_push (for_block, c_begin_compound_stmt (true)); this_pre_body = push_stmt_list (); c_in_omp_for = true; - c_parser_declaration_or_fndef (parser, true, true, true, true, true, - NULL, vNULL); + c_parser_declaration_or_fndef (parser, true, true, true, true, true); c_in_omp_for = false; if (this_pre_body) { @@ -20335,12 +20338,12 @@ c_parser_omp_declare_simd (c_parser *parser, enum pragma_context context) while (c_parser_next_token_is (parser, CPP_KEYWORD) && c_parser_peek_token (parser)->keyword == RID_EXTENSION); c_parser_declaration_or_fndef (parser, true, true, true, false, true, - NULL, clauses); + NULL, &clauses); restore_extension_diagnostics (ext); } else c_parser_declaration_or_fndef (parser, true, true, true, false, true, - NULL, clauses); + NULL, &clauses); break; case pragma_struct: case pragma_param: @@ -20361,7 +20364,7 @@ c_parser_omp_declare_simd (c_parser *parser, enum pragma_context context) if (c_parser_next_tokens_start_declaration (parser)) { c_parser_declaration_or_fndef (parser, true, true, true, true, - true, NULL, clauses); + true, NULL, &clauses); restore_extension_diagnostics (ext); break; } @@ -20370,7 +20373,7 @@ c_parser_omp_declare_simd (c_parser *parser, enum pragma_context context) else if (c_parser_next_tokens_start_declaration (parser)) { c_parser_declaration_or_fndef (parser, true, true, true, true, true, - NULL, clauses); + NULL, &clauses); break; } error ("%<#pragma omp declare %s%> must be followed by " @@ -20851,8 +20854,10 @@ c_finish_omp_declare_variant (c_parser *parser, tree fndecl, tree parms) static void c_finish_omp_declare_simd (c_parser *parser, tree fndecl, tree parms, - vec<c_token> clauses) + vec<c_token> *pclauses) { + vec<c_token> &clauses = *pclauses; + /* Normally first token is CPP_NAME "simd" or "variant". CPP_EOF there indicates error has been reported and CPP_PRAGMA that c_finish_omp_declare_simd has already processed the tokens. */ diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 313b35f..390cf9a 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -9206,257 +9206,1139 @@ __STRUCTN (float, 64, 4) #undef __STRUCTN -#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode, \ - qmode, ptr_mode, funcsuffix, signedtype) \ -__extension__ extern __inline void \ -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -vst2_lane_ ## funcsuffix (ptrtype *__ptr, \ - intype __b, const int __c) \ -{ \ - __builtin_aarch64_simd_oi __o; \ - largetype __temp; \ - __temp.val[0] \ - = vcombine_##funcsuffix (__b.val[0], \ - vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ - __temp.val[1] \ - = vcombine_##funcsuffix (__b.val[1], \ - vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ - __o = __builtin_aarch64_set_qregoi##qmode (__o, \ - (signedtype) __temp.val[0], 0); \ - __o = __builtin_aarch64_set_qregoi##qmode (__o, \ - (signedtype) __temp.val[1], 1); \ - __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ - __ptr, __o, __c); \ -} - -__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16, - float16x8_t) -__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32, - float32x4_t) -__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64, - float64x2_t) -__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8, - int8x16_t) -__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16, - int16x8_t) -__ST2_LANE_FUNC (poly64x1x2_t, poly64x2x2_t, poly64_t, di, v2di_ssps, di, p64, - poly64x2_t) -__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8, - int8x16_t) -__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16, - int16x8_t) -__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32, - int32x4_t) -__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64, - int64x2_t) -__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8, - int8x16_t) -__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16, - int16x8_t) -__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32, - int32x4_t) -__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64, - int64x2_t) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_f16 (float16_t *__ptr, float16x4x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + float16x8x2_t __temp; + __temp.val[0] = vcombine_f16 (__val.val[0], + vcreate_f16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f16 (__val.val[1], + vcreate_f16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanev4hf ((__builtin_aarch64_simd_hf *) __ptr, __o, + __lane); +} -#define __ST2Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -__extension__ extern __inline void \ -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \ - intype __b, const int __c) \ -{ \ - union { intype __i; \ - __builtin_aarch64_simd_oi __o; } __temp = { __b }; \ - __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ - __ptr, __temp.__o, __c); \ -} - -__ST2Q_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16) -__ST2Q_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32) -__ST2Q_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64) -__ST2Q_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8) -__ST2Q_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16) -__ST2Q_LANE_FUNC (poly64x2x2_t, poly64_t, v2di, di, p64) -__ST2Q_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8) -__ST2Q_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16) -__ST2Q_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32) -__ST2Q_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64) -__ST2Q_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8) -__ST2Q_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16) -__ST2Q_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32) -__ST2Q_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64) - -#define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode, \ - qmode, ptr_mode, funcsuffix, signedtype) \ -__extension__ extern __inline void \ -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -vst3_lane_ ## funcsuffix (ptrtype *__ptr, \ - intype __b, const int __c) \ -{ \ - __builtin_aarch64_simd_ci __o; \ - largetype __temp; \ - __temp.val[0] \ - = vcombine_##funcsuffix (__b.val[0], \ - vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ - __temp.val[1] \ - = vcombine_##funcsuffix (__b.val[1], \ - vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ - __temp.val[2] \ - = vcombine_##funcsuffix (__b.val[2], \ - vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ - __o = __builtin_aarch64_set_qregci##qmode (__o, \ - (signedtype) __temp.val[0], 0); \ - __o = __builtin_aarch64_set_qregci##qmode (__o, \ - (signedtype) __temp.val[1], 1); \ - __o = __builtin_aarch64_set_qregci##qmode (__o, \ - (signedtype) __temp.val[2], 2); \ - __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ - __ptr, __o, __c); \ -} - -__ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16, - float16x8_t) -__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32, - float32x4_t) -__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64, - float64x2_t) -__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8, - int8x16_t) -__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16, - int16x8_t) -__ST3_LANE_FUNC (poly64x1x3_t, poly64x2x3_t, poly64_t, di, v2di_ssps, di, p64, - poly64x2_t) -__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8, - int8x16_t) -__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16, - int16x8_t) -__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32, - int32x4_t) -__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64, - int64x2_t) -__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8, - int8x16_t) -__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16, - int16x8_t) -__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32, - int32x4_t) -__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64, - int64x2_t) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_f32 (float32_t *__ptr, float32x2x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + float32x4x2_t __temp; + __temp.val[0] = vcombine_f32 (__val.val[0], + vcreate_f32 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f32 (__val.val[1], + vcreate_f32 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanev2sf ((__builtin_aarch64_simd_sf *) __ptr, __o, + __lane); +} -#define __ST3Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -__extension__ extern __inline void \ -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \ - intype __b, const int __c) \ -{ \ - union { intype __i; \ - __builtin_aarch64_simd_ci __o; } __temp = { __b }; \ - __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ - __ptr, __temp.__o, __c); \ -} - -__ST3Q_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16) -__ST3Q_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32) -__ST3Q_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64) -__ST3Q_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8) -__ST3Q_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16) -__ST3Q_LANE_FUNC (poly64x2x3_t, poly64_t, v2di, di, p64) -__ST3Q_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8) -__ST3Q_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16) -__ST3Q_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32) -__ST3Q_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64) -__ST3Q_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8) -__ST3Q_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16) -__ST3Q_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32) -__ST3Q_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64) - -#define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode, \ - qmode, ptr_mode, funcsuffix, signedtype) \ -__extension__ extern __inline void \ -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -vst4_lane_ ## funcsuffix (ptrtype *__ptr, \ - intype __b, const int __c) \ -{ \ - __builtin_aarch64_simd_xi __o; \ - largetype __temp; \ - __temp.val[0] \ - = vcombine_##funcsuffix (__b.val[0], \ - vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ - __temp.val[1] \ - = vcombine_##funcsuffix (__b.val[1], \ - vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ - __temp.val[2] \ - = vcombine_##funcsuffix (__b.val[2], \ - vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ - __temp.val[3] \ - = vcombine_##funcsuffix (__b.val[3], \ - vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[0], 0); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[1], 1); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[2], 2); \ - __o = __builtin_aarch64_set_qregxi##qmode (__o, \ - (signedtype) __temp.val[3], 3); \ - __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ - __ptr, __o, __c); \ -} - -__ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16, - float16x8_t) -__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32, - float32x4_t) -__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64, - float64x2_t) -__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, - int8x16_t) -__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16, - int16x8_t) -__ST4_LANE_FUNC (poly64x1x4_t, poly64x2x4_t, poly64_t, di, v2di_ssps, di, p64, - poly64x2_t) -__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, - int8x16_t) -__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, - int16x8_t) -__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, - int32x4_t) -__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64, - int64x2_t) -__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, - int8x16_t) -__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16, - int16x8_t) -__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32, - int32x4_t) -__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64, - int64x2_t) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_f64 (float64_t *__ptr, float64x1x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + float64x2x2_t __temp; + __temp.val[0] = vcombine_f64 (__val.val[0], + vcreate_f64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f64 (__val.val[1], + vcreate_f64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanedf ((__builtin_aarch64_simd_df *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_p8 (poly8_t *__ptr, poly8x8x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + poly8x16x2_t __temp; + __temp.val[0] = vcombine_p8 (__val.val[0], + vcreate_p8 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p8 (__val.val[1], + vcreate_p8 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_p16 (poly16_t *__ptr, poly16x4x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + poly16x8x2_t __temp; + __temp.val[0] = vcombine_p16 (__val.val[0], + vcreate_p16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p16 (__val.val[1], + vcreate_p16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_p64 (poly64_t *__ptr, poly64x1x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + poly64x2x2_t __temp; + __temp.val[0] = vcombine_p64 (__val.val[0], + vcreate_p64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p64 (__val.val[1], + vcreate_p64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanedi ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_s8 (int8_t *__ptr, int8x8x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + int8x16x2_t __temp; + __temp.val[0] = vcombine_s8 (__val.val[0], + vcreate_s8 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s8 (__val.val[1], + vcreate_s8 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_s16 (int16_t *__ptr, int16x4x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + int16x8x2_t __temp; + __temp.val[0] = vcombine_s16 (__val.val[0], + vcreate_s16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s16 (__val.val[1], + vcreate_s16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_s32 (int32_t *__ptr, int32x2x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + int32x4x2_t __temp; + __temp.val[0] = vcombine_s32 (__val.val[0], + vcreate_s32 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s32 (__val.val[1], + vcreate_s32 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_s64 (int64_t *__ptr, int64x1x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + int64x2x2_t __temp; + __temp.val[0] = vcombine_s64 (__val.val[0], + vcreate_s64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s64 (__val.val[1], + vcreate_s64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanedi ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_u8 (uint8_t *__ptr, uint8x8x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + uint8x16x2_t __temp; + __temp.val[0] = vcombine_u8 (__val.val[0], + vcreate_u8 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u8 (__val.val[1], + vcreate_u8 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_u16 (uint16_t *__ptr, uint16x4x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + uint16x8x2_t __temp; + __temp.val[0] = vcombine_u16 (__val.val[0], + vcreate_u16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u16 (__val.val[1], + vcreate_u16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_u32 (uint32_t *__ptr, uint32x2x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + uint32x4x2_t __temp; + __temp.val[0] = vcombine_u32 (__val.val[0], + vcreate_u32 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u32 (__val.val[1], + vcreate_u32 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_u64 (uint64_t *__ptr, uint64x1x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + uint64x2x2_t __temp; + __temp.val[0] = vcombine_u64 (__val.val[0], + vcreate_u64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u64 (__val.val[1], + vcreate_u64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanedi ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_f16 (float16_t *__ptr, float16x8x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev8hf ((__builtin_aarch64_simd_hf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_f32 (float32_t *__ptr, float32x4x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev4sf ((__builtin_aarch64_simd_sf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_f64 (float64_t *__ptr, float64x2x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev2df ((__builtin_aarch64_simd_df *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_p8 (poly8_t *__ptr, poly8x16x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_p16 (poly16_t *__ptr, poly16x8x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_p64 (poly64_t *__ptr, poly64x2x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_s8 (int8_t *__ptr, int8x16x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_s16 (int16_t *__ptr, int16x8x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_s32 (int32_t *__ptr, int32x4x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_s64 (int64_t *__ptr, int64x2x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_u8 (uint8_t *__ptr, uint8x16x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_u16 (uint16_t *__ptr, uint16x8x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_u32 (uint32_t *__ptr, uint32x4x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_u64 (uint64_t *__ptr, uint64x2x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_f16 (float16_t *__ptr, float16x4x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + float16x8x3_t __temp; + __temp.val[0] = vcombine_f16 (__val.val[0], + vcreate_f16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f16 (__val.val[1], + vcreate_f16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_f16 (__val.val[2], + vcreate_f16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanev4hf ((__builtin_aarch64_simd_hf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_f32 (float32_t *__ptr, float32x2x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + float32x4x3_t __temp; + __temp.val[0] = vcombine_f32 (__val.val[0], + vcreate_f32 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f32 (__val.val[1], + vcreate_f32 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_f32 (__val.val[2], + vcreate_f32 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanev2sf ((__builtin_aarch64_simd_sf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_f64 (float64_t *__ptr, float64x1x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + float64x2x3_t __temp; + __temp.val[0] = vcombine_f64 (__val.val[0], + vcreate_f64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f64 (__val.val[1], + vcreate_f64 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_f64 (__val.val[2], + vcreate_f64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanedf ((__builtin_aarch64_simd_df *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_p8 (poly8_t *__ptr, poly8x8x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + poly8x16x3_t __temp; + __temp.val[0] = vcombine_p8 (__val.val[0], + vcreate_p8 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p8 (__val.val[1], + vcreate_p8 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_p8 (__val.val[2], + vcreate_p8 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_p16 (poly16_t *__ptr, poly16x4x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + poly16x8x3_t __temp; + __temp.val[0] = vcombine_p16 (__val.val[0], + vcreate_p16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p16 (__val.val[1], + vcreate_p16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_p16 (__val.val[2], + vcreate_p16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_p64 (poly64_t *__ptr, poly64x1x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + poly64x2x3_t __temp; + __temp.val[0] = vcombine_p64 (__val.val[0], + vcreate_p64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p64 (__val.val[1], + vcreate_p64 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_p64 (__val.val[2], + vcreate_p64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanedi ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_s8 (int8_t *__ptr, int8x8x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + int8x16x3_t __temp; + __temp.val[0] = vcombine_s8 (__val.val[0], + vcreate_s8 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s8 (__val.val[1], + vcreate_s8 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_s8 (__val.val[2], + vcreate_s8 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_s16 (int16_t *__ptr, int16x4x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + int16x8x3_t __temp; + __temp.val[0] = vcombine_s16 (__val.val[0], + vcreate_s16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s16 (__val.val[1], + vcreate_s16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_s16 (__val.val[2], + vcreate_s16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_s32 (int32_t *__ptr, int32x2x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + int32x4x3_t __temp; + __temp.val[0] = vcombine_s32 (__val.val[0], + vcreate_s32 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s32 (__val.val[1], + vcreate_s32 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_s32 (__val.val[2], + vcreate_s32 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_s64 (int64_t *__ptr, int64x1x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + int64x2x3_t __temp; + __temp.val[0] = vcombine_s64 (__val.val[0], + vcreate_s64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s64 (__val.val[1], + vcreate_s64 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_s64 (__val.val[2], + vcreate_s64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanedi ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} -#define __ST4Q_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -__extension__ extern __inline void \ -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \ - intype __b, const int __c) \ -{ \ - union { intype __i; \ - __builtin_aarch64_simd_xi __o; } __temp = { __b }; \ - __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ - __ptr, __temp.__o, __c); \ -} - -__ST4Q_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16) -__ST4Q_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32) -__ST4Q_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64) -__ST4Q_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8) -__ST4Q_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16) -__ST4Q_LANE_FUNC (poly64x2x4_t, poly64_t, v2di, di, p64) -__ST4Q_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8) -__ST4Q_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16) -__ST4Q_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32) -__ST4Q_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64) -__ST4Q_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8) -__ST4Q_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16) -__ST4Q_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32) -__ST4Q_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_u8 (uint8_t *__ptr, uint8x8x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + uint8x16x3_t __temp; + __temp.val[0] = vcombine_u8 (__val.val[0], + vcreate_u8 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u8 (__val.val[1], + vcreate_u8 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u8 (__val.val[2], + vcreate_u8 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_u16 (uint16_t *__ptr, uint16x4x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + uint16x8x3_t __temp; + __temp.val[0] = vcombine_u16 (__val.val[0], + vcreate_u16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u16 (__val.val[1], + vcreate_u16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u16 (__val.val[2], + vcreate_u16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_u32 (uint32_t *__ptr, uint32x2x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + uint32x4x3_t __temp; + __temp.val[0] = vcombine_u32 (__val.val[0], + vcreate_u32 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u32 (__val.val[1], + vcreate_u32 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u32 (__val.val[2], + vcreate_u32 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_u64 (uint64_t *__ptr, uint64x1x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + uint64x2x3_t __temp; + __temp.val[0] = vcombine_u64 (__val.val[0], + vcreate_u64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u64 (__val.val[1], + vcreate_u64 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u64 (__val.val[2], + vcreate_u64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanedi ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_f16 (float16_t *__ptr, float16x8x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev8hf ((__builtin_aarch64_simd_hf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_f32 (float32_t *__ptr, float32x4x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev4sf ((__builtin_aarch64_simd_sf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_f64 (float64_t *__ptr, float64x2x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev2df ((__builtin_aarch64_simd_df *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_p8 (poly8_t *__ptr, poly8x16x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_p16 (poly16_t *__ptr, poly16x8x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_p64 (poly64_t *__ptr, poly64x2x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_s8 (int8_t *__ptr, int8x16x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_s16 (int16_t *__ptr, int16x8x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_s32 (int32_t *__ptr, int32x4x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_s64 (int64_t *__ptr, int64x2x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_u8 (uint8_t *__ptr, uint8x16x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_u16 (uint16_t *__ptr, uint16x8x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_u32 (uint32_t *__ptr, uint32x4x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_u64 (uint64_t *__ptr, uint64x2x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_f16 (float16_t *__ptr, float16x4x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + float16x8x4_t __temp; + __temp.val[0] = vcombine_f16 (__val.val[0], + vcreate_f16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f16 (__val.val[1], + vcreate_f16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_f16 (__val.val[2], + vcreate_f16 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_f16 (__val.val[3], + vcreate_f16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanev4hf ((__builtin_aarch64_simd_hf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_f32 (float32_t *__ptr, float32x2x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + float32x4x4_t __temp; + __temp.val[0] = vcombine_f32 (__val.val[0], + vcreate_f32 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f32 (__val.val[1], + vcreate_f32 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_f32 (__val.val[2], + vcreate_f32 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_f32 (__val.val[3], + vcreate_f32 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanev2sf ((__builtin_aarch64_simd_sf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_f64 (float64_t *__ptr, float64x1x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + float64x2x4_t __temp; + __temp.val[0] = vcombine_f64 (__val.val[0], + vcreate_f64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_f64 (__val.val[1], + vcreate_f64 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_f64 (__val.val[2], + vcreate_f64 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_f64 (__val.val[3], + vcreate_f64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanedf ((__builtin_aarch64_simd_df *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_p8 (poly8_t *__ptr, poly8x8x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + poly8x16x4_t __temp; + __temp.val[0] = vcombine_p8 (__val.val[0], + vcreate_p8 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p8 (__val.val[1], + vcreate_p8 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_p8 (__val.val[2], + vcreate_p8 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_p8 (__val.val[3], + vcreate_p8 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_p16 (poly16_t *__ptr, poly16x4x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + poly16x8x4_t __temp; + __temp.val[0] = vcombine_p16 (__val.val[0], + vcreate_p16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p16 (__val.val[1], + vcreate_p16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_p16 (__val.val[2], + vcreate_p16 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_p16 (__val.val[3], + vcreate_p16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_p64 (poly64_t *__ptr, poly64x1x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + poly64x2x4_t __temp; + __temp.val[0] = vcombine_p64 (__val.val[0], + vcreate_p64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_p64 (__val.val[1], + vcreate_p64 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_p64 (__val.val[2], + vcreate_p64 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_p64 (__val.val[3], + vcreate_p64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanedi ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_s8 (int8_t *__ptr, int8x8x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + int8x16x4_t __temp; + __temp.val[0] = vcombine_s8 (__val.val[0], + vcreate_s8 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s8 (__val.val[1], + vcreate_s8 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_s8 (__val.val[2], + vcreate_s8 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_s8 (__val.val[3], + vcreate_s8 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_s16 (int16_t *__ptr, int16x4x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + int16x8x4_t __temp; + __temp.val[0] = vcombine_s16 (__val.val[0], + vcreate_s16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s16 (__val.val[1], + vcreate_s16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_s16 (__val.val[2], + vcreate_s16 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_s16 (__val.val[3], + vcreate_s16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_s32 (int32_t *__ptr, int32x2x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + int32x4x4_t __temp; + __temp.val[0] = vcombine_s32 (__val.val[0], + vcreate_s32 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s32 (__val.val[1], + vcreate_s32 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_s32 (__val.val[2], + vcreate_s32 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_s32 (__val.val[3], + vcreate_s32 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_s64 (int64_t *__ptr, int64x1x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + int64x2x4_t __temp; + __temp.val[0] = vcombine_s64 (__val.val[0], + vcreate_s64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_s64 (__val.val[1], + vcreate_s64 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_s64 (__val.val[2], + vcreate_s64 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_s64 (__val.val[3], + vcreate_s64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanedi ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_u8 (uint8_t *__ptr, uint8x8x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + uint8x16x4_t __temp; + __temp.val[0] = vcombine_u8 (__val.val[0], + vcreate_u8 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u8 (__val.val[1], + vcreate_u8 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u8 (__val.val[2], + vcreate_u8 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_u8 (__val.val[3], + vcreate_u8 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanev8qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_u16 (uint16_t *__ptr, uint16x4x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + uint16x8x4_t __temp; + __temp.val[0] = vcombine_u16 (__val.val[0], + vcreate_u16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u16 (__val.val[1], + vcreate_u16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u16 (__val.val[2], + vcreate_u16 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_u16 (__val.val[3], + vcreate_u16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanev4hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_u32 (uint32_t *__ptr, uint32x2x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + uint32x4x4_t __temp; + __temp.val[0] = vcombine_u32 (__val.val[0], + vcreate_u32 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u32 (__val.val[1], + vcreate_u32 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u32 (__val.val[2], + vcreate_u32 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_u32 (__val.val[3], + vcreate_u32 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanev2si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_u64 (uint64_t *__ptr, uint64x1x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + uint64x2x4_t __temp; + __temp.val[0] = vcombine_u64 (__val.val[0], + vcreate_u64 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_u64 (__val.val[1], + vcreate_u64 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_u64 (__val.val[2], + vcreate_u64 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_u64 (__val.val[3], + vcreate_u64 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanedi ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_f16 (float16_t *__ptr, float16x8x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev8hf ((__builtin_aarch64_simd_hf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_f32 (float32_t *__ptr, float32x4x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev4sf ((__builtin_aarch64_simd_sf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_f64 (float64_t *__ptr, float64x2x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev2df ((__builtin_aarch64_simd_df *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_p8 (poly8_t *__ptr, poly8x16x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_p16 (poly16_t *__ptr, poly16x8x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_p64 (poly64_t *__ptr, poly64x2x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_s8 (int8_t *__ptr, int8x16x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_s16 (int16_t *__ptr, int16x8x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_s32 (int32_t *__ptr, int32x4x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_s64 (int64_t *__ptr, int64x2x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_u8 (uint8_t *__ptr, uint8x16x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev16qi ((__builtin_aarch64_simd_qi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_u16 (uint16_t *__ptr, uint16x8x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev8hi ((__builtin_aarch64_simd_hi *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_u32 (uint32_t *__ptr, uint32x4x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev4si ((__builtin_aarch64_simd_si *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_u64 (uint64_t *__ptr, uint64x2x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev2di ((__builtin_aarch64_simd_di *) __ptr, __o, + __lane); +} __extension__ extern __inline int64_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) @@ -32957,8 +33839,7 @@ vst1_bf16_x2 (bfloat16_t * __a, bfloat16x4x2_t __val) bfloat16x8x2_t __temp; __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x2v4bf (__a, __o); } @@ -32967,8 +33848,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_bf16_x2 (bfloat16_t * __a, bfloat16x8x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x2v8bf (__a, __o); } @@ -32981,9 +33861,7 @@ vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __val) __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st1x3v4bf ((__builtin_aarch64_simd_bf *) __a, __o); } @@ -32992,26 +33870,31 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_bf16_x3 (bfloat16_t * __a, bfloat16x8x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st1x3v8bf ((__builtin_aarch64_simd_bf *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t val) +vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t __val) { - union { bfloat16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v4bf ((__builtin_aarch64_simd_bf *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + bfloat16x8x4_t __temp; + __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_bf16 (__val.val[3], vcreate_bf16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st1x4v4bf ((__builtin_aarch64_simd_bf *) __a, __o); } __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t val) +vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t __val) { - union { bfloat16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; - __builtin_aarch64_st1x4v8bf ((__builtin_aarch64_simd_bf *) __a, __u.__o); + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st1x4v8bf ((__builtin_aarch64_simd_bf *) __a, __o); } __extension__ extern __inline void @@ -33043,8 +33926,7 @@ vst2_bf16 (bfloat16_t * __a, bfloat16x4x2_t __val) bfloat16x8x2_t __temp; __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st2v4bf (__a, __o); } @@ -33053,8 +33935,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst2q_bf16 (bfloat16_t * __a, bfloat16x8x2_t __val) { __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st2v8bf (__a, __o); } @@ -33067,9 +33948,7 @@ vst3_bf16 (bfloat16_t * __a, bfloat16x4x3_t __val) __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st3v4bf ((__builtin_aarch64_simd_bf *) __a, __o); } @@ -33078,9 +33957,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst3q_bf16 (bfloat16_t * __a, bfloat16x8x3_t __val) { __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st3v8bf ((__builtin_aarch64_simd_bf *) __a, __o); } @@ -33094,10 +33971,7 @@ vst4_bf16 (bfloat16_t * __a, bfloat16x4x4_t __val) __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); __temp.val[3] = vcombine_bf16 (__val.val[3], vcreate_bf16 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[3], 3); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); __builtin_aarch64_st4v4bf ((__builtin_aarch64_simd_bf *) __a, __o); } @@ -33106,10 +33980,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst4q_bf16 (bfloat16_t * __a, bfloat16x8x4_t __val) { __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[3], 3); + __builtin_memcpy (&__o, &__val, sizeof (__val)); __builtin_aarch64_st4v8bf ((__builtin_aarch64_simd_bf *) __a, __o); } @@ -33723,15 +34594,86 @@ __LD4_LANE_FUNC (bfloat16x4x4_t, bfloat16x4_t, bfloat16x8x4_t, bfloat16_t, v4bf, v8bf, bf, bf16, bfloat16x8_t) __LD4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16x8_t, bfloat16_t, v8bf, bf, bf16) -__ST2_LANE_FUNC (bfloat16x4x2_t, bfloat16x8x2_t, bfloat16_t, v4bf, v8bf, bf, - bf16, bfloat16x8_t) -__ST2Q_LANE_FUNC (bfloat16x8x2_t, bfloat16_t, v8bf, bf, bf16) -__ST3_LANE_FUNC (bfloat16x4x3_t, bfloat16x8x3_t, bfloat16_t, v4bf, v8bf, bf, - bf16, bfloat16x8_t) -__ST3Q_LANE_FUNC (bfloat16x8x3_t, bfloat16_t, v8bf, bf, bf16) -__ST4_LANE_FUNC (bfloat16x4x4_t, bfloat16x8x4_t, bfloat16_t, v4bf, v8bf, bf, - bf16, bfloat16x8_t) -__ST4Q_LANE_FUNC (bfloat16x8x4_t, bfloat16_t, v8bf, bf, bf16) +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2_lane_bf16 (bfloat16_t *__ptr, bfloat16x4x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + bfloat16x8x2_t __temp; + __temp.val[0] = vcombine_bf16 (__val.val[0], + vcreate_bf16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_bf16 (__val.val[1], + vcreate_bf16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st2_lanev4bf ((__builtin_aarch64_simd_bf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst2q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x2_t __val, const int __lane) +{ + __builtin_aarch64_simd_oi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st2_lanev8bf ((__builtin_aarch64_simd_bf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3_lane_bf16 (bfloat16_t *__ptr, bfloat16x4x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + bfloat16x8x3_t __temp; + __temp.val[0] = vcombine_bf16 (__val.val[0], + vcreate_bf16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_bf16 (__val.val[1], + vcreate_bf16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_bf16 (__val.val[2], + vcreate_bf16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st3_lanev4bf ((__builtin_aarch64_simd_bf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst3q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x3_t __val, const int __lane) +{ + __builtin_aarch64_simd_ci __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st3_lanev8bf ((__builtin_aarch64_simd_bf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4_lane_bf16 (bfloat16_t *__ptr, bfloat16x4x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + bfloat16x8x4_t __temp; + __temp.val[0] = vcombine_bf16 (__val.val[0], + vcreate_bf16 (__AARCH64_UINT64_C (0))); + __temp.val[1] = vcombine_bf16 (__val.val[1], + vcreate_bf16 (__AARCH64_UINT64_C (0))); + __temp.val[2] = vcombine_bf16 (__val.val[2], + vcreate_bf16 (__AARCH64_UINT64_C (0))); + __temp.val[3] = vcombine_bf16 (__val.val[3], + vcreate_bf16 (__AARCH64_UINT64_C (0))); + __builtin_memcpy (&__o, &__temp, sizeof (__temp)); + __builtin_aarch64_st4_lanev4bf ((__builtin_aarch64_simd_bf *) __ptr, __o, + __lane); +} + +__extension__ extern __inline void +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vst4q_lane_bf16 (bfloat16_t *__ptr, bfloat16x8x4_t __val, const int __lane) +{ + __builtin_aarch64_simd_xi __o; + __builtin_memcpy (&__o, &__val, sizeof (__val)); + __builtin_aarch64_st4_lanev8bf ((__builtin_aarch64_simd_bf *) __ptr, __o, + __lane); +} #pragma GCC pop_options @@ -33952,11 +34894,5 @@ vaddq_p128 (poly128_t __a, poly128_t __b) #undef __LD3Q_LANE_FUNC #undef __LD4_LANE_FUNC #undef __LD4Q_LANE_FUNC -#undef __ST2_LANE_FUNC -#undef __ST2Q_LANE_FUNC -#undef __ST3_LANE_FUNC -#undef __ST3Q_LANE_FUNC -#undef __ST4_LANE_FUNC -#undef __ST4Q_LANE_FUNC #endif diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2de5a96..5b1c06b 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -4189,6 +4189,8 @@ rs6000_option_override_internal (bool global_init_p) else rs6000_long_double_type_size = default_long_double_size; } + else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode) + ; /* The option value can be seen when cl_target_option_restore is called. */ else if (rs6000_long_double_type_size == 128) rs6000_long_double_type_size = FLOAT_PRECISION_TFmode; else if (global_options_set.x_rs6000_ieeequad) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index ff99887..2551832 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -14764,17 +14764,17 @@ To optimize the program based on the collected profile information, use Register the profile information in the specified section instead of using a constructor/destructor. The section name is @var{name} if it is specified, otherwise the section name defaults to @code{.gcov_info}. A pointer to the -profile information generated by @option{-fprofile-arcs} or -@option{-ftest-coverage} is placed in the specified section for each -translation unit. This option disables the profile information registration -through a constructor and it disables the profile information processing -through a destructor. This option is not intended to be used in hosted -environments such as GNU/Linux. It targets systems with limited resources -which do not support constructors and destructors. The linker could collect -the input sections in a continuous memory block and define start and end -symbols. The runtime support could dump the profiling information registered -in this linker set during program termination to a serial line for example. A -GNU linker script example which defines a linker output section follows: +profile information generated by @option{-fprofile-arcs} is placed in the +specified section for each translation unit. This option disables the profile +information registration through a constructor and it disables the profile +information processing through a destructor. This option is not intended to be +used in hosted environments such as GNU/Linux. It targets free-standing +environments (for example embedded systems) with limited resources which do not +support constructors/destructors or the C library file I/O. + +The linker could collect the input sections in a continuous memory block and +define start and end symbols. A GNU linker script example which defines a +linker output section follows: @smallexample .gcov_info : @@ -14785,6 +14785,64 @@ GNU linker script example which defines a linker output section follows: @} @end smallexample +The program could dump the profiling information registered in this linker set +for example like this: + +@smallexample +#include <gcov.h> +#include <stdio.h> +#include <stdlib.h> + +extern const struct gcov_info *__gcov_info_start[]; +extern const struct gcov_info *__gcov_info_end[]; + +static void +filename (const char *f, void *arg) +@{ + puts (f); +@} + +static void +dump (const void *d, unsigned n, void *arg) +@{ + const unsigned char *c = d; + + for (unsigned i = 0; i < n; ++i) + printf ("%02x", c[i]); +@} + +static void * +allocate (unsigned length, void *arg) +@{ + return malloc (length); +@} + +static void +dump_gcov_info (void) +@{ + const struct gcov_info **info = __gcov_info_start; + const struct gcov_info **end = __gcov_info_end; + + /* Obfuscate variable to prevent compiler optimizations. */ + __asm__ ("" : "+r" (info)); + + while (info != end) + @{ + void *arg = NULL; + __gcov_info_to_gcda (*info, filename, dump, allocate, arg); + putchar ('\n'); + ++info; + @} +@} + +int +main() +@{ + dump_gcov_info(); + return 0; +@} +@end smallexample + @item -fprofile-note=@var{path} @opindex fprofile-note diff --git a/gcc/dominance.c b/gcc/dominance.c index 6a262ce..cc63391 100644 --- a/gcc/dominance.c +++ b/gcc/dominance.c @@ -1227,7 +1227,7 @@ recompute_dominator (enum cdi_direction dir, basic_block bb) from BBS. */ static void -prune_bbs_to_update_dominators (vec<basic_block> bbs, +prune_bbs_to_update_dominators (vec<basic_block> &bbs, bool conservative) { unsigned i; @@ -1379,7 +1379,7 @@ determine_dominators_for_sons (struct graph *g, vec<basic_block> bbs, a block of BBS in the current dominance tree dominate it. */ void -iterate_fix_dominators (enum cdi_direction dir, vec<basic_block> bbs, +iterate_fix_dominators (enum cdi_direction dir, vec<basic_block> &bbs, bool conservative) { unsigned i; diff --git a/gcc/dominance.h b/gcc/dominance.h index 1a8c248..970da02 100644 --- a/gcc/dominance.h +++ b/gcc/dominance.h @@ -78,7 +78,7 @@ checking_verify_dominators (cdi_direction dir) basic_block recompute_dominator (enum cdi_direction, basic_block); extern void iterate_fix_dominators (enum cdi_direction, - vec<basic_block> , bool); + vec<basic_block> &, bool); extern void add_to_dominance_info (enum cdi_direction, basic_block); extern void delete_from_dominance_info (enum cdi_direction, basic_block); extern basic_block first_dom_son (enum cdi_direction, basic_block); diff --git a/gcc/gcov-io.c b/gcc/gcov-io.c index 7819593..d3e56af 100644 --- a/gcc/gcov-io.c +++ b/gcc/gcov-io.c @@ -229,30 +229,25 @@ gcov_magic (gcov_unsigned_t magic, gcov_unsigned_t expected) #endif #if !IN_GCOV -/* Write unsigned VALUE to coverage file. */ +/* Write DATA of LENGTH characters to coverage file. */ GCOV_LINKAGE void -gcov_write_unsigned (gcov_unsigned_t value) +gcov_write (const void *data, unsigned length) { - gcov_unsigned_t r = fwrite (&value, sizeof (value), 1, gcov_var.file); + gcov_unsigned_t r = fwrite (data, length, 1, gcov_var.file); if (r != 1) gcov_var.error = 1; } -/* Write counter VALUE to coverage file. Sets error flag - appropriately. */ +/* Write unsigned VALUE to coverage file. */ -#if IN_LIBGCOV GCOV_LINKAGE void -gcov_write_counter (gcov_type value) +gcov_write_unsigned (gcov_unsigned_t value) { - gcov_write_unsigned ((gcov_unsigned_t) value); - if (sizeof (value) > sizeof (gcov_unsigned_t)) - gcov_write_unsigned ((gcov_unsigned_t) (value >> 32)); - else - gcov_write_unsigned (0); + gcov_unsigned_t r = fwrite (&value, sizeof (value), 1, gcov_var.file); + if (r != 1) + gcov_var.error = 1; } -#endif /* IN_LIBGCOV */ #if !IN_LIBGCOV /* Write STRING to coverage file. Sets error flag on file @@ -349,22 +344,13 @@ gcov_write_length (gcov_position_t position) #else /* IN_LIBGCOV */ -/* Write a tag TAG and length LENGTH. */ - -GCOV_LINKAGE void -gcov_write_tag_length (gcov_unsigned_t tag, gcov_unsigned_t length) -{ - gcov_write_unsigned (tag); - gcov_write_unsigned (length); -} - -/* Write a summary structure to the gcov file. Return nonzero on - overflow. */ +/* Write a summary structure to the gcov file. */ GCOV_LINKAGE void gcov_write_summary (gcov_unsigned_t tag, const struct gcov_summary *summary) { - gcov_write_tag_length (tag, GCOV_TAG_SUMMARY_LENGTH); + gcov_write_unsigned (tag); + gcov_write_unsigned (GCOV_TAG_SUMMARY_LENGTH); gcov_write_unsigned (summary->runs); gcov_write_unsigned (summary->sum_max); } diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h index 538bee8..99e1964 100644 --- a/gcc/gcov-io.h +++ b/gcc/gcov-io.h @@ -367,6 +367,7 @@ char *mangle_path (char const *base); #if !IN_GCOV /* Available outside gcov */ +GCOV_LINKAGE void gcov_write (const void *, unsigned) ATTRIBUTE_HIDDEN; GCOV_LINKAGE void gcov_write_unsigned (gcov_unsigned_t) ATTRIBUTE_HIDDEN; #endif diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 394530c..19ab2de 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -b47bcf942daa9a0c252db9b57b8f138adbfcdaa2 +32590102c464679f845667b5554e1dcce2549ad2 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc index 3e433d6..33177a7 100644 --- a/gcc/go/gofrontend/expressions.cc +++ b/gcc/go/gofrontend/expressions.cc @@ -11590,12 +11590,10 @@ Call_expression::intrinsify(Gogo* gogo, // sync/atomic functions and runtime/internal/atomic functions // are very similar. In order not to duplicate code, we just // redirect to the latter and let the code below to handle them. - // In case there is no equivalent functions (slight variance - // in types), we just make an artificial name (begin with '$'). // Note: no StorePointer, SwapPointer, and CompareAndSwapPointer, // as they need write barriers. if (name == "LoadInt32") - name = "$Loadint32"; + name = "Loadint32"; else if (name == "LoadInt64") name = "Loadint64"; else if (name == "LoadUint32") @@ -11607,9 +11605,9 @@ Call_expression::intrinsify(Gogo* gogo, else if (name == "LoadPointer") name = "Loadp"; else if (name == "StoreInt32") - name = "$Storeint32"; + name = "Storeint32"; else if (name == "StoreInt64") - name = "$Storeint64"; + name = "Storeint64"; else if (name == "StoreUint32") name = "Store"; else if (name == "StoreUint64") @@ -11617,7 +11615,7 @@ Call_expression::intrinsify(Gogo* gogo, else if (name == "StoreUintptr") name = "Storeuintptr"; else if (name == "AddInt32") - name = "$Xaddint32"; + name = "Xaddint32"; else if (name == "AddInt64") name = "Xaddint64"; else if (name == "AddUint32") @@ -11627,9 +11625,9 @@ Call_expression::intrinsify(Gogo* gogo, else if (name == "AddUintptr") name = "Xadduintptr"; else if (name == "SwapInt32") - name = "$Xchgint32"; + name = "Xchgint32"; else if (name == "SwapInt64") - name = "$Xchgint64"; + name = "Xchgint64"; else if (name == "SwapUint32") name = "Xchg"; else if (name == "SwapUint64") @@ -11637,9 +11635,9 @@ Call_expression::intrinsify(Gogo* gogo, else if (name == "SwapUintptr") name = "Xchguintptr"; else if (name == "CompareAndSwapInt32") - name = "$Casint32"; + name = "Casint32"; else if (name == "CompareAndSwapInt64") - name = "$Casint64"; + name = "Casint64"; else if (name == "CompareAndSwapUint32") name = "Cas"; else if (name == "CompareAndSwapUint64") @@ -11875,7 +11873,7 @@ Call_expression::intrinsify(Gogo* gogo, if ((name == "Load" || name == "Load64" || name == "Loadint64" || name == "Loadp" || name == "Loaduint" || name == "Loaduintptr" || name == "LoadAcq" - || name == "$Loadint32") + || name == "Loadint32") && this->args_ != NULL && this->args_->size() == 1) { if (int_size < 8 && (name == "Load64" || name == "Loadint64")) @@ -11895,7 +11893,7 @@ Call_expression::intrinsify(Gogo* gogo, code = Runtime::ATOMIC_LOAD_8; res_type = uint64_type; } - else if (name == "$Loadint32") + else if (name == "Loadint32") { code = Runtime::ATOMIC_LOAD_4; res_type = int32_type; @@ -11942,10 +11940,10 @@ Call_expression::intrinsify(Gogo* gogo, if ((name == "Store" || name == "Store64" || name == "StorepNoWB" || name == "Storeuintptr" || name == "StoreRel" - || name == "$Storeint32" || name == "$Storeint64") + || name == "Storeint32" || name == "Storeint64") && this->args_ != NULL && this->args_->size() == 2) { - if (int_size < 8 && (name == "Store64" || name == "$Storeint64")) + if (int_size < 8 && (name == "Store64" || name == "Storeint64")) return NULL; Runtime::Function code; @@ -11955,9 +11953,9 @@ Call_expression::intrinsify(Gogo* gogo, code = Runtime::ATOMIC_STORE_4; else if (name == "Store64") code = Runtime::ATOMIC_STORE_8; - else if (name == "$Storeint32") + else if (name == "Storeint32") code = Runtime::ATOMIC_STORE_4; - else if (name == "$Storeint64") + else if (name == "Storeint64") code = Runtime::ATOMIC_STORE_8; else if (name == "Storeuintptr") code = (ptr_size == 8 ? Runtime::ATOMIC_STORE_8 : Runtime::ATOMIC_STORE_4); @@ -11979,7 +11977,7 @@ Call_expression::intrinsify(Gogo* gogo, } if ((name == "Xchg" || name == "Xchg64" || name == "Xchguintptr" - || name == "$Xchgint32" || name == "$Xchgint64") + || name == "Xchgint32" || name == "Xchgint64") && this->args_ != NULL && this->args_->size() == 2) { if (int_size < 8 && (name == "Xchg64" || name == "Xchgint64")) @@ -11997,12 +11995,12 @@ Call_expression::intrinsify(Gogo* gogo, code = Runtime::ATOMIC_EXCHANGE_8; res_type = uint64_type; } - else if (name == "$Xchgint32") + else if (name == "Xchgint32") { code = Runtime::ATOMIC_EXCHANGE_4; res_type = int32_type; } - else if (name == "$Xchgint64") + else if (name == "Xchgint64") { code = Runtime::ATOMIC_EXCHANGE_8; res_type = int64_type; @@ -12025,10 +12023,10 @@ Call_expression::intrinsify(Gogo* gogo, if ((name == "Cas" || name == "Cas64" || name == "Casuintptr" || name == "Casp1" || name == "CasRel" - || name == "$Casint32" || name == "$Casint64") + || name == "Casint32" || name == "Casint64") && this->args_ != NULL && this->args_->size() == 3) { - if (int_size < 8 && (name == "Cas64" || name == "$Casint64")) + if (int_size < 8 && (name == "Cas64" || name == "Casint64")) return NULL; Runtime::Function code; @@ -12047,9 +12045,9 @@ Call_expression::intrinsify(Gogo* gogo, code = Runtime::ATOMIC_COMPARE_EXCHANGE_4; else if (name == "Cas64") code = Runtime::ATOMIC_COMPARE_EXCHANGE_8; - else if (name == "$Casint32") + else if (name == "Casint32") code = Runtime::ATOMIC_COMPARE_EXCHANGE_4; - else if (name == "$Casint64") + else if (name == "Casint64") code = Runtime::ATOMIC_COMPARE_EXCHANGE_8; else if (name == "Casuintptr") code = (ptr_size == 8 @@ -12077,7 +12075,7 @@ Call_expression::intrinsify(Gogo* gogo, } if ((name == "Xadd" || name == "Xadd64" || name == "Xaddint64" - || name == "Xadduintptr" || name == "$Xaddint32") + || name == "Xadduintptr" || name == "Xaddint32") && this->args_ != NULL && this->args_->size() == 2) { if (int_size < 8 && (name == "Xadd64" || name == "Xaddint64")) @@ -12095,7 +12093,7 @@ Call_expression::intrinsify(Gogo* gogo, code = Runtime::ATOMIC_ADD_FETCH_8; res_type = uint64_type; } - else if (name == "$Xaddint32") + else if (name == "Xaddint32") { code = Runtime::ATOMIC_ADD_FETCH_4; res_type = int32_type; diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h index 19751f10..42842d9 100644 --- a/gcc/ipa-prop.h +++ b/gcc/ipa-prop.h @@ -499,10 +499,10 @@ public: get reallocated, the member vectors and the underlying auto_vecs would get out of sync. */ ipa_call_arg_values (ipa_auto_call_arg_values *aavals) - : m_known_vals (aavals->m_known_vals), - m_known_contexts (aavals->m_known_contexts), - m_known_aggs (aavals->m_known_aggs), - m_known_value_ranges (aavals->m_known_value_ranges) + : m_known_vals (aavals->m_known_vals.to_vec_legacy ()), + m_known_contexts (aavals->m_known_contexts.to_vec_legacy ()), + m_known_aggs (aavals->m_known_aggs.to_vec_legacy ()), + m_known_value_ranges (aavals->m_known_value_ranges.to_vec_legacy ()) {} /* If m_known_vals (vector of known "scalar" values) is sufficiantly long, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 04b011b..d4c0307 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,85 @@ +2021-08-05 Jonathan Wakely <jwakely@redhat.com> + + * g++.old-deja/g++.other/inline7.C: Cast nodiscard call to void. + +2021-08-05 H.J. Lu <hjl.tools@gmail.com> + + PR target/99744 + * gcc.target/i386/pr99744-3.c: New test. + * gcc.target/i386/pr99744-4.c: Likewise. + * gcc.target/i386/pr99744-5.c: Likewise. + * gcc.target/i386/pr99744-6.c: Likewise. + * gcc.target/i386/pr99744-7.c: Likewise. + * gcc.target/i386/pr99744-8.c: Likewise. + +2021-08-05 Richard Earnshaw <rearnsha@arm.com> + + PR target/101723 + * gcc.target/arm/cortex-m55-nofp-flag-hard.c: Update expected output. + * gcc.target/arm/cortex-m55-nofp-flag-softfp.c: Likewise. + * gcc.target/arm/cortex-m55-nofp-nomve-flag-softfp.c: Likewise. + * gcc.target/arm/mve/intrinsics/mve_fpu1.c: Convert to dg-do assemble. + Add a non-no-op function body. + * gcc.target/arm/mve/intrinsics/mve_fpu2.c: Likewise. + * gcc.target/arm/pr98636.c (dg-options): Add -mfloat-abi=softfp. + * gcc.target/arm/attr-neon.c: Tighten scan-assembler tests. + * gcc.target/arm/attr-neon2.c: Use -Ofast, convert test to use + check-function-bodies. + * gcc.target/arm/attr-neon3.c: Likewise. + * gcc.target/arm/pr69245.c: Tighten scan-assembler match, but allow + multiple instances. + * gcc.target/arm/pragma_fpu_attribute.c: Likewise. + * gcc.target/arm/pragma_fpu_attribute_2.c: Likewise. + +2021-08-05 Jonathan Wright <jonathan.wright@arm.com> + + * gcc.target/aarch64/vsubX_high_cost.c: New test. + +2021-08-05 Jonathan Wright <jonathan.wright@arm.com> + + * gcc.target/aarch64/vaddX_high_cost.c: New test. + +2021-08-05 Richard Biener <rguenther@suse.de> + + * gcc.dg/vect/bb-slp-pr101756.c: Add -w. + +2021-08-05 Eric Botcazou <ebotcazou@gcc.gnu.org> + + * gcc.dg/sso-15.c: New test. + +2021-08-05 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/cond_op_anylogic_d-1.c: New test. + * gcc.target/i386/cond_op_anylogic_d-2.c: New test. + * gcc.target/i386/cond_op_anylogic_q-1.c: New test. + * gcc.target/i386/cond_op_anylogic_q-2.c: New test. + +2021-08-05 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/cond_op_maxmin_double-1.c: New test. + * gcc.target/i386/cond_op_maxmin_double-2.c: New test. + * gcc.target/i386/cond_op_maxmin_float-1.c: New test. + * gcc.target/i386/cond_op_maxmin_float-2.c: New test. + +2021-08-05 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/cond_op_maxmin_b-1.c: New test. + * gcc.target/i386/cond_op_maxmin_b-2.c: New test. + * gcc.target/i386/cond_op_maxmin_d-1.c: New test. + * gcc.target/i386/cond_op_maxmin_d-2.c: New test. + * gcc.target/i386/cond_op_maxmin_q-1.c: New test. + * gcc.target/i386/cond_op_maxmin_q-2.c: New test. + * gcc.target/i386/cond_op_maxmin_ub-1.c: New test. + * gcc.target/i386/cond_op_maxmin_ub-2.c: New test. + * gcc.target/i386/cond_op_maxmin_ud-1.c: New test. + * gcc.target/i386/cond_op_maxmin_ud-2.c: New test. + * gcc.target/i386/cond_op_maxmin_uq-1.c: New test. + * gcc.target/i386/cond_op_maxmin_uq-2.c: New test. + * gcc.target/i386/cond_op_maxmin_uw-1.c: New test. + * gcc.target/i386/cond_op_maxmin_uw-2.c: New test. + * gcc.target/i386/cond_op_maxmin_w-1.c: New test. + * gcc.target/i386/cond_op_maxmin_w-2.c: New test. + 2021-08-04 David Malcolm <dmalcolm@redhat.com> PR analyzer/101570 diff --git a/gcc/testsuite/gcc.dg/gcov-info-to-gcda.c b/gcc/testsuite/gcc.dg/gcov-info-to-gcda.c new file mode 100644 index 0000000..a42a768 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gcov-info-to-gcda.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-skip-if "profile-info-section" { powerpc-ibm-aix* } } */ +/* { dg-options "-fprofile-arcs -fprofile-info-section" } */ + +#define assert(expr) \ + ((expr) \ + ? (void)0 \ + : (__builtin_printf ("%s:%i: Assertion `%s' failed.\n", \ + __FILE__, __LINE__, #expr), \ + __builtin_abort ())) + +struct gcov_info; + +extern void +__gcov_info_to_gcda (const struct gcov_info *__info, + void (*__filename_fn) (const char *, void *), + void (*__dump_fn) (const void *, unsigned, void *), + void *(*__allocate_fn) (unsigned, void *), + void *__arg); + +extern const struct gcov_info *my_info; + +static unsigned counter; + +static void +filename (const char *f, void *arg) +{ + assert (arg == &counter); + assert (__builtin_strstr (f, "gcov-info-to-gcda.c") == 0); +} + +static void +dump (const void *d, unsigned n, void *arg) +{ + unsigned *m = (unsigned *)arg; + assert (arg == &counter); + + if (*m == 0) + { + const unsigned *u = d; + assert (*u == 0x67636461); + } + + *m += n; +} + +static void * +allocate (unsigned length, void *arg) +{ + assert (arg == &counter); + return __builtin_malloc (length); +} + +int main() +{ + __asm__ volatile (".set my_info, .LPBX2"); + __gcov_info_to_gcda (my_info, filename, dump, allocate, &counter); + assert (counter > 4); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c index 60c53bc..3e7e572 100644 --- a/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c +++ b/gcc/testsuite/gcc.target/aarch64/vector_structure_intrinsics.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3" } */ +/* { dg-options "-O3 -march=armv8.2-a+bf16" } */ #include <arm_neon.h> @@ -95,6 +95,7 @@ TEST_STX (vst4q, int16x8x4_t, int16_t*, s16); TEST_STX (vst4q, uint16x8x4_t, uint16_t*, u16); TEST_STX (vst4q, poly16x8x4_t, poly16_t*, p16); TEST_STX (vst4q, float16x8x4_t, float16_t*, f16); +TEST_STX (vst4q, bfloat16x8x4_t, bfloat16_t*, bf16); TEST_STX (vst4q, int32x4x4_t, int32_t*, s32); TEST_STX (vst4q, uint32x4x4_t, uint32_t*, u32); TEST_STX (vst4q, float32x4x4_t, float32_t*, f32); @@ -110,6 +111,7 @@ TEST_STX (vst2q, int16x8x2_t, int16_t*, s16); TEST_STX (vst2q, uint16x8x2_t, uint16_t*, u16); TEST_STX (vst2q, poly16x8x2_t, poly16_t*, p16); TEST_STX (vst2q, float16x8x2_t, float16_t*, f16); +TEST_STX (vst2q, bfloat16x8x2_t, bfloat16_t*, bf16); TEST_STX (vst2q, int32x4x2_t, int32_t*, s32); TEST_STX (vst2q, uint32x4x2_t, uint32_t*, u32); TEST_STX (vst2q, float32x4x2_t, float32_t*, f32); @@ -131,6 +133,7 @@ TEST_ST3 (vst3q, int16x8x3_t, int16_t*, s16); TEST_ST3 (vst3q, uint16x8x3_t, uint16_t*, u16); TEST_ST3 (vst3q, poly16x8x3_t, poly16_t*, p16); TEST_ST3 (vst3q, float16x8x3_t, float16_t*, f16); +TEST_ST3 (vst3q, bfloat16x8x3_t, bfloat16_t*, bf16); TEST_ST3 (vst3q, int32x4x3_t, int32_t*, s32); TEST_ST3 (vst3q, uint32x4x3_t, uint32_t*, u32); TEST_ST3 (vst3q, float32x4x3_t, float32_t*, f32); @@ -139,6 +142,66 @@ TEST_ST3 (vst3q, uint64x2x3_t, uint64_t*, u64); TEST_ST3 (vst3q, float64x2x3_t, float64_t*, f64); TEST_ST3 (vst3q, poly64x2x3_t, poly64_t*, p64); +#define TEST_STX_LANE(name, tbltype, ptrtype, ts) \ + void test_ ## name ## _ ## ts (ptrtype a, tbltype b) \ + { \ + name ## _ ## ts (a, b, 1); \ + } + +TEST_STX_LANE (vst4q_lane, int8x16x4_t, int8_t*, s8); +TEST_STX_LANE (vst4q_lane, uint8x16x4_t, uint8_t*, u8); +TEST_STX_LANE (vst4q_lane, poly8x16x4_t, poly8_t*, p8); +TEST_STX_LANE (vst4q_lane, int16x8x4_t, int16_t*, s16); +TEST_STX_LANE (vst4q_lane, uint16x8x4_t, uint16_t*, u16); +TEST_STX_LANE (vst4q_lane, poly16x8x4_t, poly16_t*, p16); +TEST_STX_LANE (vst4q_lane, float16x8x4_t, float16_t*, f16); +TEST_STX_LANE (vst4q_lane, bfloat16x8x4_t, bfloat16_t*, bf16); +TEST_STX_LANE (vst4q_lane, int32x4x4_t, int32_t*, s32); +TEST_STX_LANE (vst4q_lane, uint32x4x4_t, uint32_t*, u32); +TEST_STX_LANE (vst4q_lane, float32x4x4_t, float32_t*, f32); +TEST_STX_LANE (vst4q_lane, int64x2x4_t, int64_t*, s64); +TEST_STX_LANE (vst4q_lane, uint64x2x4_t, uint64_t*, u64); +TEST_STX_LANE (vst4q_lane, float64x2x4_t, float64_t*, f64); +TEST_STX_LANE (vst4q_lane, poly64x2x4_t, poly64_t*, p64); + +TEST_STX_LANE (vst2q_lane, int8x16x2_t, int8_t*, s8); +TEST_STX_LANE (vst2q_lane, uint8x16x2_t, uint8_t*, u8); +TEST_STX_LANE (vst2q_lane, poly8x16x2_t, poly8_t*, p8); +TEST_STX_LANE (vst2q_lane, int16x8x2_t, int16_t*, s16); +TEST_STX_LANE (vst2q_lane, uint16x8x2_t, uint16_t*, u16); +TEST_STX_LANE (vst2q_lane, poly16x8x2_t, poly16_t*, p16); +TEST_STX_LANE (vst2q_lane, float16x8x2_t, float16_t*, f16); +TEST_STX_LANE (vst2q_lane, bfloat16x8x2_t, bfloat16_t*, bf16); +TEST_STX_LANE (vst2q_lane, int32x4x2_t, int32_t*, s32); +TEST_STX_LANE (vst2q_lane, uint32x4x2_t, uint32_t*, u32); +TEST_STX_LANE (vst2q_lane, float32x4x2_t, float32_t*, f32); +TEST_STX_LANE (vst2q_lane, int64x2x2_t, int64_t*, s64); +TEST_STX_LANE (vst2q_lane, uint64x2x2_t, uint64_t*, u64); +TEST_STX_LANE (vst2q_lane, float64x2x2_t, float64_t*, f64); +TEST_STX_LANE (vst2q_lane, poly64x2x2_t, poly64_t*, p64); + +#define TEST_ST3_LANE(name, tbltype, ptrtype, ts) \ + void test_ ## name ## _ ## ts (ptrtype a, int8x8_t dummy, tbltype b) \ + { \ + name ## _ ## ts (a, b, 1); \ + } + +TEST_ST3_LANE (vst3q_lane, int8x16x3_t, int8_t*, s8); +TEST_ST3_LANE (vst3q_lane, uint8x16x3_t, uint8_t*, u8); +TEST_ST3_LANE (vst3q_lane, poly8x16x3_t, poly8_t*, p8); +TEST_ST3_LANE (vst3q_lane, int16x8x3_t, int16_t*, s16); +TEST_ST3_LANE (vst3q_lane, uint16x8x3_t, uint16_t*, u16); +TEST_ST3_LANE (vst3q_lane, poly16x8x3_t, poly16_t*, p16); +TEST_ST3_LANE (vst3q_lane, float16x8x3_t, float16_t*, f16); +TEST_ST3_LANE (vst3q_lane, bfloat16x8x3_t, bfloat16_t*, bf16); +TEST_ST3_LANE (vst3q_lane, int32x4x3_t, int32_t*, s32); +TEST_ST3_LANE (vst3q_lane, uint32x4x3_t, uint32_t*, u32); +TEST_ST3_LANE (vst3q_lane, float32x4x3_t, float32_t*, f32); +TEST_ST3_LANE (vst3q_lane, int64x2x3_t, int64_t*, s64); +TEST_ST3_LANE (vst3q_lane, uint64x2x3_t, uint64_t*, u64); +TEST_ST3_LANE (vst3q_lane, float64x2x3_t, float64_t*, f64); +TEST_ST3_LANE (vst3q_lane, poly64x2x3_t, poly64_t*, p64); + #define TEST_ST1xN(name, tbltype, ptrtype, ts, xn) \ void test_ ## name ## _ ## ts ## _ ## xn (ptrtype a, tbltype b) \ { \ @@ -152,6 +215,7 @@ TEST_ST1xN (vst1q, int16x8x4_t, int16_t*, s16, x4); TEST_ST1xN (vst1q, uint16x8x4_t, uint16_t*, u16, x4); TEST_ST1xN (vst1q, poly16x8x4_t, poly16_t*, p16, x4); TEST_ST1xN (vst1q, float16x8x4_t, float16_t*, f16, x4); +TEST_ST1xN (vst1q, bfloat16x8x4_t, bfloat16_t*, bf16, x4); TEST_ST1xN (vst1q, int32x4x4_t, int32_t*, s32, x4); TEST_ST1xN (vst1q, uint32x4x4_t, uint32_t*, u32, x4); TEST_ST1xN (vst1q, float32x4x4_t, float32_t*, f32, x4); @@ -167,6 +231,7 @@ TEST_ST1xN (vst1q, int16x8x2_t, int16_t*, s16, x2); TEST_ST1xN (vst1q, uint16x8x2_t, uint16_t*, u16, x2); TEST_ST1xN (vst1q, poly16x8x2_t, poly16_t*, p16, x2); TEST_ST1xN (vst1q, float16x8x2_t, float16_t*, f16, x2); +TEST_ST1xN (vst1q, bfloat16x8x2_t, bfloat16_t*, bf16, x2); TEST_ST1xN (vst1q, int32x4x2_t, int32_t*, s32, x2); TEST_ST1xN (vst1q, uint32x4x2_t, uint32_t*, u32, x2); TEST_ST1xN (vst1q, float32x4x2_t, float32_t*, f32, x2); @@ -189,6 +254,7 @@ TEST_ST1x3 (vst1q, int16x8x3_t, int16_t*, s16, x3); TEST_ST1x3 (vst1q, uint16x8x3_t, uint16_t*, u16, x3); TEST_ST1x3 (vst1q, poly16x8x3_t, poly16_t*, p16, x3); TEST_ST1x3 (vst1q, float16x8x3_t, float16_t*, f16, x3); +TEST_ST1x3 (vst1q, bfloat16x8x3_t, bfloat16_t*, bf16, x3); TEST_ST1x3 (vst1q, int32x4x3_t, int32_t*, s32, x3); TEST_ST1x3 (vst1q, uint32x4x3_t, uint32_t*, u32, x3); TEST_ST1x3 (vst1q, float32x4x3_t, float32_t*, f32, x3); @@ -201,7 +267,7 @@ TEST_ST1x3 (vst1q, float64x2x3_t, float64_t*, f64, x3); /* { dg-final { scan-assembler-times "tbl\\t" 18} } */ /* { dg-final { scan-assembler-times "tbx\\t" 18} } */ -/* { dg-final { scan-assembler-times "st4\\t" 14} } */ -/* { dg-final { scan-assembler-times "st3\\t" 14} } */ -/* { dg-final { scan-assembler-times "st2\\t" 14} } */ -/* { dg-final { scan-assembler-times "st1\\t" 42} } */ +/* { dg-final { scan-assembler-times "st4\\t" 30} } */ +/* { dg-final { scan-assembler-times "st3\\t" 30} } */ +/* { dg-final { scan-assembler-times "st2\\t" 30} } */ +/* { dg-final { scan-assembler-times "st1\\t" 45} } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pragma-optimize.c b/gcc/testsuite/gcc.target/powerpc/pragma-optimize.c new file mode 100644 index 0000000..e8ba63a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pragma-optimize.c @@ -0,0 +1,13 @@ +/* { dg-options "-O2 -mlong-double-128 -mabi=ibmlongdouble" } */ + +extern unsigned long int x; +extern float f (float); +extern __typeof (f) f_power8; +extern __typeof (f) f_power9; +extern __typeof (f) f __attribute__ ((ifunc ("f_ifunc"))); +static __attribute__ ((optimize (1))) __typeof (f) * +f_ifunc (void) +{ + __typeof (f) *res = x ? f_power9 : f_power8; + return res; +} diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 b/gcc/testsuite/gfortran.dg/vect/vect-8.f90 index cc1aebf..c8a7d89 100644 --- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 +++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90 @@ -704,7 +704,6 @@ CALL track('KERNEL ') RETURN END SUBROUTINE kernel -! { dg-final { scan-tree-dump-times "vectorized 24 loops" 1 "vect" { target aarch64_sve } } } -! { dg-final { scan-tree-dump-times "vectorized 23 loops" 1 "vect" { target { aarch64*-*-* && { ! aarch64_sve } } } } } +! { dg-final { scan-tree-dump-times "vectorized 24 loops" 1 "vect" { target aarch64*-*-* } } } ! { dg-final { scan-tree-dump-times "vectorized 2\[234\] loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } } ! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { { ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } } diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index b6f7828..e061baa 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -404,8 +404,7 @@ print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects, /* Dump function for a DATA_DEPENDENCE_RELATION structure. */ DEBUG_FUNCTION void -dump_data_dependence_relation (FILE *outf, - struct data_dependence_relation *ddr) +dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr) { struct data_reference *dra, *drb; @@ -479,7 +478,7 @@ dump_data_dependence_relation (FILE *outf, /* Debug version. */ DEBUG_FUNCTION void -debug_data_dependence_relation (struct data_dependence_relation *ddr) +debug_data_dependence_relation (const struct data_dependence_relation *ddr) { dump_data_dependence_relation (stderr, ddr); } @@ -487,10 +486,9 @@ debug_data_dependence_relation (struct data_dependence_relation *ddr) /* Dump into FILE all the dependence relations from DDRS. */ DEBUG_FUNCTION void -dump_data_dependence_relations (FILE *file, - vec<ddr_p> ddrs) +dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs) { - for (data_dependence_relation *ddr : ddrs) + for (auto ddr : ddrs) dump_data_dependence_relation (file, ddr); } diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index de45f25..685f33d 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -528,8 +528,8 @@ extern void debug_data_reference (struct data_reference *); extern void debug_data_references (vec<data_reference_p> ); extern void debug (vec<data_reference_p> &ref); extern void debug (vec<data_reference_p> *ptr); -extern void debug_data_dependence_relation (struct data_dependence_relation *); -extern void dump_data_dependence_relations (FILE *, vec<ddr_p> ); +extern void debug_data_dependence_relation (const data_dependence_relation *); +extern void dump_data_dependence_relations (FILE *, const vec<ddr_p> &); extern void debug (vec<ddr_p> &ref); extern void debug (vec<ddr_p> *ptr); extern void debug_data_dependence_relations (vec<ddr_p> ); diff --git a/gcc/tree-predcom.c b/gcc/tree-predcom.c index bed30d2..6b195d1 100644 --- a/gcc/tree-predcom.c +++ b/gcc/tree-predcom.c @@ -639,9 +639,8 @@ dump_chain (FILE *file, chain_p chain) /* Dumps CHAINS to FILE. */ -extern void dump_chains (FILE *, vec<chain_p> ); void -dump_chains (FILE *file, vec<chain_p> chains) +dump_chains (FILE *file, const vec<chain_p> &chains) { chain_p chain; unsigned i; @@ -2049,7 +2048,7 @@ finalize_eliminated_stores (class loop *loop, chain_p chain) static void initialize_root_vars_lm (class loop *loop, dref root, bool written, - vec<tree> *vars, vec<tree> inits, + vec<tree> *vars, const vec<tree> &inits, bitmap tmp_vars) { unsigned i; @@ -2324,7 +2323,7 @@ pcom_worker::execute_pred_commoning_chain (chain_p chain, optimized. */ static unsigned -determine_unroll_factor (vec<chain_p> chains) +determine_unroll_factor (const vec<chain_p> &chains) { chain_p chain; unsigned factor = 1, af, nfactor, i; @@ -2401,7 +2400,7 @@ pcom_worker::execute_pred_commoning (bitmap tmp_vars) phi node, record the ssa name that is defined by it. */ static void -replace_phis_by_defined_names (vec<chain_p> chains) +replace_phis_by_defined_names (vec<chain_p> &chains) { chain_p chain; dref a; @@ -3276,7 +3275,7 @@ pcom_worker::prepare_finalizers () /* Insert all initializing gimple stmts into LOOP's entry edge. */ static void -insert_init_seqs (class loop *loop, vec<chain_p> chains) +insert_init_seqs (class loop *loop, vec<chain_p> &chains) { unsigned i; edge entry = loop_preheader_edge (loop); @@ -3387,7 +3386,7 @@ pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p) fprintf (dump_file, "Unrolling %u times.\n", unroll_factor); dta.tmp_vars = tmp_vars; - dta.chains = m_chains; + dta.chains = m_chains.to_vec_legacy (); dta.worker = this; /* Cfg manipulations performed in tree_transform_and_unroll_loop before diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c index d2a7395..ebe95cc 100644 --- a/gcc/tree-ssa-pre.c +++ b/gcc/tree-ssa-pre.c @@ -3107,7 +3107,7 @@ create_expression_by_pieces (basic_block block, pre_expr expr, static bool insert_into_preds_of_block (basic_block block, unsigned int exprnum, - vec<pre_expr> avail) + vec<pre_expr> &avail) { pre_expr expr = expression_for_id (exprnum); pre_expr newphi; diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c index 91ce443..e237eb4 100644 --- a/gcc/tree-ssa-threadbackward.c +++ b/gcc/tree-ssa-threadbackward.c @@ -92,7 +92,7 @@ public: private: void maybe_register_path (edge taken_edge); bool find_paths_to_names (basic_block bb, bitmap imports); - bool resolve_def (tree name, bitmap interesting, vec<tree> worklist); + bool resolve_def (tree name, bitmap interesting, vec<tree> &worklist); bool resolve_phi (gphi *phi, bitmap imports); edge find_taken_edge (const vec<basic_block> &path); edge find_taken_edge_cond (const vec<basic_block> &path, gcond *); @@ -240,7 +240,7 @@ back_threader::find_taken_edge_cond (const vec<basic_block> &path, // Populate a vector of trees from a bitmap. static inline void -populate_worklist (vec<tree> worklist, bitmap bits) +populate_worklist (vec<tree> &worklist, bitmap bits) { bitmap_iterator bi; unsigned i; @@ -317,7 +317,7 @@ back_threader::resolve_phi (gphi *phi, bitmap interesting) // current path to be constant, register the path, and return TRUE. bool -back_threader::resolve_def (tree name, bitmap interesting, vec<tree> worklist) +back_threader::resolve_def (tree name, bitmap interesting, vec<tree> &worklist) { gimple *def_stmt = SSA_NAME_DEF_STMT (name); diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 17d24b4..d594c0a 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -212,7 +212,7 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) static void vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value) { - vec<tree> checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo); + const vec<tree> &checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo); for (unsigned int i = 0; i < checks.length(); ++i) if (checks[i] == value) return; @@ -2349,8 +2349,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) if (do_versioning) { - vec<stmt_vec_info> may_misalign_stmts - = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); + const vec<stmt_vec_info> &may_misalign_stmts + = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); stmt_vec_info stmt_info; /* It can now be assumed that the data references in the statements @@ -3364,7 +3364,8 @@ static void vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p, poly_uint64 min_value) { - vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo); + vec<vec_lower_bound> &lower_bounds + = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo); for (unsigned int i = 0; i < lower_bounds.length (); ++i) if (operand_equal_p (lower_bounds[i].expr, expr, 0)) { @@ -3466,10 +3467,10 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash; hash_set <tree_pair_hash> compared_objects; - vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); + const vec<ddr_p> &may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); vec<dr_with_seg_len_pair_t> &comp_alias_ddrs = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo); - vec<vec_object_pair> &check_unequal_addrs + const vec<vec_object_pair> &check_unequal_addrs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo); poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); @@ -5350,7 +5351,7 @@ vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count, I4: 6 14 22 30 7 15 23 31. */ void -vect_permute_store_chain (vec_info *vinfo, vec<tree> dr_chain, +vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain, unsigned int length, stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c index ad209de..b8d09b7 100644 --- a/gcc/tree-vect-slp-patterns.c +++ b/gcc/tree-vect-slp-patterns.c @@ -746,7 +746,7 @@ vect_match_call_complex_mla (slp_tree node, unsigned child, of the negate node. */ static inline bool -vect_normalize_conj_loc (vec<slp_tree> args, bool *neg_first_p = NULL) +vect_normalize_conj_loc (vec<slp_tree> &args, bool *neg_first_p = NULL) { gcc_assert (args.length () == 2); bool neg_found = false; diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index e6b81a0..94bdb74 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -4499,7 +4499,7 @@ static void vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, int multi_step_cvt, stmt_vec_info stmt_info, - vec<tree> vec_dsts, + vec<tree> &vec_dsts, gimple_stmt_iterator *gsi, slp_tree slp_node, enum tree_code code) { diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 686644b4..5571b3c 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1990,8 +1990,8 @@ extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); -extern void vect_permute_store_chain (vec_info *, - vec<tree> ,unsigned int, stmt_vec_info, +extern void vect_permute_store_chain (vec_info *, vec<tree> &, + unsigned int, stmt_vec_info, gimple_stmt_iterator *, vec<tree> *); extern tree vect_setup_realignment (vec_info *, stmt_vec_info, gimple_stmt_iterator *, @@ -38,16 +38,6 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-core.h" #endif -/* vNULL is an empty type with a template cast operation that returns - a zero-initialized vec<T, A, L> instance. Use this when you want - to assign nil values to new vec instances or pass a nil vector as - a function call argument. - - We use this technique because vec<T, A, L> must be PODs (they are - stored in unions and passed in vararg functions), this means that - they cannot have ctors/dtors. */ -vnull vNULL; - /* Vector memory usage. */ class vec_usage: public mem_usage { @@ -282,6 +272,42 @@ safe_push_range (vec <int>&v, int start, int limit) v.safe_push (i); } +/* Verify forms of initialization. */ + +static void +test_init () +{ + { + vec<int> v1{ }; + ASSERT_EQ (0, v1.length ()); + + vec<int> v2 (v1); + ASSERT_EQ (0, v2.length ()); + } + + { + vec<int> v1 = vec<int>(); + ASSERT_EQ (0, v1.length ()); + + vec<int> v2 = v1; + ASSERT_EQ (0, v2.length ()); + } + + { + vec<int> v1 (vNULL); + ASSERT_EQ (0, v1.length ()); + v1.safe_push (1); + + vec<int> v2 (v1); + ASSERT_EQ (1, v1.length ()); + v2.safe_push (1); + + ASSERT_EQ (2, v1.length ()); + ASSERT_EQ (2, v2.length ()); + v1.release (); + } +} + /* Verify that vec::quick_push works correctly. */ static void @@ -547,6 +573,7 @@ test_auto_delete_vec () void vec_c_tests () { + test_init (); test_quick_push (); test_safe_push (); test_truncate (); @@ -541,18 +541,16 @@ vec_copy_construct (T *dst, const T *src, unsigned n) ::new (static_cast<void*>(dst)) T (*src); } -/* Type to provide NULL values for vec<T, A, L>. This is used to - provide nil initializers for vec instances. Since vec must be - a POD, we cannot have proper ctor/dtor for it. To initialize - a vec instance, you can assign it the value vNULL. This isn't - needed for file-scope and function-local static vectors, which - are zero-initialized by default. */ -struct vnull -{ - template <typename T, typename A, typename L> - CONSTEXPR operator vec<T, A, L> () const { return vec<T, A, L>(); } -}; -extern vnull vNULL; +/* Type to provide zero-initialized values for vec<T, A, L>. This is + used to provide nil initializers for vec instances. Since vec must + be a trivially copyable type that can be copied by memcpy and zeroed + out by memset, it must have defaulted default and copy ctor and copy + assignment. To initialize a vec either use value initialization + (e.g., vec() or vec v{ };) or assign it the value vNULL. This isn't + needed for file-scope and function-local static vectors, which are + zero-initialized by default. */ +struct vnull { }; +constexpr vnull vNULL{ }; /* Embeddable vector. These vectors are suitable to be embedded @@ -1431,10 +1429,34 @@ gt_pch_nx (vec<T, A, vl_embed> *v, gt_pointer_operator op, void *cookie) As long as we use C++03, we cannot have constructors nor destructors in classes that are stored in unions. */ +template<typename T, size_t N = 0> +class auto_vec; + template<typename T> struct vec<T, va_heap, vl_ptr> { public: + /* Default ctors to ensure triviality. Use value-initialization + (e.g., vec() or vec v{ };) or vNULL to create a zero-initialized + instance. */ + vec () = default; + vec (const vec &) = default; + /* Initialization from the generic vNULL. */ + vec (vnull): m_vec () { } + /* Same as default ctor: vec storage must be released manually. */ + ~vec () = default; + + /* Defaulted same as copy ctor. */ + vec& operator= (const vec &) = default; + + /* Prevent implicit conversion from auto_vec. Use auto_vec::to_vec() + instead. */ + template <size_t N> + vec (auto_vec<T, N> &) = delete; + + template <size_t N> + void operator= (auto_vec<T, N> &) = delete; + /* Memory allocation and deallocation for the embedded vector. Needed because we cannot have proper ctors/dtors defined. */ void create (unsigned nelems CXX_MEM_STAT_INFO); @@ -1522,7 +1544,7 @@ public: want to ask for internal storage for vectors on the stack because if the size of the vector is larger than the internal storage that space is wasted. */ -template<typename T, size_t N = 0> +template<typename T, size_t N /* = 0 */> class auto_vec : public vec<T, va_heap> { public: @@ -1549,6 +1571,14 @@ public: this->release (); } + /* Explicitly convert to the base class. There is no conversion + from a const auto_vec because a copy of the returned vec can + be used to modify *THIS. + This is a legacy function not to be used in new code. */ + vec<T, va_heap> to_vec_legacy () { + return *static_cast<vec<T, va_heap> *>(this); + } + private: vec<T, va_heap, vl_embed> m_auto; T m_data[MAX (N - 1, 1)]; @@ -1602,6 +1632,14 @@ public: return *this; } + /* Explicitly convert to the base class. There is no conversion + from a const auto_vec because a copy of the returned vec can + be used to modify *THIS. + This is a legacy function not to be used in new code. */ + vec<T, va_heap> to_vec_legacy () { + return *static_cast<vec<T, va_heap> *>(this); + } + // You probably don't want to copy a vector, so these are deleted to prevent // unintentional use. If you really need a copy of the vectors contents you // can use copy (). @@ -1781,7 +1819,7 @@ template<typename T> inline vec<T, va_heap, vl_ptr> vec<T, va_heap, vl_ptr>::copy (ALONE_MEM_STAT_DECL) const { - vec<T, va_heap, vl_ptr> new_vec = vNULL; + vec<T, va_heap, vl_ptr> new_vec{ }; if (length ()) new_vec.m_vec = m_vec->copy (ALONE_PASS_MEM_STAT); return new_vec; diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 7b9896f..518dbdc 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,14 @@ +2021-08-05 Jakub Jelinek <jakub@redhat.com> + + PR c++/100977 + * ucnid.h: Regenerated using Unicode 13.0.0 files. + +2021-08-05 Jakub Jelinek <jakub@redhat.com> + + PR c++/100977 + * makeucnid.c (write_table): Fix computation of last_combine. + * ucnid.h: Regenerated using Unicode 6.3.0 files. + 2021-06-16 Jason Merrill <jason@redhat.com> PR c++/100796 diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 6845f39..de145fa 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,10 @@ +2021-08-05 Jakub Jelinek <jakub@redhat.com> + + * config/t-slibgcc (SHLIB_LINK): Add $(LDFLAGS). + * config/t-slibgcc-darwin (SHLIB_LINK): Likewise. + * config/t-slibgcc-vms (SHLIB_LINK): Likewise. + * config/t-slibgcc-fuchsia (SHLIB_LDFLAGS): Remove $(LDFLAGS). + 2021-08-04 Jakub Jelinek <jakub@redhat.com> * config/t-slibgcc-fuchsia: Undo doubly applied patch. diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in index 2c8be56..7ec9758 100644 --- a/libgcc/Makefile.in +++ b/libgcc/Makefile.in @@ -908,7 +908,7 @@ LIBGCOV_INTERFACE = _gcov_dump _gcov_fork \ _gcov_execl _gcov_execlp \ _gcov_execle _gcov_execv _gcov_execvp _gcov_execve _gcov_reset \ _gcov_lock_unlock -LIBGCOV_DRIVER = _gcov +LIBGCOV_DRIVER = _gcov _gcov_info_to_gcda libgcov-merge-objects = $(patsubst %,%$(objext),$(LIBGCOV_MERGE)) libgcov-profiler-objects = $(patsubst %,%$(objext),$(LIBGCOV_PROFILER)) diff --git a/libgcc/gcov.h b/libgcc/gcov.h index e6492cd..66d03bf 100644 --- a/libgcc/gcov.h +++ b/libgcc/gcov.h @@ -25,6 +25,8 @@ #ifndef GCC_GCOV_H #define GCC_GCOV_H +struct gcov_info; + /* Set all counters to zero. */ extern void __gcov_reset (void); @@ -33,4 +35,21 @@ extern void __gcov_reset (void); extern void __gcov_dump (void); +/* Convert the gcov information referenced by INFO to a gcda data stream. + The FILENAME_FN callback is called exactly once with the filename associated + with the gcov information. The filename may be NULL. Afterwards, the + DUMP_FN callback is subsequently called with chunks (the begin and length of + the chunk are passed as the first two callback parameters) of the gcda data + stream. The ALLOCATE_FN callback shall allocate memory with a size in + characters specified by the first callback parameter. The ARG parameter is + a user-provided argument passed as the last argument to the callback + functions. */ + +extern void +__gcov_info_to_gcda (const struct gcov_info *__info, + void (*__filename_fn) (const char *, void *), + void (*__dump_fn) (const void *, unsigned, void *), + void *(*__allocate_fn) (unsigned, void *), + void *__arg); + #endif /* GCC_GCOV_H */ diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c index df7ccb2..087f71e 100644 --- a/libgcc/libgcov-driver.c +++ b/libgcc/libgcov-driver.c @@ -26,6 +26,18 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #include "libgcov.h" #include "gcov-io.h" +/* Return 1, if all counter values are zero, otherwise 0. */ + +static inline int +are_all_counters_zero (const struct gcov_ctr_info *ci_ptr) +{ + for (unsigned i = 0; i < ci_ptr->num; i++) + if (ci_ptr->values[i] != 0) + return 0; + + return 1; +} + #if defined(inhibit_libc) /* If libc and its header files are not available, provide dummy functions. */ @@ -35,8 +47,6 @@ void __gcov_init (struct gcov_info *p __attribute__ ((unused))) {} #else /* inhibit_libc */ -#include <string.h> - #if GCOV_LOCKED #include <fcntl.h> #include <errno.h> @@ -51,8 +61,17 @@ void __gcov_init (struct gcov_info *p __attribute__ ((unused))) {} #include <sys/mman.h> #endif -#ifdef L_gcov +#endif /* inhibit_libc */ + +#if defined(L_gcov) && !defined(inhibit_libc) +#define NEED_L_GCOV +#endif + +#if defined(L_gcov_info_to_gcda) && !IN_GCOV_TOOL +#define NEED_L_GCOV_INFO_TO_GCDA +#endif +#ifdef NEED_L_GCOV /* A utility function for outputting errors. */ static int gcov_error (const char *, ...); @@ -343,6 +362,51 @@ read_error: return -1; } +/* Write the DATA of LENGTH characters to the gcov file. */ + +static void +gcov_dump_handler (const void *data, + unsigned length, + void *arg ATTRIBUTE_UNUSED) +{ + gcov_write (data, length); +} + +/* Allocate SIZE characters and return the address of the allocated memory. */ + +static void * +gcov_allocate_handler (unsigned size, void *arg ATTRIBUTE_UNUSED) +{ + return xmalloc (size); +} +#endif /* NEED_L_GCOV */ + +#if defined(NEED_L_GCOV) || defined(NEED_L_GCOV_INFO_TO_GCDA) +/* Dump the WORD using the DUMP handler called with ARG. */ + +static inline void +dump_unsigned (gcov_unsigned_t word, + void (*dump_fn) (const void *, unsigned, void *), + void *arg) +{ + (*dump_fn) (&word, sizeof (word), arg); +} + +/* Dump the COUNTER using the DUMP handler called with ARG. */ + +static inline void +dump_counter (gcov_type counter, + void (*dump_fn) (const void *, unsigned, void *), + void *arg) +{ + dump_unsigned ((gcov_unsigned_t)counter, dump_fn, arg); + + if (sizeof (counter) > sizeof (gcov_unsigned_t)) + dump_unsigned ((gcov_unsigned_t)(counter >> 32), dump_fn, arg); + else + dump_unsigned (0, dump_fn, arg); +} + #define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) /* Store all TOP N counters where each has a dynamic length. */ @@ -350,7 +414,10 @@ read_error: static void write_topn_counters (const struct gcov_ctr_info *ci_ptr, unsigned t_ix, - gcov_unsigned_t n_counts) + gcov_unsigned_t n_counts, + void (*dump_fn) (const void *, unsigned, void *), + void *(*allocate_fn)(unsigned, void *), + void *arg) { unsigned counters = n_counts / GCOV_TOPN_MEM_COUNTERS; gcc_assert (n_counts % GCOV_TOPN_MEM_COUNTERS == 0); @@ -365,46 +432,49 @@ write_topn_counters (const struct gcov_ctr_info *ci_ptr, if (list_sizes == NULL || counters > list_size_length) { list_size_length = MAX (LIST_SIZE_MIN_LENGTH, 2 * counters); -#if HAVE_SYS_MMAN_H +#if !defined(inhibit_libc) && HAVE_SYS_MMAN_H list_sizes = (unsigned *)malloc_mmap (list_size_length * sizeof (unsigned)); #endif /* Malloc fallback. */ if (list_sizes == NULL) - list_sizes = (unsigned *)xmalloc (list_size_length * sizeof (unsigned)); + list_sizes = + (unsigned *)(*allocate_fn) (list_size_length * sizeof (unsigned), + arg); } - memset (list_sizes, 0, counters * sizeof (unsigned)); unsigned pair_total = 0; for (unsigned i = 0; i < counters; i++) { gcov_type start = ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i + 2]; - for (struct gcov_kvp *node = (struct gcov_kvp *)(intptr_t)start; + unsigned sizes = 0; + + for (struct gcov_kvp *node = (struct gcov_kvp *)(__INTPTR_TYPE__)start; node != NULL; node = node->next) - { - ++pair_total; - ++list_sizes[i]; - } + ++sizes; + + pair_total += sizes; + list_sizes[i] = sizes; } unsigned disk_size = GCOV_TOPN_DISK_COUNTERS * counters + 2 * pair_total; - gcov_write_tag_length (GCOV_TAG_FOR_COUNTER (t_ix), - GCOV_TAG_COUNTER_LENGTH (disk_size)); + dump_unsigned (GCOV_TAG_FOR_COUNTER (t_ix), dump_fn, arg), + dump_unsigned (GCOV_TAG_COUNTER_LENGTH (disk_size), dump_fn, arg); for (unsigned i = 0; i < counters; i++) { - gcov_write_counter (ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i]); - gcov_write_counter (list_sizes[i]); + dump_counter (ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i], dump_fn, arg); + dump_counter (list_sizes[i], dump_fn, arg); gcov_type start = ci_ptr->values[GCOV_TOPN_MEM_COUNTERS * i + 2]; unsigned j = 0; - for (struct gcov_kvp *node = (struct gcov_kvp *)(intptr_t)start; + for (struct gcov_kvp *node = (struct gcov_kvp *)(__INTPTR_TYPE__)start; j < list_sizes[i]; node = node->next, j++) { - gcov_write_counter (node->value); - gcov_write_counter (node->count); + dump_counter (node->value, dump_fn, arg); + dump_counter (node->count, dump_fn, arg); } } } @@ -415,25 +485,34 @@ write_topn_counters (const struct gcov_ctr_info *ci_ptr, static void write_one_data (const struct gcov_info *gi_ptr, - const struct gcov_summary *prg_p) + const struct gcov_summary *prg_p ATTRIBUTE_UNUSED, + void (*dump_fn) (const void *, unsigned, void *), + void *(*allocate_fn) (unsigned, void *), + void *arg) { unsigned f_ix; - gcov_write_tag_length (GCOV_DATA_MAGIC, GCOV_VERSION); - gcov_write_unsigned (gi_ptr->stamp); + dump_unsigned (GCOV_DATA_MAGIC, dump_fn, arg); + dump_unsigned (GCOV_VERSION, dump_fn, arg); + dump_unsigned (gi_ptr->stamp, dump_fn, arg); +#ifdef NEED_L_GCOV /* Generate whole program statistics. */ gcov_write_summary (GCOV_TAG_OBJECT_SUMMARY, prg_p); +#endif /* Write execution counts for each function. */ for (f_ix = 0; f_ix != gi_ptr->n_functions; f_ix++) { +#ifdef NEED_L_GCOV unsigned buffered = 0; +#endif const struct gcov_fn_info *gfi_ptr; const struct gcov_ctr_info *ci_ptr; gcov_unsigned_t length; unsigned t_ix; +#ifdef NEED_L_GCOV if (fn_buffer && fn_buffer->fn_ix == f_ix) { /* Buffered data from another program. */ @@ -442,6 +521,7 @@ write_one_data (const struct gcov_info *gi_ptr, length = GCOV_TAG_FUNCTION_LENGTH; } else +#endif { gfi_ptr = gi_ptr->functions[f_ix]; if (gfi_ptr && gfi_ptr->key == gi_ptr) @@ -450,13 +530,14 @@ write_one_data (const struct gcov_info *gi_ptr, length = 0; } - gcov_write_tag_length (GCOV_TAG_FUNCTION, length); + dump_unsigned (GCOV_TAG_FUNCTION, dump_fn, arg); + dump_unsigned (length, dump_fn, arg); if (!length) continue; - gcov_write_unsigned (gfi_ptr->ident); - gcov_write_unsigned (gfi_ptr->lineno_checksum); - gcov_write_unsigned (gfi_ptr->cfg_checksum); + dump_unsigned (gfi_ptr->ident, dump_fn, arg); + dump_unsigned (gfi_ptr->lineno_checksum, dump_fn, arg); + dump_unsigned (gfi_ptr->cfg_checksum, dump_fn, arg); ci_ptr = gfi_ptr->ctrs; for (t_ix = 0; t_ix < GCOV_COUNTERS; t_ix++) @@ -469,39 +550,37 @@ write_one_data (const struct gcov_info *gi_ptr, n_counts = ci_ptr->num; if (t_ix == GCOV_COUNTER_V_TOPN || t_ix == GCOV_COUNTER_V_INDIR) - write_topn_counters (ci_ptr, t_ix, n_counts); + write_topn_counters (ci_ptr, t_ix, n_counts, dump_fn, allocate_fn, + arg); else { - /* Do not stream when all counters are zero. */ - int all_zeros = 1; - for (unsigned i = 0; i < n_counts; i++) - if (ci_ptr->values[i] != 0) - { - all_zeros = 0; - break; - } - - if (all_zeros) - gcov_write_tag_length (GCOV_TAG_FOR_COUNTER (t_ix), - GCOV_TAG_COUNTER_LENGTH (-n_counts)); + dump_unsigned (GCOV_TAG_FOR_COUNTER (t_ix), dump_fn, arg); + if (are_all_counters_zero (ci_ptr)) + /* Do not stream when all counters are zero. */ + dump_unsigned (GCOV_TAG_COUNTER_LENGTH (-n_counts), + dump_fn, arg); else { - gcov_write_tag_length (GCOV_TAG_FOR_COUNTER (t_ix), - GCOV_TAG_COUNTER_LENGTH (n_counts)); + dump_unsigned (GCOV_TAG_COUNTER_LENGTH (n_counts), + dump_fn, arg); for (unsigned i = 0; i < n_counts; i++) - gcov_write_counter (ci_ptr->values[i]); + dump_counter (ci_ptr->values[i], dump_fn, arg); } } ci_ptr++; } +#ifdef NEED_L_GCOV if (buffered) fn_buffer = free_fn_data (gi_ptr, fn_buffer, GCOV_COUNTERS); +#endif } - gcov_write_unsigned (0); + dump_unsigned (0, dump_fn, arg); } +#endif /* NEED_L_GCOV || NEED_L_GCOV_INFO_TO_GCDA */ +#ifdef NEED_L_GCOV /* Dump the coverage counts for one gcov_info object. We merge with existing counts when possible, to avoid growing the .da files ad infinitum. We use this program's checksum to make sure we only accumulate whole program @@ -550,7 +629,8 @@ dump_one_gcov (struct gcov_info *gi_ptr, struct gcov_filename *gf, summary = gi_ptr->summary; #endif - write_one_data (gi_ptr, &summary); + write_one_data (gi_ptr, &summary, gcov_dump_handler, gcov_allocate_handler, + NULL); /* fall through */ read_fatal:; @@ -680,5 +760,20 @@ __gcov_init (struct gcov_info *info) } } #endif /* !IN_GCOV_TOOL */ -#endif /* L_gcov */ -#endif /* inhibit_libc */ +#endif /* NEED_L_GCOV */ + +#ifdef NEED_L_GCOV_INFO_TO_GCDA +/* Convert the gcov info to a gcda data stream. It is intended for + free-standing environments which do not support the C library file I/O. */ + +void +__gcov_info_to_gcda (const struct gcov_info *gi_ptr, + void (*filename_fn) (const char *, void *), + void (*dump_fn) (const void *, unsigned, void *), + void *(*allocate_fn) (unsigned, void *), + void *arg) +{ + (*filename_fn) (gi_ptr->filename, arg); + write_one_data (gi_ptr, NULL, dump_fn, allocate_fn, arg); +} +#endif /* NEED_L_GCOV_INFO_TO_GCDA */ diff --git a/libgcc/libgcov.h b/libgcc/libgcov.h index 8d323db..9c53725 100644 --- a/libgcc/libgcov.h +++ b/libgcc/libgcov.h @@ -114,13 +114,11 @@ typedef unsigned gcov_type_unsigned __attribute__ ((mode (QI))); #define gcov_var __gcov_var #define gcov_open __gcov_open #define gcov_close __gcov_close -#define gcov_write_tag_length __gcov_write_tag_length #define gcov_position __gcov_position #define gcov_seek __gcov_seek #define gcov_rewrite __gcov_rewrite #define gcov_is_error __gcov_is_error #define gcov_write_unsigned __gcov_write_unsigned -#define gcov_write_counter __gcov_write_counter #define gcov_write_summary __gcov_write_summary #define gcov_read_unsigned __gcov_read_unsigned #define gcov_read_counter __gcov_read_counter @@ -345,9 +343,6 @@ extern int __gcov_execve (const char *, char *const [], char *const []) /* Functions that only available in libgcov. */ GCOV_LINKAGE int gcov_open (const char */*name*/) ATTRIBUTE_HIDDEN; -GCOV_LINKAGE void gcov_write_counter (gcov_type) ATTRIBUTE_HIDDEN; -GCOV_LINKAGE void gcov_write_tag_length (gcov_unsigned_t, gcov_unsigned_t) - ATTRIBUTE_HIDDEN; GCOV_LINKAGE void gcov_write_summary (gcov_unsigned_t /*tag*/, const struct gcov_summary *) ATTRIBUTE_HIDDEN; diff --git a/libgo/go/runtime/internal/atomic/atomic.c b/libgo/go/runtime/internal/atomic/atomic.c index 569e56e..b5a0940 100644 --- a/libgo/go/runtime/internal/atomic/atomic.c +++ b/libgo/go/runtime/internal/atomic/atomic.c @@ -104,6 +104,16 @@ Loaduint (uintgo *ptr) return __atomic_load_n (ptr, __ATOMIC_SEQ_CST); } +int32_t Loadint32 (int32_t *ptr) + __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Loadint32") + __attribute__ ((no_split_stack)); + +int32_t +Loadint32 (int32_t *ptr) +{ + return __atomic_load_n (ptr, __ATOMIC_SEQ_CST); +} + int64_t Loadint64 (int64_t *ptr) __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Loadint64") __attribute__ ((no_split_stack)); @@ -126,6 +136,16 @@ Xadd (uint32_t *ptr, int32_t delta) return __atomic_add_fetch (ptr, (uint32_t) delta, __ATOMIC_SEQ_CST); } +int32_t Xaddint32 (int32_t *ptr, int32_t delta) + __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Xaddint32") + __attribute__ ((no_split_stack)); + +int32_t +Xaddint32 (int32_t *ptr, int32_t delta) +{ + return __atomic_add_fetch (ptr, delta, __ATOMIC_SEQ_CST); +} + uint64_t Xadd64 (uint64_t *ptr, int64_t delta) __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Xadd64") __attribute__ ((no_split_stack)); @@ -170,6 +190,16 @@ Xchg (uint32_t *ptr, uint32_t new) return __atomic_exchange_n (ptr, new, __ATOMIC_SEQ_CST); } +int32_t Xchgint32 (int32_t *ptr, int32_t new) + __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Xchgint32") + __attribute__ ((no_split_stack)); + +int32_t +Xchgint32 (int32_t *ptr, int32_t new) +{ + return __atomic_exchange_n (ptr, new, __ATOMIC_SEQ_CST); +} + uint64_t Xchg64 (uint64_t *ptr, uint64_t new) __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Xchg64") __attribute__ ((no_split_stack)); @@ -182,6 +212,16 @@ Xchg64 (uint64_t *ptr, uint64_t new) return __atomic_exchange_n (ptr, new, __ATOMIC_SEQ_CST); } +int64_t Xchgint64 (int64_t *ptr, int64_t new) + __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Xchgint64") + __attribute__ ((no_split_stack)); + +int64_t +Xchgint64 (int64_t *ptr, int64_t new) +{ + return __atomic_exchange_n (ptr, new, __ATOMIC_SEQ_CST); +} + uintptr_t Xchguintptr (uintptr_t *ptr, uintptr_t new) __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Xchguintptr") __attribute__ ((no_split_stack)); @@ -264,6 +304,26 @@ CasRel (uint32_t *ptr, uint32_t old, uint32_t new) return __atomic_compare_exchange_n (ptr, &old, new, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED); } +_Bool Casint32 (int32_t *ptr, int32_t old, int32_t new) + __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Casint32") + __attribute__ ((no_split_stack)); + +_Bool +Casint32 (int32_t *ptr, int32_t old, int32_t new) +{ + return __atomic_compare_exchange_n (ptr, &old, new, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); +} + +_Bool Casint64 (int64_t *ptr, int64_t old, int64_t new) + __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Casint64") + __attribute__ ((no_split_stack)); + +_Bool +Casint64 (int64_t *ptr, int64_t old, int64_t new) +{ + return __atomic_compare_exchange_n (ptr, &old, new, false, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); +} + _Bool Casp1 (void **ptr, void *old, void *new) __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Casp1") __attribute__ ((no_split_stack)); @@ -304,6 +364,16 @@ Store8 (uint8_t *ptr, uint8_t val) __atomic_store_n (ptr, val, __ATOMIC_SEQ_CST); } +void Storeint32 (int32_t *ptr, int32_t val) + __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Storeint32") + __attribute__ ((no_split_stack)); + +void +Storeint32 (int32_t *ptr, int32_t val) +{ + __atomic_store_n (ptr, val, __ATOMIC_SEQ_CST); +} + void Store64 (uint64_t *ptr, uint64_t val) __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Store64") __attribute__ ((no_split_stack)); @@ -338,6 +408,16 @@ StoreRel64 (uint64_t *ptr, uint64_t val) __atomic_store_n (ptr, val, __ATOMIC_RELEASE); } +void Storeint64 (int64_t *ptr, int64_t val) + __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.Storeint64") + __attribute__ ((no_split_stack)); + +void +Storeint64 (int64_t *ptr, int64_t val) +{ + __atomic_store_n (ptr, val, __ATOMIC_SEQ_CST); +} + void StoreReluintptr (uintptr_t *ptr, uintptr_t val) __asm__ (GOSYM_PREFIX "runtime_1internal_1atomic.StoreReluintptr") __attribute__ ((no_split_stack)); diff --git a/libgo/go/runtime/internal/atomic/stubs.go b/libgo/go/runtime/internal/atomic/stubs.go index 62e30d1..e7544ba 100644 --- a/libgo/go/runtime/internal/atomic/stubs.go +++ b/libgo/go/runtime/internal/atomic/stubs.go @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. +//go:build !wasm // +build !wasm package atomic @@ -15,9 +16,21 @@ func Cas(ptr *uint32, old, new uint32) bool func Casp1(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool //go:noescape +func Casint32(ptr *int32, old, new int32) bool + +//go:noescape +func Casint64(ptr *int64, old, new int64) bool + +//go:noescape func Casuintptr(ptr *uintptr, old, new uintptr) bool //go:noescape +func Storeint32(ptr *int32, new int32) + +//go:noescape +func Storeint64(ptr *int64, new int64) + +//go:noescape func Storeuintptr(ptr *uintptr, new uintptr) //go:noescape @@ -29,7 +42,19 @@ func Loaduint(ptr *uint) uint // TODO(matloob): Should these functions have the go:noescape annotation? //go:noescape +func Loadint32(ptr *int32) int32 + +//go:noescape func Loadint64(ptr *int64) int64 //go:noescape +func Xaddint32(ptr *int32, delta int32) int32 + +//go:noescape func Xaddint64(ptr *int64, delta int64) int64 + +//go:noescape +func Xchgint32(ptr *int32, new int32) int32 + +//go:noescape +func Xchgint64(ptr *int64, new int64) int64 diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 3cf5ef4..274d238 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,34 @@ +2021-08-05 Chung-Lin Tang <cltang@codesourcery.com> + + * icv-device.c (omp_get_device_num): New API function, host side. + * fortran.c (omp_get_device_num_): New interface function. + * libgomp-plugin.h (GOMP_DEVICE_NUM_VAR): Define macro symbol. + * libgomp.map (OMP_5.0.2): New version space with omp_get_device_num, + omp_get_device_num_. + * libgomp.texi (omp_get_device_num): Add documentation for new API + function. + * omp.h.in (omp_get_device_num): Add declaration. + * omp_lib.f90.in (omp_get_device_num): Likewise. + * omp_lib.h.in (omp_get_device_num): Likewise. + * target.c (gomp_load_image_to_device): If additional entry for device + number exists at end of returned entries from 'load_image_func' hook, + copy the assigned device number over to the device variable. + * config/gcn/icv-device.c (GOMP_DEVICE_NUM_VAR): Define static global. + (omp_get_device_num): New API function, device side. + * plugin/plugin-gcn.c ("symcat.h"): Add include. + (GOMP_OFFLOAD_load_image): Add addresses of device GOMP_DEVICE_NUM_VAR + at end of returned 'target_table' entries. + * config/nvptx/icv-device.c (GOMP_DEVICE_NUM_VAR): Define static global. + (omp_get_device_num): New API function, device side. + * plugin/plugin-nvptx.c ("symcat.h"): Add include. + (GOMP_OFFLOAD_load_image): Add addresses of device GOMP_DEVICE_NUM_VAR + at end of returned 'target_table' entries. + * testsuite/lib/libgomp.exp + (check_effective_target_offload_target_intelmic): New function for + testing for intelmic offloading. + * testsuite/libgomp.c-c++-common/target-45.c: New test. + * testsuite/libgomp.fortran/target10.f90: New test. + 2021-07-30 Thomas Schwinge <thomas@codesourcery.com> Ulrich Drepper <drepper@redhat.com> diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index b6b3a0e..be5daf0 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,50 @@ +2021-08-05 Jonathan Wakely <jwakely@redhat.com> + + PR libstdc++/101782 + * include/bits/ranges_base.h (ranges::begin, ranges::end) + (ranges::rbegin, ranges::rend, ranges::size, ranges::ssize) + (ranges::empty, ranges::data): Move attribute after the + declarator-id instead of at the end of the declarator. + * include/bits/stl_iterator.h (__gnu_cxx::__normal_iterator): + Move attributes back to the start of the function declarator, + but move the requires-clause to the end. + (common_iterator): Move attribute after the declarator-id. + * include/bits/stl_queue.h (queue): Remove ill-formed attributes + from friend declaration that are not definitions. + * include/std/ranges (views::all, views::filter) + (views::transform, views::take, views::take_while, + views::drop) (views::drop_while, views::join, + views::lazy_split) (views::split, views::counted, + views::common, views::reverse) (views::elements): Move + attributes after the declarator-id. + +2021-08-05 Jonathan Wakely <jwakely@redhat.com> + + * libsupc++/compare (partial_ordering, weak_ordering) + (strong_ordering, is_eq, is_neq, is_lt, is_lteq, is_gt, is_gteq) + (compare_three_way, strong_order, weak_order, partial_order) + (compare_strong_order_fallback, compare_weak_order_fallback) + (compare_partial_order_fallback, __detail::__synth3way): Add + nodiscard attribute. + * testsuite/18_support/comparisons/categories/zero_neg.cc: Add + -Wno-unused-result to options. + +2021-08-05 Jonathan Wakely <jwakely@redhat.com> + + PR libstdc++/101782 + * include/bits/ranges_base.h (ranges::begin, ranges::end) + (ranges::rbegin, ranges::rend, ranges::size, ranges::ssize) + (ranges::empty, ranges::data): Move attribute to the end of + the declarator. + * include/bits/stl_iterator.h (__gnu_cxx::__normal_iterator) + (common_iterator): Likewise for non-member operator functions. + * include/std/ranges (views::all, views::filter) + (views::transform, views::take, views::take_while, views::drop) + (views::drop_while, views::join, views::lazy_split) + (views::split, views::counted, views::common, views::reverse) + (views::elements): Likewise. + * testsuite/std/ranges/access/101782.cc: New test. + 2021-08-04 Jonathan Wakely <jwakely@redhat.com> * include/bits/forward_list.h: Add [[nodiscard]] to functions diff --git a/libstdc++-v3/include/bits/ranges_base.h b/libstdc++-v3/include/bits/ranges_base.h index 1dac968..49c7d9c 100644 --- a/libstdc++-v3/include/bits/ranges_base.h +++ b/libstdc++-v3/include/bits/ranges_base.h @@ -111,8 +111,7 @@ namespace ranges requires is_array_v<remove_reference_t<_Tp>> || __member_begin<_Tp> || __adl_begin<_Tp> constexpr auto - operator()(_Tp&& __t) const noexcept(_S_noexcept<_Tp&>()) - [[nodiscard]] + operator()[[nodiscard]](_Tp&& __t) const noexcept(_S_noexcept<_Tp&>()) { if constexpr (is_array_v<remove_reference_t<_Tp>>) { @@ -163,8 +162,7 @@ namespace ranges requires is_bounded_array_v<remove_reference_t<_Tp>> || __member_end<_Tp> || __adl_end<_Tp> constexpr auto - operator()(_Tp&& __t) const noexcept(_S_noexcept<_Tp&>()) - [[nodiscard]] + operator()[[nodiscard]](_Tp&& __t) const noexcept(_S_noexcept<_Tp&>()) { if constexpr (is_bounded_array_v<remove_reference_t<_Tp>>) { @@ -268,9 +266,8 @@ namespace ranges template<__maybe_borrowed_range _Tp> requires __member_rbegin<_Tp> || __adl_rbegin<_Tp> || __reversable<_Tp> constexpr auto - operator()(_Tp&& __t) const + operator()[[nodiscard]](_Tp&& __t) const noexcept(_S_noexcept<_Tp&>()) - [[nodiscard]] { if constexpr (__member_rbegin<_Tp>) return __t.rbegin(); @@ -327,9 +324,8 @@ namespace ranges template<__maybe_borrowed_range _Tp> requires __member_rend<_Tp> || __adl_rend<_Tp> || __reversable<_Tp> constexpr auto - operator()(_Tp&& __t) const + operator()[[nodiscard]](_Tp&& __t) const noexcept(_S_noexcept<_Tp&>()) - [[nodiscard]] { if constexpr (__member_rend<_Tp>) return __t.rend(); @@ -417,8 +413,7 @@ namespace ranges requires is_bounded_array_v<remove_reference_t<_Tp>> || __member_size<_Tp> || __adl_size<_Tp> || __sentinel_size<_Tp> constexpr auto - operator()(_Tp&& __t) const noexcept(_S_noexcept<_Tp&>()) - [[nodiscard]] + operator()[[nodiscard]](_Tp&& __t) const noexcept(_S_noexcept<_Tp&>()) { if constexpr (is_bounded_array_v<remove_reference_t<_Tp>>) return extent_v<remove_reference_t<_Tp>>; @@ -438,8 +433,7 @@ namespace ranges template<typename _Tp> requires requires (_Tp& __t) { _Size{}(__t); } constexpr auto - operator()(_Tp&& __t) const noexcept(noexcept(_Size{}(__t))) - [[nodiscard]] + operator()[[nodiscard]](_Tp&& __t) const noexcept(noexcept(_Size{}(__t))) { auto __size = _Size{}(__t); using __size_type = decltype(__size); @@ -498,8 +492,7 @@ namespace ranges requires __member_empty<_Tp> || __size0_empty<_Tp> || __eq_iter_empty<_Tp> constexpr bool - operator()(_Tp&& __t) const noexcept(_S_noexcept<_Tp&>()) - [[nodiscard]] + operator()[[nodiscard]](_Tp&& __t) const noexcept(_S_noexcept<_Tp&>()) { if constexpr (__member_empty<_Tp>) return bool(__t.empty()); @@ -540,8 +533,7 @@ namespace ranges template<__maybe_borrowed_range _Tp> requires __member_data<_Tp> || __begin_data<_Tp> constexpr auto - operator()(_Tp&& __t) const noexcept(_S_noexcept<_Tp>()) - [[nodiscard]] + operator()[[nodiscard]](_Tp&& __t) const noexcept(_S_noexcept<_Tp>()) { if constexpr (__member_data<_Tp>) return __t.data(); diff --git a/libstdc++-v3/include/bits/stl_iterator.h b/libstdc++-v3/include/bits/stl_iterator.h index 053ae41..c5b0240 100644 --- a/libstdc++-v3/include/bits/stl_iterator.h +++ b/libstdc++-v3/include/bits/stl_iterator.h @@ -1118,21 +1118,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #if __cpp_lib_three_way_comparison template<typename _IteratorL, typename _IteratorR, typename _Container> - requires requires (_IteratorL __lhs, _IteratorR __rhs) - { { __lhs == __rhs } -> std::convertible_to<bool>; } + [[nodiscard]] constexpr bool operator==(const __normal_iterator<_IteratorL, _Container>& __lhs, const __normal_iterator<_IteratorR, _Container>& __rhs) noexcept(noexcept(__lhs.base() == __rhs.base())) - [[nodiscard]] + requires requires { + { __lhs.base() == __rhs.base() } -> std::convertible_to<bool>; + } { return __lhs.base() == __rhs.base(); } template<typename _IteratorL, typename _IteratorR, typename _Container> + [[nodiscard]] constexpr std::__detail::__synth3way_t<_IteratorR, _IteratorL> operator<=>(const __normal_iterator<_IteratorL, _Container>& __lhs, const __normal_iterator<_IteratorR, _Container>& __rhs) noexcept(noexcept(std::__detail::__synth3way(__lhs.base(), __rhs.base()))) - [[nodiscard]] { return std::__detail::__synth3way(__lhs.base(), __rhs.base()); } #else // Forward iterator requirements @@ -1985,9 +1986,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _It2, sentinel_for<_It> _Sent2> requires sentinel_for<_Sent, _It2> friend bool - operator==(const common_iterator& __x, - const common_iterator<_It2, _Sent2>& __y) - [[nodiscard]] + operator== [[nodiscard]] (const common_iterator& __x, + const common_iterator<_It2, _Sent2>& __y) { switch(__x._M_index << 2 | __y._M_index) { @@ -2008,9 +2008,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename _It2, sentinel_for<_It> _Sent2> requires sentinel_for<_Sent, _It2> && equality_comparable_with<_It, _It2> friend bool - operator==(const common_iterator& __x, - const common_iterator<_It2, _Sent2>& __y) - [[nodiscard]] + operator== [[nodiscard]] (const common_iterator& __x, + const common_iterator<_It2, _Sent2>& __y) { switch(__x._M_index << 2 | __y._M_index) { @@ -2032,9 +2031,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<sized_sentinel_for<_It> _It2, sized_sentinel_for<_It> _Sent2> requires sized_sentinel_for<_Sent, _It2> friend iter_difference_t<_It2> - operator-(const common_iterator& __x, - const common_iterator<_It2, _Sent2>& __y) - [[nodiscard]] + operator- [[nodiscard]] (const common_iterator& __x, + const common_iterator<_It2, _Sent2>& __y) { switch(__x._M_index << 2 | __y._M_index) { diff --git a/libstdc++-v3/include/bits/stl_queue.h b/libstdc++-v3/include/bits/stl_queue.h index 363868f..41ffc50 100644 --- a/libstdc++-v3/include/bits/stl_queue.h +++ b/libstdc++-v3/include/bits/stl_queue.h @@ -107,18 +107,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif template<typename _Tp1, typename _Seq1> - _GLIBCXX_NODISCARD friend bool operator==(const queue<_Tp1, _Seq1>&, const queue<_Tp1, _Seq1>&); template<typename _Tp1, typename _Seq1> - _GLIBCXX_NODISCARD friend bool operator<(const queue<_Tp1, _Seq1>&, const queue<_Tp1, _Seq1>&); #if __cpp_lib_three_way_comparison template<typename _Tp1, three_way_comparable _Seq1> - [[nodiscard]] friend compare_three_way_result_t<_Seq1> operator<=>(const queue<_Tp1, _Seq1>&, const queue<_Tp1, _Seq1>&); #endif diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges index 71b7dc7..fb8905f 100644 --- a/libstdc++-v3/include/std/ranges +++ b/libstdc++-v3/include/std/ranges @@ -1127,9 +1127,8 @@ namespace views::__adaptor || __detail::__can_ref_view<_Range> || __detail::__can_subrange<_Range> constexpr auto - operator()(_Range&& __r) const + operator() [[nodiscard]] (_Range&& __r) const noexcept(_S_noexcept<_Range>()) - [[nodiscard]] { if constexpr (view<decay_t<_Range>>) return std::forward<_Range>(__r); @@ -1554,8 +1553,7 @@ namespace views::__adaptor template<viewable_range _Range, typename _Pred> requires __detail::__can_filter_view<_Range, _Pred> constexpr auto - operator()(_Range&& __r, _Pred&& __p) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r, _Pred&& __p) const { return filter_view(std::forward<_Range>(__r), std::forward<_Pred>(__p)); } @@ -1932,8 +1930,7 @@ namespace views::__adaptor template<viewable_range _Range, typename _Fp> requires __detail::__can_transform_view<_Range, _Fp> constexpr auto - operator()(_Range&& __r, _Fp&& __f) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r, _Fp&& __f) const { return transform_view(std::forward<_Range>(__r), std::forward<_Fp>(__f)); } @@ -2114,8 +2111,7 @@ namespace views::__adaptor template<viewable_range _Range, typename _Tp> requires __detail::__can_take_view<_Range, _Tp> constexpr auto - operator()(_Range&& __r, _Tp&& __n) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r, _Tp&& __n) const { return take_view(std::forward<_Range>(__r), std::forward<_Tp>(__n)); } @@ -2242,8 +2238,7 @@ namespace views::__adaptor template<viewable_range _Range, typename _Pred> requires __detail::__can_take_while_view<_Range, _Pred> constexpr auto - operator()(_Range&& __r, _Pred&& __p) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r, _Pred&& __p) const { return take_while_view(std::forward<_Range>(__r), std::forward<_Pred>(__p)); } @@ -2363,8 +2358,7 @@ namespace views::__adaptor template<viewable_range _Range, typename _Tp> requires __detail::__can_drop_view<_Range, _Tp> constexpr auto - operator()(_Range&& __r, _Tp&& __n) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r, _Tp&& __n) const { return drop_view(std::forward<_Range>(__r), std::forward<_Tp>(__n)); } @@ -2452,8 +2446,7 @@ namespace views::__adaptor template<viewable_range _Range, typename _Pred> requires __detail::__can_drop_while_view<_Range, _Pred> constexpr auto - operator()(_Range&& __r, _Pred&& __p) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r, _Pred&& __p) const { return drop_while_view(std::forward<_Range>(__r), std::forward<_Pred>(__p)); @@ -2815,8 +2808,7 @@ namespace views::__adaptor template<viewable_range _Range> requires __detail::__can_join_view<_Range> constexpr auto - operator()(_Range&& __r) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r) const { // _GLIBCXX_RESOLVE_LIB_DEFECTS // 3474. Nesting join_views is broken because of CTAD @@ -3267,8 +3259,7 @@ namespace views::__adaptor template<viewable_range _Range, typename _Pattern> requires __detail::__can_lazy_split_view<_Range, _Pattern> constexpr auto - operator()(_Range&& __r, _Pattern&& __f) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r, _Pattern&& __f) const { return lazy_split_view(std::forward<_Range>(__r), std::forward<_Pattern>(__f)); } @@ -3476,8 +3467,7 @@ namespace views::__adaptor template<viewable_range _Range, typename _Pattern> requires __detail::__can_split_view<_Range, _Pattern> constexpr auto - operator()(_Range&& __r, _Pattern&& __f) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r, _Pattern&& __f) const { return split_view(std::forward<_Range>(__r), std::forward<_Pattern>(__f)); } @@ -3498,8 +3488,7 @@ namespace views::__adaptor { template<input_or_output_iterator _Iter> constexpr auto - operator()(_Iter __i, iter_difference_t<_Iter> __n) const - [[nodiscard]] + operator() [[nodiscard]] (_Iter __i, iter_difference_t<_Iter> __n) const { if constexpr (random_access_iterator<_Iter>) return subrange(__i, __i + __n); @@ -3620,8 +3609,7 @@ namespace views::__adaptor requires __detail::__already_common<_Range> || __detail::__can_common_view<_Range> constexpr auto - operator()(_Range&& __r) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r) const { if constexpr (__detail::__already_common<_Range>) return views::all(std::forward<_Range>(__r)); @@ -3743,8 +3731,7 @@ namespace views::__adaptor || __detail::__is_reversible_subrange<remove_cvref_t<_Range>> || __detail::__can_reverse_view<_Range> constexpr auto - operator()(_Range&& __r) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r) const { using _Tp = remove_cvref_t<_Range>; if constexpr (__detail::__is_reverse_view<_Tp>) @@ -4142,8 +4129,7 @@ namespace views::__adaptor template<viewable_range _Range> requires __detail::__can_elements_view<_Nm, _Range> constexpr auto - operator()(_Range&& __r) const - [[nodiscard]] + operator() [[nodiscard]] (_Range&& __r) const { return elements_view<all_t<_Range>, _Nm>{std::forward<_Range>(__r)}; } |