/* Builtins' description for AArch64 SIMD architecture. Copyright (C) 2011-2017 Free Software Foundation, Inc. Contributed by ARM Ltd. This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . */ #include "config.h" #include "system.h" #include "coretypes.h" #include "tm.h" #include "function.h" #include "basic-block.h" #include "rtl.h" #include "tree.h" #include "gimple.h" #include "memmodel.h" #include "tm_p.h" #include "expmed.h" #include "optabs.h" #include "recog.h" #include "diagnostic-core.h" #include "fold-const.h" #include "stor-layout.h" #include "explow.h" #include "expr.h" #include "langhooks.h" #include "gimple-iterator.h" #include "case-cfn-macros.h" #define v8qi_UP E_V8QImode #define v4hi_UP E_V4HImode #define v4hf_UP E_V4HFmode #define v2si_UP E_V2SImode #define v2sf_UP E_V2SFmode #define v1df_UP E_V1DFmode #define di_UP E_DImode #define df_UP E_DFmode #define v16qi_UP E_V16QImode #define v8hi_UP E_V8HImode #define v8hf_UP E_V8HFmode #define v4si_UP E_V4SImode #define v4sf_UP E_V4SFmode #define v2di_UP E_V2DImode #define v2df_UP E_V2DFmode #define ti_UP E_TImode #define oi_UP E_OImode #define ci_UP E_CImode #define xi_UP E_XImode #define si_UP E_SImode #define sf_UP E_SFmode #define hi_UP E_HImode #define hf_UP E_HFmode #define qi_UP E_QImode #define UP(X) X##_UP #define SIMD_MAX_BUILTIN_ARGS 5 enum aarch64_type_qualifiers { /* T foo. */ qualifier_none = 0x0, /* unsigned T foo. */ qualifier_unsigned = 0x1, /* 1 << 0 */ /* const T foo. */ qualifier_const = 0x2, /* 1 << 1 */ /* T *foo. */ qualifier_pointer = 0x4, /* 1 << 2 */ /* Used when expanding arguments if an operand could be an immediate. */ qualifier_immediate = 0x8, /* 1 << 3 */ qualifier_maybe_immediate = 0x10, /* 1 << 4 */ /* void foo (...). */ qualifier_void = 0x20, /* 1 << 5 */ /* Some patterns may have internal operands, this qualifier is an instruction to the initialisation code to skip this operand. */ qualifier_internal = 0x40, /* 1 << 6 */ /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum rather than using the type of the operand. */ qualifier_map_mode = 0x80, /* 1 << 7 */ /* qualifier_pointer | qualifier_map_mode */ qualifier_pointer_map_mode = 0x84, /* qualifier_const | qualifier_pointer | qualifier_map_mode */ qualifier_const_pointer_map_mode = 0x86, /* Polynomial types. */ qualifier_poly = 0x100, /* Lane indices - must be in range, and flipped for bigendian. */ qualifier_lane_index = 0x200, /* Lane indices for single lane structure loads and stores. */ qualifier_struct_load_store_lane_index = 0x400 }; typedef struct { const char *name; machine_mode mode; const enum insn_code code; unsigned int fcode; enum aarch64_type_qualifiers *qualifiers; } aarch64_simd_builtin_datum; static enum aarch64_type_qualifiers aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none }; #define TYPES_UNOP (aarch64_types_unop_qualifiers) static enum aarch64_type_qualifiers aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned }; #define TYPES_UNOPU (aarch64_types_unopu_qualifiers) static enum aarch64_type_qualifiers aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_none }; #define TYPES_UNOPUS (aarch64_types_unopus_qualifiers) static enum aarch64_type_qualifiers aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; #define TYPES_BINOP (aarch64_types_binop_qualifiers) static enum aarch64_type_qualifiers aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; #define TYPES_BINOPU (aarch64_types_binopu_qualifiers) static enum aarch64_type_qualifiers aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_none }; #define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers) static enum aarch64_type_qualifiers aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_unsigned }; #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers) static enum aarch64_type_qualifiers aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_none, qualifier_none }; #define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers) static enum aarch64_type_qualifiers aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_poly, qualifier_poly, qualifier_poly }; #define TYPES_BINOPP (aarch64_types_binopp_qualifiers) static enum aarch64_type_qualifiers aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none, qualifier_none }; #define TYPES_TERNOP (aarch64_types_ternop_qualifiers) static enum aarch64_type_qualifiers aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index }; #define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers) static enum aarch64_type_qualifiers aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers) static enum aarch64_type_qualifiers aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index }; #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers) static enum aarch64_type_qualifiers aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, qualifier_lane_index }; #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers) static enum aarch64_type_qualifiers aarch64_types_binop_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_poly, qualifier_none, qualifier_immediate }; #define TYPES_GETREGP (aarch64_types_binop_imm_p_qualifiers) static enum aarch64_type_qualifiers aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_immediate }; #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers) #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers) static enum aarch64_type_qualifiers aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_none, qualifier_immediate }; #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers) static enum aarch64_type_qualifiers aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_unsigned, qualifier_immediate }; #define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers) static enum aarch64_type_qualifiers aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) static enum aarch64_type_qualifiers aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate}; #define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers) static enum aarch64_type_qualifiers aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate}; #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers) #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers) #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers) static enum aarch64_type_qualifiers aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate}; #define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers) static enum aarch64_type_qualifiers aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers) static enum aarch64_type_qualifiers aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none }; #define TYPES_COMBINE (aarch64_types_combine_qualifiers) static enum aarch64_type_qualifiers aarch64_types_combine_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_poly, qualifier_poly, qualifier_poly }; #define TYPES_COMBINEP (aarch64_types_combine_p_qualifiers) static enum aarch64_type_qualifiers aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_const_pointer_map_mode }; #define TYPES_LOAD1 (aarch64_types_load1_qualifiers) #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers) static enum aarch64_type_qualifiers aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_const_pointer_map_mode, qualifier_none, qualifier_struct_load_store_lane_index }; #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers) static enum aarch64_type_qualifiers aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_poly, qualifier_unsigned, qualifier_poly, qualifier_poly }; #define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers) static enum aarch64_type_qualifiers aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_unsigned, qualifier_none, qualifier_none }; #define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers) static enum aarch64_type_qualifiers aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; #define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers) /* The first argument (return type) of a store should be void type, which we represent with qualifier_void. Their first operand will be a DImode pointer to the location to store to, so we must use qualifier_map_mode | qualifier_pointer to build a pointer to the element type of the vector. */ static enum aarch64_type_qualifiers aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly }; #define TYPES_STORE1P (aarch64_types_store1_p_qualifiers) static enum aarch64_type_qualifiers aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; #define TYPES_STORE1 (aarch64_types_store1_qualifiers) #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers) static enum aarch64_type_qualifiers aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_pointer_map_mode, qualifier_none, qualifier_struct_load_store_lane_index }; #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers) #define CF0(N, X) CODE_FOR_aarch64_##N##X #define CF1(N, X) CODE_FOR_##N##X##1 #define CF2(N, X) CODE_FOR_##N##X##2 #define CF3(N, X) CODE_FOR_##N##X##3 #define CF4(N, X) CODE_FOR_##N##X##4 #define CF10(N, X) CODE_FOR_##N##X #define VAR1(T, N, MAP, A) \ {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T}, #define VAR2(T, N, MAP, A, B) \ VAR1 (T, N, MAP, A) \ VAR1 (T, N, MAP, B) #define VAR3(T, N, MAP, A, B, C) \ VAR2 (T, N, MAP, A, B) \ VAR1 (T, N, MAP, C) #define VAR4(T, N, MAP, A, B, C, D) \ VAR3 (T, N, MAP, A, B, C) \ VAR1 (T, N, MAP, D) #define VAR5(T, N, MAP, A, B, C, D, E) \ VAR4 (T, N, MAP, A, B, C, D) \ VAR1 (T, N, MAP, E) #define VAR6(T, N, MAP, A, B, C, D, E, F) \ VAR5 (T, N, MAP, A, B, C, D, E) \ VAR1 (T, N, MAP, F) #define VAR7(T, N, MAP, A, B, C, D, E, F, G) \ VAR6 (T, N, MAP, A, B, C, D, E, F) \ VAR1 (T, N, MAP, G) #define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \ VAR7 (T, N, MAP, A, B, C, D, E, F, G) \ VAR1 (T, N, MAP, H) #define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \ VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \ VAR1 (T, N, MAP, I) #define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \ VAR1 (T, N, MAP, J) #define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \ VAR1 (T, N, MAP, K) #define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \ VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \ VAR1 (T, N, MAP, L) #define VAR13(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \ VAR12 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \ VAR1 (T, N, MAP, M) #define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \ VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \ VAR1 (T, X, MAP, N) #include "aarch64-builtin-iterators.h" static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = { #include "aarch64-simd-builtins.def" }; /* There's only 8 CRC32 builtins. Probably not worth their own .def file. */ #define AARCH64_CRC32_BUILTINS \ CRC32_BUILTIN (crc32b, QI) \ CRC32_BUILTIN (crc32h, HI) \ CRC32_BUILTIN (crc32w, SI) \ CRC32_BUILTIN (crc32x, DI) \ CRC32_BUILTIN (crc32cb, QI) \ CRC32_BUILTIN (crc32ch, HI) \ CRC32_BUILTIN (crc32cw, SI) \ CRC32_BUILTIN (crc32cx, DI) typedef struct { const char *name; machine_mode mode; const enum insn_code icode; unsigned int fcode; } aarch64_crc_builtin_datum; #define CRC32_BUILTIN(N, M) \ AARCH64_BUILTIN_##N, #undef VAR1 #define VAR1(T, N, MAP, A) \ AARCH64_SIMD_BUILTIN_##T##_##N##A, enum aarch64_builtins { AARCH64_BUILTIN_MIN, AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR, AARCH64_BUILTIN_GET_FPSR, AARCH64_BUILTIN_SET_FPSR, AARCH64_BUILTIN_RSQRT_DF, AARCH64_BUILTIN_RSQRT_SF, AARCH64_BUILTIN_RSQRT_V2DF, AARCH64_BUILTIN_RSQRT_V2SF, AARCH64_BUILTIN_RSQRT_V4SF, AARCH64_SIMD_BUILTIN_BASE, AARCH64_SIMD_BUILTIN_LANE_CHECK, #include "aarch64-simd-builtins.def" /* The first enum element which is based on an insn_data pattern. */ AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1, AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START + ARRAY_SIZE (aarch64_simd_builtin_data) - 1, AARCH64_CRC32_BUILTIN_BASE, AARCH64_CRC32_BUILTINS AARCH64_CRC32_BUILTIN_MAX, /* ARMv8.3-A Pointer Authentication Builtins. */ AARCH64_PAUTH_BUILTIN_AUTIA1716, AARCH64_PAUTH_BUILTIN_PACIA1716, AARCH64_PAUTH_BUILTIN_XPACLRI, AARCH64_BUILTIN_MAX }; #undef CRC32_BUILTIN #define CRC32_BUILTIN(N, M) \ {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N}, static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = { AARCH64_CRC32_BUILTINS }; #undef CRC32_BUILTIN static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; #define NUM_DREG_TYPES 6 #define NUM_QREG_TYPES 6 /* Internal scalar builtin types. These types are used to support neon intrinsic builtins. They are _not_ user-visible types. Therefore the mangling for these types are implementation defined. */ const char *aarch64_scalar_builtin_types[] = { "__builtin_aarch64_simd_qi", "__builtin_aarch64_simd_hi", "__builtin_aarch64_simd_si", "__builtin_aarch64_simd_hf", "__builtin_aarch64_simd_sf", "__builtin_aarch64_simd_di", "__builtin_aarch64_simd_df", "__builtin_aarch64_simd_poly8", "__builtin_aarch64_simd_poly16", "__builtin_aarch64_simd_poly64", "__builtin_aarch64_simd_poly128", "__builtin_aarch64_simd_ti", "__builtin_aarch64_simd_uqi", "__builtin_aarch64_simd_uhi", "__builtin_aarch64_simd_usi", "__builtin_aarch64_simd_udi", "__builtin_aarch64_simd_ei", "__builtin_aarch64_simd_oi", "__builtin_aarch64_simd_ci", "__builtin_aarch64_simd_xi", NULL }; #define ENTRY(E, M, Q, G) E, enum aarch64_simd_type { #include "aarch64-simd-builtin-types.def" ARM_NEON_H_TYPES_LAST }; #undef ENTRY struct aarch64_simd_type_info { enum aarch64_simd_type type; /* Internal type name. */ const char *name; /* Internal type name(mangled). The mangled names conform to the AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture", Appendix A). To qualify for emission with the mangled names defined in that document, a vector type must not only be of the correct mode but also be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these types are registered by aarch64_init_simd_builtin_types (). In other words, vector types defined in other ways e.g. via vector_size attribute will get default mangled names. */ const char *mangle; /* Internal type. */ tree itype; /* Element type. */ tree eltype; /* Machine mode the internal type maps to. */ enum machine_mode mode; /* Qualifiers. */ enum aarch64_type_qualifiers q; }; #define ENTRY(E, M, Q, G) \ {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q}, static struct aarch64_simd_type_info aarch64_simd_types [] = { #include "aarch64-simd-builtin-types.def" }; #undef ENTRY static tree aarch64_simd_intOI_type_node = NULL_TREE; static tree aarch64_simd_intCI_type_node = NULL_TREE; static tree aarch64_simd_intXI_type_node = NULL_TREE; /* The user-visible __fp16 type, and a pointer to that type. Used across the back-end. */ tree aarch64_fp16_type_node = NULL_TREE; tree aarch64_fp16_ptr_type_node = NULL_TREE; static const char * aarch64_mangle_builtin_scalar_type (const_tree type) { int i = 0; while (aarch64_scalar_builtin_types[i] != NULL) { const char *name = aarch64_scalar_builtin_types[i]; if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL && DECL_NAME (TYPE_NAME (type)) && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name)) return aarch64_scalar_builtin_types[i]; i++; } return NULL; } static const char * aarch64_mangle_builtin_vector_type (const_tree type) { int i; int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]); for (i = 0; i < nelts; i++) if (aarch64_simd_types[i].mode == TYPE_MODE (type) && TYPE_NAME (type) && TREE_CODE (TYPE_NAME (type)) == TYPE_DECL && DECL_NAME (TYPE_NAME (type)) && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), aarch64_simd_types[i].name)) return aarch64_simd_types[i].mangle; return NULL; } const char * aarch64_mangle_builtin_type (const_tree type) { const char *mangle; /* Walk through all the AArch64 builtins types tables to filter out the incoming type. */ if ((mangle = aarch64_mangle_builtin_vector_type (type)) || (mangle = aarch64_mangle_builtin_scalar_type (type))) return mangle; return NULL; } static tree aarch64_simd_builtin_std_type (machine_mode mode, enum aarch64_type_qualifiers q) { #define QUAL_TYPE(M) \ ((q == qualifier_none) ? int##M##_type_node : unsigned_int##M##_type_node); switch (mode) { case E_QImode: return QUAL_TYPE (QI); case E_HImode: return QUAL_TYPE (HI); case E_SImode: return QUAL_TYPE (SI); case E_DImode: return QUAL_TYPE (DI); case E_TImode: return QUAL_TYPE (TI); case E_OImode: return aarch64_simd_intOI_type_node; case E_CImode: return aarch64_simd_intCI_type_node; case E_XImode: return aarch64_simd_intXI_type_node; case E_HFmode: return aarch64_fp16_type_node; case E_SFmode: return float_type_node; case E_DFmode: return double_type_node; default: gcc_unreachable (); } #undef QUAL_TYPE } static tree aarch64_lookup_simd_builtin_type (machine_mode mode, enum aarch64_type_qualifiers q) { int i; int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]); /* Non-poly scalar modes map to standard types not in the table. */ if (q != qualifier_poly && !VECTOR_MODE_P (mode)) return aarch64_simd_builtin_std_type (mode, q); for (i = 0; i < nelts; i++) if (aarch64_simd_types[i].mode == mode && aarch64_simd_types[i].q == q) return aarch64_simd_types[i].itype; return NULL_TREE; } static tree aarch64_simd_builtin_type (machine_mode mode, bool unsigned_p, bool poly_p) { if (poly_p) return aarch64_lookup_simd_builtin_type (mode, qualifier_poly); else if (unsigned_p) return aarch64_lookup_simd_builtin_type (mode, qualifier_unsigned); else return aarch64_lookup_simd_builtin_type (mode, qualifier_none); } static void aarch64_init_simd_builtin_types (void) { int i; int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]); tree tdecl; /* Init all the element types built by the front-end. */ aarch64_simd_types[Int8x8_t].eltype = intQI_type_node; aarch64_simd_types[Int8x16_t].eltype = intQI_type_node; aarch64_simd_types[Int16x4_t].eltype = intHI_type_node; aarch64_simd_types[Int16x8_t].eltype = intHI_type_node; aarch64_simd_types[Int32x2_t].eltype = intSI_type_node; aarch64_simd_types[Int32x4_t].eltype = intSI_type_node; aarch64_simd_types[Int64x1_t].eltype = intDI_type_node; aarch64_simd_types[Int64x2_t].eltype = intDI_type_node; aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node; aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node; aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node; aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node; aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node; aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node; aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node; aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node; /* Poly types are a world of their own. */ aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype = build_distinct_type_copy (unsigned_intQI_type_node); aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype = build_distinct_type_copy (unsigned_intHI_type_node); aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype = build_distinct_type_copy (unsigned_intDI_type_node); aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype = build_distinct_type_copy (unsigned_intTI_type_node); /* Init poly vector element types with scalar poly types. */ aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype; aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype; aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype; aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype; aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype; aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype; /* Continue with standard types. */ aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node; aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node; aarch64_simd_types[Float32x2_t].eltype = float_type_node; aarch64_simd_types[Float32x4_t].eltype = float_type_node; aarch64_simd_types[Float64x1_t].eltype = double_type_node; aarch64_simd_types[Float64x2_t].eltype = double_type_node; for (i = 0; i < nelts; i++) { tree eltype = aarch64_simd_types[i].eltype; machine_mode mode = aarch64_simd_types[i].mode; if (aarch64_simd_types[i].itype == NULL) { aarch64_simd_types[i].itype = build_distinct_type_copy (build_vector_type (eltype, GET_MODE_NUNITS (mode))); SET_TYPE_STRUCTURAL_EQUALITY (aarch64_simd_types[i].itype); } tdecl = add_builtin_type (aarch64_simd_types[i].name, aarch64_simd_types[i].itype); TYPE_NAME (aarch64_simd_types[i].itype) = tdecl; } #define AARCH64_BUILD_SIGNED_TYPE(mode) \ make_signed_type (GET_MODE_PRECISION (mode)); aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode); aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode); aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode); #undef AARCH64_BUILD_SIGNED_TYPE tdecl = add_builtin_type ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node); TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl; tdecl = add_builtin_type ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node); TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl; tdecl = add_builtin_type ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node); TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl; } static void aarch64_init_simd_builtin_scalar_types (void) { /* Define typedefs for all the standard scalar types. */ (*lang_hooks.types.register_builtin_type) (intQI_type_node, "__builtin_aarch64_simd_qi"); (*lang_hooks.types.register_builtin_type) (intHI_type_node, "__builtin_aarch64_simd_hi"); (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__builtin_aarch64_simd_hf"); (*lang_hooks.types.register_builtin_type) (intSI_type_node, "__builtin_aarch64_simd_si"); (*lang_hooks.types.register_builtin_type) (float_type_node, "__builtin_aarch64_simd_sf"); (*lang_hooks.types.register_builtin_type) (intDI_type_node, "__builtin_aarch64_simd_di"); (*lang_hooks.types.register_builtin_type) (double_type_node, "__builtin_aarch64_simd_df"); (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node, "__builtin_aarch64_simd_poly8"); (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node, "__builtin_aarch64_simd_poly16"); (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node, "__builtin_aarch64_simd_poly64"); (*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node, "__builtin_aarch64_simd_poly128"); (*lang_hooks.types.register_builtin_type) (intTI_type_node, "__builtin_aarch64_simd_ti"); /* Unsigned integer types for various mode sizes. */ (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node, "__builtin_aarch64_simd_uqi"); (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node, "__builtin_aarch64_simd_uhi"); (*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node, "__builtin_aarch64_simd_usi"); (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node, "__builtin_aarch64_simd_udi"); } static bool aarch64_simd_builtins_initialized_p = false; void aarch64_init_simd_builtins (void) { unsigned int i, fcode = AARCH64_SIMD_PATTERN_START; if (aarch64_simd_builtins_initialized_p) return; aarch64_simd_builtins_initialized_p = true; aarch64_init_simd_builtin_types (); /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics. Therefore we need to preserve the old __builtin scalar types. It can be removed once all the intrinsics become strongly typed using the qualifier system. */ aarch64_init_simd_builtin_scalar_types (); tree lane_check_fpr = build_function_type_list (void_type_node, size_type_node, size_type_node, intSI_type_node, NULL); aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] = add_builtin_function ("__builtin_aarch64_im_lane_boundsi", lane_check_fpr, AARCH64_SIMD_BUILTIN_LANE_CHECK, BUILT_IN_MD, NULL, NULL_TREE); for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) { bool print_type_signature_p = false; char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 }; aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i]; char namebuf[60]; tree ftype = NULL; tree fndecl = NULL; d->fcode = fcode; /* We must track two variables here. op_num is the operand number as in the RTL pattern. This is required to access the mode (e.g. V4SF mode) of the argument, from which the base type can be derived. arg_num is an index in to the qualifiers data, which gives qualifiers to the type (e.g. const unsigned). The reason these two variables may differ by one is the void return type. While all return types take the 0th entry in the qualifiers array, there is no operand for them in the RTL pattern. */ int op_num = insn_data[d->code].n_operands - 1; int arg_num = d->qualifiers[0] & qualifier_void ? op_num + 1 : op_num; tree return_type = void_type_node, args = void_list_node; tree eltype; /* Build a function type directly from the insn_data for this builtin. The build_function_type () function takes care of removing duplicates for us. */ for (; op_num >= 0; arg_num--, op_num--) { machine_mode op_mode = insn_data[d->code].operand[op_num].mode; enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num]; if (qualifiers & qualifier_unsigned) { type_signature[op_num] = 'u'; print_type_signature_p = true; } else if (qualifiers & qualifier_poly) { type_signature[op_num] = 'p'; print_type_signature_p = true; } else type_signature[op_num] = 's'; /* Skip an internal operand for vget_{low, high}. */ if (qualifiers & qualifier_internal) continue; /* Some builtins have different user-facing types for certain arguments, encoded in d->mode. */ if (qualifiers & qualifier_map_mode) op_mode = d->mode; /* For pointers, we want a pointer to the basic type of the vector. */ if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) op_mode = GET_MODE_INNER (op_mode); eltype = aarch64_simd_builtin_type (op_mode, (qualifiers & qualifier_unsigned) != 0, (qualifiers & qualifier_poly) != 0); gcc_assert (eltype != NULL); /* Add qualifiers. */ if (qualifiers & qualifier_const) eltype = build_qualified_type (eltype, TYPE_QUAL_CONST); if (qualifiers & qualifier_pointer) eltype = build_pointer_type (eltype); /* If we have reached arg_num == 0, we are at a non-void return type. Otherwise, we are still processing arguments. */ if (arg_num == 0) return_type = eltype; else args = tree_cons (NULL_TREE, eltype, args); } ftype = build_function_type (return_type, args); gcc_assert (ftype != NULL); if (print_type_signature_p) snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s", d->name, type_signature); else snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s", d->name); fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL, NULL_TREE); aarch64_builtin_decls[fcode] = fndecl; } } static void aarch64_init_crc32_builtins () { tree usi_type = aarch64_simd_builtin_std_type (SImode, qualifier_unsigned); unsigned int i = 0; for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i) { aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i]; tree argtype = aarch64_simd_builtin_std_type (d->mode, qualifier_unsigned); tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE); tree fndecl = add_builtin_function (d->name, ftype, d->fcode, BUILT_IN_MD, NULL, NULL_TREE); aarch64_builtin_decls[d->fcode] = fndecl; } } /* Add builtins for reciprocal square root. */ void aarch64_init_builtin_rsqrt (void) { tree fndecl = NULL; tree ftype = NULL; tree V2SF_type_node = build_vector_type (float_type_node, 2); tree V2DF_type_node = build_vector_type (double_type_node, 2); tree V4SF_type_node = build_vector_type (float_type_node, 4); struct builtin_decls_data { tree type_node; const char *builtin_name; int function_code; }; builtin_decls_data bdda[] = { { double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF }, { float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF }, { V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF }, { V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF }, { V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF } }; builtin_decls_data *bdd = bdda; builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data)); for (; bdd < bdd_end; bdd++) { ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE); fndecl = add_builtin_function (bdd->builtin_name, ftype, bdd->function_code, BUILT_IN_MD, NULL, NULL_TREE); aarch64_builtin_decls[bdd->function_code] = fndecl; } } /* Initialize the backend types that support the user-visible __fp16 type, also initialize a pointer to that type, to be used when forming HFAs. */ static void aarch64_init_fp16_types (void) { aarch64_fp16_type_node = make_node (REAL_TYPE); TYPE_PRECISION (aarch64_fp16_type_node) = 16; layout_type (aarch64_fp16_type_node); (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16"); aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node); } /* Pointer authentication builtins that will become NOP on legacy platform. Currently, these builtins are for internal use only (libgcc EH unwinder). */ void aarch64_init_pauth_hint_builtins (void) { /* Pointer Authentication builtins. */ tree ftype_pointer_auth = build_function_type_list (ptr_type_node, ptr_type_node, unsigned_intDI_type_node, NULL_TREE); tree ftype_pointer_strip = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE); aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716] = add_builtin_function ("__builtin_aarch64_autia1716", ftype_pointer_auth, AARCH64_PAUTH_BUILTIN_AUTIA1716, BUILT_IN_MD, NULL, NULL_TREE); aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716] = add_builtin_function ("__builtin_aarch64_pacia1716", ftype_pointer_auth, AARCH64_PAUTH_BUILTIN_PACIA1716, BUILT_IN_MD, NULL, NULL_TREE); aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI] = add_builtin_function ("__builtin_aarch64_xpaclri", ftype_pointer_strip, AARCH64_PAUTH_BUILTIN_XPACLRI, BUILT_IN_MD, NULL, NULL_TREE); } void aarch64_init_builtins (void) { tree ftype_set_fpr = build_function_type_list (void_type_node, unsigned_type_node, NULL); tree ftype_get_fpr = build_function_type_list (unsigned_type_node, NULL); aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr, AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr, AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr, AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); aarch64_init_fp16_types (); if (TARGET_SIMD) aarch64_init_simd_builtins (); aarch64_init_crc32_builtins (); aarch64_init_builtin_rsqrt (); /* Initialize pointer authentication builtins which are backed by instructions in NOP encoding space. NOTE: these builtins are supposed to be used by libgcc unwinder only, as there is no support on return address signing under ILP32, we don't register them. */ if (!TARGET_ILP32) aarch64_init_pauth_hint_builtins (); } tree aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) { if (code >= AARCH64_BUILTIN_MAX) return error_mark_node; return aarch64_builtin_decls[code]; } typedef enum { SIMD_ARG_COPY_TO_REG, SIMD_ARG_CONSTANT, SIMD_ARG_LANE_INDEX, SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX, SIMD_ARG_STOP } builtin_simd_arg; static rtx aarch64_simd_expand_args (rtx target, int icode, int have_retval, tree exp, builtin_simd_arg *args, machine_mode builtin_mode) { rtx pat; rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */ int opc = 0; if (have_retval) { machine_mode tmode = insn_data[icode].operand[0].mode; if (!target || GET_MODE (target) != tmode || !(*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); op[opc++] = target; } for (;;) { builtin_simd_arg thisarg = args[opc - have_retval]; if (thisarg == SIMD_ARG_STOP) break; else { tree arg = CALL_EXPR_ARG (exp, opc - have_retval); machine_mode mode = insn_data[icode].operand[opc].mode; op[opc] = expand_normal (arg); switch (thisarg) { case SIMD_ARG_COPY_TO_REG: if (POINTER_TYPE_P (TREE_TYPE (arg))) op[opc] = convert_memory_address (Pmode, op[opc]); /*gcc_assert (GET_MODE (op[opc]) == mode); */ if (!(*insn_data[icode].operand[opc].predicate) (op[opc], mode)) op[opc] = copy_to_mode_reg (mode, op[opc]); break; case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX: gcc_assert (opc > 1); if (CONST_INT_P (op[opc])) { aarch64_simd_lane_bounds (op[opc], 0, GET_MODE_NUNITS (builtin_mode), exp); /* Keep to GCC-vector-extension lane indices in the RTL. */ op[opc] = GEN_INT (ENDIAN_LANE_N (builtin_mode, INTVAL (op[opc]))); } goto constant_arg; case SIMD_ARG_LANE_INDEX: /* Must be a previous operand into which this is an index. */ gcc_assert (opc > 0); if (CONST_INT_P (op[opc])) { machine_mode vmode = insn_data[icode].operand[opc - 1].mode; aarch64_simd_lane_bounds (op[opc], 0, GET_MODE_NUNITS (vmode), exp); /* Keep to GCC-vector-extension lane indices in the RTL. */ op[opc] = GEN_INT (ENDIAN_LANE_N (vmode, INTVAL (op[opc]))); } /* Fall through - if the lane index isn't a constant then the next case will error. */ /* FALLTHRU */ case SIMD_ARG_CONSTANT: constant_arg: if (!(*insn_data[icode].operand[opc].predicate) (op[opc], mode)) { error ("%Kargument %d must be a constant immediate", exp, opc + 1 - have_retval); return const0_rtx; } break; case SIMD_ARG_STOP: gcc_unreachable (); } opc++; } } switch (opc) { case 1: pat = GEN_FCN (icode) (op[0]); break; case 2: pat = GEN_FCN (icode) (op[0], op[1]); break; case 3: pat = GEN_FCN (icode) (op[0], op[1], op[2]); break; case 4: pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); break; case 5: pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); break; case 6: pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); break; default: gcc_unreachable (); } if (!pat) return NULL_RTX; emit_insn (pat); return target; } /* Expand an AArch64 AdvSIMD builtin(intrinsic). */ rtx aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) { if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK) { rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0)); rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1)); if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize) && UINTVAL (elementsize) != 0 && UINTVAL (totalsize) != 0) { rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2)); if (CONST_INT_P (lane_idx)) aarch64_simd_lane_bounds (lane_idx, 0, UINTVAL (totalsize) / UINTVAL (elementsize), exp); else error ("%Klane index must be a constant immediate", exp); } else error ("%Ktotal size and element size must be a non-zero constant immediate", exp); /* Don't generate any RTL. */ return const0_rtx; } aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START]; enum insn_code icode = d->code; builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1]; int num_args = insn_data[d->code].n_operands; int is_void = 0; int k; is_void = !!(d->qualifiers[0] & qualifier_void); num_args += is_void; for (k = 1; k < num_args; k++) { /* We have four arrays of data, each indexed in a different fashion. qualifiers - element 0 always describes the function return type. operands - element 0 is either the operand for return value (if the function has a non-void return type) or the operand for the first argument. expr_args - element 0 always holds the first argument. args - element 0 is always used for the return type. */ int qualifiers_k = k; int operands_k = k - is_void; int expr_args_k = k - 1; if (d->qualifiers[qualifiers_k] & qualifier_lane_index) args[k] = SIMD_ARG_LANE_INDEX; else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX; else if (d->qualifiers[qualifiers_k] & qualifier_immediate) args[k] = SIMD_ARG_CONSTANT; else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) { rtx arg = expand_normal (CALL_EXPR_ARG (exp, (expr_args_k))); /* Handle constants only if the predicate allows it. */ bool op_const_int_p = (CONST_INT_P (arg) && (*insn_data[icode].operand[operands_k].predicate) (arg, insn_data[icode].operand[operands_k].mode)); args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG; } else args[k] = SIMD_ARG_COPY_TO_REG; } args[k] = SIMD_ARG_STOP; /* The interface to aarch64_simd_expand_args expects a 0 if the function is void, and a 1 if it is not. */ return aarch64_simd_expand_args (target, icode, !is_void, exp, &args[1], d->mode); } rtx aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target) { rtx pat; aarch64_crc_builtin_datum *d = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)]; enum insn_code icode = d->icode; tree arg0 = CALL_EXPR_ARG (exp, 0); tree arg1 = CALL_EXPR_ARG (exp, 1); rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); machine_mode tmode = insn_data[icode].operand[0].mode; machine_mode mode0 = insn_data[icode].operand[1].mode; machine_mode mode1 = insn_data[icode].operand[2].mode; if (! target || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) op0 = copy_to_mode_reg (mode0, op0); if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); pat = GEN_FCN (icode) (target, op0, op1); if (!pat) return NULL_RTX; emit_insn (pat); return target; } /* Function to expand reciprocal square root builtins. */ static rtx aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target) { tree arg0 = CALL_EXPR_ARG (exp, 0); rtx op0 = expand_normal (arg0); rtx (*gen) (rtx, rtx); switch (fcode) { case AARCH64_BUILTIN_RSQRT_DF: gen = gen_rsqrtdf2; break; case AARCH64_BUILTIN_RSQRT_SF: gen = gen_rsqrtsf2; break; case AARCH64_BUILTIN_RSQRT_V2DF: gen = gen_rsqrtv2df2; break; case AARCH64_BUILTIN_RSQRT_V2SF: gen = gen_rsqrtv2sf2; break; case AARCH64_BUILTIN_RSQRT_V4SF: gen = gen_rsqrtv4sf2; break; default: gcc_unreachable (); } if (!target) target = gen_reg_rtx (GET_MODE (op0)); emit_insn (gen (target, op0)); return target; } /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient. */ rtx aarch64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, machine_mode mode ATTRIBUTE_UNUSED, int ignore ATTRIBUTE_UNUSED) { tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); int fcode = DECL_FUNCTION_CODE (fndecl); int icode; rtx pat, op0; tree arg0; switch (fcode) { case AARCH64_BUILTIN_GET_FPCR: case AARCH64_BUILTIN_SET_FPCR: case AARCH64_BUILTIN_GET_FPSR: case AARCH64_BUILTIN_SET_FPSR: if ((fcode == AARCH64_BUILTIN_GET_FPCR) || (fcode == AARCH64_BUILTIN_GET_FPSR)) { icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ? CODE_FOR_get_fpsr : CODE_FOR_get_fpcr; target = gen_reg_rtx (SImode); pat = GEN_FCN (icode) (target); } else { target = NULL_RTX; icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ? CODE_FOR_set_fpsr : CODE_FOR_set_fpcr; arg0 = CALL_EXPR_ARG (exp, 0); op0 = force_reg (SImode, expand_normal (arg0)); pat = GEN_FCN (icode) (op0); } emit_insn (pat); return target; case AARCH64_PAUTH_BUILTIN_AUTIA1716: case AARCH64_PAUTH_BUILTIN_PACIA1716: case AARCH64_PAUTH_BUILTIN_XPACLRI: arg0 = CALL_EXPR_ARG (exp, 0); op0 = force_reg (Pmode, expand_normal (arg0)); if (!target) target = gen_reg_rtx (Pmode); else target = force_reg (Pmode, target); emit_move_insn (target, op0); if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI) { rtx lr = gen_rtx_REG (Pmode, R30_REGNUM); icode = CODE_FOR_xpaclri; emit_move_insn (lr, op0); emit_insn (GEN_FCN (icode) ()); emit_move_insn (target, lr); } else { tree arg1 = CALL_EXPR_ARG (exp, 1); rtx op1 = force_reg (Pmode, expand_normal (arg1)); icode = (fcode == AARCH64_PAUTH_BUILTIN_PACIA1716 ? CODE_FOR_paci1716 : CODE_FOR_auti1716); rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM); rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM); emit_move_insn (x17_reg, op0); emit_move_insn (x16_reg, op1); emit_insn (GEN_FCN (icode) ()); emit_move_insn (target, x17_reg); } return target; } if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) return aarch64_simd_expand_builtin (fcode, exp, target); else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX) return aarch64_crc32_expand_builtin (fcode, exp, target); if (fcode == AARCH64_BUILTIN_RSQRT_DF || fcode == AARCH64_BUILTIN_RSQRT_SF || fcode == AARCH64_BUILTIN_RSQRT_V2DF || fcode == AARCH64_BUILTIN_RSQRT_V2SF || fcode == AARCH64_BUILTIN_RSQRT_V4SF) return aarch64_expand_builtin_rsqrt (fcode, exp, target); gcc_unreachable (); } tree aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, tree type_in) { machine_mode in_mode, out_mode; int in_n, out_n; if (TREE_CODE (type_out) != VECTOR_TYPE || TREE_CODE (type_in) != VECTOR_TYPE) return NULL_TREE; out_mode = TYPE_MODE (TREE_TYPE (type_out)); out_n = TYPE_VECTOR_SUBPARTS (type_out); in_mode = TYPE_MODE (TREE_TYPE (type_in)); in_n = TYPE_VECTOR_SUBPARTS (type_in); #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 #define AARCH64_FIND_FRINT_VARIANT(N) \ (AARCH64_CHECK_BUILTIN_MODE (2, D) \ ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \ : (AARCH64_CHECK_BUILTIN_MODE (4, S) \ ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \ : (AARCH64_CHECK_BUILTIN_MODE (2, S) \ ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \ : NULL_TREE))) switch (fn) { #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ (out_mode == N##Fmode && out_n == C \ && in_mode == N##Fmode && in_n == C) CASE_CFN_FLOOR: return AARCH64_FIND_FRINT_VARIANT (floor); CASE_CFN_CEIL: return AARCH64_FIND_FRINT_VARIANT (ceil); CASE_CFN_TRUNC: return AARCH64_FIND_FRINT_VARIANT (btrunc); CASE_CFN_ROUND: return AARCH64_FIND_FRINT_VARIANT (round); CASE_CFN_NEARBYINT: return AARCH64_FIND_FRINT_VARIANT (nearbyint); CASE_CFN_SQRT: return AARCH64_FIND_FRINT_VARIANT (sqrt); #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ (out_mode == SImode && out_n == C \ && in_mode == N##Imode && in_n == C) CASE_CFN_CLZ: { if (AARCH64_CHECK_BUILTIN_MODE (4, S)) return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si]; return NULL_TREE; } CASE_CFN_CTZ: { if (AARCH64_CHECK_BUILTIN_MODE (2, S)) return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si]; else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si]; return NULL_TREE; } #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ (out_mode == N##Imode && out_n == C \ && in_mode == N##Fmode && in_n == C) CASE_CFN_IFLOOR: CASE_CFN_LFLOOR: CASE_CFN_LLFLOOR: { enum aarch64_builtins builtin; if (AARCH64_CHECK_BUILTIN_MODE (2, D)) builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di; else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si; else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si; else return NULL_TREE; return aarch64_builtin_decls[builtin]; } CASE_CFN_ICEIL: CASE_CFN_LCEIL: CASE_CFN_LLCEIL: { enum aarch64_builtins builtin; if (AARCH64_CHECK_BUILTIN_MODE (2, D)) builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di; else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si; else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si; else return NULL_TREE; return aarch64_builtin_decls[builtin]; } CASE_CFN_IROUND: CASE_CFN_LROUND: CASE_CFN_LLROUND: { enum aarch64_builtins builtin; if (AARCH64_CHECK_BUILTIN_MODE (2, D)) builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di; else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si; else if (AARCH64_CHECK_BUILTIN_MODE (2, S)) builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si; else return NULL_TREE; return aarch64_builtin_decls[builtin]; } case CFN_BUILT_IN_BSWAP16: #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ (out_mode == N##Imode && out_n == C \ && in_mode == N##Imode && in_n == C) if (AARCH64_CHECK_BUILTIN_MODE (4, H)) return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi]; else if (AARCH64_CHECK_BUILTIN_MODE (8, H)) return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi]; else return NULL_TREE; case CFN_BUILT_IN_BSWAP32: if (AARCH64_CHECK_BUILTIN_MODE (2, S)) return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si]; else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si]; else return NULL_TREE; case CFN_BUILT_IN_BSWAP64: if (AARCH64_CHECK_BUILTIN_MODE (2, D)) return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di]; else return NULL_TREE; default: return NULL_TREE; } return NULL_TREE; } /* Return builtin for reciprocal square root. */ tree aarch64_builtin_rsqrt (unsigned int fn) { if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df) return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF]; if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf) return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF]; if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf) return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF]; return NULL_TREE; } #undef VAR1 #define VAR1(T, N, MAP, A) \ case AARCH64_SIMD_BUILTIN_##T##_##N##A: tree aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, bool ignore ATTRIBUTE_UNUSED) { int fcode = DECL_FUNCTION_CODE (fndecl); tree type = TREE_TYPE (TREE_TYPE (fndecl)); switch (fcode) { BUILTIN_VDQF (UNOP, abs, 2) return fold_build1 (ABS_EXPR, type, args[0]); VAR1 (UNOP, floatv2si, 2, v2sf) VAR1 (UNOP, floatv4si, 2, v4sf) VAR1 (UNOP, floatv2di, 2, v2df) return fold_build1 (FLOAT_EXPR, type, args[0]); default: break; } return NULL_TREE; } bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) { bool changed = false; gimple *stmt = gsi_stmt (*gsi); tree call = gimple_call_fn (stmt); tree fndecl; gimple *new_stmt = NULL; if (call) { fndecl = gimple_call_fndecl (stmt); if (fndecl) { int fcode = DECL_FUNCTION_CODE (fndecl); unsigned nargs = gimple_call_num_args (stmt); tree *args = (nargs > 0 ? gimple_call_arg_ptr (stmt, 0) : &error_mark_node); /* We use gimple's REDUC_(PLUS|MIN|MAX)_EXPRs for float, signed int and unsigned int; it will distinguish according to the types of the arguments to the __builtin. */ switch (fcode) { BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) new_stmt = gimple_build_assign (gimple_call_lhs (stmt), REDUC_PLUS_EXPR, args[0]); break; BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) new_stmt = gimple_build_assign (gimple_call_lhs (stmt), REDUC_MAX_EXPR, args[0]); break; BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) new_stmt = gimple_build_assign (gimple_call_lhs (stmt), REDUC_MIN_EXPR, args[0]); break; BUILTIN_GPF (BINOP, fmulx, 0) { gcc_assert (nargs == 2); bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST; bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST; if (a0_cst_p || a1_cst_p) { if (a0_cst_p && a1_cst_p) { tree t0 = TREE_TYPE (args[0]); real_value a0 = (TREE_REAL_CST (args[0])); real_value a1 = (TREE_REAL_CST (args[1])); if (real_equal (&a1, &dconst0)) std::swap (a0, a1); /* According to real_equal (), +0 equals -0. */ if (real_equal (&a0, &dconst0) && real_isinf (&a1)) { real_value res = dconst2; res.sign = a0.sign ^ a1.sign; new_stmt = gimple_build_assign (gimple_call_lhs (stmt), REAL_CST, build_real (t0, res)); } else new_stmt = gimple_build_assign (gimple_call_lhs (stmt), MULT_EXPR, args[0], args[1]); } else /* a0_cst_p ^ a1_cst_p. */ { real_value const_part = a0_cst_p ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]); if (!real_equal (&const_part, &dconst0) && !real_isinf (&const_part)) new_stmt = gimple_build_assign (gimple_call_lhs (stmt), MULT_EXPR, args[0], args[1]); } } if (new_stmt) { gimple_set_vuse (new_stmt, gimple_vuse (stmt)); gimple_set_vdef (new_stmt, gimple_vdef (stmt)); } break; } default: break; } } } if (new_stmt) { gsi_replace (gsi, new_stmt, true); changed = true; } return changed; } void aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) { const unsigned AARCH64_FE_INVALID = 1; const unsigned AARCH64_FE_DIVBYZERO = 2; const unsigned AARCH64_FE_OVERFLOW = 4; const unsigned AARCH64_FE_UNDERFLOW = 8; const unsigned AARCH64_FE_INEXACT = 16; const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID | AARCH64_FE_DIVBYZERO | AARCH64_FE_OVERFLOW | AARCH64_FE_UNDERFLOW | AARCH64_FE_INEXACT); const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8; tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr; tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr; tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr; tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv; /* Generate the equivalence of : unsigned int fenv_cr; fenv_cr = __builtin_aarch64_get_fpcr (); unsigned int fenv_sr; fenv_sr = __builtin_aarch64_get_fpsr (); Now set all exceptions to non-stop unsigned int mask_cr = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT); unsigned int masked_cr; masked_cr = fenv_cr & mask_cr; And clear all exception flags unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT; unsigned int masked_cr; masked_sr = fenv_sr & mask_sr; __builtin_aarch64_set_cr (masked_cr); __builtin_aarch64_set_sr (masked_sr); */ fenv_cr = create_tmp_var_raw (unsigned_type_node); fenv_sr = create_tmp_var_raw (unsigned_type_node); get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]; set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]; get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]; set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]; mask_cr = build_int_cst (unsigned_type_node, ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT)); mask_sr = build_int_cst (unsigned_type_node, ~(AARCH64_FE_ALL_EXCEPT)); ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node, fenv_cr, build_call_expr (get_fpcr, 0)); ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node, fenv_sr, build_call_expr (get_fpsr, 0)); masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr); masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr); hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr); hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr); hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr, hold_fnclex_sr); masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr, masked_fenv_sr); ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr); *hold = build2 (COMPOUND_EXPR, void_type_node, build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), hold_fnclex); /* Store the value of masked_fenv to clear the exceptions: __builtin_aarch64_set_fpsr (masked_fenv_sr); */ *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr); /* Generate the equivalent of : unsigned int new_fenv_var; new_fenv_var = __builtin_aarch64_get_fpsr (); __builtin_aarch64_set_fpsr (fenv_sr); __atomic_feraiseexcept (new_fenv_var); */ new_fenv_var = create_tmp_var_raw (unsigned_type_node); reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var, build_call_expr (get_fpsr, 0)); restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr); atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); update_call = build_call_expr (atomic_feraiseexcept, 1, fold_convert (integer_type_node, new_fenv_var)); *update = build2 (COMPOUND_EXPR, void_type_node, build2 (COMPOUND_EXPR, void_type_node, reload_fenv, restore_fnenv), update_call); } #undef AARCH64_CHECK_BUILTIN_MODE #undef AARCH64_FIND_FRINT_VARIANT #undef CF0 #undef CF1 #undef CF2 #undef CF3 #undef CF4 #undef CF10 #undef VAR1 #undef VAR2 #undef VAR3 #undef VAR4 #undef VAR5 #undef VAR6 #undef VAR7 #undef VAR8 #undef VAR9 #undef VAR10 #undef VAR11 #include "gt-aarch64-builtins.h"