diff options
Diffstat (limited to 'gcc/config')
228 files changed, 12379 insertions, 3297 deletions
diff --git a/gcc/config/aarch64/aarch64-abi-ms-protos.h b/gcc/config/aarch64/aarch64-abi-ms-protos.h new file mode 100644 index 0000000..717b60d --- /dev/null +++ b/gcc/config/aarch64/aarch64-abi-ms-protos.h @@ -0,0 +1,34 @@ +/* Windows specific ABI for AArch64 architecture. + Copyright (C) 2025 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef GCC_AARCH64_ABI_MS_PROTOS_H +#define GCC_AARCH64_ABI_MS_PROTOS_H + +extern int aarch64_ms_variadic_abi_enum_va_list (int, const char **, + tree *ptree); + +extern tree aarch64_ms_variadic_abi_fn_abi_va_list (tree fndecl); + +extern tree aarch64_ms_variadic_abi_canonical_va_list_type (tree type); + +extern int aarch64_arg_partial_bytes (cumulative_args_t, + const function_arg_info &); + +#endif diff --git a/gcc/config/aarch64/aarch64-abi-ms.cc b/gcc/config/aarch64/aarch64-abi-ms.cc new file mode 100644 index 0000000..ea0a0e5 --- /dev/null +++ b/gcc/config/aarch64/aarch64-abi-ms.cc @@ -0,0 +1,106 @@ +/* Windows specific ABI for AArch64 architecture. + Copyright (C) 2025 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "target.h" +#include "backend.h" +#include "rtl.h" +#include "tree.h" +#include "stringpool.h" +#include "attribs.h" +#include "regs.h" +#include "function-abi.h" +#include "builtins.h" +#include "aarch64-abi-ms-protos.h" + +/* Iterate through the target-specific builtin types for va_list. + IDX denotes the iterator, *PTREE is set to the result type of + the va_list builtin, and *PNAME to its internal type. + Returns zero if there is no element for this index, otherwise + IDX should be increased upon the next call. + Note, do not iterate a base builtin's name like __builtin_va_list. + Used from c_common_nodes_and_builtins. */ + +int +aarch64_ms_variadic_abi_enum_va_list (int idx, const char **pname, tree *ptree) +{ + switch (idx) + { + default: + break; + + case 0: + *ptree = ms_va_list_type_node; + *pname = "__builtin_ms_va_list"; + return 1; + } + + return 0; +} + +/* This function returns the calling abi specific va_list type node. + It returns the FNDECL specific va_list type. */ + +tree +aarch64_ms_variadic_abi_fn_abi_va_list (tree fndecl) +{ + gcc_assert (fndecl != NULL_TREE); + + arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id (); + if (pcs == ARM_PCS_MS_VARIADIC) + return ms_va_list_type_node; + + return std_fn_abi_va_list (fndecl); +} + +/* Returns the canonical va_list type specified by TYPE. + If there is no valid TYPE provided, it return NULL_TREE. */ + +tree +aarch64_ms_variadic_abi_canonical_va_list_type (tree type) +{ + if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type))) + return ms_va_list_type_node; + + return NULL_TREE; +} + +/* Implement TARGET_ARG_PARTIAL_BYTES. */ + +int +aarch64_arg_partial_bytes (cumulative_args_t pcum_v, + const function_arg_info &arg ATTRIBUTE_UNUSED) +{ + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + + if (pcum->pcs_variant != ARM_PCS_MS_VARIADIC) + return 0; + + /* Handle the case when argument is split between the last registers and + the stack. */ + if ((pcum->aapcs_reg != NULL_RTX) && (pcum->aapcs_stack_words != 0)) + return pcum->aapcs_stack_words * UNITS_PER_WORD; + + return 0; +} diff --git a/gcc/config/aarch64/aarch64-abi-ms.h b/gcc/config/aarch64/aarch64-abi-ms.h index bc9ada0..5530843 100644 --- a/gcc/config/aarch64/aarch64-abi-ms.h +++ b/gcc/config/aarch64/aarch64-abi-ms.h @@ -31,4 +31,22 @@ along with GCC; see the file COPYING3. If not see #undef STATIC_CHAIN_REGNUM #define STATIC_CHAIN_REGNUM R17_REGNUM +#define ASM_COMMENT_START "//" + +/* ASM_OUTPUT_TYPE_DIRECTIVE is not yet supported by binutils for the + aarch64-w64-mingw32 target. */ +#define ASM_OUTPUT_TYPE_DIRECTIVE(STREAM, NAME, TYPE) + +/* Structured Exception Handling (SEH) is not yet supported by binutils + so adding seh_endproc as an assembly comment to mark the end of a + function. */ +#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) \ + fprintf (FILE, "\t" ASM_COMMENT_START " seh_endproc\n") + +/* Long double is 64 bit for Coff targets. + Reference: + https://learn.microsoft.com/en-us/cpp/c-language/type-long-double. */ +#undef TARGET_LONG_DOUBLE_128 +#define TARGET_LONG_DOUBLE_128 0 + #endif /* GCC_AARCH64_ABI_MS_H. */ diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 408099a..a384ba0 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -2519,6 +2519,39 @@ aarch64_general_init_builtins (void) handle_arm_acle_h (); } +/* Function to initialize builtin variadic functions for aarch64-w64-mingw32. + In this target, variadic functions are handled differently. + + Implements SUBTARGET_INIT_BULITINS. */ + +void +aarch64_ms_variadic_abi_init_builtins (void) +{ + tree ms_va_ref; + tree fnvoid_va_end_ms; + tree fnvoid_va_start_ms; + tree fnvoid_va_copy_ms; + tree fnattr_ms = NULL_TREE; + + fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE); + ms_va_ref = build_reference_type (ms_va_list_type_node); + + fnvoid_va_end_ms + = build_function_type_list (void_type_node, ms_va_ref, NULL_TREE); + fnvoid_va_start_ms + = build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE); + fnvoid_va_copy_ms + = build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node, + NULL_TREE); + + add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms, + BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms); + add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms, + BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms); + add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms, + BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms); +} + /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */ tree aarch64_general_builtin_decl (unsigned code, bool) diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index c3957c7..de4444b 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -65,6 +65,11 @@ aarch64_define_unconditional_macros (cpp_reader *pfile) builtin_define_with_int_value ("__ARM_ARCH_PROFILE", TARGET_V8R ? 'R' : 'A'); + +#if HAVE_AS_AEABI_BUILD_ATTRIBUTES + builtin_define_with_int_value ("__ARM_BUILDATTR64_FV", 'A'); +#endif + builtin_define ("__ARM_FEATURE_CLZ"); builtin_define ("__ARM_FEATURE_IDIV"); builtin_define ("__ARM_FEATURE_UNALIGNED"); diff --git a/gcc/config/aarch64/aarch64-coff.h b/gcc/config/aarch64/aarch64-coff.h index 7260726..42ae022 100644 --- a/gcc/config/aarch64/aarch64-coff.h +++ b/gcc/config/aarch64/aarch64-coff.h @@ -61,11 +61,6 @@ #define ASM_OUTPUT_SKIP(STREAM, NBYTES) \ fprintf (STREAM, "\t.space\t%d // skip\n", (int) (NBYTES)) -/* Definitions that are not yet supported by binutils for the - aarch64-w64-mingw32 target. */ -#define ASM_OUTPUT_TYPE_DIRECTIVE(STREAM, NAME, TYPE) -#define ASM_DECLARE_FUNCTION_SIZE(FILE, FNAME, DECL) - #define TEXT_SECTION_ASM_OP "\t.text" #define DATA_SECTION_ASM_OP "\t.data" #define BSS_SECTION_ASM_OP "\t.bss" diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index baf8abf..851594a 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -75,6 +75,8 @@ AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, (CRC, CRYPTO), thu AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (F16, RNG, AES, SHA3), ampere1, 0xC0, 0xac3, -1) AARCH64_CORE("ampere1a", ampere1a, cortexa57, V8_6A, (F16, RNG, AES, SHA3, SM4, MEMTAG), ampere1a, 0xC0, 0xac4, -1) AARCH64_CORE("ampere1b", ampere1b, cortexa57, V8_7A, (F16, RNG, AES, SHA3, SM4, MEMTAG, CSSC), ampere1b, 0xC0, 0xac5, -1) +AARCH64_CORE("ampere1c", ampere1c, cortexa57, V9_2A, (CSSC, CRYPTO, FP8FMA, FAMINMAX, LUT, SVE_B16B16, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversen3, 0xc0, 0xac7, -1) + /* Do not swap around "emag" and "xgene1", this order is required to handle variant correctly. */ AARCH64_CORE("emag", emag, xgene1, V8A, (CRC, CRYPTO), emag, 0x50, 0x000, 3) @@ -227,6 +229,11 @@ AARCH64_CORE("grace", grace, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, SVE2_AES AARCH64_CORE("neoverse-v3", neoversev3, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE), neoversev3, 0x41, 0xd84, -1) AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE), neoversev3ae, 0x41, 0xd83, -1) +AARCH64_CORE("c1-nano", c1nano, cortexa53, V9_3A, (MEMTAG, SVE2_BITPERM, F16FML, SME2, RCPC3), cortexa53, 0x41, 0xd8a, -1) +AARCH64_CORE("c1-pro", c1pro, cortexa57, V9_3A, (MEMTAG, SVE2_BITPERM, F16FML, PROFILE, SME2, RCPC3), neoversen3, 0x41, 0xd8b, -1) +AARCH64_CORE("c1-premium", c1premium, cortexa57, V9_3A, (MEMTAG, SVE2_BITPERM, F16FML, PROFILE, SME2, RCPC3), neoversev3, 0x41, 0xd90, -1) +AARCH64_CORE("c1-ultra", c1ultra, cortexa57, V9_3A, (MEMTAG, SVE2_BITPERM, F16FML, PROFILE, SME2, RCPC3), cortexx925, 0x41, 0xd8c, -1) + AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1) /* NVIDIA ('N') cores. */ diff --git a/gcc/config/aarch64/aarch64-generate-json-tuning-routines.py b/gcc/config/aarch64/aarch64-generate-json-tuning-routines.py new file mode 100755 index 0000000..a4f9e4e --- /dev/null +++ b/gcc/config/aarch64/aarch64-generate-json-tuning-routines.py @@ -0,0 +1,383 @@ +#!/usr/bin/env python3 + +# Script to autogenerate the parsing and serialization routines for the +# aarch64 JSON tuning parameters. +# +# Copyright The GNU Toolchain Authors. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +DESCRIPTION = """ +Maintenance script to regenerate aarch64-json-tunings-*-generated.inc files +from the JSON schema in aarch64-json-schema.h. + +This script is run automatically whenever aarch64-json-schema.h is modified. + +Usage: + + python3 <path-to-script>/aarch64-generate-json-tuning-routines.py [options] + +Options: + --generate-only <parser|printer> Generate only parser or printer file. + If not specified, generates both. + +Note that the script can be called from any directory. + +Generates (in gcc/config/aarch64/): + aarch64-json-tunings-parser-generated.inc + aarch64-json-tunings-printer-generated.inc +""" + +import json +import re +import os +import argparse +from typing import Dict, Any, List, Tuple + +def extract_schema_from_header(file_path: str) -> str: + with open(file_path, "r") as f: + content = f.read() + + # Find the schema_json variable content between R"json( and )json" + pattern = r'static const char \*schema_json = R"json\((.*?)\)json";' + match = re.search(pattern, content, re.DOTALL) + + if not match: + raise ValueError("Could not find schema_json in header file") + + return match.group(1).strip() + +def get_macro(operation: str, field_type: str) -> str: + type_map = { + "int": "INTEGER", + "uint": "UNSIGNED_INTEGER", + "boolean": "BOOLEAN", + "string": "STRING", + "enum": "ENUM", + } + if field_type not in type_map: + raise ValueError(f"Unknown field type: {field_type}") + return f"{operation}_{type_map[field_type]}_FIELD" + +def generate_field_code( + operation: str, + key: str, + value: Any, + struct_name: str, + current_path: List[str], + function_map: Dict[str, str], + obj_name: str = "jo", + indent: str = " ", +) -> List[str]: + lines = [] + + if isinstance(value, str): + macro = get_macro(operation.upper(), value) + if value == "enum": + enum_mapping = f"{key}_mappings" + lines.append( + f'{indent}{macro} ({obj_name}, "{key}", {struct_name}.{key}, {enum_mapping});' + ) + else: + lines.append(f'{indent}{macro} ({obj_name}, "{key}", {struct_name}.{key});') + + elif isinstance(value, dict): + # Nested object - find function name based on current context + key + child_path = current_path + [key] + child_path_key = "_".join(child_path) + func_name = function_map.get(child_path_key, f"{operation.lower()}_{key}") + macro_name = f"{operation.upper()}_OBJECT" + lines.append( + f'{indent}{macro_name} ({obj_name}, "{key}", {struct_name}.{key}, {func_name});' + ) + + elif isinstance(value, list) and len(value) > 0: + if isinstance(value[0], dict): + element_key = f"{key}_element" + element_path = current_path + [element_key] + element_path_key = "_".join(element_path) + func_name = function_map.get( + element_path_key, f"{operation.lower()}_{element_key}" + ) + macro_name = f"{operation.upper()}_ARRAY_FIELD" + + if operation.lower() == "serialize": + lines.append( + f'{indent}{macro_name} ({obj_name}, "{key}", {struct_name}.{key}, ARRAY_SIZE ({struct_name}.{key}), {func_name});' + ) + else: + lines.append( + f'{indent}{macro_name} ({obj_name}, "{key}", {struct_name}.{key}, {func_name});' + ) + else: + raise ValueError(f"Arrays of non-object types are not yet supported: {key}") + else: + raise ValueError(f"Unhandled field type for key '{key}': {type(value)}") + + return lines + +def generate_field_parsing( + key: str, + value: Any, + struct_name: str, + current_path: List[str], + function_map: Dict[str, str], + indent: str = " ", +) -> List[str]: + return generate_field_code( + "parse", key, value, struct_name, current_path, function_map, "jo", indent + ) + +def generate_field_serialization( + key: str, + value: Any, + struct_name: str, + obj_name: str, + current_path: List[str], + function_map: Dict[str, str], + indent: str = " ", +) -> List[str]: + return generate_field_code( + "serialize", + key, + value, + struct_name, + current_path, + function_map, + obj_name, + indent, + ) + +def generate_function( + operation: str, + full_name: str, + local_name: str, + schema: Dict[str, Any], + current_path: List[str], + function_map: Dict[str, str], +) -> List[str]: + lines = [] + lines.append("template <typename T>") + + if operation.lower() == "parse": + lines.append("static void") + lines.append(f"parse_{full_name} (const json::object *jo, T &{local_name})") + lines.append("{") + + for key, value in schema.items(): + field_lines = generate_field_parsing( + key, value, local_name, current_path, function_map + ) + lines.extend(field_lines) + + elif operation.lower() == "serialize": + lines.append("static std::unique_ptr<json::object>") + lines.append(f"serialize_{full_name} (const T &{local_name})") + lines.append("{") + lines.append(f" auto {local_name}_obj = std::make_unique<json::object> ();") + lines.append("") + + for key, value in schema.items(): + field_lines = generate_field_serialization( + key, value, local_name, f"{local_name}_obj", current_path, function_map + ) + lines.extend(field_lines) + + lines.append("") + lines.append(f" return {local_name}_obj;") + + lines.append("}") + + return lines + +"""Collect all object schemas with their full paths. This is necessary for +generating names for the routines with the correct hierarchal path to ensure +that identical keys in different structures are not given the same name. +For example: +vec_costs.issue_info.sve maps to <parse/serialize>_vec_costs_issue_info_sve +vec_costs.sve maps to <parse/serialize>_vec_costs_sve. +""" +def collect_all_objects_with_paths( + schema: Dict[str, Any], path: List[str] = [] +) -> Dict[str, Tuple[List[str], Dict[str, Any]]]: + objects = {} + + for key, value in schema.items(): + current_path = path + [key] + + if isinstance(value, dict): + path_key = "_".join(current_path) + objects[path_key] = (current_path, value) + nested = collect_all_objects_with_paths(value, current_path) + objects.update(nested) + + elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict): + element_key = key.rstrip("s") if key.endswith("s") else f"{key}_element" + element_path = current_path[:-1] + [element_key] + element_path_key = "_".join(element_path) + objects[element_path_key] = (element_path, value[0]) + nested = collect_all_objects_with_paths(value[0], element_path) + objects.update(nested) + + return objects + +"""Calculate dependency depth of an object schema. 0 indicates no +dependencies, ie. the object has only primitive types.""" +def get_dependency_depth(obj_schema: Dict[str, Any]) -> int: + max_depth = 0 + for value in obj_schema.values(): + if isinstance(value, dict): + max_depth = max(max_depth, 1 + get_dependency_depth(value)) + elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict): + max_depth = max(max_depth, 1 + get_dependency_depth(value[0])) + return max_depth + +def generate_enum_mappings(operation: str) -> str: + mappings = f""" +static const enum_mapping<tune_params::aarch64_autoprefetch_model> + autoprefetcher_model_mappings[] = {{ +#define AARCH64_AUTOPREFETCH_MODE(NAME, ENUM_VALUE) {{NAME, tune_params::ENUM_VALUE}}, +#include "aarch64-tuning-enums.def" +}}; + +static const enum_mapping<aarch64_ldp_stp_policy> ldp_policy_model_mappings[] = {{ +#define AARCH64_LDP_STP_POLICY(NAME, ENUM_VALUE) {{NAME, ENUM_VALUE}}, +#include "aarch64-tuning-enums.def" +}}; + +static const enum_mapping<aarch64_ldp_stp_policy> stp_policy_model_mappings[] = {{ +#define AARCH64_LDP_STP_POLICY(NAME, ENUM_VALUE) {{NAME, ENUM_VALUE}}, +#include "aarch64-tuning-enums.def" +}}; +""" + return mappings + +def generate_all_functions(schema_file: str, operation: str) -> str: + schema_str = extract_schema_from_header(schema_file) + schema = json.loads(schema_str) + tune_params_schema = schema.get("tune_params", {}) + + all_objects_with_paths = collect_all_objects_with_paths(tune_params_schema) + + function_map = {} + for path_key, (path, obj_schema) in all_objects_with_paths.items(): + if path: + full_name = "_".join(path) + function_map[path_key] = f"{operation}_{full_name}" + else: + function_map[path_key] = f"{operation}_{path_key}" + + """ Structures can have nested structures that may not have been defined yet. + Therefore, we need to sort the objects by dependency depth and define + functions for the inner structures first.""" + sorted_objects = sorted( + all_objects_with_paths.items(), key=lambda x: get_dependency_depth(x[1][1]) + ) + + generated_functions = [] + generated_functions.append(generate_enum_mappings(operation)) + + for path_key, (path, obj_schema) in sorted_objects: + # Use the full path for function generation + if path: + full_name = "_".join(path) + local_name = path[-1] + else: + full_name = path_key + local_name = path_key + + function_str = generate_function( + operation, full_name, local_name, obj_schema, path, function_map + ) + generated_functions.append("\n".join(function_str)) + + main_function = generate_function( + operation, "tunings", "tunings", tune_params_schema, [], function_map + ) + generated_functions.append("\n".join(main_function)) + return "\n\n".join(generated_functions) + +def write_generated_include_file( + output_file_path: str, generated_code: str, operation: str +) -> None: + header_comment = f"""/* This file is auto-generated by aarch64-generate-json-tuning-routines.py. */ +/* Copyright The GNU Toolchain Authors. + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* This file contains the auto-generated {operation} functions for JSON tuning parameters. */ + +""" + + try: + with open(output_file_path, "w") as f: + f.write(header_comment) + f.write(generated_code) + print(f"Successfully generated {output_file_path}") + except Exception as e: + print(f"Error writing to {output_file_path}: {e}") + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--generate-only', + choices=['parser', 'printer'], + help='Generate only parser or printer file. If not specified, generates both.') + args = parser.parse_args() + + try: + script_dir = os.path.dirname(os.path.abspath(__file__)) + + schema_file = os.path.join(script_dir, "aarch64-json-schema.h") + parser_inc_file = os.path.join( + script_dir, "aarch64-json-tunings-parser-generated.inc" + ) + printer_inc_file = os.path.join( + script_dir, "aarch64-json-tunings-printer-generated.inc" + ) + if args.generate_only is None or args.generate_only == 'parser': + parser_generated_code = generate_all_functions(schema_file, "parse") + write_generated_include_file(parser_inc_file, parser_generated_code, "parser") + + if args.generate_only is None or args.generate_only == 'printer': + serializer_generated_code = generate_all_functions(schema_file, "serialize") + write_generated_include_file( + printer_inc_file, serializer_generated_code, "serializer" + ) + + print(f"Generated files in: {script_dir}") + + except Exception as e: + print(f"Error: {e}") + return 1 + + return 0 + +if __name__ == "__main__": + exit(main()) diff --git a/gcc/config/aarch64/aarch64-json-schema.h b/gcc/config/aarch64/aarch64-json-schema.h new file mode 100644 index 0000000..0c1863f --- /dev/null +++ b/gcc/config/aarch64/aarch64-json-schema.h @@ -0,0 +1,264 @@ +/* Raw JSON schema for the AArch64 tuning parameters. + Copyright The GNU Toolchain Authors. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef AARCH64_JSON_SCHEMA_H +#define AARCH64_JSON_SCHEMA_H + +static const char *schema_json = R"json( +{ + "metadata": { + "gcc_version": "int" + }, + "tune_params": { + "insn_extra_cost": { + "alu": { + "arith": "int", + "logical": "int", + "shift": "int", + "shift_reg": "int", + "arith_shift": "int", + "arith_shift_reg": "int", + "log_shift": "int", + "log_shift_reg": "int", + "extend": "int", + "extend_arith": "int", + "bfi": "int", + "bfx": "int", + "clz": "int", + "rev": "int", + "non_exec": "int", + "non_exec_costs_exec": "boolean" + }, + "mult": [ + { + "simple": "int", + "flag_setting": "int", + "extend": "int", + "add": "int", + "extend_add": "int", + "idiv": "int" + }, + { + "simple": "int", + "flag_setting": "int", + "extend": "int", + "add": "int", + "extend_add": "int", + "idiv": "int" + } + ], + "ldst": { + "load": "int", + "load_sign_extend": "int", + "ldrd": "int", + "ldm_1st": "int", + "ldm_regs_per_insn_1st": "int", + "ldm_regs_per_insn_subsequent": "int", + "loadf": "int", + "loadd": "int", + "load_unaligned": "int", + "store": "int", + "strd": "int", + "stm_1st": "int", + "stm_regs_per_insn_1st": "int", + "stm_regs_per_insn_subsequent": "int", + "storef": "int", + "stored": "int", + "store_unaligned": "int", + "loadv": "int", + "storev": "int" + }, + "fp": [ + { + "div": "int", + "mult": "int", + "mult_addsub": "int", + "fma": "int", + "addsub": "int", + "fpconst": "int", + "neg": "int", + "compare": "int", + "widen": "int", + "narrow": "int", + "toint": "int", + "fromint": "int", + "roundint": "int" + }, + { + "div": "int", + "mult": "int", + "mult_addsub": "int", + "fma": "int", + "addsub": "int", + "fpconst": "int", + "neg": "int", + "compare": "int", + "widen": "int", + "narrow": "int", + "toint": "int", + "fromint": "int", + "roundint": "int" + } + ], + "vect": { + "alu": "int", + "mult": "int", + "movi": "int", + "dup": "int", + "extract": "int" + } + }, + "addr_cost": { + "addr_scale_costs": { + "hi": "int", + "si": "int", + "di": "int", + "ti": "int" + }, + "pre_modify": "int", + "post_modify": "int", + "post_modify_ld3_st3": "int", + "post_modify_ld4_st4": "int", + "register_offset": "int", + "register_sextend": "int", + "register_zextend": "int", + "imm_offset": "int" + }, + "regmove_cost": { + "GP2GP": "int", + "GP2FP": "int", + "FP2GP": "int", + "FP2FP": "int" + }, + "vec_costs": { + "scalar_int_stmt_cost": "int", + "scalar_fp_stmt_cost": "int", + "scalar_load_cost": "int", + "scalar_store_cost": "int", + "cond_taken_branch_cost": "int", + "cond_not_taken_branch_cost": "int", + "advsimd": { + "int_stmt_cost": "int", + "fp_stmt_cost": "int", + "ld2_st2_permute_cost": "int", + "ld3_st3_permute_cost": "int", + "ld4_st4_permute_cost": "int", + "permute_cost": "int", + "reduc_i8_cost": "int", + "reduc_i16_cost": "int", + "reduc_i32_cost": "int", + "reduc_i64_cost": "int", + "reduc_f16_cost": "int", + "reduc_f32_cost": "int", + "reduc_f64_cost": "int", + "store_elt_extra_cost": "int", + "vec_to_scalar_cost": "int", + "scalar_to_vec_cost": "int", + "align_load_cost": "int", + "unalign_load_cost": "int", + "unalign_store_cost": "int", + "store_cost": "int" + }, + "sve": { + "clast_cost": "int", + "fadda_f16_cost": "int", + "fadda_f32_cost": "int", + "fadda_f64_cost": "int", + "gather_load_x32_cost": "uint", + "gather_load_x64_cost": "uint", + "gather_load_x32_init_cost": "int", + "gather_load_x64_init_cost": "int", + "scatter_store_elt_cost": "int" + }, + "issue_info": { + "scalar": { + "loads_stores_per_cycle": "uint", + "stores_per_cycle": "uint", + "general_ops_per_cycle": "uint", + "fp_simd_load_general_ops": "uint", + "fp_simd_store_general_ops": "uint" + }, + "advsimd": { + "loads_stores_per_cycle": "uint", + "stores_per_cycle": "uint", + "general_ops_per_cycle": "uint", + "fp_simd_load_general_ops": "uint", + "fp_simd_store_general_ops": "uint", + "ld2_st2_general_ops": "uint", + "ld3_st3_general_ops": "uint", + "ld4_st4_general_ops": "uint" + }, + "sve": { + "loads_stores_per_cycle": "uint", + "stores_per_cycle": "uint", + "general_ops_per_cycle": "uint", + "fp_simd_load_general_ops": "uint", + "fp_simd_store_general_ops": "uint", + "ld2_st2_general_ops": "uint", + "ld3_st3_general_ops": "uint", + "ld4_st4_general_ops": "uint", + "pred_ops_per_cycle": "uint", + "while_pred_ops": "uint", + "int_cmp_pred_ops": "uint", + "fp_cmp_pred_ops": "uint", + "gather_scatter_pair_general_ops": "uint", + "gather_scatter_pair_pred_ops": "uint" + } + } + }, + "branch_costs": { "predictable": "int", "unpredictable": "int" }, + "approx_modes": { "division": "int", "sqrt": "int", "recip_sqrt": "int" }, + "sve_width": "uint", + "memmov_cost": { + "load_int": "int", + "store_int": "int", + "load_fp": "int", + "store_fp": "int", + "load_pred": "int", + "store_pred": "int" + }, + "issue_rate": "int", + "fusible_ops": "uint", + "function_align": "string", + "jump_align": "string", + "loop_align": "string", + "int_reassoc_width": "int", + "fp_reassoc_width": "int", + "fma_reassoc_width": "int", + "vec_reassoc_width": "int", + "min_div_recip_mul_sf": "int", + "min_div_recip_mul_df": "int", + "max_case_values": "uint", + "autoprefetcher_model": "enum", + "extra_tuning_flags": "uint", + "prefetch": { + "num_slots": "int", + "l1_cache_size": "int", + "l1_cache_line_size": "int", + "l2_cache_size": "int", + "prefetch_dynamic_strides": "boolean", + "minimum_stride": "int", + "default_opt_level": "int" + }, + "ldp_policy_model": "enum", + "stp_policy_model": "enum" + } +})json"; + +#endif
\ No newline at end of file diff --git a/gcc/config/aarch64/aarch64-json-tunings-parser-generated.inc b/gcc/config/aarch64/aarch64-json-tunings-parser-generated.inc new file mode 100644 index 0000000..cf31e53 --- /dev/null +++ b/gcc/config/aarch64/aarch64-json-tunings-parser-generated.inc @@ -0,0 +1,355 @@ +/* This file is auto-generated by aarch64-generate-json-tuning-routines.py. */ +/* Copyright The GNU Toolchain Authors. + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* This file contains the auto-generated parser functions for JSON tuning parameters. */ + + +static const enum_mapping<tune_params::aarch64_autoprefetch_model> + autoprefetcher_model_mappings[] = { +#define AARCH64_AUTOPREFETCH_MODE(NAME, ENUM_VALUE) {NAME, tune_params::ENUM_VALUE}, +#include "aarch64-tuning-enums.def" +}; + +static const enum_mapping<aarch64_ldp_stp_policy> ldp_policy_model_mappings[] = { +#define AARCH64_LDP_STP_POLICY(NAME, ENUM_VALUE) {NAME, ENUM_VALUE}, +#include "aarch64-tuning-enums.def" +}; + +static const enum_mapping<aarch64_ldp_stp_policy> stp_policy_model_mappings[] = { +#define AARCH64_LDP_STP_POLICY(NAME, ENUM_VALUE) {NAME, ENUM_VALUE}, +#include "aarch64-tuning-enums.def" +}; + + +template <typename T> +static void +parse_insn_extra_cost_alu (const json::object *jo, T &alu) +{ + PARSE_INTEGER_FIELD (jo, "arith", alu.arith); + PARSE_INTEGER_FIELD (jo, "logical", alu.logical); + PARSE_INTEGER_FIELD (jo, "shift", alu.shift); + PARSE_INTEGER_FIELD (jo, "shift_reg", alu.shift_reg); + PARSE_INTEGER_FIELD (jo, "arith_shift", alu.arith_shift); + PARSE_INTEGER_FIELD (jo, "arith_shift_reg", alu.arith_shift_reg); + PARSE_INTEGER_FIELD (jo, "log_shift", alu.log_shift); + PARSE_INTEGER_FIELD (jo, "log_shift_reg", alu.log_shift_reg); + PARSE_INTEGER_FIELD (jo, "extend", alu.extend); + PARSE_INTEGER_FIELD (jo, "extend_arith", alu.extend_arith); + PARSE_INTEGER_FIELD (jo, "bfi", alu.bfi); + PARSE_INTEGER_FIELD (jo, "bfx", alu.bfx); + PARSE_INTEGER_FIELD (jo, "clz", alu.clz); + PARSE_INTEGER_FIELD (jo, "rev", alu.rev); + PARSE_INTEGER_FIELD (jo, "non_exec", alu.non_exec); + PARSE_BOOLEAN_FIELD (jo, "non_exec_costs_exec", alu.non_exec_costs_exec); +} + +template <typename T> +static void +parse_insn_extra_cost_mult_element (const json::object *jo, T &mult_element) +{ + PARSE_INTEGER_FIELD (jo, "simple", mult_element.simple); + PARSE_INTEGER_FIELD (jo, "flag_setting", mult_element.flag_setting); + PARSE_INTEGER_FIELD (jo, "extend", mult_element.extend); + PARSE_INTEGER_FIELD (jo, "add", mult_element.add); + PARSE_INTEGER_FIELD (jo, "extend_add", mult_element.extend_add); + PARSE_INTEGER_FIELD (jo, "idiv", mult_element.idiv); +} + +template <typename T> +static void +parse_insn_extra_cost_ldst (const json::object *jo, T &ldst) +{ + PARSE_INTEGER_FIELD (jo, "load", ldst.load); + PARSE_INTEGER_FIELD (jo, "load_sign_extend", ldst.load_sign_extend); + PARSE_INTEGER_FIELD (jo, "ldrd", ldst.ldrd); + PARSE_INTEGER_FIELD (jo, "ldm_1st", ldst.ldm_1st); + PARSE_INTEGER_FIELD (jo, "ldm_regs_per_insn_1st", ldst.ldm_regs_per_insn_1st); + PARSE_INTEGER_FIELD (jo, "ldm_regs_per_insn_subsequent", ldst.ldm_regs_per_insn_subsequent); + PARSE_INTEGER_FIELD (jo, "loadf", ldst.loadf); + PARSE_INTEGER_FIELD (jo, "loadd", ldst.loadd); + PARSE_INTEGER_FIELD (jo, "load_unaligned", ldst.load_unaligned); + PARSE_INTEGER_FIELD (jo, "store", ldst.store); + PARSE_INTEGER_FIELD (jo, "strd", ldst.strd); + PARSE_INTEGER_FIELD (jo, "stm_1st", ldst.stm_1st); + PARSE_INTEGER_FIELD (jo, "stm_regs_per_insn_1st", ldst.stm_regs_per_insn_1st); + PARSE_INTEGER_FIELD (jo, "stm_regs_per_insn_subsequent", ldst.stm_regs_per_insn_subsequent); + PARSE_INTEGER_FIELD (jo, "storef", ldst.storef); + PARSE_INTEGER_FIELD (jo, "stored", ldst.stored); + PARSE_INTEGER_FIELD (jo, "store_unaligned", ldst.store_unaligned); + PARSE_INTEGER_FIELD (jo, "loadv", ldst.loadv); + PARSE_INTEGER_FIELD (jo, "storev", ldst.storev); +} + +template <typename T> +static void +parse_insn_extra_cost_fp_element (const json::object *jo, T &fp_element) +{ + PARSE_INTEGER_FIELD (jo, "div", fp_element.div); + PARSE_INTEGER_FIELD (jo, "mult", fp_element.mult); + PARSE_INTEGER_FIELD (jo, "mult_addsub", fp_element.mult_addsub); + PARSE_INTEGER_FIELD (jo, "fma", fp_element.fma); + PARSE_INTEGER_FIELD (jo, "addsub", fp_element.addsub); + PARSE_INTEGER_FIELD (jo, "fpconst", fp_element.fpconst); + PARSE_INTEGER_FIELD (jo, "neg", fp_element.neg); + PARSE_INTEGER_FIELD (jo, "compare", fp_element.compare); + PARSE_INTEGER_FIELD (jo, "widen", fp_element.widen); + PARSE_INTEGER_FIELD (jo, "narrow", fp_element.narrow); + PARSE_INTEGER_FIELD (jo, "toint", fp_element.toint); + PARSE_INTEGER_FIELD (jo, "fromint", fp_element.fromint); + PARSE_INTEGER_FIELD (jo, "roundint", fp_element.roundint); +} + +template <typename T> +static void +parse_insn_extra_cost_vect (const json::object *jo, T &vect) +{ + PARSE_INTEGER_FIELD (jo, "alu", vect.alu); + PARSE_INTEGER_FIELD (jo, "mult", vect.mult); + PARSE_INTEGER_FIELD (jo, "movi", vect.movi); + PARSE_INTEGER_FIELD (jo, "dup", vect.dup); + PARSE_INTEGER_FIELD (jo, "extract", vect.extract); +} + +template <typename T> +static void +parse_addr_cost_addr_scale_costs (const json::object *jo, T &addr_scale_costs) +{ + PARSE_INTEGER_FIELD (jo, "hi", addr_scale_costs.hi); + PARSE_INTEGER_FIELD (jo, "si", addr_scale_costs.si); + PARSE_INTEGER_FIELD (jo, "di", addr_scale_costs.di); + PARSE_INTEGER_FIELD (jo, "ti", addr_scale_costs.ti); +} + +template <typename T> +static void +parse_regmove_cost (const json::object *jo, T ®move_cost) +{ + PARSE_INTEGER_FIELD (jo, "GP2GP", regmove_cost.GP2GP); + PARSE_INTEGER_FIELD (jo, "GP2FP", regmove_cost.GP2FP); + PARSE_INTEGER_FIELD (jo, "FP2GP", regmove_cost.FP2GP); + PARSE_INTEGER_FIELD (jo, "FP2FP", regmove_cost.FP2FP); +} + +template <typename T> +static void +parse_vec_costs_advsimd (const json::object *jo, T &advsimd) +{ + PARSE_INTEGER_FIELD (jo, "int_stmt_cost", advsimd.int_stmt_cost); + PARSE_INTEGER_FIELD (jo, "fp_stmt_cost", advsimd.fp_stmt_cost); + PARSE_INTEGER_FIELD (jo, "ld2_st2_permute_cost", advsimd.ld2_st2_permute_cost); + PARSE_INTEGER_FIELD (jo, "ld3_st3_permute_cost", advsimd.ld3_st3_permute_cost); + PARSE_INTEGER_FIELD (jo, "ld4_st4_permute_cost", advsimd.ld4_st4_permute_cost); + PARSE_INTEGER_FIELD (jo, "permute_cost", advsimd.permute_cost); + PARSE_INTEGER_FIELD (jo, "reduc_i8_cost", advsimd.reduc_i8_cost); + PARSE_INTEGER_FIELD (jo, "reduc_i16_cost", advsimd.reduc_i16_cost); + PARSE_INTEGER_FIELD (jo, "reduc_i32_cost", advsimd.reduc_i32_cost); + PARSE_INTEGER_FIELD (jo, "reduc_i64_cost", advsimd.reduc_i64_cost); + PARSE_INTEGER_FIELD (jo, "reduc_f16_cost", advsimd.reduc_f16_cost); + PARSE_INTEGER_FIELD (jo, "reduc_f32_cost", advsimd.reduc_f32_cost); + PARSE_INTEGER_FIELD (jo, "reduc_f64_cost", advsimd.reduc_f64_cost); + PARSE_INTEGER_FIELD (jo, "store_elt_extra_cost", advsimd.store_elt_extra_cost); + PARSE_INTEGER_FIELD (jo, "vec_to_scalar_cost", advsimd.vec_to_scalar_cost); + PARSE_INTEGER_FIELD (jo, "scalar_to_vec_cost", advsimd.scalar_to_vec_cost); + PARSE_INTEGER_FIELD (jo, "align_load_cost", advsimd.align_load_cost); + PARSE_INTEGER_FIELD (jo, "unalign_load_cost", advsimd.unalign_load_cost); + PARSE_INTEGER_FIELD (jo, "unalign_store_cost", advsimd.unalign_store_cost); + PARSE_INTEGER_FIELD (jo, "store_cost", advsimd.store_cost); +} + +template <typename T> +static void +parse_vec_costs_sve (const json::object *jo, T &sve) +{ + PARSE_INTEGER_FIELD (jo, "clast_cost", sve.clast_cost); + PARSE_INTEGER_FIELD (jo, "fadda_f16_cost", sve.fadda_f16_cost); + PARSE_INTEGER_FIELD (jo, "fadda_f32_cost", sve.fadda_f32_cost); + PARSE_INTEGER_FIELD (jo, "fadda_f64_cost", sve.fadda_f64_cost); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "gather_load_x32_cost", sve.gather_load_x32_cost); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "gather_load_x64_cost", sve.gather_load_x64_cost); + PARSE_INTEGER_FIELD (jo, "gather_load_x32_init_cost", sve.gather_load_x32_init_cost); + PARSE_INTEGER_FIELD (jo, "gather_load_x64_init_cost", sve.gather_load_x64_init_cost); + PARSE_INTEGER_FIELD (jo, "scatter_store_elt_cost", sve.scatter_store_elt_cost); +} + +template <typename T> +static void +parse_vec_costs_issue_info_scalar (const json::object *jo, T &scalar) +{ + PARSE_UNSIGNED_INTEGER_FIELD (jo, "loads_stores_per_cycle", scalar.loads_stores_per_cycle); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "stores_per_cycle", scalar.stores_per_cycle); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "general_ops_per_cycle", scalar.general_ops_per_cycle); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "fp_simd_load_general_ops", scalar.fp_simd_load_general_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "fp_simd_store_general_ops", scalar.fp_simd_store_general_ops); +} + +template <typename T> +static void +parse_vec_costs_issue_info_advsimd (const json::object *jo, T &advsimd) +{ + PARSE_UNSIGNED_INTEGER_FIELD (jo, "loads_stores_per_cycle", advsimd.loads_stores_per_cycle); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "stores_per_cycle", advsimd.stores_per_cycle); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "general_ops_per_cycle", advsimd.general_ops_per_cycle); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "fp_simd_load_general_ops", advsimd.fp_simd_load_general_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "fp_simd_store_general_ops", advsimd.fp_simd_store_general_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "ld2_st2_general_ops", advsimd.ld2_st2_general_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "ld3_st3_general_ops", advsimd.ld3_st3_general_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "ld4_st4_general_ops", advsimd.ld4_st4_general_ops); +} + +template <typename T> +static void +parse_vec_costs_issue_info_sve (const json::object *jo, T &sve) +{ + PARSE_UNSIGNED_INTEGER_FIELD (jo, "loads_stores_per_cycle", sve.loads_stores_per_cycle); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "stores_per_cycle", sve.stores_per_cycle); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "general_ops_per_cycle", sve.general_ops_per_cycle); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "fp_simd_load_general_ops", sve.fp_simd_load_general_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "fp_simd_store_general_ops", sve.fp_simd_store_general_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "ld2_st2_general_ops", sve.ld2_st2_general_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "ld3_st3_general_ops", sve.ld3_st3_general_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "ld4_st4_general_ops", sve.ld4_st4_general_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "pred_ops_per_cycle", sve.pred_ops_per_cycle); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "while_pred_ops", sve.while_pred_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "int_cmp_pred_ops", sve.int_cmp_pred_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "fp_cmp_pred_ops", sve.fp_cmp_pred_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "gather_scatter_pair_general_ops", sve.gather_scatter_pair_general_ops); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "gather_scatter_pair_pred_ops", sve.gather_scatter_pair_pred_ops); +} + +template <typename T> +static void +parse_branch_costs (const json::object *jo, T &branch_costs) +{ + PARSE_INTEGER_FIELD (jo, "predictable", branch_costs.predictable); + PARSE_INTEGER_FIELD (jo, "unpredictable", branch_costs.unpredictable); +} + +template <typename T> +static void +parse_approx_modes (const json::object *jo, T &approx_modes) +{ + PARSE_INTEGER_FIELD (jo, "division", approx_modes.division); + PARSE_INTEGER_FIELD (jo, "sqrt", approx_modes.sqrt); + PARSE_INTEGER_FIELD (jo, "recip_sqrt", approx_modes.recip_sqrt); +} + +template <typename T> +static void +parse_memmov_cost (const json::object *jo, T &memmov_cost) +{ + PARSE_INTEGER_FIELD (jo, "load_int", memmov_cost.load_int); + PARSE_INTEGER_FIELD (jo, "store_int", memmov_cost.store_int); + PARSE_INTEGER_FIELD (jo, "load_fp", memmov_cost.load_fp); + PARSE_INTEGER_FIELD (jo, "store_fp", memmov_cost.store_fp); + PARSE_INTEGER_FIELD (jo, "load_pred", memmov_cost.load_pred); + PARSE_INTEGER_FIELD (jo, "store_pred", memmov_cost.store_pred); +} + +template <typename T> +static void +parse_prefetch (const json::object *jo, T &prefetch) +{ + PARSE_INTEGER_FIELD (jo, "num_slots", prefetch.num_slots); + PARSE_INTEGER_FIELD (jo, "l1_cache_size", prefetch.l1_cache_size); + PARSE_INTEGER_FIELD (jo, "l1_cache_line_size", prefetch.l1_cache_line_size); + PARSE_INTEGER_FIELD (jo, "l2_cache_size", prefetch.l2_cache_size); + PARSE_BOOLEAN_FIELD (jo, "prefetch_dynamic_strides", prefetch.prefetch_dynamic_strides); + PARSE_INTEGER_FIELD (jo, "minimum_stride", prefetch.minimum_stride); + PARSE_INTEGER_FIELD (jo, "default_opt_level", prefetch.default_opt_level); +} + +template <typename T> +static void +parse_insn_extra_cost (const json::object *jo, T &insn_extra_cost) +{ + PARSE_OBJECT (jo, "alu", insn_extra_cost.alu, parse_insn_extra_cost_alu); + PARSE_ARRAY_FIELD (jo, "mult", insn_extra_cost.mult, parse_insn_extra_cost_mult_element); + PARSE_OBJECT (jo, "ldst", insn_extra_cost.ldst, parse_insn_extra_cost_ldst); + PARSE_ARRAY_FIELD (jo, "fp", insn_extra_cost.fp, parse_insn_extra_cost_fp_element); + PARSE_OBJECT (jo, "vect", insn_extra_cost.vect, parse_insn_extra_cost_vect); +} + +template <typename T> +static void +parse_addr_cost (const json::object *jo, T &addr_cost) +{ + PARSE_OBJECT (jo, "addr_scale_costs", addr_cost.addr_scale_costs, parse_addr_cost_addr_scale_costs); + PARSE_INTEGER_FIELD (jo, "pre_modify", addr_cost.pre_modify); + PARSE_INTEGER_FIELD (jo, "post_modify", addr_cost.post_modify); + PARSE_INTEGER_FIELD (jo, "post_modify_ld3_st3", addr_cost.post_modify_ld3_st3); + PARSE_INTEGER_FIELD (jo, "post_modify_ld4_st4", addr_cost.post_modify_ld4_st4); + PARSE_INTEGER_FIELD (jo, "register_offset", addr_cost.register_offset); + PARSE_INTEGER_FIELD (jo, "register_sextend", addr_cost.register_sextend); + PARSE_INTEGER_FIELD (jo, "register_zextend", addr_cost.register_zextend); + PARSE_INTEGER_FIELD (jo, "imm_offset", addr_cost.imm_offset); +} + +template <typename T> +static void +parse_vec_costs_issue_info (const json::object *jo, T &issue_info) +{ + PARSE_OBJECT (jo, "scalar", issue_info.scalar, parse_vec_costs_issue_info_scalar); + PARSE_OBJECT (jo, "advsimd", issue_info.advsimd, parse_vec_costs_issue_info_advsimd); + PARSE_OBJECT (jo, "sve", issue_info.sve, parse_vec_costs_issue_info_sve); +} + +template <typename T> +static void +parse_vec_costs (const json::object *jo, T &vec_costs) +{ + PARSE_INTEGER_FIELD (jo, "scalar_int_stmt_cost", vec_costs.scalar_int_stmt_cost); + PARSE_INTEGER_FIELD (jo, "scalar_fp_stmt_cost", vec_costs.scalar_fp_stmt_cost); + PARSE_INTEGER_FIELD (jo, "scalar_load_cost", vec_costs.scalar_load_cost); + PARSE_INTEGER_FIELD (jo, "scalar_store_cost", vec_costs.scalar_store_cost); + PARSE_INTEGER_FIELD (jo, "cond_taken_branch_cost", vec_costs.cond_taken_branch_cost); + PARSE_INTEGER_FIELD (jo, "cond_not_taken_branch_cost", vec_costs.cond_not_taken_branch_cost); + PARSE_OBJECT (jo, "advsimd", vec_costs.advsimd, parse_vec_costs_advsimd); + PARSE_OBJECT (jo, "sve", vec_costs.sve, parse_vec_costs_sve); + PARSE_OBJECT (jo, "issue_info", vec_costs.issue_info, parse_vec_costs_issue_info); +} + +template <typename T> +static void +parse_tunings (const json::object *jo, T &tunings) +{ + PARSE_OBJECT (jo, "insn_extra_cost", tunings.insn_extra_cost, parse_insn_extra_cost); + PARSE_OBJECT (jo, "addr_cost", tunings.addr_cost, parse_addr_cost); + PARSE_OBJECT (jo, "regmove_cost", tunings.regmove_cost, parse_regmove_cost); + PARSE_OBJECT (jo, "vec_costs", tunings.vec_costs, parse_vec_costs); + PARSE_OBJECT (jo, "branch_costs", tunings.branch_costs, parse_branch_costs); + PARSE_OBJECT (jo, "approx_modes", tunings.approx_modes, parse_approx_modes); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "sve_width", tunings.sve_width); + PARSE_OBJECT (jo, "memmov_cost", tunings.memmov_cost, parse_memmov_cost); + PARSE_INTEGER_FIELD (jo, "issue_rate", tunings.issue_rate); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "fusible_ops", tunings.fusible_ops); + PARSE_STRING_FIELD (jo, "function_align", tunings.function_align); + PARSE_STRING_FIELD (jo, "jump_align", tunings.jump_align); + PARSE_STRING_FIELD (jo, "loop_align", tunings.loop_align); + PARSE_INTEGER_FIELD (jo, "int_reassoc_width", tunings.int_reassoc_width); + PARSE_INTEGER_FIELD (jo, "fp_reassoc_width", tunings.fp_reassoc_width); + PARSE_INTEGER_FIELD (jo, "fma_reassoc_width", tunings.fma_reassoc_width); + PARSE_INTEGER_FIELD (jo, "vec_reassoc_width", tunings.vec_reassoc_width); + PARSE_INTEGER_FIELD (jo, "min_div_recip_mul_sf", tunings.min_div_recip_mul_sf); + PARSE_INTEGER_FIELD (jo, "min_div_recip_mul_df", tunings.min_div_recip_mul_df); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "max_case_values", tunings.max_case_values); + PARSE_ENUM_FIELD (jo, "autoprefetcher_model", tunings.autoprefetcher_model, autoprefetcher_model_mappings); + PARSE_UNSIGNED_INTEGER_FIELD (jo, "extra_tuning_flags", tunings.extra_tuning_flags); + PARSE_OBJECT (jo, "prefetch", tunings.prefetch, parse_prefetch); + PARSE_ENUM_FIELD (jo, "ldp_policy_model", tunings.ldp_policy_model, ldp_policy_model_mappings); + PARSE_ENUM_FIELD (jo, "stp_policy_model", tunings.stp_policy_model, stp_policy_model_mappings); +}
\ No newline at end of file diff --git a/gcc/config/aarch64/aarch64-json-tunings-parser.cc b/gcc/config/aarch64/aarch64-json-tunings-parser.cc new file mode 100644 index 0000000..59c745e --- /dev/null +++ b/gcc/config/aarch64/aarch64-json-tunings-parser.cc @@ -0,0 +1,630 @@ +/* Routines to parse the AArch64 tuning parameters from a JSON file. + Copyright The GNU Toolchain Authors. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#define INCLUDE_STRING +#define INCLUDE_VECTOR +#define INCLUDE_TYPE_TRAITS +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "diagnostic-core.h" +#include "json-parsing.h" +#include "aarch64-json-schema.h" +#include "aarch64-json-tunings-parser.h" +#include "aarch64-protos.h" +#include "config/arm/aarch-common-protos.h" +#include "selftest.h" +#include "version.h" + +#define PARSE_INTEGER_FIELD(obj, key, member) \ + { \ + const json::value *val = obj->get (key); \ + if (val) \ + member = extract_integer (val); \ + } + +#define PARSE_UNSIGNED_INTEGER_FIELD(obj, key, member) \ + { \ + const json::value *val = obj->get (key); \ + if (val) \ + member = extract_unsigned_integer (val); \ + } + +#define PARSE_BOOLEAN_FIELD(obj, key, member) \ + { \ + const json::value *val = obj->get (key); \ + if (val) \ + member = extract_boolean (val); \ + } + +#define PARSE_STRING_FIELD(obj, key, member) \ + { \ + const json::value *val = obj->get (key); \ + if (val) \ + member = extract_string (val); \ + } + +#define PARSE_OBJECT(obj, key, member, parse_func) \ + { \ + const json::value *field_value = obj->get (key); \ + if (field_value) \ + if (auto *field_obj = dyn_cast<const json::object *> (field_value)) \ + parse_object_helper (field_obj, (member), (parse_func)); \ + } + +#define PARSE_ARRAY_FIELD(obj, key, member, parse_func) \ + { \ + const json::value *field_value = obj->get (key); \ + if (field_value) \ + if (auto *field_array = dyn_cast<const json::array *> (field_value)) \ + for (size_t i = 0; i < field_array->size (); ++i) \ + { \ + const json::value *elem = field_array->get (i); \ + if (elem) \ + if (auto *array_obj = dyn_cast<const json::object *> (elem)) \ + parse_func (array_obj, member[i]); \ + } \ + } + +#define PARSE_ENUM_FIELD(obj, key, member, mappings) \ + parse_enum_field (obj, key, member, mappings, \ + sizeof (mappings) / sizeof (mappings[0])) + +/* Type alias for parse function pointer. */ +template <typename T> +using parse_func_type + = void (*) (const json::object *, + std::remove_const_t<std::remove_pointer_t<T>> &); + +/* Parse JSON object into non-pointer member type. */ +template <typename T> +static std::enable_if_t<!std::is_pointer<T>::value> +parse_object_helper (const json::object *field_obj, T &member, + parse_func_type<T> parse_func) +{ + parse_func (field_obj, member); +} + +/* Parse JSON object into a const pointer member by creating a temp copy. */ +template <typename T> +static std::enable_if_t<std::is_pointer<T>::value + && std::is_const<std::remove_pointer_t<T>>::value> +parse_object_helper (const json::object *field_obj, T &member, + parse_func_type<T> parse_func) +{ + if (!member) + return; + + /* Use static storage for the non-const copy. + This works because tune_params does not have nested structures of the + same type, but has room for errors if we end up having pointers to the + same structure at some point. */ + static bool already_initialized = false; + if (already_initialized) + { + error ("static storage conflict - multiple pointer members of the " + "same type cannot be parsed"); + return; + } + already_initialized = true; + using NonConstType = std::remove_const_t<std::remove_pointer_t<T>>; + static NonConstType new_obj = *member; + parse_func (field_obj, new_obj); + member = &new_obj; +} + +/* Extract string value from JSON, returning allocated C string. */ +char * +extract_string (const json::value *val) +{ + if (auto *string_val = dyn_cast<const json::string *> (val)) + return xstrdup (string_val->get_string ()); + warning (0, "expected a string but got something else or NULL"); + return nullptr; +} + +/* Extract signed integer value from JSON. */ +int +extract_integer (const json::value *val) +{ + if (auto *int_val = dyn_cast<const json::integer_number *> (val)) + { + long value = int_val->get (); + gcc_assert (value >= INT_MIN && value <= INT_MAX); + return static_cast<int> (value); + } + warning (0, "expected an integer value but got something else or NULL"); + return 0; +} + +/* Extract unsigned integer value from JSON. */ +unsigned int +extract_unsigned_integer (const json::value *val) +{ + if (auto *int_val = dyn_cast<const json::integer_number *> (val)) + { + long value = int_val->get (); + gcc_assert (value >= 0 && value <= UINT_MAX); + return static_cast<unsigned int> (value); + } + warning (0, + "expected an unsigned integer value but got something else or NULL"); + return 0; +} + +/* Extract boolean value from JSON literal. */ +bool +extract_boolean (const json::value *val) +{ + if (auto *literal_val = dyn_cast<const json::literal *> (val)) + { + json::kind kind = literal_val->get_kind (); + if (kind == json::JSON_TRUE || kind == json::JSON_FALSE) + return (kind == json::JSON_TRUE); + } + warning (0, "expected a boolean value but got something else or NULL"); + return false; +} + +template <typename EnumType> struct enum_mapping +{ + const char *name; + EnumType value; +}; + +/* Parse JSON string field into enum value using string-to-enum mappings. */ +template <typename EnumType> +static void +parse_enum_field (const json::object *jo, const std::string &key, + EnumType &enum_var, const enum_mapping<EnumType> *mappings, + size_t num_mappings) +{ + const json::value *field_value = jo->get (key.c_str ()); + if (!field_value) + return; + + auto *string_val = dyn_cast<const json::string *> (field_value); + if (!string_val) + { + warning (0, "expected string for enum field %s", key.c_str ()); + enum_var = mappings[0].value; + return; + } + + const char *field_string = string_val->get_string (); + for (size_t i = 0; i < num_mappings; ++i) + { + if (strcmp (field_string, mappings[i].name) == 0) + { + enum_var = mappings[i].value; + return; + } + } + + warning (0, "%s not recognized, defaulting to %qs", key.c_str (), + mappings[0].name); + enum_var = mappings[0].value; +} + +/* Include auto-generated parsing routines. */ +#include "aarch64-json-tunings-parser-generated.inc" + +/* Validate the user provided JSON data against the present schema. + Checks for correct types, fields, and expected format. */ +static bool +validate_and_traverse (const json::object *json_obj, + const json::object *schema_obj, + const std::string &parent_key = "") +{ + for (const auto &json_entry : json_obj->get_map ()) + { + const std::string &key = json_entry.first; + const json::value *json_value = json_entry.second; + + std::string full_key = parent_key.empty () ? key : parent_key + "." + key; + + const json::value *schema_value = schema_obj->get (key.c_str ()); + if (!schema_value) + { + warning (0, "key %qs is not a tuning parameter, skipping", + full_key.c_str ()); + continue; + } + + if (auto *sub_schema_obj = dyn_cast<const json::object *> (schema_value)) + { + if (auto *sub_json_obj = dyn_cast<const json::object *> (json_value)) + { + if (!validate_and_traverse (sub_json_obj, sub_schema_obj, + full_key)) + return false; + } + else + { + error ("key %qs expected to be an object", full_key.c_str ()); + return false; + } + } + else if (schema_value->get_kind () == json::JSON_ARRAY) + { + if (json_value->get_kind () != json::JSON_ARRAY) + { + error ("key %qs expected to be an array", full_key.c_str ()); + return false; + } + } + else if (auto *schema_string + = dyn_cast<const json::string *> (schema_value)) + { + const char *schema_type_str = schema_string->get_string (); + + if (strcmp (schema_type_str, "int") == 0) + { + if (json_value->get_kind () != json::JSON_INTEGER) + { + error ("key %qs expected to be an integer", + full_key.c_str ()); + return false; + } + // Check if the value is valid for signed integer + if (auto *int_val + = dyn_cast<const json::integer_number *> (json_value)) + { + long value = int_val->get (); + if (value > INT_MAX || value < INT_MIN) + { + error ("key %qs value %ld is out of range for %<int%> " + "type [%d, %d]", + full_key.c_str (), value, INT_MIN, INT_MAX); + return false; + } + } + } + else if (strcmp (schema_type_str, "uint") == 0) + { + if (json_value->get_kind () != json::JSON_INTEGER) + { + error ("key %qs expected to be an unsigned integer", + full_key.c_str ()); + return false; + } + // Check if the value is valid for unsigned integer + if (auto *int_val + = dyn_cast<const json::integer_number *> (json_value)) + { + long value = int_val->get (); + if (value < 0 || value > UINT_MAX) + { + error ("key %qs value %ld is out of range for %<uint%> " + "type [0, %u]", + full_key.c_str (), value, UINT_MAX); + return false; + } + } + } + else if (strcmp (schema_type_str, "string") == 0) + { + if (json_value->get_kind () != json::JSON_STRING) + { + error ("key %qs expected to be a string", full_key.c_str ()); + return false; + } + } + else if (strcmp (schema_type_str, "boolean") == 0) + { + if (json_value->get_kind () != json::JSON_TRUE + && json_value->get_kind () != json::JSON_FALSE) + { + error ("key %qs expected to be a boolean (true/false)", + full_key.c_str ()); + return false; + } + } + else if (strcmp (schema_type_str, "enum") == 0) + { + if (json_value->get_kind () != json::JSON_STRING) + { + error ("key %qs expected to be an enum (string)", + full_key.c_str ()); + return false; + } + } + else + { + error ("key %qs has unsupported type", full_key.c_str ()); + return false; + } + } + else + { + error ("key %qs has unexpected format in schema", full_key.c_str ()); + return false; + } + } + return true; +} + +/* Helper routine for reading the provided JSON file. */ +static std::unique_ptr<std::vector<char>> +read_file (const char *path) +{ + FILE *f_in = fopen (path, "r"); + if (!f_in) + { + error ("could not open file %s", path); + return nullptr; + } + + auto result = std::make_unique<std::vector<char>> (); + char buf[4096]; + + while (size_t iter_sz_in = fread (buf, 1, sizeof (buf), f_in)) + result->insert (result->end (), buf, buf + iter_sz_in); + + if (!feof (f_in)) + { + error ("error reading file %s", path); + fclose (f_in); + return nullptr; + } + + fclose (f_in); + result->push_back ('\0'); + return result; +} + +static bool +check_version_compatibility (const json::object *root_obj) +{ + const json::value *metadata_value = root_obj->get ("metadata"); + int json_gcc_major_version = -1; + + if (metadata_value) + { + if (auto *metadata_obj = dyn_cast<const json::object *> (metadata_value)) + { + const json::value *version_value = metadata_obj->get ("gcc_version"); + if (version_value) + { + if (auto *version_int_val + = dyn_cast<const json::integer_number *> (version_value)) + json_gcc_major_version = version_int_val->get (); + } + } + } + + if (json_gcc_major_version == -1) + { + warning (0, "JSON tuning file does not contain version information; " + "compatibility cannot be verified"); + return true; + } + + if (json_gcc_major_version != GCC_major_version) + { + error ("JSON tuning file was created with GCC version %d " + "but current GCC version is %d", + json_gcc_major_version, GCC_major_version); + inform (UNKNOWN_LOCATION, "JSON tuning files must be regenerated " + "when switching between major GCC versions"); + return false; + } + + return true; +} + +/* Main routine for setting up the parsing of JSON data. */ +static void +aarch64_load_tuning_params_from_json_string (const char *json_string, + const char *schema_string, + struct tune_params *tune) +{ + /* Try parsing the JSON string. */ + json::parser_result_t data_result + = json::parse_utf8_string (strlen (json_string), json_string, true, + nullptr); + + if (auto json_err = data_result.m_err.get ()) + { + error ("error parsing JSON data: %s", json_err->get_msg ()); + return; + } + + const std::unique_ptr<json::value> &root = data_result.m_val; + if (!root) + { + error ("JSON parsing returned null data"); + return; + } + auto *root_obj = dyn_cast<const json::object *> (root.get ()); + if (!root_obj) + { + warning (0, "no JSON object found in the provided data"); + return; + } + + /* Check version compatibility before proceeding. */ + if (!check_version_compatibility (root_obj)) + return; + + json::parser_result_t schema_result + = json::parse_utf8_string (strlen (schema_string), schema_string, true, + nullptr); + + gcc_assert (!schema_result.m_err.get ()); + gcc_assert (schema_result.m_val); + + auto *schema_obj + = dyn_cast<const json::object *> (schema_result.m_val.get ()); + gcc_assert (schema_obj); + + const json::value *tune_params_value = root_obj->get ("tune_params"); + if (!tune_params_value) + { + warning (0, "key %<tune_params%> not found in JSON data"); + return; + } + + auto *jo = dyn_cast<const json::object *> (tune_params_value); + if (!jo) + { + error ("key %<tune_params%> is not a JSON object"); + return; + } + + if (!validate_and_traverse (root_obj, schema_obj)) + { + error ("validation failed for the provided JSON data"); + return; + } + + parse_tunings (jo, *tune); + return; +} + +/* Wrapper for calling aarch64_load_tuning_params_from_json_string. */ +void +aarch64_load_tuning_params_from_json (const char *data_filename, + struct tune_params *tune) +{ + std::unique_ptr<std::vector<char>> json_data = read_file (data_filename); + if (!json_data || !json_data->data ()) + { + error ("cannot read JSON data in %s", data_filename); + return; + } + aarch64_load_tuning_params_from_json_string ( + (const char *) json_data->data (), schema_json, tune); +} + +#if CHECKING_P +namespace selftest { + +#define STR_(X) #X +#define STR(X) STR_(X) + +void +test_json_integers () +{ + const char *test_json = R"json({ + "metadata": { + "gcc_version": )json" STR (GCC_major_version) R"json( + }, + "tune_params": { + "sve_width": 256, + "issue_rate": 4 + } + })json"; + + tune_params params; + + aarch64_load_tuning_params_from_json_string (test_json, schema_json, ¶ms); + + ASSERT_EQ (params.sve_width, 256); + ASSERT_EQ (params.issue_rate, 4); +} + +void +test_json_boolean () +{ + const char *test_json = R"json({ + "metadata": { + "gcc_version": )json" STR (GCC_major_version) R"json( + }, + "tune_params": { + "insn_extra_cost": { + "alu": { + "non_exec_costs_exec": false + } + } + } + })json"; + + static const cpu_cost_table default_cost_table = {}; + + tune_params params; + params.insn_extra_cost = &default_cost_table; + + aarch64_load_tuning_params_from_json_string (test_json, schema_json, ¶ms); + + ASSERT_EQ (params.insn_extra_cost->alu.non_exec_costs_exec, false); +} + +void +test_json_strings () +{ + const char *test_json = R"json({ + "metadata": { + "gcc_version": )json" STR (GCC_major_version) R"json( + }, + "tune_params": { + "function_align": "16", + "jump_align": "2", + "loop_align": "8" + } + })json"; + + tune_params params; + + aarch64_load_tuning_params_from_json_string (test_json, schema_json, ¶ms); + + ASSERT_STREQ (params.function_align, "16"); + ASSERT_STREQ (params.jump_align, "2"); + ASSERT_STREQ (params.loop_align, "8"); +} + +void +test_json_enums () +{ + const char *test_json = R"json({ + "metadata": { + "gcc_version": )json" STR (GCC_major_version) R"json( + }, + "tune_params": { + "autoprefetcher_model": "AUTOPREFETCHER_OFF", + "ldp_policy_model": "AARCH64_LDP_STP_POLICY_NEVER", + "stp_policy_model": "AARCH64_LDP_STP_POLICY_DEFAULT" + } + })json"; + + tune_params params; + + aarch64_load_tuning_params_from_json_string (test_json, schema_json, ¶ms); + + ASSERT_EQ (params.autoprefetcher_model, tune_params::AUTOPREFETCHER_OFF); + ASSERT_EQ (params.ldp_policy_model, AARCH64_LDP_STP_POLICY_NEVER); + ASSERT_EQ (params.stp_policy_model, AARCH64_LDP_STP_POLICY_DEFAULT); +} + +void +aarch64_json_tunings_tests () +{ + test_json_integers (); + test_json_boolean (); + test_json_strings (); + test_json_enums (); +} + +} // namespace selftest + +#undef STR +#undef STR_ + +#endif /* CHECKING_P */
\ No newline at end of file diff --git a/gcc/config/aarch64/aarch64-json-tunings-parser.h b/gcc/config/aarch64/aarch64-json-tunings-parser.h new file mode 100644 index 0000000..3c5cd4c --- /dev/null +++ b/gcc/config/aarch64/aarch64-json-tunings-parser.h @@ -0,0 +1,29 @@ +/* Routines to parse the AArch64 tuning parameters from a JSON file. + Copyright The GNU Toolchain Authors. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef AARCH64_JSON_TUNINGS_PARSER_H +#define AARCH64_JSON_TUNINGS_PARSER_H + +#include "aarch64-protos.h" + +void +aarch64_load_tuning_params_from_json (const char *data_filename, + struct tune_params *tune); + +#endif
\ No newline at end of file diff --git a/gcc/config/aarch64/aarch64-json-tunings-printer-generated.inc b/gcc/config/aarch64/aarch64-json-tunings-printer-generated.inc new file mode 100644 index 0000000..6ffc442 --- /dev/null +++ b/gcc/config/aarch64/aarch64-json-tunings-printer-generated.inc @@ -0,0 +1,439 @@ +/* This file is auto-generated by aarch64-generate-json-tuning-routines.py. */ +/* Copyright The GNU Toolchain Authors. + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* This file contains the auto-generated serializer functions for JSON tuning parameters. */ + + +static const enum_mapping<tune_params::aarch64_autoprefetch_model> + autoprefetcher_model_mappings[] = { +#define AARCH64_AUTOPREFETCH_MODE(NAME, ENUM_VALUE) {NAME, tune_params::ENUM_VALUE}, +#include "aarch64-tuning-enums.def" +}; + +static const enum_mapping<aarch64_ldp_stp_policy> ldp_policy_model_mappings[] = { +#define AARCH64_LDP_STP_POLICY(NAME, ENUM_VALUE) {NAME, ENUM_VALUE}, +#include "aarch64-tuning-enums.def" +}; + +static const enum_mapping<aarch64_ldp_stp_policy> stp_policy_model_mappings[] = { +#define AARCH64_LDP_STP_POLICY(NAME, ENUM_VALUE) {NAME, ENUM_VALUE}, +#include "aarch64-tuning-enums.def" +}; + + +template <typename T> +static std::unique_ptr<json::object> +serialize_insn_extra_cost_alu (const T &alu) +{ + auto alu_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (alu_obj, "arith", alu.arith); + SERIALIZE_INTEGER_FIELD (alu_obj, "logical", alu.logical); + SERIALIZE_INTEGER_FIELD (alu_obj, "shift", alu.shift); + SERIALIZE_INTEGER_FIELD (alu_obj, "shift_reg", alu.shift_reg); + SERIALIZE_INTEGER_FIELD (alu_obj, "arith_shift", alu.arith_shift); + SERIALIZE_INTEGER_FIELD (alu_obj, "arith_shift_reg", alu.arith_shift_reg); + SERIALIZE_INTEGER_FIELD (alu_obj, "log_shift", alu.log_shift); + SERIALIZE_INTEGER_FIELD (alu_obj, "log_shift_reg", alu.log_shift_reg); + SERIALIZE_INTEGER_FIELD (alu_obj, "extend", alu.extend); + SERIALIZE_INTEGER_FIELD (alu_obj, "extend_arith", alu.extend_arith); + SERIALIZE_INTEGER_FIELD (alu_obj, "bfi", alu.bfi); + SERIALIZE_INTEGER_FIELD (alu_obj, "bfx", alu.bfx); + SERIALIZE_INTEGER_FIELD (alu_obj, "clz", alu.clz); + SERIALIZE_INTEGER_FIELD (alu_obj, "rev", alu.rev); + SERIALIZE_INTEGER_FIELD (alu_obj, "non_exec", alu.non_exec); + SERIALIZE_BOOLEAN_FIELD (alu_obj, "non_exec_costs_exec", alu.non_exec_costs_exec); + + return alu_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_insn_extra_cost_mult_element (const T &mult_element) +{ + auto mult_element_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (mult_element_obj, "simple", mult_element.simple); + SERIALIZE_INTEGER_FIELD (mult_element_obj, "flag_setting", mult_element.flag_setting); + SERIALIZE_INTEGER_FIELD (mult_element_obj, "extend", mult_element.extend); + SERIALIZE_INTEGER_FIELD (mult_element_obj, "add", mult_element.add); + SERIALIZE_INTEGER_FIELD (mult_element_obj, "extend_add", mult_element.extend_add); + SERIALIZE_INTEGER_FIELD (mult_element_obj, "idiv", mult_element.idiv); + + return mult_element_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_insn_extra_cost_ldst (const T &ldst) +{ + auto ldst_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (ldst_obj, "load", ldst.load); + SERIALIZE_INTEGER_FIELD (ldst_obj, "load_sign_extend", ldst.load_sign_extend); + SERIALIZE_INTEGER_FIELD (ldst_obj, "ldrd", ldst.ldrd); + SERIALIZE_INTEGER_FIELD (ldst_obj, "ldm_1st", ldst.ldm_1st); + SERIALIZE_INTEGER_FIELD (ldst_obj, "ldm_regs_per_insn_1st", ldst.ldm_regs_per_insn_1st); + SERIALIZE_INTEGER_FIELD (ldst_obj, "ldm_regs_per_insn_subsequent", ldst.ldm_regs_per_insn_subsequent); + SERIALIZE_INTEGER_FIELD (ldst_obj, "loadf", ldst.loadf); + SERIALIZE_INTEGER_FIELD (ldst_obj, "loadd", ldst.loadd); + SERIALIZE_INTEGER_FIELD (ldst_obj, "load_unaligned", ldst.load_unaligned); + SERIALIZE_INTEGER_FIELD (ldst_obj, "store", ldst.store); + SERIALIZE_INTEGER_FIELD (ldst_obj, "strd", ldst.strd); + SERIALIZE_INTEGER_FIELD (ldst_obj, "stm_1st", ldst.stm_1st); + SERIALIZE_INTEGER_FIELD (ldst_obj, "stm_regs_per_insn_1st", ldst.stm_regs_per_insn_1st); + SERIALIZE_INTEGER_FIELD (ldst_obj, "stm_regs_per_insn_subsequent", ldst.stm_regs_per_insn_subsequent); + SERIALIZE_INTEGER_FIELD (ldst_obj, "storef", ldst.storef); + SERIALIZE_INTEGER_FIELD (ldst_obj, "stored", ldst.stored); + SERIALIZE_INTEGER_FIELD (ldst_obj, "store_unaligned", ldst.store_unaligned); + SERIALIZE_INTEGER_FIELD (ldst_obj, "loadv", ldst.loadv); + SERIALIZE_INTEGER_FIELD (ldst_obj, "storev", ldst.storev); + + return ldst_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_insn_extra_cost_fp_element (const T &fp_element) +{ + auto fp_element_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (fp_element_obj, "div", fp_element.div); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "mult", fp_element.mult); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "mult_addsub", fp_element.mult_addsub); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "fma", fp_element.fma); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "addsub", fp_element.addsub); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "fpconst", fp_element.fpconst); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "neg", fp_element.neg); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "compare", fp_element.compare); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "widen", fp_element.widen); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "narrow", fp_element.narrow); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "toint", fp_element.toint); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "fromint", fp_element.fromint); + SERIALIZE_INTEGER_FIELD (fp_element_obj, "roundint", fp_element.roundint); + + return fp_element_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_insn_extra_cost_vect (const T &vect) +{ + auto vect_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (vect_obj, "alu", vect.alu); + SERIALIZE_INTEGER_FIELD (vect_obj, "mult", vect.mult); + SERIALIZE_INTEGER_FIELD (vect_obj, "movi", vect.movi); + SERIALIZE_INTEGER_FIELD (vect_obj, "dup", vect.dup); + SERIALIZE_INTEGER_FIELD (vect_obj, "extract", vect.extract); + + return vect_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_addr_cost_addr_scale_costs (const T &addr_scale_costs) +{ + auto addr_scale_costs_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (addr_scale_costs_obj, "hi", addr_scale_costs.hi); + SERIALIZE_INTEGER_FIELD (addr_scale_costs_obj, "si", addr_scale_costs.si); + SERIALIZE_INTEGER_FIELD (addr_scale_costs_obj, "di", addr_scale_costs.di); + SERIALIZE_INTEGER_FIELD (addr_scale_costs_obj, "ti", addr_scale_costs.ti); + + return addr_scale_costs_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_regmove_cost (const T ®move_cost) +{ + auto regmove_cost_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (regmove_cost_obj, "GP2GP", regmove_cost.GP2GP); + SERIALIZE_INTEGER_FIELD (regmove_cost_obj, "GP2FP", regmove_cost.GP2FP); + SERIALIZE_INTEGER_FIELD (regmove_cost_obj, "FP2GP", regmove_cost.FP2GP); + SERIALIZE_INTEGER_FIELD (regmove_cost_obj, "FP2FP", regmove_cost.FP2FP); + + return regmove_cost_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_vec_costs_advsimd (const T &advsimd) +{ + auto advsimd_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (advsimd_obj, "int_stmt_cost", advsimd.int_stmt_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "fp_stmt_cost", advsimd.fp_stmt_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "ld2_st2_permute_cost", advsimd.ld2_st2_permute_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "ld3_st3_permute_cost", advsimd.ld3_st3_permute_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "ld4_st4_permute_cost", advsimd.ld4_st4_permute_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "permute_cost", advsimd.permute_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "reduc_i8_cost", advsimd.reduc_i8_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "reduc_i16_cost", advsimd.reduc_i16_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "reduc_i32_cost", advsimd.reduc_i32_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "reduc_i64_cost", advsimd.reduc_i64_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "reduc_f16_cost", advsimd.reduc_f16_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "reduc_f32_cost", advsimd.reduc_f32_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "reduc_f64_cost", advsimd.reduc_f64_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "store_elt_extra_cost", advsimd.store_elt_extra_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "vec_to_scalar_cost", advsimd.vec_to_scalar_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "scalar_to_vec_cost", advsimd.scalar_to_vec_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "align_load_cost", advsimd.align_load_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "unalign_load_cost", advsimd.unalign_load_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "unalign_store_cost", advsimd.unalign_store_cost); + SERIALIZE_INTEGER_FIELD (advsimd_obj, "store_cost", advsimd.store_cost); + + return advsimd_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_vec_costs_sve (const T &sve) +{ + auto sve_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (sve_obj, "clast_cost", sve.clast_cost); + SERIALIZE_INTEGER_FIELD (sve_obj, "fadda_f16_cost", sve.fadda_f16_cost); + SERIALIZE_INTEGER_FIELD (sve_obj, "fadda_f32_cost", sve.fadda_f32_cost); + SERIALIZE_INTEGER_FIELD (sve_obj, "fadda_f64_cost", sve.fadda_f64_cost); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "gather_load_x32_cost", sve.gather_load_x32_cost); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "gather_load_x64_cost", sve.gather_load_x64_cost); + SERIALIZE_INTEGER_FIELD (sve_obj, "gather_load_x32_init_cost", sve.gather_load_x32_init_cost); + SERIALIZE_INTEGER_FIELD (sve_obj, "gather_load_x64_init_cost", sve.gather_load_x64_init_cost); + SERIALIZE_INTEGER_FIELD (sve_obj, "scatter_store_elt_cost", sve.scatter_store_elt_cost); + + return sve_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_vec_costs_issue_info_scalar (const T &scalar) +{ + auto scalar_obj = std::make_unique<json::object> (); + + SERIALIZE_UNSIGNED_INTEGER_FIELD (scalar_obj, "loads_stores_per_cycle", scalar.loads_stores_per_cycle); + SERIALIZE_UNSIGNED_INTEGER_FIELD (scalar_obj, "stores_per_cycle", scalar.stores_per_cycle); + SERIALIZE_UNSIGNED_INTEGER_FIELD (scalar_obj, "general_ops_per_cycle", scalar.general_ops_per_cycle); + SERIALIZE_UNSIGNED_INTEGER_FIELD (scalar_obj, "fp_simd_load_general_ops", scalar.fp_simd_load_general_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (scalar_obj, "fp_simd_store_general_ops", scalar.fp_simd_store_general_ops); + + return scalar_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_vec_costs_issue_info_advsimd (const T &advsimd) +{ + auto advsimd_obj = std::make_unique<json::object> (); + + SERIALIZE_UNSIGNED_INTEGER_FIELD (advsimd_obj, "loads_stores_per_cycle", advsimd.loads_stores_per_cycle); + SERIALIZE_UNSIGNED_INTEGER_FIELD (advsimd_obj, "stores_per_cycle", advsimd.stores_per_cycle); + SERIALIZE_UNSIGNED_INTEGER_FIELD (advsimd_obj, "general_ops_per_cycle", advsimd.general_ops_per_cycle); + SERIALIZE_UNSIGNED_INTEGER_FIELD (advsimd_obj, "fp_simd_load_general_ops", advsimd.fp_simd_load_general_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (advsimd_obj, "fp_simd_store_general_ops", advsimd.fp_simd_store_general_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (advsimd_obj, "ld2_st2_general_ops", advsimd.ld2_st2_general_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (advsimd_obj, "ld3_st3_general_ops", advsimd.ld3_st3_general_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (advsimd_obj, "ld4_st4_general_ops", advsimd.ld4_st4_general_ops); + + return advsimd_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_vec_costs_issue_info_sve (const T &sve) +{ + auto sve_obj = std::make_unique<json::object> (); + + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "loads_stores_per_cycle", sve.loads_stores_per_cycle); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "stores_per_cycle", sve.stores_per_cycle); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "general_ops_per_cycle", sve.general_ops_per_cycle); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "fp_simd_load_general_ops", sve.fp_simd_load_general_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "fp_simd_store_general_ops", sve.fp_simd_store_general_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "ld2_st2_general_ops", sve.ld2_st2_general_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "ld3_st3_general_ops", sve.ld3_st3_general_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "ld4_st4_general_ops", sve.ld4_st4_general_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "pred_ops_per_cycle", sve.pred_ops_per_cycle); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "while_pred_ops", sve.while_pred_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "int_cmp_pred_ops", sve.int_cmp_pred_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "fp_cmp_pred_ops", sve.fp_cmp_pred_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "gather_scatter_pair_general_ops", sve.gather_scatter_pair_general_ops); + SERIALIZE_UNSIGNED_INTEGER_FIELD (sve_obj, "gather_scatter_pair_pred_ops", sve.gather_scatter_pair_pred_ops); + + return sve_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_branch_costs (const T &branch_costs) +{ + auto branch_costs_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (branch_costs_obj, "predictable", branch_costs.predictable); + SERIALIZE_INTEGER_FIELD (branch_costs_obj, "unpredictable", branch_costs.unpredictable); + + return branch_costs_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_approx_modes (const T &approx_modes) +{ + auto approx_modes_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (approx_modes_obj, "division", approx_modes.division); + SERIALIZE_INTEGER_FIELD (approx_modes_obj, "sqrt", approx_modes.sqrt); + SERIALIZE_INTEGER_FIELD (approx_modes_obj, "recip_sqrt", approx_modes.recip_sqrt); + + return approx_modes_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_memmov_cost (const T &memmov_cost) +{ + auto memmov_cost_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (memmov_cost_obj, "load_int", memmov_cost.load_int); + SERIALIZE_INTEGER_FIELD (memmov_cost_obj, "store_int", memmov_cost.store_int); + SERIALIZE_INTEGER_FIELD (memmov_cost_obj, "load_fp", memmov_cost.load_fp); + SERIALIZE_INTEGER_FIELD (memmov_cost_obj, "store_fp", memmov_cost.store_fp); + SERIALIZE_INTEGER_FIELD (memmov_cost_obj, "load_pred", memmov_cost.load_pred); + SERIALIZE_INTEGER_FIELD (memmov_cost_obj, "store_pred", memmov_cost.store_pred); + + return memmov_cost_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_prefetch (const T &prefetch) +{ + auto prefetch_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (prefetch_obj, "num_slots", prefetch.num_slots); + SERIALIZE_INTEGER_FIELD (prefetch_obj, "l1_cache_size", prefetch.l1_cache_size); + SERIALIZE_INTEGER_FIELD (prefetch_obj, "l1_cache_line_size", prefetch.l1_cache_line_size); + SERIALIZE_INTEGER_FIELD (prefetch_obj, "l2_cache_size", prefetch.l2_cache_size); + SERIALIZE_BOOLEAN_FIELD (prefetch_obj, "prefetch_dynamic_strides", prefetch.prefetch_dynamic_strides); + SERIALIZE_INTEGER_FIELD (prefetch_obj, "minimum_stride", prefetch.minimum_stride); + SERIALIZE_INTEGER_FIELD (prefetch_obj, "default_opt_level", prefetch.default_opt_level); + + return prefetch_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_insn_extra_cost (const T &insn_extra_cost) +{ + auto insn_extra_cost_obj = std::make_unique<json::object> (); + + SERIALIZE_OBJECT (insn_extra_cost_obj, "alu", insn_extra_cost.alu, serialize_insn_extra_cost_alu); + SERIALIZE_ARRAY_FIELD (insn_extra_cost_obj, "mult", insn_extra_cost.mult, ARRAY_SIZE (insn_extra_cost.mult), serialize_insn_extra_cost_mult_element); + SERIALIZE_OBJECT (insn_extra_cost_obj, "ldst", insn_extra_cost.ldst, serialize_insn_extra_cost_ldst); + SERIALIZE_ARRAY_FIELD (insn_extra_cost_obj, "fp", insn_extra_cost.fp, ARRAY_SIZE (insn_extra_cost.fp), serialize_insn_extra_cost_fp_element); + SERIALIZE_OBJECT (insn_extra_cost_obj, "vect", insn_extra_cost.vect, serialize_insn_extra_cost_vect); + + return insn_extra_cost_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_addr_cost (const T &addr_cost) +{ + auto addr_cost_obj = std::make_unique<json::object> (); + + SERIALIZE_OBJECT (addr_cost_obj, "addr_scale_costs", addr_cost.addr_scale_costs, serialize_addr_cost_addr_scale_costs); + SERIALIZE_INTEGER_FIELD (addr_cost_obj, "pre_modify", addr_cost.pre_modify); + SERIALIZE_INTEGER_FIELD (addr_cost_obj, "post_modify", addr_cost.post_modify); + SERIALIZE_INTEGER_FIELD (addr_cost_obj, "post_modify_ld3_st3", addr_cost.post_modify_ld3_st3); + SERIALIZE_INTEGER_FIELD (addr_cost_obj, "post_modify_ld4_st4", addr_cost.post_modify_ld4_st4); + SERIALIZE_INTEGER_FIELD (addr_cost_obj, "register_offset", addr_cost.register_offset); + SERIALIZE_INTEGER_FIELD (addr_cost_obj, "register_sextend", addr_cost.register_sextend); + SERIALIZE_INTEGER_FIELD (addr_cost_obj, "register_zextend", addr_cost.register_zextend); + SERIALIZE_INTEGER_FIELD (addr_cost_obj, "imm_offset", addr_cost.imm_offset); + + return addr_cost_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_vec_costs_issue_info (const T &issue_info) +{ + auto issue_info_obj = std::make_unique<json::object> (); + + SERIALIZE_OBJECT (issue_info_obj, "scalar", issue_info.scalar, serialize_vec_costs_issue_info_scalar); + SERIALIZE_OBJECT (issue_info_obj, "advsimd", issue_info.advsimd, serialize_vec_costs_issue_info_advsimd); + SERIALIZE_OBJECT (issue_info_obj, "sve", issue_info.sve, serialize_vec_costs_issue_info_sve); + + return issue_info_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_vec_costs (const T &vec_costs) +{ + auto vec_costs_obj = std::make_unique<json::object> (); + + SERIALIZE_INTEGER_FIELD (vec_costs_obj, "scalar_int_stmt_cost", vec_costs.scalar_int_stmt_cost); + SERIALIZE_INTEGER_FIELD (vec_costs_obj, "scalar_fp_stmt_cost", vec_costs.scalar_fp_stmt_cost); + SERIALIZE_INTEGER_FIELD (vec_costs_obj, "scalar_load_cost", vec_costs.scalar_load_cost); + SERIALIZE_INTEGER_FIELD (vec_costs_obj, "scalar_store_cost", vec_costs.scalar_store_cost); + SERIALIZE_INTEGER_FIELD (vec_costs_obj, "cond_taken_branch_cost", vec_costs.cond_taken_branch_cost); + SERIALIZE_INTEGER_FIELD (vec_costs_obj, "cond_not_taken_branch_cost", vec_costs.cond_not_taken_branch_cost); + SERIALIZE_OBJECT (vec_costs_obj, "advsimd", vec_costs.advsimd, serialize_vec_costs_advsimd); + SERIALIZE_OBJECT (vec_costs_obj, "sve", vec_costs.sve, serialize_vec_costs_sve); + SERIALIZE_OBJECT (vec_costs_obj, "issue_info", vec_costs.issue_info, serialize_vec_costs_issue_info); + + return vec_costs_obj; +} + +template <typename T> +static std::unique_ptr<json::object> +serialize_tunings (const T &tunings) +{ + auto tunings_obj = std::make_unique<json::object> (); + + SERIALIZE_OBJECT (tunings_obj, "insn_extra_cost", tunings.insn_extra_cost, serialize_insn_extra_cost); + SERIALIZE_OBJECT (tunings_obj, "addr_cost", tunings.addr_cost, serialize_addr_cost); + SERIALIZE_OBJECT (tunings_obj, "regmove_cost", tunings.regmove_cost, serialize_regmove_cost); + SERIALIZE_OBJECT (tunings_obj, "vec_costs", tunings.vec_costs, serialize_vec_costs); + SERIALIZE_OBJECT (tunings_obj, "branch_costs", tunings.branch_costs, serialize_branch_costs); + SERIALIZE_OBJECT (tunings_obj, "approx_modes", tunings.approx_modes, serialize_approx_modes); + SERIALIZE_UNSIGNED_INTEGER_FIELD (tunings_obj, "sve_width", tunings.sve_width); + SERIALIZE_OBJECT (tunings_obj, "memmov_cost", tunings.memmov_cost, serialize_memmov_cost); + SERIALIZE_INTEGER_FIELD (tunings_obj, "issue_rate", tunings.issue_rate); + SERIALIZE_UNSIGNED_INTEGER_FIELD (tunings_obj, "fusible_ops", tunings.fusible_ops); + SERIALIZE_STRING_FIELD (tunings_obj, "function_align", tunings.function_align); + SERIALIZE_STRING_FIELD (tunings_obj, "jump_align", tunings.jump_align); + SERIALIZE_STRING_FIELD (tunings_obj, "loop_align", tunings.loop_align); + SERIALIZE_INTEGER_FIELD (tunings_obj, "int_reassoc_width", tunings.int_reassoc_width); + SERIALIZE_INTEGER_FIELD (tunings_obj, "fp_reassoc_width", tunings.fp_reassoc_width); + SERIALIZE_INTEGER_FIELD (tunings_obj, "fma_reassoc_width", tunings.fma_reassoc_width); + SERIALIZE_INTEGER_FIELD (tunings_obj, "vec_reassoc_width", tunings.vec_reassoc_width); + SERIALIZE_INTEGER_FIELD (tunings_obj, "min_div_recip_mul_sf", tunings.min_div_recip_mul_sf); + SERIALIZE_INTEGER_FIELD (tunings_obj, "min_div_recip_mul_df", tunings.min_div_recip_mul_df); + SERIALIZE_UNSIGNED_INTEGER_FIELD (tunings_obj, "max_case_values", tunings.max_case_values); + SERIALIZE_ENUM_FIELD (tunings_obj, "autoprefetcher_model", tunings.autoprefetcher_model, autoprefetcher_model_mappings); + SERIALIZE_UNSIGNED_INTEGER_FIELD (tunings_obj, "extra_tuning_flags", tunings.extra_tuning_flags); + SERIALIZE_OBJECT (tunings_obj, "prefetch", tunings.prefetch, serialize_prefetch); + SERIALIZE_ENUM_FIELD (tunings_obj, "ldp_policy_model", tunings.ldp_policy_model, ldp_policy_model_mappings); + SERIALIZE_ENUM_FIELD (tunings_obj, "stp_policy_model", tunings.stp_policy_model, stp_policy_model_mappings); + + return tunings_obj; +}
\ No newline at end of file diff --git a/gcc/config/aarch64/aarch64-json-tunings-printer.cc b/gcc/config/aarch64/aarch64-json-tunings-printer.cc new file mode 100644 index 0000000..7f28dde --- /dev/null +++ b/gcc/config/aarch64/aarch64-json-tunings-printer.cc @@ -0,0 +1,146 @@ +/* Routines to print the AArch64 tuning parameters to a JSON file. + Copyright The GNU Toolchain Authors. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#define INCLUDE_TYPE_TRAITS +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "pretty-print.h" +#include "tm.h" +#include "diagnostic-core.h" +#include "aarch64-json-tunings-printer.h" +#include "aarch64-protos.h" +#include "config/arm/aarch-common-protos.h" +#include "json.h" +#include "version.h" + +#define SERIALIZE_INTEGER_FIELD(obj, key, member) \ + (obj)->set_integer ((key), (member)) + +#define SERIALIZE_UNSIGNED_INTEGER_FIELD(obj, key, member) \ + (obj)->set_integer ((key), (member)) + +#define SERIALIZE_BOOLEAN_FIELD(obj, key, member) \ + (obj)->set_bool ((key), (member)) + +#define SERIALIZE_STRING_FIELD(obj, key, member) \ + (obj)->set_string ((key), (member)) + +#define SERIALIZE_OBJECT(obj, key, member, serialize_func) \ + { \ + auto field_obj = serialize_object_helper ((member), (serialize_func)); \ + if (field_obj) \ + (obj)->set ((key), std::move (field_obj)); \ + } + +#define SERIALIZE_ARRAY_FIELD(obj, key, member, size, serialize_func) \ + { \ + auto field_array = std::make_unique<json::array> (); \ + for (size_t i = 0; i < (size); ++i) \ + { \ + auto element_obj = serialize_func ((member)[i]); \ + if (element_obj) \ + field_array->append (std::move (element_obj)); \ + } \ + (obj)->set ((key), std::move (field_array)); \ + } + +#define SERIALIZE_ENUM_FIELD(obj, key, member, mappings) \ + (obj)->set_string ((key), serialize_enum ((member), (mappings), \ + sizeof (mappings) \ + / sizeof (mappings[0]))) + +/* Type alias for serialize function pointer. */ +template <typename T> +using serialize_func_type = std::unique_ptr<json::object> (*) ( + const typename std::remove_pointer<T>::type &); + +/* Serialize JSON object from non-pointer members. */ +template <typename T> +static typename std::enable_if<!std::is_pointer<T>::value, + std::unique_ptr<json::object>>::type +serialize_object_helper (const T &member, serialize_func_type<T> serialize_func) +{ + return serialize_func (member); +} + +/* Serialize JSON object from pointer members. */ +template <typename T> +static typename std::enable_if<std::is_pointer<T>::value, + std::unique_ptr<json::object>>::type +serialize_object_helper (const T &member, serialize_func_type<T> serialize_func) +{ + if (member) + return serialize_func (*member); + return std::make_unique<json::object> (); +} + +/* Mapping structure for enum-to-string conversion. */ +template <typename EnumType> struct enum_mapping +{ + const char *name; + EnumType value; +}; + +/* Convert enum value to string using enum-to-string mappings. */ +template <typename EnumType> +static const char * +serialize_enum (EnumType enum_value, const enum_mapping<EnumType> *mappings, + size_t num_mappings) +{ + for (size_t i = 0; i < num_mappings; ++i) + if (enum_value == mappings[i].value) + return mappings[i].name; + return mappings[0].name; +} + +/* Include auto-generated printing routines. */ +#include "aarch64-json-tunings-printer-generated.inc" + +/* Print tune_params structure to JSON file. */ +void +aarch64_print_tune_params (const tune_params ¶ms, const char *filename) +{ + /* Use default filename if none provided or empty string given. */ + const char *output_filename = filename; + if (!output_filename || *output_filename == '\0') + output_filename = "aarch64-tuning.json"; + + auto aarch64_tune_params_json = std::make_unique<json::object> (); + + auto metadata = std::make_unique<json::object> (); + metadata->set_integer ("gcc_version", GCC_major_version); + aarch64_tune_params_json->set ("metadata", std::move (metadata)); + + aarch64_tune_params_json->set ("tune_params", serialize_tunings (params)); + + pretty_printer pp; + aarch64_tune_params_json->print (&pp, true); + + FILE *outputFile = fopen (output_filename, "w"); + if (!outputFile) + { + error ("Error opening file %s", output_filename); + return; + } + + fprintf (outputFile, "%s", pp_formatted_text (&pp)); + fclose (outputFile); + return; +}
\ No newline at end of file diff --git a/gcc/config/aarch64/aarch64-json-tunings-printer.h b/gcc/config/aarch64/aarch64-json-tunings-printer.h new file mode 100644 index 0000000..a65c005 --- /dev/null +++ b/gcc/config/aarch64/aarch64-json-tunings-printer.h @@ -0,0 +1,28 @@ +/* Routine to print the AArch64 tuning parameters to a JSON file. + Copyright The GNU Toolchain Authors. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef AARCH64_JSON_TUNINGS_PRINTER_H +#define AARCH64_JSON_TUNINGS_PRINTER_H + +#include "aarch64-protos.h" + +void +aarch64_print_tune_params (const tune_params ¶ms, const char *filename); + +#endif
\ No newline at end of file diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index a70375c05..083515d 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -155,6 +155,12 @@ AARCH64_OPT_EXTENSION("fp16fml", F16FML, (), (F16), (), "asimdfhm") AARCH64_FMV_FEATURE("fp16fml", FP16FML, (F16FML)) +AARCH64_FMV_FEATURE("dit", DIT, ()) + +AARCH64_FMV_FEATURE("dpb", DPB, ()) + +AARCH64_FMV_FEATURE("dpb2", DPB2, ()) + AARCH64_OPT_FMV_EXTENSION("jscvt", JSCVT, (FP), (), (), "jscvt") AARCH64_OPT_FMV_EXTENSION("fcma", FCMA, (SIMD), (), (), "fcma") @@ -209,13 +215,15 @@ AARCH64_OPT_EXTENSION("sve2p1", SVE2p1, (SVE2), (), (), "sve2p1") AARCH64_OPT_FMV_EXTENSION("sme", SME, (BF16, FCMA, F16, F16FML), (), (), "sme") -AARCH64_OPT_EXTENSION("memtag", MEMTAG, (), (), (), "") +AARCH64_OPT_FMV_EXTENSION("memtag", MEMTAG, (), (), (), "") AARCH64_OPT_FMV_EXTENSION("sb", SB, (), (), (), "sb") AARCH64_OPT_EXTENSION("predres", PREDRES, (), (), (), "") -AARCH64_OPT_EXTENSION("ssbs", SSBS, (), (), (), "ssbs") +AARCH64_OPT_FMV_EXTENSION("ssbs", SSBS, (), (), (), "ssbs") + +AARCH64_FMV_FEATURE("bti", BTI, ()) AARCH64_OPT_EXTENSION("profile", PROFILE, (), (), (), "") diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h index a6ca5cf..a25b217 100644 --- a/gcc/config/aarch64/aarch64-opts.h +++ b/gcc/config/aarch64/aarch64-opts.h @@ -142,10 +142,8 @@ enum aarch64_autovec_preference_enum { - LDP_STP_POLICY_ALWAYS: Emit ldp/stp regardless of alignment. - LDP_STP_POLICY_NEVER: Do not emit ldp/stp. */ enum aarch64_ldp_stp_policy { - AARCH64_LDP_STP_POLICY_DEFAULT, - AARCH64_LDP_STP_POLICY_ALIGNED, - AARCH64_LDP_STP_POLICY_ALWAYS, - AARCH64_LDP_STP_POLICY_NEVER +#define AARCH64_LDP_STP_POLICY(NAME, ENUM_VALUE) ENUM_VALUE, +#include "aarch64-tuning-enums.def" }; /* An enum specifying when the early-ra pass should be run: diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index a9e407b..da1d734 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -166,88 +166,88 @@ enum aarch64_salt_type { struct scale_addr_mode_cost { - const int hi; - const int si; - const int di; - const int ti; + int hi; + int si; + int di; + int ti; }; /* Additional cost for addresses. */ struct cpu_addrcost_table { - const struct scale_addr_mode_cost addr_scale_costs; - const int pre_modify; - const int post_modify; - const int post_modify_ld3_st3; - const int post_modify_ld4_st4; - const int register_offset; - const int register_sextend; - const int register_zextend; - const int imm_offset; + struct scale_addr_mode_cost addr_scale_costs; + int pre_modify; + int post_modify; + int post_modify_ld3_st3; + int post_modify_ld4_st4; + int register_offset; + int register_sextend; + int register_zextend; + int imm_offset; }; /* Additional costs for register copies. Cost is for one register. */ struct cpu_regmove_cost { - const int GP2GP; - const int GP2FP; - const int FP2GP; - const int FP2FP; + int GP2GP; + int GP2FP; + int FP2GP; + int FP2FP; }; struct simd_vec_cost { /* Cost of any integer vector operation, excluding the ones handled specially below. */ - const int int_stmt_cost; + int int_stmt_cost; /* Cost of any fp vector operation, excluding the ones handled specially below. */ - const int fp_stmt_cost; + int fp_stmt_cost; /* Per-vector cost of permuting vectors after an LD2, LD3 or LD4, as well as the per-vector cost of permuting vectors before an ST2, ST3 or ST4. */ - const int ld2_st2_permute_cost; - const int ld3_st3_permute_cost; - const int ld4_st4_permute_cost; + int ld2_st2_permute_cost; + int ld3_st3_permute_cost; + int ld4_st4_permute_cost; /* Cost of a permute operation. */ - const int permute_cost; + int permute_cost; /* Cost of reductions for various vector types: iN is for N-bit integer elements and fN is for N-bit floating-point elements. We need to single out the element type because it affects the depth of the reduction. */ - const int reduc_i8_cost; - const int reduc_i16_cost; - const int reduc_i32_cost; - const int reduc_i64_cost; - const int reduc_f16_cost; - const int reduc_f32_cost; - const int reduc_f64_cost; + int reduc_i8_cost; + int reduc_i16_cost; + int reduc_i32_cost; + int reduc_i64_cost; + int reduc_f16_cost; + int reduc_f32_cost; + int reduc_f64_cost; /* Additional cost of storing a single vector element, on top of the normal cost of a scalar store. */ - const int store_elt_extra_cost; + int store_elt_extra_cost; /* Cost of a vector-to-scalar operation. */ - const int vec_to_scalar_cost; + int vec_to_scalar_cost; /* Cost of a scalar-to-vector operation. */ - const int scalar_to_vec_cost; + int scalar_to_vec_cost; /* Cost of an aligned vector load. */ - const int align_load_cost; + int align_load_cost; /* Cost of an unaligned vector load. */ - const int unalign_load_cost; + int unalign_load_cost; /* Cost of an unaligned vector store. */ - const int unalign_store_cost; + int unalign_store_cost; /* Cost of a vector store. */ - const int store_cost; + int store_cost; }; typedef struct simd_vec_cost advsimd_vec_cost; @@ -280,27 +280,27 @@ struct sve_vec_cost : simd_vec_cost /* The cost of a vector-to-scalar CLASTA or CLASTB instruction, with the scalar being stored in FP registers. This cost is assumed to be a cycle latency. */ - const int clast_cost; + int clast_cost; /* The costs of FADDA for the three data types that it supports. These costs are assumed to be cycle latencies. */ - const int fadda_f16_cost; - const int fadda_f32_cost; - const int fadda_f64_cost; + int fadda_f16_cost; + int fadda_f32_cost; + int fadda_f64_cost; /* The cost of a gather load instruction. The x32 value is for loads of 32-bit elements and the x64 value is for loads of 64-bit elements. */ - const unsigned int gather_load_x32_cost; - const unsigned int gather_load_x64_cost; + unsigned int gather_load_x32_cost; + unsigned int gather_load_x64_cost; /* Additional loop initialization cost of using a gather load instruction. The x32 value is for loads of 32-bit elements and the x64 value is for loads of 64-bit elements. */ - const int gather_load_x32_init_cost; - const int gather_load_x64_init_cost; + int gather_load_x32_init_cost; + int gather_load_x64_init_cost; /* The per-element cost of a scatter store. */ - const int scatter_store_elt_cost; + int scatter_store_elt_cost; }; /* Base information about how the CPU issues code, containing @@ -319,10 +319,10 @@ struct sve_vec_cost : simd_vec_cost struct aarch64_base_vec_issue_info { /* How many loads and stores can be issued per cycle. */ - const unsigned int loads_stores_per_cycle; + unsigned int loads_stores_per_cycle; /* How many stores can be issued per cycle. */ - const unsigned int stores_per_cycle; + unsigned int stores_per_cycle; /* How many integer or FP/SIMD operations can be issued per cycle. @@ -338,7 +338,7 @@ struct aarch64_base_vec_issue_info This is not very precise, but it's only meant to be a heuristic. We could certainly try to do better in future if there's an example of something that would benefit. */ - const unsigned int general_ops_per_cycle; + unsigned int general_ops_per_cycle; /* How many FP/SIMD operations to count for a floating-point or vector load operation. @@ -347,7 +347,7 @@ struct aarch64_base_vec_issue_info been loaded from memory, these values apply to each individual load. When using an SVE gather load, the values apply to each element of the gather. */ - const unsigned int fp_simd_load_general_ops; + unsigned int fp_simd_load_general_ops; /* How many FP/SIMD operations to count for a floating-point or vector store operation. @@ -355,7 +355,7 @@ struct aarch64_base_vec_issue_info When storing individual elements of an Advanced SIMD vector out to memory, these values apply to each individual store. When using an SVE scatter store, these values apply to each element of the scatter. */ - const unsigned int fp_simd_store_general_ops; + unsigned int fp_simd_store_general_ops; }; using aarch64_scalar_vec_issue_info = aarch64_base_vec_issue_info; @@ -382,9 +382,9 @@ struct aarch64_simd_vec_issue_info : aarch64_base_vec_issue_info load ops: 3 general ops: 3 * (fp_simd_load_general_ops + ld3_st3_general_ops). */ - const unsigned int ld2_st2_general_ops; - const unsigned int ld3_st3_general_ops; - const unsigned int ld4_st4_general_ops; + unsigned int ld2_st2_general_ops; + unsigned int ld3_st3_general_ops; + unsigned int ld4_st4_general_ops; }; using aarch64_advsimd_vec_issue_info = aarch64_simd_vec_issue_info; @@ -411,19 +411,19 @@ struct aarch64_sve_vec_issue_info : aarch64_simd_vec_issue_info {} /* How many predicate operations can be issued per cycle. */ - const unsigned int pred_ops_per_cycle; + unsigned int pred_ops_per_cycle; /* How many predicate operations are generated by a WHILExx instruction. */ - const unsigned int while_pred_ops; + unsigned int while_pred_ops; /* How many predicate operations are generated by an integer comparison instruction. */ - const unsigned int int_cmp_pred_ops; + unsigned int int_cmp_pred_ops; /* How many predicate operations are generated by a floating-point comparison instruction. */ - const unsigned int fp_cmp_pred_ops; + unsigned int fp_cmp_pred_ops; /* How many general and predicate operations are generated by each pair of elements in a gather load or scatter store. These values apply @@ -433,38 +433,38 @@ struct aarch64_sve_vec_issue_info : aarch64_simd_vec_issue_info The reason for using pairs is that that is the largest possible granule size for 128-bit SVE, which can load and store 2 64-bit elements or 4 32-bit elements. */ - const unsigned int gather_scatter_pair_general_ops; - const unsigned int gather_scatter_pair_pred_ops; + unsigned int gather_scatter_pair_general_ops; + unsigned int gather_scatter_pair_pred_ops; }; /* Information related to instruction issue for a particular CPU. */ struct aarch64_vec_issue_info { - const aarch64_base_vec_issue_info *const scalar; - const aarch64_simd_vec_issue_info *const advsimd; - const aarch64_sve_vec_issue_info *const sve; + const aarch64_base_vec_issue_info *scalar; + const aarch64_simd_vec_issue_info *advsimd; + const aarch64_sve_vec_issue_info *sve; }; /* Cost for vector insn classes. */ struct cpu_vector_cost { /* Cost of any integer scalar operation, excluding load and store. */ - const int scalar_int_stmt_cost; + int scalar_int_stmt_cost; /* Cost of any fp scalar operation, excluding load and store. */ - const int scalar_fp_stmt_cost; + int scalar_fp_stmt_cost; /* Cost of a scalar load. */ - const int scalar_load_cost; + int scalar_load_cost; /* Cost of a scalar store. */ - const int scalar_store_cost; + int scalar_store_cost; /* Cost of a taken branch. */ - const int cond_taken_branch_cost; + int cond_taken_branch_cost; /* Cost of a not-taken branch. */ - const int cond_not_taken_branch_cost; + int cond_not_taken_branch_cost; /* Cost of an Advanced SIMD operations. */ const advsimd_vec_cost *advsimd; @@ -473,14 +473,14 @@ struct cpu_vector_cost const sve_vec_cost *sve; /* Issue information, or null if none is provided. */ - const aarch64_vec_issue_info *const issue_info; + const aarch64_vec_issue_info *issue_info; }; /* Branch costs. */ struct cpu_branch_cost { - const int predictable; /* Predictable branch or optimizing for size. */ - const int unpredictable; /* Unpredictable branch or optimizing for speed. */ + int predictable; /* Predictable branch or optimizing for size. */ + int unpredictable; /* Unpredictable branch or optimizing for speed. */ }; /* Control approximate alternatives to certain FP operators. */ @@ -497,25 +497,25 @@ struct cpu_branch_cost /* Allowed modes for approximations. */ struct cpu_approx_modes { - const uint64_t division; /* Division. */ - const uint64_t sqrt; /* Square root. */ - const uint64_t recip_sqrt; /* Reciprocal square root. */ + uint64_t division; /* Division. */ + uint64_t sqrt; /* Square root. */ + uint64_t recip_sqrt; /* Reciprocal square root. */ }; /* Cache prefetch settings for prefetch-loop-arrays. */ struct cpu_prefetch_tune { - const int num_slots; - const int l1_cache_size; - const int l1_cache_line_size; - const int l2_cache_size; + int num_slots; + int l1_cache_size; + int l1_cache_line_size; + int l2_cache_size; /* Whether software prefetch hints should be issued for non-constant strides. */ - const bool prefetch_dynamic_strides; + bool prefetch_dynamic_strides; /* The minimum constant stride beyond which we should use prefetch hints for. */ - const int minimum_stride; - const int default_opt_level; + int minimum_stride; + int default_opt_level; }; /* Model the costs for loads/stores for the register allocators so that it can @@ -568,9 +568,8 @@ struct tune_params enum aarch64_autoprefetch_model { - AUTOPREFETCHER_OFF, - AUTOPREFETCHER_WEAK, - AUTOPREFETCHER_STRONG +#define AARCH64_AUTOPREFETCH_MODE(NAME, ENUM_VALUE) ENUM_VALUE, +#include "aarch64-tuning-enums.def" } autoprefetcher_model; unsigned int extra_tuning_flags; @@ -1140,6 +1139,7 @@ gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *, rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); tree aarch64_general_builtin_decl (unsigned, bool); tree aarch64_general_builtin_rsqrt (unsigned int); +void aarch64_ms_variadic_abi_init_builtins (void); void handle_arm_acle_h (void); void handle_arm_neon_h (void); @@ -1272,6 +1272,7 @@ tree aarch64_resolve_overloaded_builtin_general (location_t, tree, void *); const char *aarch64_sls_barrier (int); const char *aarch64_indirect_call_asm (rtx); +extern const char *aarch64_indirect_branch_asm (rtx); extern bool aarch64_harden_sls_retbr_p (void); extern bool aarch64_harden_sls_blr_p (void); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e7c459d..c02ffd6 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1155,7 +1155,7 @@ ) ;; For EOR (vector, register) and SVE EOR (vector, immediate) -(define_insn "xor<mode>3<vczle><vczbe>" +(define_insn "@xor<mode>3<vczle><vczbe>" [(set (match_operand:VDQ_I 0 "register_operand") (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand") (match_operand:VDQ_I 2 "aarch64_reg_or_xor_imm")))] @@ -3544,10 +3544,10 @@ rtx reduc = gen_lowpart (V4SImode, tmp); rtx res = gen_reg_rtx (V4SImode); emit_insn (gen_aarch64_uminpv4si (res, reduc, reduc)); - emit_move_insn (tmp, gen_lowpart (<MODE>mode, res)); + tmp = gen_lowpart (<MODE>mode, res); } - rtx val = gen_reg_rtx (DImode); - emit_move_insn (val, gen_lowpart (DImode, tmp)); + + rtx val = force_lowpart_subreg (DImode, tmp, <MODE>mode); rtx cc_reg = aarch64_gen_compare_reg (EQ, val, constm1_rtx); rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, constm1_rtx); rtx tmp2 = gen_reg_rtx (SImode); @@ -3607,10 +3607,10 @@ rtx reduc = gen_lowpart (V4SImode, tmp); rtx res = gen_reg_rtx (V4SImode); emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); - emit_move_insn (tmp, gen_lowpart (<MODE>mode, res)); + tmp = gen_lowpart (<MODE>mode, res); } - rtx val = gen_reg_rtx (DImode); - emit_move_insn (val, gen_lowpart (DImode, tmp)); + + rtx val = force_lowpart_subreg (DImode, tmp, <MODE>mode); rtx cc_reg = aarch64_gen_compare_reg (NE, val, const0_rtx); rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx); rtx tmp2 = gen_reg_rtx (SImode); @@ -4156,6 +4156,8 @@ ;; Patterns comparing two vectors and conditionally jump +;; Define cbranch on masks. This optab is only called for BOOLEAN_VECTOR_TYPE_P +;; which allows optimizing compares with zero. (define_expand "cbranch<mode>4" [(set (pc) (if_then_else @@ -4196,6 +4198,83 @@ DONE; }) +;; Define vec_cbranch_any and vec_cbranch_all +;; Vector comparison and branch for Adv. SIMD Integer types using SVE +;; instructions. +(define_expand "<optab><mode>" + [(set (pc) + (unspec:VALL + [(if_then_else + (match_operator 0 "aarch64_cbranch_compare_operation" + [(match_operand:VALL 1 "register_operand") + (match_operand:VALL 2 "aarch64_simd_reg_or_zero")]) + (label_ref (match_operand 3 "")) + (pc))] + CBRANCH_CMP))] + "TARGET_SIMD" +{ + auto code = GET_CODE (operands[0]); + if (TARGET_SVE) + { + machine_mode full_mode = aarch64_full_sve_mode (<VEL>mode).require (); + + rtx in1 = force_lowpart_subreg (full_mode, operands[1], <MODE>mode); + rtx in2; + if (CONST0_RTX (<MODE>mode) == operands[2]) + in2 = CONST0_RTX (full_mode); + else + in2 = force_lowpart_subreg (full_mode, operands[2], <MODE>mode); + + unsigned lanes + = exact_div (GET_MODE_BITSIZE (<MODE>mode), 8).to_constant (); + machine_mode pred_mode = aarch64_sve_pred_mode (full_mode); + rtx ptrue = aarch64_ptrue_reg (VNx16BImode, lanes); + rtx hint = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode); + + rtx tmp = gen_reg_rtx (pred_mode); + rtx cast_ptrue = gen_lowpart (pred_mode, ptrue); + + if (FLOAT_MODE_P (full_mode)) + { + aarch64_expand_sve_vec_cmp<sve_cmp_suff> (tmp, code, in1, in2); + emit_insn (gen_and3 (pred_mode, tmp, tmp, cast_ptrue)); + emit_insn (gen_aarch64_ptest (pred_mode, ptrue, cast_ptrue, hint, + tmp)); + } + else + emit_insn (gen_aarch64_pred_cmp_ptest (code, full_mode, tmp, ptrue, in1, + in2, cast_ptrue, hint, + cast_ptrue, hint)); + + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx); + emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[3])); + DONE; + } + + rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode); + emit_insn (gen_vec_cmp<mode><v_int_equiv> (tmp, operands[0], operands[1], + operands[2])); + + /* For 128-bit vectors we need a reduction to 64-bit first. */ + if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode))) + { + /* Always reduce using a V4SI. */ + rtx reduc = gen_lowpart (V4SImode, tmp); + rtx res = gen_reg_rtx (V4SImode); + emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); + emit_move_insn (tmp, gen_lowpart (<V_INT_EQUIV>mode, res)); + } + + rtx val = gen_reg_rtx (DImode); + emit_move_insn (val, gen_lowpart (DImode, tmp)); + + rtx cc_reg = aarch64_gen_compare_reg (<cbranch_op>, val, const0_rtx); + rtx cmp_rtx = gen_rtx_fmt_ee (<cbranch_op>, DImode, cc_reg, const0_rtx); + emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[3])); + DONE; +}) + ;; Patterns comparing two vectors to produce a mask. (define_expand "vec_cmp<mode><mode>" @@ -9449,6 +9528,18 @@ [(set_attr "type" "crypto_sha3")] ) +(define_insn "*eor3q<mode>4" + [(set (match_operand:ALLI 0 "register_operand" "=w") + (xor:ALLI + (xor:ALLI + (match_operand:ALLI 2 "register_operand" "w") + (match_operand:ALLI 3 "register_operand" "w")) + (match_operand:ALLI 1 "register_operand" "w")))] + "TARGET_SHA3 && reload_completed" + "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b" + [(set_attr "type" "crypto_sha3")] +) + (define_insn "aarch64_rax1qv2di" [(set (match_operand:V2DI 0 "register_operand" "=w") (xor:V2DI diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index b2b03dc8c..dbd80ca 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -4691,9 +4691,6 @@ register_builtin_types () vectype = build_truth_vector_type_for_mode (BYTES_PER_SVE_VECTOR, VNx16BImode); num_pr = 1; - /* Leave svbool_t as indivisible for now. We don't yet support - C/C++ operators on predicates. */ - TYPE_INDIVISIBLE_P (vectype) = 1; } else { @@ -4710,12 +4707,12 @@ register_builtin_types () && TYPE_ALIGN (vectype) == 128 && known_eq (size, BITS_PER_SVE_VECTOR)); num_zr = 1; - TYPE_INDIVISIBLE_P (vectype) = 0; } vectype = build_distinct_type_copy (vectype); gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype)); SET_TYPE_STRUCTURAL_EQUALITY (vectype); TYPE_ARTIFICIAL (vectype) = 1; + TYPE_INDIVISIBLE_P (vectype) = 0; make_type_sizeless (vectype); } if (num_pr) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index f459f63..6d2e87c 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3112,6 +3112,48 @@ } ) +;; Don't allow expansions of SVE to Adv. SIMD registers immediately as subregs. +;; Doing so prevents combine from matching instructions generated by the +;; SVE/Adv. SIMD bridge as the SVE modes are not valid inside the instructions. +;; Eventually early-ra or reload will split them but by then we've lost the +;; combinations. Instead split them early and allow fwprop or combine to +;; push them into instructions where they are actually supported as part of the +;; instruction. +(define_expand "vec_extract<mode><v128>" + [(match_operand:<V128> 0 "register_operand") + (match_operand:SVE_FULL 1 "register_operand") + (match_operand:SI 2 "const0_operand")] + "TARGET_SVE" +{ + emit_move_insn (operands[0], + force_lowpart_subreg (<V128>mode, operands[1], <MODE>mode)); + DONE; +}) + +;; Similarly for extractions of 64-bit Adv. SIMD vectors from SVE vectors. For +;; these extractions we can support offsets 0 and 1 by first extracting a +;; 128-bit vector and then selecting the appropriate half. +(define_expand "vec_extract<mode><v64>" + [(match_operand:<V64> 0 "register_operand") + (match_operand:SVE_FULL_BHS 1 "register_operand") + (match_operand:SI 2 "const0_to_1_operand")] + "TARGET_SVE" +{ + if (CONST0_RTX (SImode) == operands[2]) + emit_move_insn (operands[0], + force_lowpart_subreg (<V64>mode, operands[1], + <MODE>mode)); + else + { + rtx tmp = gen_reg_rtx (<V128>mode); + emit_move_insn (tmp, + force_lowpart_subreg (<V128>mode, operands[1], + <MODE>mode)); + emit_insn (gen_vec_extract<v128><v64> (operands[0], tmp, operands[2])); + } + DONE; +}) + ;; Extract element zero. This is a special case because we want to force ;; the registers to be the same for the second alternative, and then ;; split the instruction into nothing after RA. @@ -3520,6 +3562,47 @@ } ) +;; Unpredicated sign and zero extension from a boolean mode. +(define_expand "extend<vpred><mode>2" + [(set (match_operand:SVE_ALL 0 "register_operand") + (unspec:SVE_ALL + [(match_operand:<VPRED> 1 "register_operand") + (match_dup 2) + (match_dup 3)] + UNSPEC_SEL))] + "TARGET_SVE" + { + operands[2] = CONSTM1_RTX (<MODE>mode); + operands[3] = CONST0_RTX (<MODE>mode); + } +) + +(define_expand "zero_extend<vpred><mode>2" + [(set (match_operand:SVE_ALL 0 "register_operand") + (unspec:SVE_ALL + [(match_operand:<VPRED> 1 "register_operand") + (match_dup 2) + (match_dup 3)] + UNSPEC_SEL))] + "TARGET_SVE" + { + operands[2] = CONST1_RTX (<MODE>mode); + operands[3] = CONST0_RTX (<MODE>mode); + } +) + +(define_expand "trunc<mode><vpred>2" + [(match_operand:<VPRED> 0 "register_operand") + (match_operand:SVE_I 1 "register_operand")] + "TARGET_SVE" + { + rtx mone = CONSTM1_RTX (<MODE>mode); + rtx cmp = gen_rtx_EQ (<MODE>mode, operands[1], mone); + emit_insn (gen_vec_cmp<mode><vpred> (operands[0], cmp, operands[1], mone)); + DONE; + } +) + ;; Predicated sign and zero extension from a narrower mode. (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2" [(set (match_operand:SVE_HSDI 0 "register_operand") @@ -8528,6 +8611,33 @@ } ) +(define_expand "vec_cmp<mode><mode>" + [(parallel + [(set (match_operand:PRED_ALL 0 "register_operand") + (match_operator:PRED_ALL 1 "aarch64_equality_operator" + [(match_operand:PRED_ALL 2 "register_operand") + (match_operand:PRED_ALL 3 "register_operand")]))])] + "TARGET_SVE" + { + rtx ptrue = aarch64_ptrue_reg (<MODE>mode); + if (GET_CODE (operands[1]) == EQ) + { + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_aarch64_pred_xor<mode>_z (tmp, ptrue, + operands[2], operands[3])); + emit_insn (gen_aarch64_pred_xor<mode>_z (operands[0], ptrue, + tmp, ptrue)); + } + else if (GET_CODE (operands[1]) == NE) + emit_insn (gen_aarch64_pred_xor<mode>_z (operands[0], ptrue, + operands[2], operands[3])); + else + gcc_unreachable (); + + DONE; + } +) + ;; Unsigned integer comparisons. Don't enforce an immediate range here, since ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int ;; instead. @@ -9693,7 +9803,8 @@ ;; - PTEST ;; ------------------------------------------------------------------------- -;; Branch based on predicate equality or inequality. +;; Branch based on predicate equality or inequality. This allows PTEST to be +;; combined with other flag setting instructions like ORR -> ORRS. (define_expand "cbranch<mode>4" [(set (pc) (if_then_else @@ -9722,8 +9833,78 @@ } ) +;; Define vec_cbranch_any and vec_cbranch_all +;; Branch based on predicate equality or inequality. +(define_expand "<optab><mode>" + [(set (pc) + (unspec:PRED_ALL + [(if_then_else + (match_operator 0 "aarch64_equality_operator" + [(match_operand:PRED_ALL 1 "register_operand") + (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) + (label_ref (match_operand 3 "")) + (pc))] + CBRANCH_CMP))] + "" + { + rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>)); + rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue); + rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); + rtx pred; + if (operands[2] == CONST0_RTX (<MODE>mode)) + pred = operands[1]; + else + { + pred = gen_reg_rtx (<MODE>mode); + emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1], + operands[2])); + } + emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred)); + + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx); + emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[3])); + DONE; + } +) + +;; Define cond_vec_cbranch_any and cond_vec_cbranch_all +;; Vector comparison and branch for SVE Floating points types instructions. +;; But only on EQ or NE comparisons, which allows us to use integer compares +;; instead and about the ptest. +(define_expand "<optab><mode>" + [(set (pc) + (unspec:SVE_I + [(if_then_else + (match_operator 0 "aarch64_comparison_operator" + [(match_operand:<VPRED> 1 "register_operand") + (match_operand:SVE_I 2 "register_operand") + (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]) + (label_ref (match_operand 4 "")) + (pc))] + COND_CBRANCH_CMP))] + "" +{ + auto code = GET_CODE (operands[0]); + rtx in1 = operands[2]; + rtx in2 = operands[3]; + + rtx res = gen_reg_rtx (<VPRED>mode); + rtx gp = gen_lowpart (VNx16BImode, operands[1]); + rtx cast_gp = operands[1]; + rtx flag = gen_int_mode (SVE_MAYBE_NOT_PTRUE, SImode); + + emit_insn (gen_aarch64_pred_cmp_ptest (code, <MODE>mode, res, gp, in1, in2, + cast_gp, flag, cast_gp, flag)); + + rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); + rtx cmp_reg = gen_rtx_<cbranch_op> (VOIDmode, cc_reg, const0_rtx); + emit_jump_insn (gen_aarch64_bcond (cmp_reg, cc_reg, operands[4])); + DONE; +}) + ;; See "Description of UNSPEC_PTEST" above for details. -(define_insn "aarch64_ptest<mode>" +(define_insn "@aarch64_ptest<mode>" [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") (match_operand 1) diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index dc10f70..d6f1bbc 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,applea12,applem1_0,applem1_1,applem1_2,applem1_3,applem2_0,applem2_1,applem2_2,applem2_3,applem3_0,applem3_1,applem3_2,applem4_0,applem4_1,applem4_2,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,gb10,generic,generic_armv8_a,generic_armv9_a" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,ampere1c,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,applea12,applem1_0,applem1_1,applem1_2,applem1_3,applem2_0,applem2_1,applem2_2,applem2_3,applem3_0,applem3_1,applem3_2,applem4_0,applem4_1,applem4_2,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,c1nano,c1pro,c1premium,c1ultra,demeter,olympus,gb10,generic,generic_armv8_a,generic_armv9_a" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/config/aarch64/aarch64-tuning-enums.def b/gcc/config/aarch64/aarch64-tuning-enums.def new file mode 100644 index 0000000..701b64f --- /dev/null +++ b/gcc/config/aarch64/aarch64-tuning-enums.def @@ -0,0 +1,37 @@ +/* AArch64 tuning parameter enum definitions. + Copyright The GNU Toolchain Authors. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* This file contains the enum definitions for AArch64 tuning parameters + that are used in both the JSON parser/printer and the tuning structures. */ + +#ifdef AARCH64_AUTOPREFETCH_MODE +AARCH64_AUTOPREFETCH_MODE("AUTOPREFETCHER_OFF", AUTOPREFETCHER_OFF) +AARCH64_AUTOPREFETCH_MODE("AUTOPREFETCHER_WEAK", AUTOPREFETCHER_WEAK) +AARCH64_AUTOPREFETCH_MODE("AUTOPREFETCHER_STRONG", AUTOPREFETCHER_STRONG) +#endif + +#ifdef AARCH64_LDP_STP_POLICY +AARCH64_LDP_STP_POLICY("AARCH64_LDP_STP_POLICY_DEFAULT", AARCH64_LDP_STP_POLICY_DEFAULT) +AARCH64_LDP_STP_POLICY("AARCH64_LDP_STP_POLICY_ALIGNED", AARCH64_LDP_STP_POLICY_ALIGNED) +AARCH64_LDP_STP_POLICY("AARCH64_LDP_STP_POLICY_ALWAYS", AARCH64_LDP_STP_POLICY_ALWAYS) +AARCH64_LDP_STP_POLICY("AARCH64_LDP_STP_POLICY_NEVER", AARCH64_LDP_STP_POLICY_NEVER) +#endif + +#undef AARCH64_AUTOPREFETCH_MODE +#undef AARCH64_LDP_STP_POLICY diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 6f6dea6..0ef22e8 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -83,6 +83,7 @@ #include "rtlanal.h" #include "tree-dfa.h" #include "asan.h" +#include "aarch64-abi-ms-protos.h" #include "aarch64-elf-metadata.h" #include "aarch64-feature-deps.h" #include "config/arm/aarch-common.h" @@ -99,6 +100,8 @@ #include "ipa-fnsummary.h" #include "hash-map.h" #include "aarch64-sched-dispatch.h" +#include "aarch64-json-tunings-printer.h" +#include "aarch64-json-tunings-parser.h" /* This file should be included last. */ #include "target-def.h" @@ -114,6 +117,11 @@ #define HAVE_AS_AEABI_BUILD_ATTRIBUTES 0 #endif +/* Not on Windows ABI unless explicitly set. */ +#ifndef TARGET_AARCH64_MS_ABI +#define TARGET_AARCH64_MS_ABI 0 +#endif + /* Flags that describe how a function shares certain architectural state with its callers. @@ -749,6 +757,9 @@ handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree, *no_add_attrs = true; return NULL_TREE; + case ARM_PCS_MS_VARIADIC: + /* Rely on the exclusions list for preserve_none. */ + case ARM_PCS_PRESERVE_NONE: case ARM_PCS_TLSDESC: case ARM_PCS_UNKNOWN: break; @@ -851,6 +862,16 @@ handle_arm_shared (tree *node, tree name, tree args, return NULL_TREE; } +/* Mutually-exclusive function type attributes for various PCS variants. */ +static const struct attribute_spec::exclusions aarch64_pcs_exclusions[] = +{ + /* Attribute name exclusion applies to: + function, type, variable */ + { "aarch64_vector_pcs", false, true, false }, + { "preserve_none", false, true, false }, + { NULL, false, false, false } +}; + /* Mutually-exclusive function type attributes for controlling PSTATE.SM. */ static const struct attribute_spec::exclusions attr_streaming_exclusions[] = { @@ -867,7 +888,10 @@ static const attribute_spec aarch64_gnu_attributes[] = /* { name, min_len, max_len, decl_req, type_req, fn_type_req, affects_type_identity, handler, exclude } */ { "aarch64_vector_pcs", 0, 0, false, true, true, true, - handle_aarch64_vector_pcs_attribute, NULL }, + handle_aarch64_vector_pcs_attribute, + aarch64_pcs_exclusions }, + { "preserve_none", 0, 0, false, true, true, true, NULL, + aarch64_pcs_exclusions }, { "indirect_return", 0, 0, false, true, true, true, NULL, NULL }, { "arm_sve_vector_bits", 1, 1, false, true, false, true, aarch64_sve::handle_arm_sve_vector_bits_attribute, @@ -1317,6 +1341,23 @@ aarch64_sve_abi (void) return sve_abi; } +/* Return the descriptor of the preserve_none PCS. */ + +static const predefined_function_abi & +aarch64_preserve_none_abi (void) +{ + auto &preserve_none_abi = function_abis[ARM_PCS_PRESERVE_NONE]; + if (!preserve_none_abi.initialized_p ()) + { + HARD_REG_SET preserved_regs = {}; + if (!CALL_USED_X18) + SET_HARD_REG_BIT (preserved_regs, R18_REGNUM); + auto full_reg_clobbers = reg_class_contents[ALL_REGS] & ~preserved_regs; + preserve_none_abi.initialize (ARM_PCS_PRESERVE_NONE, full_reg_clobbers); + } + return preserve_none_abi; +} + /* If X is an UNSPEC_SALT_ADDR expression, return the address that it wraps, otherwise return X itself. */ @@ -2304,14 +2345,35 @@ aarch64_takes_arguments_in_sve_regs_p (const_tree fntype) return false; } +/* Return the descriptor of the Windows Arm64 variadic function call ABI. */ + +static const predefined_function_abi & +aarch64_ms_variadic_abi (void) +{ + predefined_function_abi &ms_variadic_abi = function_abis[ARM_PCS_MS_VARIADIC]; + if (!ms_variadic_abi.initialized_p ()) + { + HARD_REG_SET full_reg_clobbers + = default_function_abi.full_reg_clobbers (); + ms_variadic_abi.initialize (ARM_PCS_MS_VARIADIC, full_reg_clobbers); + } + return ms_variadic_abi; +} + /* Implement TARGET_FNTYPE_ABI. */ static const predefined_function_abi & aarch64_fntype_abi (const_tree fntype) { + if (TARGET_AARCH64_MS_ABI && stdarg_p (fntype)) + return aarch64_ms_variadic_abi (); + if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype))) return aarch64_simd_abi (); + if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (fntype))) + return aarch64_preserve_none_abi (); + if (aarch64_returns_value_in_sve_regs_p (fntype) || aarch64_takes_arguments_in_sve_regs_p (fntype)) return aarch64_sve_abi (); @@ -2521,8 +2583,16 @@ aarch64_reg_save_mode (unsigned int regno) { case ARM_PCS_AAPCS64: /* Only the low 64 bits are saved by the base PCS. */ + case ARM_PCS_PRESERVE_NONE: + /* In preserve_none all fpr registers are caller saved, so the choice + here should not matter. Nevertheless, fall back to the base AAPCS + for consistency. */ return DFmode; + case ARM_PCS_MS_VARIADIC: + /* Windows only uses GP registers for variadic arguments. */ + return DImode; + case ARM_PCS_SIMD: /* The vector PCS saves the low 128 bits (which is the full register on non-SVE targets). */ @@ -2649,7 +2719,9 @@ aarch64_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno, machine_mode mode) { - if (FP_REGNUM_P (regno) && abi_id != ARM_PCS_SVE) + if (FP_REGNUM_P (regno) + && abi_id != ARM_PCS_SVE + && abi_id != ARM_PCS_PRESERVE_NONE) { poly_int64 per_register_size = GET_MODE_SIZE (mode); unsigned int nregs = hard_regno_nregs (regno, mode); @@ -6826,6 +6898,10 @@ aarch64_function_ok_for_sibcall (tree, tree exp) auto from_abi = crtl->abi->id (); auto to_abi = expr_callee_abi (exp).id (); + /* preserve_none functions can tail-call anything that the base PCS can. */ + if (from_abi != to_abi && from_abi == ARM_PCS_PRESERVE_NONE) + from_abi = ARM_PCS_AAPCS64; + /* ARM_PCS_SVE preserves strictly more than ARM_PCS_SIMD, which in turn preserves strictly more than the base PCS. The callee must preserve everything that the caller is required to preserve. */ @@ -7287,6 +7363,122 @@ bitint_or_aggr_of_bitint_p (tree type) return false; } +/* How many GPR are available for argument passing in the procedure call + standard. */ +static int +num_pcs_arg_regs (enum arm_pcs pcs) +{ + switch (pcs) + { + case ARM_PCS_PRESERVE_NONE: + return NUM_PRESERVE_NONE_ARG_REGS; + case ARM_PCS_AAPCS64: + case ARM_PCS_MS_VARIADIC: + case ARM_PCS_SIMD: + case ARM_PCS_SVE: + case ARM_PCS_TLSDESC: + case ARM_PCS_UNKNOWN: + return NUM_ARG_REGS; + } + gcc_unreachable (); +} + +/* Get the NUM'th GPR argument passing register from the PCS procedure call + * standard. */ + +static int +get_pcs_arg_reg (enum arm_pcs pcs, int num) +{ + static const int ARM_PCS_PRESERVE_NONE_REGISTERS[] = PRESERVE_NONE_REGISTERS; + + gcc_assert (num < num_pcs_arg_regs (pcs)); + + switch (pcs) + { + case ARM_PCS_PRESERVE_NONE: + return ARM_PCS_PRESERVE_NONE_REGISTERS[num]; + case ARM_PCS_AAPCS64: + case ARM_PCS_MS_VARIADIC: + case ARM_PCS_SIMD: + case ARM_PCS_SVE: + case ARM_PCS_TLSDESC: + case ARM_PCS_UNKNOWN: + return R0_REGNUM + num; + } + gcc_unreachable (); +} + +static int +aarch64_arg_size (const function_arg_info &arg) +{ + HOST_WIDE_INT size; + + /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ + if (arg.type) + size = int_size_in_bytes (arg.type); + else + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + size = GET_MODE_SIZE (arg.mode).to_constant (); + + return ROUND_UP (size, UNITS_PER_WORD); +} + +/* The Windows Arm64 variadic function call ABI uses only C.12-C15 rules. + See: https://learn.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions#addendum-variadic-functions. */ + +static void +aarch64_ms_variadic_abi_layout_arg (cumulative_args_t pcum_v, + const function_arg_info &arg) +{ + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + auto mode = arg.mode; + auto ncrn = pcum->aapcs_ncrn; + HOST_WIDE_INT size = aarch64_arg_size (arg); + auto nregs = size / UNITS_PER_WORD; + + if (ncrn < NUM_ARG_REGS) + { + /* The argument bytes are copied to the core registers. */ + if (nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT) + pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn); + else + { + /* Handle the case when argument is split + between the last registers and the + stack. */ + if (ncrn + nregs > NUM_ARG_REGS) + { + pcum->aapcs_stack_words = ncrn + nregs - NUM_ARG_REGS; + nregs -= pcum->aapcs_stack_words; + } + + /* Generate load arg to registers instructions. */ + rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs)); + for (auto i = 0; i < nregs; i++) + { + rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, + GEN_INT (i * UNITS_PER_WORD)); + XVECEXP (par, 0, i) = tmp; + } + pcum->aapcs_reg = par; + } + + pcum->aapcs_nextncrn = ncrn + nregs; + } + else + { + /* The remaining arguments are passed on stack; record the needed + number of words for this argument and align the total size if + necessary. */ + pcum->aapcs_nextncrn = NUM_ARG_REGS; + pcum->aapcs_stack_words = nregs; + } + + pcum->aapcs_arg_processed = true; +} + /* Layout a function argument according to the AAPCS64 rules. The rule numbers refer to the rule numbers in the AAPCS64. ORIG_MODE is the mode that was originally given to us by the target hook, whereas the @@ -7310,6 +7502,12 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg) if (pcum->aapcs_arg_processed) return; + if (pcum->pcs_variant == ARM_PCS_MS_VARIADIC) + { + aarch64_ms_variadic_abi_layout_arg (pcum_v, arg); + return; + } + bool warn_pcs_change = (warn_psabi && !pcum->silent_p @@ -7385,7 +7583,9 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg) unprototyped function. There is no ABI-defined location we can return in this case, so we have no real choice but to raise an error immediately, even though this is only a query function. */ - if (arg.named && pcum->pcs_variant != ARM_PCS_SVE) + if (arg.named + && pcum->pcs_variant != ARM_PCS_SVE + && pcum->pcs_variant != ARM_PCS_PRESERVE_NONE) { gcc_assert (!pcum->silent_p); error ("SVE type %qT cannot be passed to an unprototyped function", @@ -7400,7 +7600,6 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg) pcum->aapcs_nextnvrn = pcum->aapcs_nvrn + pst_info.num_zr (); pcum->aapcs_nextnprn = pcum->aapcs_nprn + pst_info.num_pr (); gcc_assert (arg.named - && pcum->pcs_variant == ARM_PCS_SVE && pcum->aapcs_nextnvrn <= NUM_FP_ARG_REGS && pcum->aapcs_nextnprn <= NUM_PR_ARG_REGS); pcum->aapcs_reg = pst_info.get_rtx (mode, V0_REGNUM + pcum->aapcs_nvrn, @@ -7426,15 +7625,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg) && (aarch64_some_values_include_pst_objects_p (type) || (vec_flags & VEC_PARTIAL))); - /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ - if (type) - size = int_size_in_bytes (type); - else - /* No frontends can create types with variable-sized modes, so we - shouldn't be asked to pass or return them. */ - size = GET_MODE_SIZE (mode).to_constant (); - size = ROUND_UP (size, UNITS_PER_WORD); - + size = aarch64_arg_size (arg); allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v, mode, @@ -7514,7 +7705,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg) /* C6 - C9. though the sign and zero extension semantics are handled elsewhere. This is the case where the argument fits entirely general registers. */ - if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS)) + if (allocate_ncrn && (ncrn + nregs <= num_pcs_arg_regs (pcum->pcs_variant))) { gcc_assert (nregs == 0 || nregs == 1 || nregs == 2); @@ -7550,7 +7741,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg) inform (input_location, "parameter passing for argument of type " "%qT changed in GCC 9.1", type); ++ncrn; - gcc_assert (ncrn + nregs <= NUM_ARG_REGS); + gcc_assert (ncrn + nregs <= num_pcs_arg_regs (pcum->pcs_variant)); } } @@ -7567,12 +7758,13 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg) } /* NREGS can be 0 when e.g. an empty structure is to be passed. - A reg is still generated for it, but the caller should be smart - enough not to use it. */ - if (nregs == 0 - || (nregs == 1 && !sve_p) - || GET_MODE_CLASS (mode) == MODE_INT) - pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn); + In this situation the register should never be used, so assign + NULL_RTX. */ + if (nregs == 0) + pcum->aapcs_reg = NULL_RTX; + else if ((nregs == 1 && !sve_p) || GET_MODE_CLASS (mode) == MODE_INT) + pcum->aapcs_reg + = gen_rtx_REG (mode, get_pcs_arg_reg (pcum->pcs_variant, ncrn)); else { rtx par; @@ -7584,7 +7776,8 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg) scalar_int_mode reg_mode = word_mode; if (nregs == 1) reg_mode = int_mode_for_mode (mode).require (); - rtx tmp = gen_rtx_REG (reg_mode, R0_REGNUM + ncrn + i); + int reg = get_pcs_arg_reg (pcum->pcs_variant, ncrn + i); + rtx tmp = gen_rtx_REG (reg_mode, reg); tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, GEN_INT (i * UNITS_PER_WORD)); XVECEXP (par, 0, i) = tmp; @@ -7597,7 +7790,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg) } /* C.11 */ - pcum->aapcs_nextncrn = NUM_ARG_REGS; + pcum->aapcs_nextncrn = num_pcs_arg_regs (pcum->pcs_variant); /* The argument is passed on stack; record the needed number of words for this argument and align the total size if necessary. */ @@ -7674,9 +7867,10 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) { CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64 + || pcum->pcs_variant == ARM_PCS_PRESERVE_NONE + || pcum->pcs_variant == ARM_PCS_MS_VARIADIC || pcum->pcs_variant == ARM_PCS_SIMD || pcum->pcs_variant == ARM_PCS_SVE); - if (arg.end_marker_p ()) { rtx abi_cookie = aarch64_gen_callee_cookie (pcum->isa_mode, @@ -7767,13 +7961,12 @@ aarch64_function_arg_advance (cumulative_args_t pcum_v, CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); if (pcum->pcs_variant == ARM_PCS_AAPCS64 || pcum->pcs_variant == ARM_PCS_SIMD - || pcum->pcs_variant == ARM_PCS_SVE) + || pcum->pcs_variant == ARM_PCS_SVE + || pcum->pcs_variant == ARM_PCS_PRESERVE_NONE + || pcum->pcs_variant == ARM_PCS_MS_VARIADIC) { aarch64_layout_arg (pcum_v, arg); - gcc_assert ((pcum->aapcs_reg != NULL_RTX) - != (pcum->aapcs_stack_words != 0)); - if (pcum->aapcs_reg - && aarch64_call_switches_pstate_sm (pcum->isa_mode)) + if (pcum->aapcs_reg && aarch64_call_switches_pstate_sm (pcum->isa_mode)) aarch64_record_sme_mode_switch_args (pcum); pcum->aapcs_arg_processed = false; @@ -7786,13 +7979,42 @@ aarch64_function_arg_advance (cumulative_args_t pcum_v, } } -bool -aarch64_function_arg_regno_p (unsigned regno) +/* Checks if a register is live at entry of a preserve_none pcs function. + That is, it used for passing registers. See ARM_PCS_PRESERVE_NONE_REGISTERS + for full list and order of argument passing registers. */ + +static bool +function_arg_preserve_none_regno_p (unsigned regno) { - return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS) + return ((GP_REGNUM_P (regno) && regno != R8_REGNUM && regno != R15_REGNUM + && regno != R16_REGNUM && regno != R17_REGNUM && regno != R18_REGNUM + && regno != R19_REGNUM && regno != R29_REGNUM && regno != R30_REGNUM) || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS) || (PR_REGNUM_P (regno) && regno < P0_REGNUM + NUM_PR_ARG_REGS)); } +/* Implements FUNCTION_ARG_REGNO_P. */ +bool +aarch64_function_arg_regno_p (unsigned regno) +{ + enum arm_pcs pcs + = cfun ? (arm_pcs) fndecl_abi (cfun->decl).id () : ARM_PCS_AAPCS64; + + switch (pcs) + { + case ARM_PCS_AAPCS64: + case ARM_PCS_MS_VARIADIC: + case ARM_PCS_SIMD: + case ARM_PCS_SVE: + case ARM_PCS_TLSDESC: + case ARM_PCS_UNKNOWN: + return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS) + || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS) + || (PR_REGNUM_P (regno) && regno < P0_REGNUM + NUM_PR_ARG_REGS)); + case ARM_PCS_PRESERVE_NONE: + return function_arg_preserve_none_regno_p (regno); + } + gcc_unreachable (); +} /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least PARM_BOUNDARY bits of alignment, but will be given anything up @@ -16307,6 +16529,10 @@ aarch64_init_builtins () { aarch64_general_init_builtins (); aarch64_sve::init_builtins (); + if (TARGET_AARCH64_MS_ABI) + { + aarch64_ms_variadic_abi_init_builtins (); + } #ifdef SUBTARGET_INIT_BUILTINS SUBTARGET_INIT_BUILTINS; #endif @@ -19051,12 +19277,30 @@ aarch64_override_options_internal (struct gcc_options *opts) aarch64_parse_override_string (opts->x_aarch64_override_tune_string, &aarch64_tune_params); + /* We need to parse the JSON file only once per program execution. */ + if (opts->x_muser_provided_CPU) + { + static bool json_parsed = false; + static struct tune_params aarch64_json_params; + if (!json_parsed) + { + aarch64_json_params = *(tune->tune); + aarch64_load_tuning_params_from_json (opts->x_muser_provided_CPU, + &aarch64_json_params); + json_parsed = true; + } + aarch64_tune_params = aarch64_json_params; + } + if (opts->x_aarch64_ldp_policy_param) aarch64_tune_params.ldp_policy_model = opts->x_aarch64_ldp_policy_param; if (opts->x_aarch64_stp_policy_param) aarch64_tune_params.stp_policy_model = opts->x_aarch64_stp_policy_param; + if (opts->x_fdump_tuning_model) + aarch64_print_tune_params (aarch64_tune_params, opts->x_fdump_tuning_model); + /* This target defaults to strict volatile bitfields. */ if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2)) opts->x_flag_strict_volatile_bitfields = 1; @@ -20380,6 +20624,8 @@ typedef struct /* The "rdma" alias uses a different FEAT_NAME to avoid a duplicate feature_deps name. */ #define FEAT_RDMA FEAT_RDM +#define FEAT_SSBS FEAT_SSBS2 +#define FEAT_MEMTAG FEAT_MEMTAG2 /* FMV features are listed in priority order, to make it easier to sort target strings. */ @@ -21678,6 +21924,24 @@ aarch64_load_tp (rtx target) return target; } +/* Windows Arm64 variadic function call ABI specific va_list type node. */ +tree ms_va_list_type_node = NULL_TREE; + +/* Setup the builtin va_list data type and for 64-bit the additional + calling convention specific va_list data types. */ + +static tree +aarch64_ms_variadic_abi_build_builtin_va_list (void) +{ + /* For MS_ABI we use plain pointer to argument area. */ + tree char_ptr_type = build_pointer_type (char_type_node); + tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE, + TYPE_ATTRIBUTES (char_ptr_type)); + ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr); + + return ms_va_list_type_node; +} + /* On AAPCS systems, this is the "struct __va_list". */ static GTY(()) tree va_list_type; @@ -21693,11 +21957,17 @@ static GTY(()) tree va_list_type; void *__vr_top; int __gr_offs; int __vr_offs; - }; */ + }; + + Windows ABI is handled using + aarch64_ms_variadic_abi_build_builtin_va_list (void). */ static tree aarch64_build_builtin_va_list (void) { + if (TARGET_AARCH64_MS_ABI) + return aarch64_ms_variadic_abi_build_builtin_va_list (); + tree va_list_name; tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; @@ -21761,10 +22031,29 @@ aarch64_build_builtin_va_list (void) return va_list_type; } +static void +aarch64_ms_variadic_abi_expand_builtin_va_start (tree valist, rtx nextarg) +{ + rtx va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE); + + /* ??? Should we initialize and use cfun->va_list_gpr_size instead of + * defining single purpose + * cfun->machine->frame.unaligned_saved_varargs_size field. + * Currently, the cfun->va_list_gpr_size contains only value 255. */ + int offset = cfun->machine->frame.unaligned_saved_varargs_size; + nextarg = plus_constant (GET_MODE (nextarg), nextarg, -offset); + + convert_move (va_r, nextarg, 0); +} + /* Implement TARGET_EXPAND_BUILTIN_VA_START. */ + static void -aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) +aarch64_expand_builtin_va_start (tree valist, rtx nextarg) { + if (TARGET_AARCH64_MS_ABI) + return aarch64_ms_variadic_abi_expand_builtin_va_start (valist, nextarg); + const CUMULATIVE_ARGS *cum; tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; tree stack, grtop, vrtop, groff, vroff; @@ -21775,8 +22064,9 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) cum = &crtl->args.info; if (cfun->va_list_gpr_size) - gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD, - cfun->va_list_gpr_size); + gr_save_area_size = MIN ((num_pcs_arg_regs (cum->pcs_variant) + - cum->aapcs_ncrn) + * UNITS_PER_WORD, cfun->va_list_gpr_size); if (cfun->va_list_fpr_size) vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG, cfun->va_list_fpr_size); @@ -21846,6 +22136,7 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */ +#if TARGET_AARCH64_MS_ABI == 0 static tree aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, gimple_seq *post_p ATTRIBUTE_UNUSED) @@ -22138,6 +22429,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, return addr; } +#endif /* Implement TARGET_SETUP_INCOMING_VARARGS. */ @@ -22161,13 +22453,15 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, /* Found out how many registers we need to save. Honor tree-stdvar analysis results. */ if (cfun->va_list_gpr_size) - gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn, + gr_saved = MIN (num_pcs_arg_regs (local_cum.pcs_variant) + - local_cum.aapcs_ncrn, cfun->va_list_gpr_size / UNITS_PER_WORD); if (cfun->va_list_fpr_size) vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn, cfun->va_list_fpr_size / UNITS_PER_VREG); - if (!TARGET_FLOAT) + /* Windows variadic function calls ABI never uses vector registers. */ + if (TARGET_AARCH64_MS_ABI || !TARGET_FLOAT) { gcc_assert (local_cum.aapcs_nvrn == 0); vr_saved = 0; @@ -22185,8 +22479,22 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, mem = gen_frame_mem (BLKmode, ptr); set_mem_alias_set (mem, get_varargs_alias_set ()); - move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM, - mem, gr_saved); + /* For preserve_none pcs we can't use move_block_from_reg as the + argument passing register order is not consecutive. */ + if (local_cum.pcs_variant == ARM_PCS_PRESERVE_NONE) + { + for (int i = 0; i < gr_saved; ++i) + { + rtx tem = operand_subword (mem, i, 1, BLKmode); + gcc_assert (tem); + int reg = get_pcs_arg_reg (local_cum.pcs_variant, + local_cum.aapcs_ncrn + i); + emit_move_insn (tem, gen_rtx_REG (word_mode, reg)); + } + } + else + move_block_from_reg (R0_REGNUM + local_cum.aapcs_ncrn, mem, + gr_saved); } if (vr_saved > 0) { @@ -22218,8 +22526,9 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, /* We don't save the size into *PRETEND_SIZE because we want to avoid any complication of having crtl->args.pretend_args_size changed. */ + cfun->machine->frame.unaligned_saved_varargs_size = gr_saved * UNITS_PER_WORD; cfun->machine->frame.saved_varargs_size - = (ROUND_UP (gr_saved * UNITS_PER_WORD, + = (ROUND_UP (cfun->machine->frame.unaligned_saved_varargs_size, STACK_BOUNDARY / BITS_PER_UNIT) + vr_saved * UNITS_PER_VREG); } @@ -23006,14 +23315,35 @@ aarch64_autovectorize_vector_modes (vector_modes *modes, bool) return flags; } +/* Implement TARGET_CONVERT_TO_TYPE. Convert EXPR to TYPE. */ + +static tree +aarch64_convert_to_type (tree type, tree expr) +{ + /* If TYPE is a non-standard boolean type invented by the target, check if + EXPR can be converted to TYPE. */ + if (TREE_CODE (type) == BOOLEAN_TYPE + && TREE_CODE (TREE_TYPE (expr)) == BOOLEAN_TYPE + && !VECTOR_TYPE_P (type) + && !VECTOR_TYPE_P (TREE_TYPE (expr)) + && TYPE_CANONICAL (type) != TYPE_CANONICAL (TREE_TYPE (expr))) + return build1 (VIEW_CONVERT_EXPR, type, expr); + + /* Use standard rules. */ + return NULL_TREE; +} + /* Implement TARGET_MANGLE_TYPE. */ static const char * aarch64_mangle_type (const_tree type) { /* The AArch64 ABI documents say that "__va_list" has to be - mangled as if it is in the "std" namespace. */ - if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) + mangled as if it is in the "std" namespace. + The Windows Arm64 ABI uses just an address of the first variadic + argument. */ + if (!TARGET_AARCH64_MS_ABI + && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) return "St9__va_list"; /* Half-precision floating point types. */ @@ -25492,7 +25822,7 @@ aarch64_is_variant_pcs (tree fndecl) { /* Check for ABIs that preserve more registers than usual. */ arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id (); - if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE) + if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE || pcs == ARM_PCS_PRESERVE_NONE) return true; /* Check for ABIs that allow PSTATE.SM to be 1 on entry. */ @@ -25661,6 +25991,28 @@ aarch64_post_cfi_startproc (FILE *f, tree ignored ATTRIBUTE_UNUSED) asm_fprintf (f, "\t.cfi_b_key_frame\n"); } +/* Implement TARGET_STRICT_ARGUMENT_NAMING. + + Return true if the location where a function argument is passed + depends on whether or not it is a named argument. + + For Windows ABI of variadic function calls, treat the named arguments as + unnamed as they are handled the same way as variadic arguments. */ + +static bool +aarch64_variadic_abi_strict_argument_naming (cumulative_args_t pcum_v) +{ + if (!TARGET_AARCH64_MS_ABI) + return hook_bool_CUMULATIVE_ARGS_true (pcum_v); + + CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); + + if (pcum->pcs_variant == ARM_PCS_MS_VARIADIC) + return false; + + return hook_bool_CUMULATIVE_ARGS_true (pcum_v); +} + /* Implements TARGET_ASM_FILE_START. Output the assembly header. */ static void @@ -28305,6 +28657,12 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, struct expand_operand ops[6]; int aarch64_cond; + /* Exit early for modes that are ot handled to avoid O(n^2) part of expand_operands. */ + op_mode = TYPE_MODE (TREE_TYPE (treeop0)); + if (!(op_mode == QImode || op_mode == HImode || op_mode == SImode || op_mode == DImode + || op_mode == SFmode || op_mode == DFmode)) + return NULL_RTX; + push_to_sequence (*prep_seq); expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); @@ -29609,6 +29967,9 @@ aarch64_scalar_mode_supported_p (scalar_mode mode) if (DECIMAL_FLOAT_MODE_P (mode)) return default_decimal_float_supported_p (); + if (mode == TFmode) + return true; + return ((mode == HFmode || mode == BFmode) ? true : default_scalar_mode_supported_p (mode)); @@ -29699,7 +30060,7 @@ aarch64_bitint_type_info (int n, struct bitint_info *info) static machine_mode aarch64_c_mode_for_floating_type (enum tree_index ti) { - if (ti == TI_LONG_DOUBLE_TYPE) + if (TARGET_LONG_DOUBLE_128 && ti == TI_LONG_DOUBLE_TYPE) return TFmode; return default_mode_for_floating_type (ti); } @@ -30223,6 +30584,8 @@ aarch64_comp_type_attributes (const_tree type1, const_tree type2) if (!check_attr ("gnu", "aarch64_vector_pcs")) return 0; + if (!check_attr ("gnu", "preserve_none")) + return 0; if (!check_attr ("gnu", "indirect_return")) return 0; if (!check_attr ("gnu", "Advanced SIMD type")) @@ -30299,11 +30662,58 @@ aarch64_stack_protect_guard (void) return NULL_TREE; } -/* Implement TARGET_INVALID_UNARY_OP. */ + +static const char * +aarch64_valid_vector_boolean_op (int code) +{ + switch ((enum tree_code)code) + { + case PREINCREMENT_EXPR: + return N_ ("preincrement operation not permitted on svbool_t"); + case PREDECREMENT_EXPR: + return N_ ("predecrement operation not permitted on svbool_t"); + case POSTINCREMENT_EXPR: + return N_ ("postincrement operation not permitted on svbool_t"); + case POSTDECREMENT_EXPR: + return N_ ("postdecrement operation not permitted on svbool_t"); + case NEGATE_EXPR: + return N_ ("negation operation not permitted on svbool_t"); + case PLUS_EXPR: + return N_ ("plus operation not permitted on svbool_t"); + case MINUS_EXPR: + return N_ ("minus operation not permitted on svbool_t"); + case MULT_EXPR: + return N_ ("multiply operation not permitted on svbool_t"); + case TRUNC_DIV_EXPR: + return N_ ("divide operation not permitted on svbool_t"); + case LSHIFT_EXPR: + case RSHIFT_EXPR: + return N_ ("shift operation not permitted on svbool_t"); + case LT_EXPR: + case LE_EXPR: + case GT_EXPR: + case GE_EXPR: + return N_ ("only == and != operations permitted on svbool_t"); + case ARRAY_REF: + return N_ ("subscript operation not supported on svbool_t"); + default: + /* Operation permitted. */ + return NULL; + } +} + +/* Implement TARGET_INVALID_BINARY_OP. + Return the diagnostic message string if the unary operation OP is + not permitted on TYPE, NULL otherwise. */ static const char * aarch64_invalid_unary_op (int op, const_tree type) { + if (VECTOR_BOOLEAN_TYPE_P (type) + && !TYPE_INDIVISIBLE_P (type) + && aarch64_sve::builtin_type_p (type)) + return aarch64_valid_vector_boolean_op (op); + /* Reject all single-operand operations on __mfp8 except for &. */ if (TYPE_MAIN_VARIANT (type) == aarch64_mfp8_type_node && op != ADDR_EXPR) return N_ ("operation not permitted on type %<mfloat8_t%>"); @@ -30312,19 +30722,29 @@ aarch64_invalid_unary_op (int op, const_tree type) return NULL; } -/* Implement TARGET_INVALID_BINARY_OP. */ +/* Implement TARGET_INVALID_BINARY_OP. + Return the diagnostic message string if the binary operation OP is + not permitted on TYPE1 and TYPE2, NULL otherwise. */ static const char * -aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, +aarch64_invalid_binary_op (int op, const_tree type1, const_tree type2) { if (VECTOR_TYPE_P (type1) && VECTOR_TYPE_P (type2) && !TYPE_INDIVISIBLE_P (type1) - && !TYPE_INDIVISIBLE_P (type2) - && (aarch64_sve::builtin_type_p (type1) + && !TYPE_INDIVISIBLE_P (type2)) + { + if ((aarch64_sve::builtin_type_p (type1) != aarch64_sve::builtin_type_p (type2))) - return N_("cannot combine GNU and SVE vectors in a binary operation"); + return N_("cannot combine GNU and SVE vectors in a binary operation"); + + if (aarch64_sve::builtin_type_p (type1) + && aarch64_sve::builtin_type_p (type2) + && VECTOR_BOOLEAN_TYPE_P (type1) + && VECTOR_BOOLEAN_TYPE_P (type2)) + return aarch64_valid_vector_boolean_op (op); + } /* Reject all 2-operand operations on __mfp8. */ if (TYPE_MAIN_VARIANT (type1) == aarch64_mfp8_type_node @@ -30609,6 +31029,18 @@ aarch64_indirect_call_asm (rtx addr) return ""; } +/* Generate assembly for AArch64 indirect branch instruction. ADDR is the + target address register. Returns any additional barrier instructions + needed for SLS (Straight Line Speculation) mitigation. */ + +const char * +aarch64_indirect_branch_asm (rtx addr) +{ + gcc_assert (REG_P (addr)); + output_asm_insn ("br\t%0", &addr); + return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()); +} + /* Emit the assembly instruction to load the thread pointer into DEST. Select between different tpidr_elN registers depending on -mtp= setting. */ @@ -32196,6 +32628,8 @@ aarch64_test_sve_folding () } } +extern void aarch64_json_tunings_tests (); + /* Run all target-specific selftests. */ static void @@ -32205,6 +32639,7 @@ aarch64_run_selftests (void) aarch64_test_fractional_cost (); aarch64_test_sysreg_encoding_clashes (); aarch64_test_sve_folding (); + aarch64_json_tunings_tests (); } } // namespace selftest @@ -32335,6 +32770,21 @@ aarch64_run_selftests (void) #undef TARGET_EXPAND_BUILTIN_VA_START #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start +#if TARGET_AARCH64_MS_ABI == 1 +#undef TARGET_ENUM_VA_LIST_P +#define TARGET_ENUM_VA_LIST_P aarch64_ms_variadic_abi_enum_va_list + +#undef TARGET_FN_ABI_VA_LIST +#define TARGET_FN_ABI_VA_LIST aarch64_ms_variadic_abi_fn_abi_va_list + +#undef TARGET_CANONICAL_VA_LIST_TYPE +#define TARGET_CANONICAL_VA_LIST_TYPE \ + aarch64_ms_variadic_abi_canonical_va_list_type + +#undef TARGET_ARG_PARTIAL_BYTES +#define TARGET_ARG_PARTIAL_BYTES aarch64_arg_partial_bytes +#endif + #undef TARGET_FOLD_BUILTIN #define TARGET_FOLD_BUILTIN aarch64_fold_builtin @@ -32373,8 +32823,10 @@ aarch64_run_selftests (void) #undef TARGET_GIMPLE_FOLD_BUILTIN #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin +#if TARGET_AARCH64_MS_ABI == 0 #undef TARGET_GIMPLIFY_VA_ARG_EXPR #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr +#endif #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS aarch64_init_builtins @@ -32412,6 +32864,9 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_INVALID_BINARY_OP #define TARGET_INVALID_BINARY_OP aarch64_invalid_binary_op +#undef TARGET_INVALID_UNARY_OP +#define TARGET_INVALID_UNARY_OP aarch64_invalid_unary_op + #undef TARGET_VERIFY_TYPE_CONTEXT #define TARGET_VERIFY_TYPE_CONTEXT aarch64_verify_type_context @@ -32602,6 +33057,9 @@ aarch64_libgcc_floating_mode_supported_p #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ aarch64_autovectorize_vector_modes +#undef TARGET_CONVERT_TO_TYPE +#define TARGET_CONVERT_TO_TYPE aarch64_convert_to_type + #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \ aarch64_atomic_assign_expand_fenv @@ -32790,7 +33248,8 @@ aarch64_libgcc_floating_mode_supported_p #define TARGET_ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc #undef TARGET_STRICT_ARGUMENT_NAMING -#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true +#define TARGET_STRICT_ARGUMENT_NAMING \ + aarch64_variadic_abi_strict_argument_naming #undef TARGET_MODE_EMIT #define TARGET_MODE_EMIT aarch64_mode_emit diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 2cd929d..5a1d5a9 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -696,6 +696,31 @@ through +ssve-fp8dot2. */ #define NUM_FP_ARG_REGS 8 #define NUM_PR_ARG_REGS 4 +/* The argument passing regs for preserve_none pcs. */ +#if TARGET_AARCH64_MS_ABI +#define NUM_PRESERVE_NONE_ARG_REGS 23 +#define PRESERVE_NONE_REGISTERS \ +{ \ + R20_REGNUM, R21_REGNUM, R22_REGNUM, R23_REGNUM, R24_REGNUM, R25_REGNUM,\ + R26_REGNUM, R27_REGNUM, R28_REGNUM,\ + R0_REGNUM, R1_REGNUM, R2_REGNUM, R3_REGNUM, R4_REGNUM, R5_REGNUM,\ + R6_REGNUM, R7_REGNUM,\ + R10_REGNUM, R11_REGNUM, R12_REGNUM, R13_REGNUM, R14_REGNUM, R9_REGNUM\ +} +#else +#define NUM_PRESERVE_NONE_ARG_REGS 24 +#define PRESERVE_NONE_REGISTERS \ +{ \ + R20_REGNUM, R21_REGNUM, R22_REGNUM, R23_REGNUM, R24_REGNUM, R25_REGNUM,\ + R26_REGNUM, R27_REGNUM, R28_REGNUM,\ + R0_REGNUM, R1_REGNUM, R2_REGNUM, R3_REGNUM, R4_REGNUM, R5_REGNUM,\ + R6_REGNUM, R7_REGNUM,\ + R10_REGNUM, R11_REGNUM, R12_REGNUM, R13_REGNUM, R14_REGNUM, R9_REGNUM,\ + R15_REGNUM\ +} +#endif + + /* A Homogeneous Floating-Point or Short-Vector Aggregate may have at most four members. */ #define HA_MAX_NUM_FLDS 4 @@ -920,6 +945,11 @@ enum reg_class /* CPU/ARCH option handling. */ #include "config/aarch64/aarch64-opts.h" +/* Long double is stored in 128 bits by default. */ +#ifndef TARGET_LONG_DOUBLE_128 +#define TARGET_LONG_DOUBLE_128 1 +#endif + /* If there is no CPU defined at configure, use generic as default. */ #ifndef TARGET_CPU_DEFAULT # define TARGET_CPU_DEFAULT AARCH64_CPU_generic_armv8_a @@ -987,6 +1017,9 @@ struct GTY (()) aarch64_frame STACK_BOUNDARY. */ HOST_WIDE_INT saved_varargs_size; + /* The same as above except it is the original unaligned stack size. */ + HOST_WIDE_INT unaligned_saved_varargs_size; + /* The number of bytes between the bottom of the static frame (the bottom of the outgoing arguments) and the bottom of the register save area. This value is always a multiple of STACK_BOUNDARY. */ @@ -1150,6 +1183,13 @@ enum arm_pcs ARM_PCS_SVE, /* For functions that pass or return values in SVE registers. */ ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */ + ARM_PCS_PRESERVE_NONE, /* PCS variant with no call-preserved + registers except X29. */ + ARM_PCS_MS_VARIADIC, /* PCS variant with no call-preserved + differently. + All composites are treated alike. + SIMD and floating-point registers + aren't used. */ ARM_PCS_UNKNOWN }; @@ -1533,6 +1573,9 @@ extern GTY(()) tree aarch64_fp16_ptr_type_node; bfloat16_type_node. Defined in aarch64-builtins.cc. */ extern GTY(()) tree aarch64_bf16_ptr_type_node; +/* Windows Arm64 variadic function call ABI specific va_list type node. */ +extern GTY(()) tree ms_va_list_type_node; + /* The generic unwind code in libgcc does not initialize the frame pointer. So in order to unwind a function using a frame pointer, the very first function that is unwound must save the frame pointer. That way the frame diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 98c65a7..8beeefc 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -479,7 +479,7 @@ ;; Q registers and is equivalent to "simd". (define_enum "arches" [any rcpc8_4 fp fp_q base_simd nobase_simd - simd nosimd sve fp16 sme]) + simd nosimd sve fp16 sme cssc]) (define_enum_attr "arch" "arches" (const_string "any")) @@ -551,6 +551,9 @@ (and (eq_attr "arch" "fp16") (match_test "TARGET_FP_F16INST")) + (and (eq_attr "arch" "cssc") + (match_test "TARGET_CSSC")) + (and (eq_attr "arch" "sve") (match_test "TARGET_SVE")) @@ -1581,10 +1584,7 @@ "SIBLING_CALL_P (insn)" { if (which_alternative == 0) - { - output_asm_insn ("br\\t%0", operands); - return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()); - } + return aarch64_indirect_branch_asm (operands[0]); return "b\\t%c0"; } [(set_attr "type" "branch, branch") @@ -1601,10 +1601,7 @@ "SIBLING_CALL_P (insn)" { if (which_alternative == 0) - { - output_asm_insn ("br\\t%1", operands); - return aarch64_sls_barrier (aarch64_harden_sls_retbr_p ()); - } + return aarch64_indirect_branch_asm (operands[1]); return "b\\t%c1"; } [(set_attr "type" "branch, branch") @@ -4488,6 +4485,66 @@ [(set_attr "type" "<su>div")] ) +;; umax (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? sum : a +;; umin (a, add (a, b)) => [sum, ovf] = adds (a, b); !ovf ? a : sum +;; ... and the commutated versions: +;; umax (a, add (b, a)) => [sum, ovf] = adds (b, a); !ovf ? sum : a +;; umin (a, add (b, a)) => [sum, ovf] = adds (b, a); !ovf ? a : sum +(define_insn_and_split "*aarch64_plus_within_<optab><mode>3_<ovf_commutate>" + [(set (match_operand:GPI 0 "register_operand" "=r") + (UMAXMIN:GPI + (plus:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")) + (match_dup ovf_commutate))) + (clobber (match_scratch:GPI 3 "=r"))] + "!TARGET_CSSC" + "#" + "&& 1" + [(parallel + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C (plus:GPI (match_dup ovf_commutate) + (match_dup <ovf_comm_opp>)) + (match_dup ovf_commutate))) + (set (match_dup 3) (plus:GPI (match_dup ovf_commutate) + (match_dup <ovf_comm_opp>)))]) + (set (match_dup 0) + (if_then_else:GPI (<ovf_add_cmp> (reg:CC_C CC_REGNUM) + (const_int 0)) + (match_dup 3) + (match_dup ovf_commutate)))] + { + if (GET_CODE (operands[3]) == SCRATCH) + operands[3] = gen_reg_rtx (<MODE>mode); + } +) + +;; umax (a, sub (a, b)) => [diff, udf] = subs (a, b); udf ? diff : a +;; umin (a, sub (a, b)) => [diff, udf] = subs (a, b); udf ? a : diff +(define_insn_and_split "*aarch64_minus_within_<optab><mode>3" + [(set (match_operand:GPI 0 "register_operand" "=r") + (UMAXMIN:GPI + (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")) + (match_dup 1))) + (clobber (match_scratch:GPI 3 "=r"))] + "!TARGET_CSSC" + "#" + "&& 1" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 2))) + (set (match_dup 3) (minus:GPI (match_dup 1) (match_dup 2)))]) + (set (match_dup 0) + (if_then_else:GPI (<udf_sub_cmp> (reg:CC CC_REGNUM) + (const_int 0)) + (match_dup 3) + (match_dup 1)))] + { + if (GET_CODE (operands[3]) == SCRATCH) + operands[3] = gen_reg_rtx (<MODE>mode); + } +) + ;; ------------------------------------------------------------------- ;; Comparison insns ;; ------------------------------------------------------------------- @@ -4797,24 +4854,18 @@ [(set_attr "type" "fcsel")] ) -(define_expand "mov<mode>cc" - [(set (match_operand:ALLI 0 "register_operand") - (if_then_else:ALLI (match_operand 1 "aarch64_comparison_operator") - (match_operand:ALLI 2 "register_operand") - (match_operand:ALLI 3 "register_operand")))] +(define_expand "mov<ALLI_GPF:mode>cc" + [(set (match_operand:ALLI_GPF 0 "register_operand") + (if_then_else:ALLI_GPF (match_operand 1 "aarch64_comparison_operator_cc") + (match_operand:ALLI_GPF 2 "register_operand") + (match_operand:ALLI_GPF 3 "register_operand")))] "" { enum rtx_code code = GET_CODE (operands[1]); - if (code == UNEQ || code == LTGT) - FAIL; - rtx ccreg = XEXP (operands[1], 0); enum machine_mode ccmode = GET_MODE (ccreg); - if (GET_MODE_CLASS (ccmode) == MODE_CC) - gcc_assert (XEXP (operands[1], 1) == const0_rtx); - else if (ccmode == QImode || ccmode == HImode) - FAIL; - else + + if (GET_MODE_CLASS (ccmode) != MODE_CC) { ccreg = aarch64_gen_compare_reg (code, ccreg, XEXP (operands[1], 1)); operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); @@ -4822,60 +4873,22 @@ } ) -(define_expand "mov<GPF:mode><GPI:mode>cc" - [(set (match_operand:GPI 0 "register_operand") - (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator") - (match_operand:GPF 2 "register_operand") - (match_operand:GPF 3 "register_operand")))] - "" - { - rtx ccreg; - enum rtx_code code = GET_CODE (operands[1]); - - if (code == UNEQ || code == LTGT) - FAIL; - - ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0), - XEXP (operands[1], 1)); - operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); - } -) - -(define_expand "mov<mode>cc" - [(set (match_operand:GPF 0 "register_operand") - (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator") - (match_operand:GPF 2 "register_operand") - (match_operand:GPF 3 "register_operand")))] - "" - { - rtx ccreg; - enum rtx_code code = GET_CODE (operands[1]); - - if (code == UNEQ || code == LTGT) - FAIL; - - ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0), - XEXP (operands[1], 1)); - operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); - } -) - (define_expand "<neg_not_op><mode>cc" [(set (match_operand:GPI 0 "register_operand") - (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator") + (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator_cc") (NEG_NOT:GPI (match_operand:GPI 2 "register_operand")) (match_operand:GPI 3 "register_operand")))] "" { - rtx ccreg; enum rtx_code code = GET_CODE (operands[1]); - if (code == UNEQ || code == LTGT) - FAIL; - - ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0), - XEXP (operands[1], 1)); - operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + rtx ccreg = XEXP (operands[1], 0); + enum machine_mode ccmode = GET_MODE (ccreg); + if (GET_MODE_CLASS (ccmode) != MODE_CC) + { + ccreg = aarch64_gen_compare_reg (code, ccreg, XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + } } ) @@ -4904,7 +4917,7 @@ ;; data (match_operand:ALLI 2 "register_operand" "r") ;; polynomial without leading 1 - (match_operand:ALLX 3)] + (match_operand:ALLX 3 "const_int_operand")] "" { /* If the polynomial is the same as the polynomial of crc32c* instruction, @@ -4943,7 +4956,7 @@ ;; data (match_operand:ALLI 2 "register_operand" "r") ;; polynomial without leading 1 - (match_operand:ALLX 3)] + (match_operand:ALLX 3 "const_int_operand")] "TARGET_AES && <ALLI:sizen> <= <ALLX:sizen>" { aarch64_expand_crc_using_pmull (<ALLX:MODE>mode, <ALLI:MODE>mode, @@ -5687,6 +5700,8 @@ [(set_attr "type" "logics_shift_imm")] ) +;; CLZ, CTZ, CLS, RBIT instructions. + (define_insn "clz<mode>2" [(set (match_operand:GPI 0 "register_operand" "=r") (clz:GPI (match_operand:GPI 1 "register_operand" "r")))] @@ -5695,6 +5710,35 @@ [(set_attr "type" "clz")] ) +;; Model ctz as a target instruction. +;; If TARGET_CSSC is not available, emit rbit and clz. + +(define_insn "ctz<mode>2" + [(set (match_operand:GPI 0 "register_operand") + (ctz:GPI (match_operand:GPI 1 "register_operand")))] + "" + {@ [ cons: =0, 1; attrs: type, arch, length ] + [ r , r; clz, cssc, 4 ] ctz\\t%<w>0, %<w>1 + [ r , r; clz, * , 8 ] rbit\\t%<w>0, %<w>1\;clz\\t%<w>0, %<w>0 + } +) + +(define_insn "clrsb<mode>2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))] + "" + "cls\\t%<w>0, %<w>1" + [(set_attr "type" "clz")] +) + +(define_insn "@aarch64_rbit<mode>" + [(set (match_operand:GPI 0 "register_operand" "=r") + (bitreverse:GPI (match_operand:GPI 1 "register_operand" "r")))] + "" + "rbit\\t%<w>0, %<w>1" + [(set_attr "type" "rbit")] +) + (define_expand "ffs<mode>2" [(match_operand:GPI 0 "register_operand") (match_operand:GPI 1 "register_operand")] @@ -5702,9 +5746,7 @@ { rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx); rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx); - - emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); - emit_insn (gen_clz<mode>2 (operands[0], operands[0])); + emit_insn (gen_ctz<mode>2 (operands[0], operands[1])); emit_insn (gen_csinc3<mode>_insn (operands[0], x, operands[0], const0_rtx)); DONE; } @@ -5799,40 +5841,6 @@ DONE; }) -(define_insn "clrsb<mode>2" - [(set (match_operand:GPI 0 "register_operand" "=r") - (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))] - "" - "cls\\t%<w>0, %<w>1" - [(set_attr "type" "clz")] -) - -(define_insn "@aarch64_rbit<mode>" - [(set (match_operand:GPI 0 "register_operand" "=r") - (bitreverse:GPI (match_operand:GPI 1 "register_operand" "r")))] - "" - "rbit\\t%<w>0, %<w>1" - [(set_attr "type" "rbit")] -) - -;; Split after reload into RBIT + CLZ. Since RBIT is represented as an UNSPEC -;; it is unlikely to fold with any other operation, so keep this as a CTZ -;; expression and split after reload to enable scheduling them apart if -;; needed. For TARGET_CSSC we have a single CTZ instruction that can do this. - -(define_insn_and_split "ctz<mode>2" - [(set (match_operand:GPI 0 "register_operand" "=r") - (ctz:GPI (match_operand:GPI 1 "register_operand" "r")))] - "" - { return TARGET_CSSC ? "ctz\\t%<w>0, %<w>1" : "#"; } - "reload_completed && !TARGET_CSSC" - [(const_int 0)] - " - emit_insn (gen_aarch64_rbit (<MODE>mode, operands[0], operands[1])); - emit_insn (gen_clz<mode>2 (operands[0], operands[0])); - DONE; -") - (define_insn "*and<mode>_compare0" [(set (reg:CC_Z CC_REGNUM) (compare:CC_Z diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt index fc3f632..6c0cbc7 100644 --- a/gcc/config/aarch64/aarch64.opt +++ b/gcc/config/aarch64/aarch64.opt @@ -193,6 +193,14 @@ mabi= Target RejectNegative Joined Enum(aarch64_abi) Var(aarch64_abi) Init(AARCH64_ABI_DEFAULT) Generate code that conforms to the specified ABI. +fdump-tuning-model= +Target Undocumented RejectNegative Negative(fdump-tuning-model=) Joined Var(fdump_tuning_model) +-fdump-tuning-model=<filename> Dump current tuning model to a JSON file. + +muser-provided-CPU= +Target Undocumented RejectNegative Negative(muser-provided-CPU=) Joined Var(muser_provided_CPU) +-muser-provided-CPU=<json-tuning-file> User specific CPU tunings. + moverride= Target RejectNegative ToLower Joined Var(aarch64_override_tune_string) Save -moverride=<string> Power users only! Override CPU optimization parameters. @@ -449,5 +457,5 @@ also try to opportunistically form writeback opportunities by folding in trailing destructive updates of the base register used by a pair. Wexperimental-fmv-target -Target Var(warn_experimental_fmv) Warning Init(1) +Target Var(warn_experimental_fmv) Warning Init(1) Undocumented This option is deprecated. diff --git a/gcc/config/aarch64/aarch64.opt.urls b/gcc/config/aarch64/aarch64.opt.urls index 993e0fc..f0087b2 100644 --- a/gcc/config/aarch64/aarch64.opt.urls +++ b/gcc/config/aarch64/aarch64.opt.urls @@ -77,8 +77,14 @@ UrlSuffix(gcc/AArch64-Options.html#index-mearly-ra) msve-vector-bits= UrlSuffix(gcc/AArch64-Options.html#index-msve-vector-bits) -mverbose-cost-dump -UrlSuffix(gcc/AArch64-Options.html#index-mverbose-cost-dump) +mautovec-preference= +UrlSuffix(gcc/AArch64-Options.html#index-mautovec-preference) + +mmax-vectorization +UrlSuffix(gcc/AArch64-Options.html#index-mmax-vectorization) + +mtrack-speculation +UrlSuffix(gcc/AArch64-Options.html#index-mtrack-speculation) mearly-ldp-fusion UrlSuffix(gcc/AArch64-Options.html#index-mearly-ldp-fusion) @@ -95,6 +101,6 @@ UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-reg) mstack-protector-guard-offset= UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-offset) -Wexperimental-fmv-target -UrlSuffix(gcc/AArch64-Options.html#index-Wexperimental-fmv-target) +moutline-atomics +UrlSuffix(gcc/AArch64-Options.html#index-moutline-atomics) diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index ea4a936..d4b4afb 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -870,7 +870,13 @@ enum memmodel model = memmodel_from_int (INTVAL (operands[1])); if (is_mm_acquire (model)) return "dmb\\tishld"; + else if (is_mm_release (model)) + return "dmb\\tishld\;dmb\\tishst"; else return "dmb\\tish"; } + [(set (attr "length") + (if_then_else + (match_test "is_mm_release (memmodel_from_int (INTVAL (operands[1])))") + (const_int 8) (const_int 4)))] ) diff --git a/gcc/config/aarch64/cygming.h b/gcc/config/aarch64/cygming.h index 7e2203c..1c7f8f5 100644 --- a/gcc/config/aarch64/cygming.h +++ b/gcc/config/aarch64/cygming.h @@ -205,7 +205,10 @@ still needed for compilation. */ #define SUBTARGET_ATTRIBUTE_TABLE \ { "selectany", 0, 0, true, false, false, false, \ - mingw_handle_selectany_attribute, NULL } + mingw_handle_selectany_attribute, NULL }, \ + { "ms_abi", 0, 0, false, true, true, true, \ + aarch64_handle_ms_abi_attribute, NULL }, \ + { "ms_abi va_list", 0, 0, false, false, false, false, NULL, NULL } #undef SUB_TARGET_RECORD_STUB #define SUB_TARGET_RECORD_STUB(NAME, DECL) mingw_pe_record_stub((NAME), \ diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc index 0333746..be98c5b 100644 --- a/gcc/config/aarch64/driver-aarch64.cc +++ b/gcc/config/aarch64/driver-aarch64.cc @@ -368,18 +368,30 @@ host_detect_local_cpu (int argc, const char **argv) continue; } + /* This may be a multi-token feature string. We need to match + all parts in one of the "|" separated sublists. */ bool enabled = true; - - /* This may be a multi-token feature string. We need - to match all parts, which could be in any order. */ - std::set<std::string> tokens; - split_words (val, tokens); - std::set<std::string>::iterator it; - - /* Iterate till the first feature isn't found or all of them - are found. */ - for (it = tokens.begin (); enabled && it != tokens.end (); ++it) - enabled = enabled && features.count (*it); + size_t cur = 0; + while (cur < val.length ()) + { + size_t end = val.find_first_of (" ", cur); + if (end == std::string::npos) + end = val.length (); + std::string word = val.substr (cur, end - cur); + cur = end + 1; + + if (word == "|") + { + /* If we've matched everything in the current sublist, we + can stop now. */ + if (enabled) + break; + /* Otherwise, start again with the next sublist. */ + enabled = true; + continue; + } + enabled = enabled && features.count (word); + } if (enabled) extension_flags |= aarch64_extensions[i].flag; diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 517b280..ff56885 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -65,6 +65,10 @@ ;; Iterator for all 16-bit scalar floating point modes (HF, BF) (define_mode_iterator HFBF [HF BF]) +;; Iterator for all integer modes (up to 64-bit) plus all General Purpose +;; Floating-point registers (32- and 64-bit modes). +(define_mode_iterator ALLI_GPF [ALLI GPF]) + ;; Iterator for all scalar floating point modes suitable for moving, including ;; special BF type and decimal floating point types (HF, SF, DF, TF, BF, ;; SD, DD and TD) @@ -837,6 +841,10 @@ UNSPEC_SSHLL ; Used in aarch64-simd.md. UNSPEC_USHLL ; Used in aarch64-simd.md. UNSPEC_ADDP ; Used in aarch64-simd.md. + UNSPEC_CMP_ALL ; Used in aarch64-simd.md. + UNSPEC_CMP_ANY ; Used in aarch64-simd.md. + UNSPEC_COND_CMP_ALL ; Used in aarch64-simd.md. + UNSPEC_COND_CMP_ANY ; Used in aarch64-simd.md. UNSPEC_TBL ; Used in vector permute patterns. UNSPEC_TBLQ ; Used in vector permute patterns. UNSPEC_TBX ; Used in vector permute patterns. @@ -1816,6 +1824,18 @@ (VNx4SI "v4si") (VNx4SF "v4sf") (VNx2DI "v2di") (VNx2DF "v2df")]) +;; Gives the mode of the 64-bit lowpart of an SVE vector. +(define_mode_attr V64 [(VNx16QI "V8QI") + (VNx8HI "V4HI") (VNx8HF "V4HF") (VNx8BF "V4BF") + (VNx4SI "V2SI") (VNx4SF "V2SF") + (VNx2DI "DI") (VNx2DF "DF")]) + +;; ...and again in lower case. +(define_mode_attr v64 [(VNx16QI "v8qi") + (VNx8HI "v4hi") (VNx8HF "v4hf") (VNx8BF "v4bf") + (VNx4SI "v2si") (VNx4SF "v2sf") + (VNx2DI "di") (VNx2DF "df")]) + (define_mode_attr vnx [(V4SI "vnx4si") (V2DI "vnx2di")]) ;; 64-bit container modes the inner or scalar source mode. @@ -2612,6 +2632,12 @@ (VNx16SI "vnx4bi") (VNx16SF "vnx4bi") (VNx8DI "vnx2bi") (VNx8DF "vnx2bi")]) +;; Map mode to suffix for using an SVE comparison +(define_mode_attr sve_cmp_suff [(V8QI "_int") (V16QI "_int") + (V4HI "_int") (V8HI "_int") (V2SI "_int") + (V4SI "_int") (V2DI "_int") + (V2SF "_float") (V4SF "_float") (V2DF "_float")]) + (define_mode_attr VDOUBLE [(VNx16QI "VNx32QI") (VNx8HI "VNx16HI") (VNx8HF "VNx16HF") (VNx8BF "VNx16BF") @@ -2827,6 +2853,8 @@ (define_code_iterator FMAXMIN [smax smin]) +(define_code_iterator UMAXMIN [umax umin]) + ;; Signed and unsigned max operations. (define_code_iterator USMAX [smax umax]) @@ -3115,6 +3143,9 @@ (define_code_attr maxminand [(smax "bic") (smin "and")]) +(define_code_attr ovf_add_cmp [(umax "geu") (umin "ltu")]) +(define_code_attr udf_sub_cmp [(umax "ltu") (umin "geu")]) + ;; MLA/MLS attributes. (define_code_attr as [(ss_plus "a") (ss_minus "s")]) @@ -3272,6 +3303,9 @@ (define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD]) +(define_int_iterator CBRANCH_CMP [UNSPEC_CMP_ALL UNSPEC_CMP_ANY]) +(define_int_iterator COND_CBRANCH_CMP [UNSPEC_COND_CMP_ALL UNSPEC_COND_CMP_ANY]) + (define_int_iterator BSL_DUP [1 2]) (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) @@ -4215,7 +4249,16 @@ (UNSPEC_COND_SCVTF "float") (UNSPEC_COND_SMAX "smax") (UNSPEC_COND_SMIN "smin") - (UNSPEC_COND_UCVTF "floatuns")]) + (UNSPEC_COND_UCVTF "floatuns") + (UNSPEC_CMP_ALL "vec_cbranch_all") + (UNSPEC_CMP_ANY "vec_cbranch_any") + (UNSPEC_COND_CMP_ALL "cond_vec_cbranch_all") + (UNSPEC_COND_CMP_ANY "cond_vec_cbranch_any")]) + +(define_int_attr cbranch_op [(UNSPEC_CMP_ALL "EQ") + (UNSPEC_CMP_ANY "NE") + (UNSPEC_COND_CMP_ALL "EQ") + (UNSPEC_COND_CMP_ANY "NE")]) (define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan") (UNSPEC_FMAXNM "fmax") @@ -5126,3 +5169,7 @@ (UNSPEC_F2CVT "f2cvt") (UNSPEC_F1CVTLT "f1cvtlt") (UNSPEC_F2CVTLT "f2cvtlt")]) + +;; Operand numbers for commutative operations +(define_int_iterator ovf_commutate [1 2]) +(define_int_attr ovf_comm_opp [(1 "2") (2 "1")]) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 42304ce..de1d7d8 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -46,6 +46,10 @@ (and (match_code "const_int") (match_test "op == CONST0_RTX (mode)"))) +(define_predicate "const0_to_1_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 1)"))) + (define_predicate "const_0_to_7_operand" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) @@ -459,9 +463,31 @@ return aarch64_get_condition_code (op) >= 0; }) +(define_predicate "aarch64_comparison_operator_cc" + (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered, + ordered,unlt,unle,unge,ungt") +{ + rtx ccreg = XEXP (op, 0); + enum machine_mode ccmode = GET_MODE (ccreg); + + if (GET_MODE_CLASS (ccmode) == MODE_CC) + gcc_assert (XEXP (op, 1) == const0_rtx); + else if (ccmode == QImode || ccmode == HImode) + return false; + + return true; +}) + (define_special_predicate "aarch64_equality_operator" (match_code "eq,ne")) +(define_special_predicate "aarch64_cbranch_compare_operation" + (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered, + ordered,unlt,unle,unge,ungt") +{ + return TARGET_SIMD; +}) + (define_special_predicate "aarch64_carry_operation" (match_code "ltu,geu") { diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 index 71242f0..5df4dd2 100644 --- a/gcc/config/aarch64/t-aarch64 +++ b/gcc/config/aarch64/t-aarch64 @@ -20,6 +20,7 @@ TM_H += $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \ $(srcdir)/config/aarch64/aarch64-tuning-flags.def \ + $(srcdir)/config/aarch64/aarch64-tuning-enums.def \ $(srcdir)/config/aarch64/aarch64-option-extensions.def \ $(srcdir)/config/aarch64/aarch64-cores.def \ $(srcdir)/config/aarch64/aarch64-isa-modes.def \ @@ -47,6 +48,12 @@ else endif $(STAMP) s-aarch64-tune-md +# Regenerate the JSON tuning files if the schema has changed +$(srcdir)/config/aarch64/aarch64-json-tunings-%-generated.inc: \ +$(srcdir)/config/aarch64/aarch64-json-schema.h \ +$(srcdir)/config/aarch64/aarch64-generate-json-tuning-routines.py + $(PYTHON) $(srcdir)/config/aarch64/aarch64-generate-json-tuning-routines.py --generate-only $* + s-mddeps: s-aarch64-tune-md aarch64-builtins.o: $(srcdir)/config/aarch64/aarch64-builtins.cc $(CONFIG_H) \ @@ -210,6 +217,27 @@ aarch64-sched-dispatch.o: $(srcdir)/config/aarch64/aarch64-sched-dispatch.cc \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/aarch64/aarch64-sched-dispatch.cc +aarch64-json-tunings-printer.o: $(srcdir)/config/aarch64/aarch64-json-tunings-printer.cc \ + $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(TM_H) $(DIAGNOSTIC_CORE_H) \ + $(PRETTY_PRINT_H) json.h \ + $(srcdir)/config/aarch64/aarch64-json-tunings-printer.h \ + $(srcdir)/config/aarch64/aarch64-json-tunings-printer-generated.inc \ + $(srcdir)/config/aarch64/aarch64-protos.h \ + $(srcdir)/config/arm/aarch-common-protos.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/aarch64/aarch64-json-tunings-printer.cc + +aarch64-json-tunings-parser.o: $(srcdir)/config/aarch64/aarch64-json-tunings-parser.cc \ + $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(TM_H) $(DIAGNOSTIC_CORE_H) \ + json-parsing.h \ + $(srcdir)/config/aarch64/aarch64-json-schema.h \ + $(srcdir)/config/aarch64/aarch64-json-tunings-parser.h \ + $(srcdir)/config/aarch64/aarch64-json-tunings-parser-generated.inc \ + $(srcdir)/config/aarch64/aarch64-protos.h \ + $(srcdir)/config/arm/aarch-common-protos.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/aarch64/aarch64-json-tunings-parser.cc + comma=, MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG)))) MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) diff --git a/gcc/config/aarch64/t-aarch64-mingw b/gcc/config/aarch64/t-aarch64-mingw new file mode 100644 index 0000000..fea7ae5 --- /dev/null +++ b/gcc/config/aarch64/t-aarch64-mingw @@ -0,0 +1,25 @@ +# Windows specific ABI for AArch64 architecture. +# Copyright (C) 2025 Free Software Foundation, Inc. +# Contributed by ARM Ltd. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +aarch64-abi-ms.o: \ + $(srcdir)/config/aarch64/aarch64-abi-ms.cc \ + $(TREE_H) + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/aarch64/aarch64-abi-ms.cc diff --git a/gcc/config/alpha/alpha.opt b/gcc/config/alpha/alpha.opt index 3c1320b..31f4cb5 100644 --- a/gcc/config/alpha/alpha.opt +++ b/gcc/config/alpha/alpha.opt @@ -27,7 +27,7 @@ Target Mask(FPREGS) Use fp registers. mgas -Target Ignore +Target Ignore Undocumented Does nothing. Preserved for backward compatibility. mieee-conformant diff --git a/gcc/config/alpha/alpha.opt.urls b/gcc/config/alpha/alpha.opt.urls index 9361587..df814cd 100644 --- a/gcc/config/alpha/alpha.opt.urls +++ b/gcc/config/alpha/alpha.opt.urls @@ -1,7 +1,7 @@ ; Autogenerated by regenerate-opt-urls.py from gcc/config/alpha/alpha.opt and generated HTML msoft-float -UrlSuffix(gcc/DEC-Alpha-Options.html#index-msoft-float-2) +UrlSuffix(gcc/DEC-Alpha-Options.html#index-msoft-float-1) ; skipping UrlSuffix for 'mgas' due to finding no URLs @@ -56,9 +56,14 @@ UrlSuffix(gcc/DEC-Alpha-Options.html#index-msmall-text) mlarge-text UrlSuffix(gcc/DEC-Alpha-Options.html#index-mlarge-text) -; skipping UrlSuffix for 'mlong-double-128' due to finding no URLs +mtls-kernel +UrlSuffix(gcc/DEC-Alpha-Options.html#index-mtls-kernel) -; skipping UrlSuffix for 'mlong-double-64' due to finding no URLs +mlong-double-128 +UrlSuffix(gcc/DEC-Alpha-Options.html#index-mlong-double-128) + +mlong-double-64 +UrlSuffix(gcc/DEC-Alpha-Options.html#index-mlong-double-64) mcpu= UrlSuffix(gcc/DEC-Alpha-Options.html#index-mcpu-4) @@ -78,5 +83,6 @@ UrlSuffix(gcc/DEC-Alpha-Options.html#index-mtrap-precision) mmemory-latency= UrlSuffix(gcc/DEC-Alpha-Options.html#index-mmemory-latency) -; skipping UrlSuffix for 'mtls-size=' due to finding no URLs +mtls-size= +UrlSuffix(gcc/DEC-Alpha-Options.html#index-mtls-size-1) diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index bb5db97..5c34d9c 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -6705,7 +6705,7 @@ arc_cannot_force_const_mem (machine_mode mode, rtx x) enum arc_builtin_id { -#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \ +#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK, ATTRS) \ ARC_BUILTIN_ ## NAME, #include "builtins.def" #undef DEF_BUILTIN @@ -6723,7 +6723,7 @@ struct GTY(()) arc_builtin_description static GTY(()) struct arc_builtin_description arc_bdesc[ARC_BUILTIN_COUNT] = { -#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \ +#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK, ATTRS) \ { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE }, #include "builtins.def" #undef DEF_BUILTIN @@ -6855,8 +6855,11 @@ arc_init_builtins (void) = build_function_type_list (long_long_integer_type_node, V2SI_type_node, V2HI_type_node, NULL_TREE); + /* Create const attribute for mathematical functions. */ + tree attr_const = tree_cons (get_identifier ("const"), NULL, NULL); + /* Add the builtins. */ -#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \ +#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK, ATTRS) \ { \ int id = ARC_BUILTIN_ ## NAME; \ const char *Name = "__builtin_arc_" #NAME; \ @@ -6866,7 +6869,7 @@ arc_init_builtins (void) if (MASK) \ arc_bdesc[id].fndecl \ = add_builtin_function (arc_tolower(name, Name), TYPE, id, \ - BUILT_IN_MD, NULL, NULL_TREE); \ + BUILT_IN_MD, NULL, ATTRS); \ } #include "builtins.def" #undef DEF_BUILTIN diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 8f7e537..2b16ac5 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -3555,7 +3555,14 @@ archs4x, archs4xd" [(set (match_operand:SI 0 "dest_reg_operand" "") (ANY_SHIFT_ROTATE:SI (match_operand:SI 1 "register_operand" "") (match_operand:SI 2 "nonmemory_operand" "")))] - "") + "" +{ + if (!TARGET_BARREL_SHIFTER && operands[2] != const1_rtx) + { + emit_insn (gen_<insn>si3_loop (operands[0], operands[1], operands[2])); + DONE; + } +}) ; asl, asr, lsr patterns: ; There is no point in including an 'I' alternative since only the lowest 5 @@ -3654,35 +3661,23 @@ archs4x, archs4xd" [(set_attr "type" "shift") (set_attr "length" "8")]) -(define_insn_and_split "*<insn>si3_nobs" - [(set (match_operand:SI 0 "dest_reg_operand") - (ANY_SHIFT_ROTATE:SI (match_operand:SI 1 "register_operand") - (match_operand:SI 2 "nonmemory_operand")))] +(define_insn_and_split "<insn>si3_loop" + [(set (match_operand:SI 0 "dest_reg_operand" "=r,r") + (ANY_SHIFT_ROTATE:SI (match_operand:SI 1 "register_operand" "0,0") + (match_operand:SI 2 "nonmemory_operand" "rn,Cal"))) + (clobber (reg:SI LP_COUNT)) + (clobber (reg:CC CC_REG))] "!TARGET_BARREL_SHIFTER - && operands[2] != const1_rtx - && arc_pre_reload_split ()" - "#" - "&& 1" + && operands[2] != const1_rtx" + "* return output_shift_loop (<CODE>, operands);" + "&& arc_pre_reload_split ()" [(const_int 0)] { arc_split_<insn> (operands); DONE; -}) - -;; <ANY_SHIFT_ROTATE>si3_loop appears after <ANY_SHIFT_ROTATE>si3_nobs -(define_insn "<insn>si3_loop" - [(set (match_operand:SI 0 "dest_reg_operand" "=r,r") - (ANY_SHIFT_ROTATE:SI - (match_operand:SI 1 "register_operand" "0,0") - (match_operand:SI 2 "nonmemory_operand" "rn,Cal"))) - (clobber (reg:SI LP_COUNT)) - (clobber (reg:CC CC_REG)) - ] - "!TARGET_BARREL_SHIFTER - && operands[2] != const1_rtx" - "* return output_shift_loop (<CODE>, operands);" - [(set_attr "type" "shift") - (set_attr "length" "16,20")]) +} +[(set_attr "type" "shift") + (set_attr "length" "16,20")]) ;; DImode shifts @@ -6286,15 +6281,15 @@ archs4x, archs4xd" (define_insn_and_split "*extvsi_n_0" [(set (match_operand:SI 0 "register_operand" "=r") - (sign_extract:SI (match_operand:SI 1 "register_operand" "0") + (sign_extract:SI (match_operand:SI 1 "register_operand" "r") (match_operand:QI 2 "const_int_operand") (const_int 0)))] "!TARGET_BARREL_SHIFTER && IN_RANGE (INTVAL (operands[2]), 2, (optimize_insn_for_size_p () ? 28 : 30))" "#" - "&& 1" -[(set (match_dup 0) (and:SI (match_dup 0) (match_dup 3))) + "&& reload_completed" +[(set (match_dup 0) (and:SI (match_dup 1) (match_dup 3))) (set (match_dup 0) (xor:SI (match_dup 0) (match_dup 4))) (set (match_dup 0) (minus:SI (match_dup 0) (match_dup 4)))] { @@ -6414,6 +6409,21 @@ archs4x, archs4xd" (set_attr "length" "4") (set_attr "predicable" "no")]) +;; Match <insn>si3_loop pattern if operand 2 has become const_int 1 in the meantime +(define_insn_and_split "<insn>si3_cnt1_clobber" + [(set (match_operand:SI 0 "dest_reg_operand") + (ANY_SHIFT_ROTATE:SI (match_operand:SI 1 "register_operand") + (const_int 1))) + (clobber (reg:SI LP_COUNT)) + (clobber (reg:CC CC_REG))] + "!TARGET_BARREL_SHIFTER" + "#" + "&& arc_pre_reload_split ()" + [(set (match_dup 0) (ANY_SHIFT_ROTATE:SI (match_dup 1) (const_int 1)))] + "" +[(set_attr "type" "shift") + (set_attr "length" "4")]) + (define_peephole2 [(set (match_operand:SI 0 "register_operand" "") (zero_extract:SI (match_dup 0) diff --git a/gcc/config/arc/builtins.def b/gcc/config/arc/builtins.def index e3c5780..ae230dc 100644 --- a/gcc/config/arc/builtins.def +++ b/gcc/config/arc/builtins.def @@ -20,7 +20,7 @@ builtins defined in the ARC part of the GNU compiler. Before including this file, define a macro - DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) + DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK, ATTRS) NAME: `__builtin_arc_name' will be the user-level name of the builtin. `ARC_BUILTIN_NAME' will be the internal builtin's id. @@ -29,194 +29,196 @@ TYPE: A tree node describing the prototype of the built-in. ICODE: Name of attached insn or expander. If special treatment in arc.cc is needed to expand the built-in, use `nothing'. - MASK: CPU selector mask. */ + MASK: CPU selector mask. + ATTRS: Function attributes like "attr_const" for the `const' attribute + or "NULL_TREE" for no attribute. */ /* Special builtins. */ -DEF_BUILTIN (NOP, 0, void_ftype_void, nothing, 1) -DEF_BUILTIN (RTIE, 0, void_ftype_void, rtie, !TARGET_ARC600_FAMILY) -DEF_BUILTIN (SYNC, 0, void_ftype_void, sync, 1) -DEF_BUILTIN (BRK, 0, void_ftype_void, brk, 1) -DEF_BUILTIN (SWI, 0, void_ftype_void, swi, 1) -DEF_BUILTIN (UNIMP_S, 0, void_ftype_void, unimp_s, !TARGET_ARC600_FAMILY) -DEF_BUILTIN (TRAP_S, 1, void_ftype_usint, trap_s, !TARGET_ARC600_FAMILY) -DEF_BUILTIN (ALIGNED, 2, int_ftype_pcvoid_int, nothing, 1) -DEF_BUILTIN (CLRI, 0, int_ftype_void, clri, TARGET_V2) -DEF_BUILTIN (SLEEP, 1, void_ftype_usint, sleep, 1) - -DEF_BUILTIN (FLAG, 1, void_ftype_usint, flag, 1) -DEF_BUILTIN (SR, 2, void_ftype_usint_usint, sr, 1) -DEF_BUILTIN (KFLAG, 1, void_ftype_usint, kflag, TARGET_V2) -DEF_BUILTIN (CORE_WRITE, 2, void_ftype_usint_usint, core_write, 1) -DEF_BUILTIN (SETI, 1, void_ftype_int, seti, TARGET_V2) +DEF_BUILTIN (NOP, 0, void_ftype_void, nothing, 1, NULL_TREE) +DEF_BUILTIN (RTIE, 0, void_ftype_void, rtie, !TARGET_ARC600_FAMILY, NULL_TREE) +DEF_BUILTIN (SYNC, 0, void_ftype_void, sync, 1, NULL_TREE) +DEF_BUILTIN (BRK, 0, void_ftype_void, brk, 1, NULL_TREE) +DEF_BUILTIN (SWI, 0, void_ftype_void, swi, 1, NULL_TREE) +DEF_BUILTIN (UNIMP_S, 0, void_ftype_void, unimp_s, !TARGET_ARC600_FAMILY, NULL_TREE) +DEF_BUILTIN (TRAP_S, 1, void_ftype_usint, trap_s, !TARGET_ARC600_FAMILY, NULL_TREE) +DEF_BUILTIN (ALIGNED, 2, int_ftype_pcvoid_int, nothing, 1, NULL_TREE) +DEF_BUILTIN (CLRI, 0, int_ftype_void, clri, TARGET_V2, NULL_TREE) +DEF_BUILTIN (SLEEP, 1, void_ftype_usint, sleep, 1, NULL_TREE) + +DEF_BUILTIN (FLAG, 1, void_ftype_usint, flag, 1, NULL_TREE) +DEF_BUILTIN (SR, 2, void_ftype_usint_usint, sr, 1, NULL_TREE) +DEF_BUILTIN (KFLAG, 1, void_ftype_usint, kflag, TARGET_V2, NULL_TREE) +DEF_BUILTIN (CORE_WRITE, 2, void_ftype_usint_usint, core_write, 1, NULL_TREE) +DEF_BUILTIN (SETI, 1, void_ftype_int, seti, TARGET_V2, NULL_TREE) /* Regular builtins. */ -DEF_BUILTIN (NORM, 1, int_ftype_int, clrsbsi2, TARGET_NORM) -DEF_BUILTIN (NORMW, 1, int_ftype_short, normw, TARGET_NORM) -DEF_BUILTIN (SWAP, 1, int_ftype_int, rotlsi2_cnt16, TARGET_SWAP) -DEF_BUILTIN (DIVAW, 2, int_ftype_int_int, divaw, TARGET_EA_SET) -DEF_BUILTIN (CORE_READ, 1, usint_ftype_usint, core_read, 1) -DEF_BUILTIN (LR, 1, usint_ftype_usint, lr, 1) -DEF_BUILTIN (FFS, 1, int_ftype_int, ffs, (TARGET_EM && TARGET_NORM) || TARGET_HS) -DEF_BUILTIN (FLS, 1, int_ftype_int, fls, (TARGET_EM && TARGET_NORM) || TARGET_HS) +DEF_BUILTIN (NORM, 1, int_ftype_int, clrsbsi2, TARGET_NORM, attr_const) +DEF_BUILTIN (NORMW, 1, int_ftype_short, normw, TARGET_NORM, attr_const) +DEF_BUILTIN (SWAP, 1, int_ftype_int, rotlsi2_cnt16, TARGET_SWAP, attr_const) +DEF_BUILTIN (DIVAW, 2, int_ftype_int_int, divaw, TARGET_EA_SET, NULL_TREE) +DEF_BUILTIN (CORE_READ, 1, usint_ftype_usint, core_read, 1, NULL_TREE) +DEF_BUILTIN (LR, 1, usint_ftype_usint, lr, 1, NULL_TREE) +DEF_BUILTIN (FFS, 1, int_ftype_int, ffs, (TARGET_EM && TARGET_NORM) || TARGET_HS, attr_const) +DEF_BUILTIN (FLS, 1, int_ftype_int, fls, (TARGET_EM && TARGET_NORM) || TARGET_HS, attr_const) /* ARC SIMD extenssion. */ /* BEGIN SIMD marker. */ -DEF_BUILTIN (SIMD_BEGIN, 0, void_ftype_void, nothing, 0) - -DEF_BUILTIN ( VADDAW, 2, v8hi_ftype_v8hi_v8hi, vaddaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VADDW, 2, v8hi_ftype_v8hi_v8hi, vaddw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VAVB, 2, v8hi_ftype_v8hi_v8hi, vavb_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VAVRB, 2, v8hi_ftype_v8hi_v8hi, vavrb_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VDIFAW, 2, v8hi_ftype_v8hi_v8hi, vdifaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VDIFW, 2, v8hi_ftype_v8hi_v8hi, vdifw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMAXAW, 2, v8hi_ftype_v8hi_v8hi, vmaxaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMAXW, 2, v8hi_ftype_v8hi_v8hi, vmaxw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMINAW, 2, v8hi_ftype_v8hi_v8hi, vminaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMINW, 2, v8hi_ftype_v8hi_v8hi, vminw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMULAW, 2, v8hi_ftype_v8hi_v8hi, vmulaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VMULFAW, 2, v8hi_ftype_v8hi_v8hi, vmulfaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMULFW, 2, v8hi_ftype_v8hi_v8hi, vmulfw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMULW, 2, v8hi_ftype_v8hi_v8hi, vmulw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VSUBAW, 2, v8hi_ftype_v8hi_v8hi, vsubaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VSUBW, 2, v8hi_ftype_v8hi_v8hi, vsubw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VSUMMW, 2, v8hi_ftype_v8hi_v8hi, vsummw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VAND, 2, v8hi_ftype_v8hi_v8hi, vand_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VANDAW, 2, v8hi_ftype_v8hi_v8hi, vandaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VBIC, 2, v8hi_ftype_v8hi_v8hi, vbic_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VBICAW, 2, v8hi_ftype_v8hi_v8hi, vbicaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VOR, 2, v8hi_ftype_v8hi_v8hi, vor_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VXOR, 2, v8hi_ftype_v8hi_v8hi, vxor_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VXORAW, 2, v8hi_ftype_v8hi_v8hi, vxoraw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VEQW, 2, v8hi_ftype_v8hi_v8hi, veqw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VLEW, 2, v8hi_ftype_v8hi_v8hi, vlew_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VLTW, 2, v8hi_ftype_v8hi_v8hi, vltw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VNEW, 2, v8hi_ftype_v8hi_v8hi, vnew_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR1AW, 2, v8hi_ftype_v8hi_v8hi, vmr1aw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR1W, 2, v8hi_ftype_v8hi_v8hi, vmr1w_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR2AW, 2, v8hi_ftype_v8hi_v8hi, vmr2aw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR2W, 2, v8hi_ftype_v8hi_v8hi, vmr2w_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR3AW, 2, v8hi_ftype_v8hi_v8hi, vmr3aw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR3W, 2, v8hi_ftype_v8hi_v8hi, vmr3w_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR4AW, 2, v8hi_ftype_v8hi_v8hi, vmr4aw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR4W, 2, v8hi_ftype_v8hi_v8hi, vmr4w_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR5AW, 2, v8hi_ftype_v8hi_v8hi, vmr5aw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR5W, 2, v8hi_ftype_v8hi_v8hi, vmr5w_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR6AW, 2, v8hi_ftype_v8hi_v8hi, vmr6aw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR6W, 2, v8hi_ftype_v8hi_v8hi, vmr6w_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR7AW, 2, v8hi_ftype_v8hi_v8hi, vmr7aw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMR7W, 2, v8hi_ftype_v8hi_v8hi, vmr7w_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMRB, 2, v8hi_ftype_v8hi_v8hi, vmrb_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VH264F, 2, v8hi_ftype_v8hi_v8hi, vh264f_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VH264FT, 2, v8hi_ftype_v8hi_v8hi, vh264ft_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VH264FW, 2, v8hi_ftype_v8hi_v8hi, vh264fw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VVC1F, 2, v8hi_ftype_v8hi_v8hi, vvc1f_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VVC1FT, 2, v8hi_ftype_v8hi_v8hi, vvc1ft_insn, TARGET_SIMD_SET) - -DEF_BUILTIN ( VBADDW, 2, v8hi_ftype_v8hi_int, vbaddw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VBMAXW, 2, v8hi_ftype_v8hi_int, vbmaxw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VBMINW, 2, v8hi_ftype_v8hi_int, vbminw_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VBMULAW, 2, v8hi_ftype_v8hi_int, vbmulaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VBMULFW, 2, v8hi_ftype_v8hi_int, vbmulfw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VBMULW, 2, v8hi_ftype_v8hi_int, vbmulw_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VBRSUBW, 2, v8hi_ftype_v8hi_int, vbrsubw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VBSUBW, 2, v8hi_ftype_v8hi_int, vbsubw_insn, TARGET_SIMD_SET) +DEF_BUILTIN (SIMD_BEGIN, 0, void_ftype_void, nothing, 0, NULL_TREE) + +DEF_BUILTIN ( VADDAW, 2, v8hi_ftype_v8hi_v8hi, vaddaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VADDW, 2, v8hi_ftype_v8hi_v8hi, vaddw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VAVB, 2, v8hi_ftype_v8hi_v8hi, vavb_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VAVRB, 2, v8hi_ftype_v8hi_v8hi, vavrb_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VDIFAW, 2, v8hi_ftype_v8hi_v8hi, vdifaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VDIFW, 2, v8hi_ftype_v8hi_v8hi, vdifw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMAXAW, 2, v8hi_ftype_v8hi_v8hi, vmaxaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMAXW, 2, v8hi_ftype_v8hi_v8hi, vmaxw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMINAW, 2, v8hi_ftype_v8hi_v8hi, vminaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMINW, 2, v8hi_ftype_v8hi_v8hi, vminw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMULAW, 2, v8hi_ftype_v8hi_v8hi, vmulaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VMULFAW, 2, v8hi_ftype_v8hi_v8hi, vmulfaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMULFW, 2, v8hi_ftype_v8hi_v8hi, vmulfw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMULW, 2, v8hi_ftype_v8hi_v8hi, vmulw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VSUBAW, 2, v8hi_ftype_v8hi_v8hi, vsubaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VSUBW, 2, v8hi_ftype_v8hi_v8hi, vsubw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VSUMMW, 2, v8hi_ftype_v8hi_v8hi, vsummw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VAND, 2, v8hi_ftype_v8hi_v8hi, vand_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VANDAW, 2, v8hi_ftype_v8hi_v8hi, vandaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VBIC, 2, v8hi_ftype_v8hi_v8hi, vbic_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VBICAW, 2, v8hi_ftype_v8hi_v8hi, vbicaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VOR, 2, v8hi_ftype_v8hi_v8hi, vor_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VXOR, 2, v8hi_ftype_v8hi_v8hi, vxor_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VXORAW, 2, v8hi_ftype_v8hi_v8hi, vxoraw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VEQW, 2, v8hi_ftype_v8hi_v8hi, veqw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VLEW, 2, v8hi_ftype_v8hi_v8hi, vlew_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VLTW, 2, v8hi_ftype_v8hi_v8hi, vltw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VNEW, 2, v8hi_ftype_v8hi_v8hi, vnew_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR1AW, 2, v8hi_ftype_v8hi_v8hi, vmr1aw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR1W, 2, v8hi_ftype_v8hi_v8hi, vmr1w_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR2AW, 2, v8hi_ftype_v8hi_v8hi, vmr2aw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR2W, 2, v8hi_ftype_v8hi_v8hi, vmr2w_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR3AW, 2, v8hi_ftype_v8hi_v8hi, vmr3aw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR3W, 2, v8hi_ftype_v8hi_v8hi, vmr3w_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR4AW, 2, v8hi_ftype_v8hi_v8hi, vmr4aw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR4W, 2, v8hi_ftype_v8hi_v8hi, vmr4w_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR5AW, 2, v8hi_ftype_v8hi_v8hi, vmr5aw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR5W, 2, v8hi_ftype_v8hi_v8hi, vmr5w_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR6AW, 2, v8hi_ftype_v8hi_v8hi, vmr6aw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR6W, 2, v8hi_ftype_v8hi_v8hi, vmr6w_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR7AW, 2, v8hi_ftype_v8hi_v8hi, vmr7aw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMR7W, 2, v8hi_ftype_v8hi_v8hi, vmr7w_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMRB, 2, v8hi_ftype_v8hi_v8hi, vmrb_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VH264F, 2, v8hi_ftype_v8hi_v8hi, vh264f_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VH264FT, 2, v8hi_ftype_v8hi_v8hi, vh264ft_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VH264FW, 2, v8hi_ftype_v8hi_v8hi, vh264fw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VVC1F, 2, v8hi_ftype_v8hi_v8hi, vvc1f_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VVC1FT, 2, v8hi_ftype_v8hi_v8hi, vvc1ft_insn, TARGET_SIMD_SET, NULL_TREE) + +DEF_BUILTIN ( VBADDW, 2, v8hi_ftype_v8hi_int, vbaddw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VBMAXW, 2, v8hi_ftype_v8hi_int, vbmaxw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VBMINW, 2, v8hi_ftype_v8hi_int, vbminw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VBMULAW, 2, v8hi_ftype_v8hi_int, vbmulaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VBMULFW, 2, v8hi_ftype_v8hi_int, vbmulfw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VBMULW, 2, v8hi_ftype_v8hi_int, vbmulw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VBRSUBW, 2, v8hi_ftype_v8hi_int, vbrsubw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VBSUBW, 2, v8hi_ftype_v8hi_int, vbsubw_insn, TARGET_SIMD_SET, NULL_TREE) /* Va, Vb, Ic instructions. */ -DEF_BUILTIN ( VASRW, 2, v8hi_ftype_v8hi_int, vasrw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VSR8, 2, v8hi_ftype_v8hi_int, vsr8_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VSR8AW, 2, v8hi_ftype_v8hi_int, vsr8aw_insn, TARGET_SIMD_SET) +DEF_BUILTIN ( VASRW, 2, v8hi_ftype_v8hi_int, vasrw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VSR8, 2, v8hi_ftype_v8hi_int, vsr8_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VSR8AW, 2, v8hi_ftype_v8hi_int, vsr8aw_insn, TARGET_SIMD_SET, NULL_TREE) /* Va, Vb, u6 instructions. */ -DEF_BUILTIN ( VASRRWi, 2, v8hi_ftype_v8hi_int, vasrrwi_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VASRSRWi, 2, v8hi_ftype_v8hi_int, vasrsrwi_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VASRWi, 2, v8hi_ftype_v8hi_int, vasrwi_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VASRPWBi, 2, v8hi_ftype_v8hi_int, vasrpwbi_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VASRRPWBi, 2, v8hi_ftype_v8hi_int, vasrrpwbi_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VSR8AWi, 2, v8hi_ftype_v8hi_int, vsr8awi_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VSR8i, 2, v8hi_ftype_v8hi_int, vsr8i_insn, TARGET_SIMD_SET) +DEF_BUILTIN ( VASRRWi, 2, v8hi_ftype_v8hi_int, vasrrwi_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VASRSRWi, 2, v8hi_ftype_v8hi_int, vasrsrwi_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VASRWi, 2, v8hi_ftype_v8hi_int, vasrwi_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VASRPWBi, 2, v8hi_ftype_v8hi_int, vasrpwbi_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VASRRPWBi, 2, v8hi_ftype_v8hi_int, vasrrpwbi_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VSR8AWi, 2, v8hi_ftype_v8hi_int, vsr8awi_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VSR8i, 2, v8hi_ftype_v8hi_int, vsr8i_insn, TARGET_SIMD_SET, NULL_TREE) /* Va, Vb, u8 (simm) instructions. */ -DEF_BUILTIN ( VMVAW, 2, v8hi_ftype_v8hi_int, vmvaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMVW, 2, v8hi_ftype_v8hi_int, vmvw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMVZW, 2, v8hi_ftype_v8hi_int, vmvzw_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VD6TAPF, 2, v8hi_ftype_v8hi_int, vd6tapf_insn, TARGET_SIMD_SET) +DEF_BUILTIN ( VMVAW, 2, v8hi_ftype_v8hi_int, vmvaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMVW, 2, v8hi_ftype_v8hi_int, vmvw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMVZW, 2, v8hi_ftype_v8hi_int, vmvzw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VD6TAPF, 2, v8hi_ftype_v8hi_int, vd6tapf_insn, TARGET_SIMD_SET, NULL_TREE) /* Va, rlimm, u8 (simm) instructions. */ -DEF_BUILTIN (VMOVAW, 2, v8hi_ftype_int_int, vmovaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VMOVW, 2, v8hi_ftype_int_int, vmovw_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VMOVZW, 2, v8hi_ftype_int_int, vmovzw_insn, TARGET_SIMD_SET) +DEF_BUILTIN (VMOVAW, 2, v8hi_ftype_int_int, vmovaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VMOVW, 2, v8hi_ftype_int_int, vmovw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VMOVZW, 2, v8hi_ftype_int_int, vmovzw_insn, TARGET_SIMD_SET, NULL_TREE) /* Va, Vb instructions. */ -DEF_BUILTIN ( VABSAW, 1, v8hi_ftype_v8hi, vabsaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VABSW, 1, v8hi_ftype_v8hi, vabsw_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VADDSUW, 1, v8hi_ftype_v8hi, vaddsuw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VSIGNW, 1, v8hi_ftype_v8hi, vsignw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VEXCH1, 1, v8hi_ftype_v8hi, vexch1_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VEXCH2, 1, v8hi_ftype_v8hi, vexch2_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VEXCH4, 1, v8hi_ftype_v8hi, vexch4_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VUPBAW, 1, v8hi_ftype_v8hi, vupbaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VUPBW, 1, v8hi_ftype_v8hi, vupbw_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VUPSBAW, 1, v8hi_ftype_v8hi, vupsbaw_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VUPSBW, 1, v8hi_ftype_v8hi, vupsbw_insn, TARGET_SIMD_SET) +DEF_BUILTIN ( VABSAW, 1, v8hi_ftype_v8hi, vabsaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VABSW, 1, v8hi_ftype_v8hi, vabsw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VADDSUW, 1, v8hi_ftype_v8hi, vaddsuw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VSIGNW, 1, v8hi_ftype_v8hi, vsignw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VEXCH1, 1, v8hi_ftype_v8hi, vexch1_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VEXCH2, 1, v8hi_ftype_v8hi, vexch2_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VEXCH4, 1, v8hi_ftype_v8hi, vexch4_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VUPBAW, 1, v8hi_ftype_v8hi, vupbaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VUPBW, 1, v8hi_ftype_v8hi, vupbw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VUPSBAW, 1, v8hi_ftype_v8hi, vupsbaw_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VUPSBW, 1, v8hi_ftype_v8hi, vupsbw_insn, TARGET_SIMD_SET, NULL_TREE) /* SIMD special DIb, rlimm, rlimm instructions. */ -DEF_BUILTIN (VDIRUN, 2, void_ftype_int_int, vdirun_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VDORUN, 2, void_ftype_int_int, vdorun_insn, TARGET_SIMD_SET) +DEF_BUILTIN (VDIRUN, 2, void_ftype_int_int, vdirun_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VDORUN, 2, void_ftype_int_int, vdorun_insn, TARGET_SIMD_SET, NULL_TREE) /* SIMD special DIb, limm, rlimm instructions. */ -DEF_BUILTIN (VDIWR, 2, void_ftype_int_int, vdiwr_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VDOWR, 2, void_ftype_int_int, vdowr_insn, TARGET_SIMD_SET) +DEF_BUILTIN (VDIWR, 2, void_ftype_int_int, vdiwr_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VDOWR, 2, void_ftype_int_int, vdowr_insn, TARGET_SIMD_SET, NULL_TREE) /* rlimm instructions. */ -DEF_BUILTIN ( VREC, 1, void_ftype_int, vrec_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VRUN, 1, void_ftype_int, vrun_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VRECRUN, 1, void_ftype_int, vrecrun_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VENDREC, 1, void_ftype_int, vendrec_insn, TARGET_SIMD_SET) +DEF_BUILTIN ( VREC, 1, void_ftype_int, vrec_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VRUN, 1, void_ftype_int, vrun_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VRECRUN, 1, void_ftype_int, vrecrun_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VENDREC, 1, void_ftype_int, vendrec_insn, TARGET_SIMD_SET, NULL_TREE) /* Va, [Ib,u8] instructions. */ -DEF_BUILTIN (VLD32WH, 3, v8hi_ftype_v8hi_int_int, vld32wh_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VLD32WL, 3, v8hi_ftype_v8hi_int_int, vld32wl_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VLD64, 3, v8hi_ftype_v8hi_int_int, vld64_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VLD32, 3, v8hi_ftype_v8hi_int_int, vld32_insn, TARGET_SIMD_SET) +DEF_BUILTIN (VLD32WH, 3, v8hi_ftype_v8hi_int_int, vld32wh_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VLD32WL, 3, v8hi_ftype_v8hi_int_int, vld32wl_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VLD64, 3, v8hi_ftype_v8hi_int_int, vld64_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VLD32, 3, v8hi_ftype_v8hi_int_int, vld32_insn, TARGET_SIMD_SET, NULL_TREE) -DEF_BUILTIN (VLD64W, 2, v8hi_ftype_int_int, vld64w_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VLD128, 2, v8hi_ftype_int_int, vld128_insn, TARGET_SIMD_SET) +DEF_BUILTIN (VLD64W, 2, v8hi_ftype_int_int, vld64w_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VLD128, 2, v8hi_ftype_int_int, vld128_insn, TARGET_SIMD_SET, NULL_TREE) -DEF_BUILTIN (VST128, 3, void_ftype_v8hi_int_int, vst128_insn, TARGET_SIMD_SET) -DEF_BUILTIN ( VST64, 3, void_ftype_v8hi_int_int, vst64_insn, TARGET_SIMD_SET) +DEF_BUILTIN (VST128, 3, void_ftype_v8hi_int_int, vst128_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN ( VST64, 3, void_ftype_v8hi_int_int, vst64_insn, TARGET_SIMD_SET, NULL_TREE) /* Va, [Ib, u8] instructions. */ -DEF_BUILTIN (VST16_N, 4, void_ftype_v8hi_int_int_int, vst16_n_insn, TARGET_SIMD_SET) -DEF_BUILTIN (VST32_N, 4, void_ftype_v8hi_int_int_int, vst32_n_insn, TARGET_SIMD_SET) +DEF_BUILTIN (VST16_N, 4, void_ftype_v8hi_int_int_int, vst16_n_insn, TARGET_SIMD_SET, NULL_TREE) +DEF_BUILTIN (VST32_N, 4, void_ftype_v8hi_int_int_int, vst32_n_insn, TARGET_SIMD_SET, NULL_TREE) -DEF_BUILTIN (VINTI, 1, void_ftype_int, vinti_insn, TARGET_SIMD_SET) +DEF_BUILTIN (VINTI, 1, void_ftype_int, vinti_insn, TARGET_SIMD_SET, NULL_TREE) /* END SIMD marker. */ -DEF_BUILTIN (SIMD_END, 0, void_ftype_void, nothing, 0) +DEF_BUILTIN (SIMD_END, 0, void_ftype_void, nothing, 0, NULL_TREE) /* ARCv2 SIMD instructions that use/clobber the accumulator reg. */ -DEF_BUILTIN (QMACH, 2, long_ftype_v4hi_v4hi, qmach, TARGET_PLUS_QMACW) -DEF_BUILTIN (QMACHU, 2, long_ftype_v4hi_v4hi, qmachu, TARGET_PLUS_QMACW) -DEF_BUILTIN (QMPYH, 2, long_ftype_v4hi_v4hi, qmpyh, TARGET_PLUS_QMACW) -DEF_BUILTIN (QMPYHU, 2, long_ftype_v4hi_v4hi, qmpyhu, TARGET_PLUS_QMACW) +DEF_BUILTIN (QMACH, 2, long_ftype_v4hi_v4hi, qmach, TARGET_PLUS_QMACW, NULL_TREE) +DEF_BUILTIN (QMACHU, 2, long_ftype_v4hi_v4hi, qmachu, TARGET_PLUS_QMACW, NULL_TREE) +DEF_BUILTIN (QMPYH, 2, long_ftype_v4hi_v4hi, qmpyh, TARGET_PLUS_QMACW, NULL_TREE) +DEF_BUILTIN (QMPYHU, 2, long_ftype_v4hi_v4hi, qmpyhu, TARGET_PLUS_QMACW, NULL_TREE) -DEF_BUILTIN (DMACH, 2, int_ftype_v2hi_v2hi, dmach, TARGET_PLUS_DMPY) -DEF_BUILTIN (DMACHU, 2, int_ftype_v2hi_v2hi, dmachu, TARGET_PLUS_DMPY) -DEF_BUILTIN (DMPYH, 2, int_ftype_v2hi_v2hi, dmpyh, TARGET_PLUS_DMPY) -DEF_BUILTIN (DMPYHU, 2, int_ftype_v2hi_v2hi, dmpyhu, TARGET_PLUS_DMPY) +DEF_BUILTIN (DMACH, 2, int_ftype_v2hi_v2hi, dmach, TARGET_PLUS_DMPY, NULL_TREE) +DEF_BUILTIN (DMACHU, 2, int_ftype_v2hi_v2hi, dmachu, TARGET_PLUS_DMPY, NULL_TREE) +DEF_BUILTIN (DMPYH, 2, int_ftype_v2hi_v2hi, dmpyh, TARGET_PLUS_DMPY, NULL_TREE) +DEF_BUILTIN (DMPYHU, 2, int_ftype_v2hi_v2hi, dmpyhu, TARGET_PLUS_DMPY, NULL_TREE) -DEF_BUILTIN (DMACWH, 2, long_ftype_v2si_v2hi, dmacwh, TARGET_PLUS_QMACW) -DEF_BUILTIN (DMACWHU, 2, long_ftype_v2si_v2hi, dmacwhu, TARGET_PLUS_QMACW) +DEF_BUILTIN (DMACWH, 2, long_ftype_v2si_v2hi, dmacwh, TARGET_PLUS_QMACW, NULL_TREE) +DEF_BUILTIN (DMACWHU, 2, long_ftype_v2si_v2hi, dmacwhu, TARGET_PLUS_QMACW, NULL_TREE) -DEF_BUILTIN (VMAC2H, 2, v2si_ftype_v2hi_v2hi, vmac2h, TARGET_PLUS_MACD) -DEF_BUILTIN (VMAC2HU, 2, v2si_ftype_v2hi_v2hi, vmac2hu, TARGET_PLUS_MACD) -DEF_BUILTIN (VMPY2H, 2, v2si_ftype_v2hi_v2hi, vmpy2h, TARGET_PLUS_MACD) -DEF_BUILTIN (VMPY2HU, 2, v2si_ftype_v2hi_v2hi, vmpy2hu, TARGET_PLUS_MACD) +DEF_BUILTIN (VMAC2H, 2, v2si_ftype_v2hi_v2hi, vmac2h, TARGET_PLUS_MACD, NULL_TREE) +DEF_BUILTIN (VMAC2HU, 2, v2si_ftype_v2hi_v2hi, vmac2hu, TARGET_PLUS_MACD, NULL_TREE) +DEF_BUILTIN (VMPY2H, 2, v2si_ftype_v2hi_v2hi, vmpy2h, TARGET_PLUS_MACD, NULL_TREE) +DEF_BUILTIN (VMPY2HU, 2, v2si_ftype_v2hi_v2hi, vmpy2hu, TARGET_PLUS_MACD, NULL_TREE) /* Combined add/sub HS SIMD instructions. */ -DEF_BUILTIN (VADDSUB2H, 2, v2hi_ftype_v2hi_v2hi, addsubv2hi3, TARGET_PLUS_DMPY) -DEF_BUILTIN (VSUBADD2H, 2, v2hi_ftype_v2hi_v2hi, subaddv2hi3, TARGET_PLUS_DMPY) -DEF_BUILTIN (VADDSUB, 2, v2si_ftype_v2si_v2si, addsubv2si3, TARGET_PLUS_QMACW) -DEF_BUILTIN (VSUBADD, 2, v2si_ftype_v2si_v2si, subaddv2si3, TARGET_PLUS_QMACW) -DEF_BUILTIN (VADDSUB4H, 2, v4hi_ftype_v4hi_v4hi, addsubv4hi3, TARGET_PLUS_QMACW) -DEF_BUILTIN (VSUBADD4H, 2, v4hi_ftype_v4hi_v4hi, subaddv4hi3, TARGET_PLUS_QMACW) +DEF_BUILTIN (VADDSUB2H, 2, v2hi_ftype_v2hi_v2hi, addsubv2hi3, TARGET_PLUS_DMPY, NULL_TREE) +DEF_BUILTIN (VSUBADD2H, 2, v2hi_ftype_v2hi_v2hi, subaddv2hi3, TARGET_PLUS_DMPY, NULL_TREE) +DEF_BUILTIN (VADDSUB, 2, v2si_ftype_v2si_v2si, addsubv2si3, TARGET_PLUS_QMACW, NULL_TREE) +DEF_BUILTIN (VSUBADD, 2, v2si_ftype_v2si_v2si, subaddv2si3, TARGET_PLUS_QMACW, NULL_TREE) +DEF_BUILTIN (VADDSUB4H, 2, v4hi_ftype_v4hi_v4hi, addsubv4hi3, TARGET_PLUS_QMACW, NULL_TREE) +DEF_BUILTIN (VSUBADD4H, 2, v4hi_ftype_v4hi_v4hi, subaddv4hi3, TARGET_PLUS_QMACW, NULL_TREE) diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md index a53b2ba..53e0c83 100644 --- a/gcc/config/arc/simdext.md +++ b/gcc/config/arc/simdext.md @@ -1438,11 +1438,15 @@ "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR" [(set (match_dup 0) (match_dup 2))] { - HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16; - intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF; - - operands[0] = gen_rtx_REG (SImode, REGNO (operands[0])); - operands[2] = GEN_INT (trunc_int_for_mode (intval, SImode)); + int hi = TARGET_BIG_ENDIAN ? 0 : 1; + int lo = TARGET_BIG_ENDIAN ? 1 : 0; + HOST_WIDE_INT hi_val = INTVAL (XVECEXP (operands[1], 0, hi)); + HOST_WIDE_INT lo_val = INTVAL (XVECEXP (operands[1], 0, lo)); + hi_val = zext_hwi (hi_val, 16); + lo_val = zext_hwi (lo_val, 16); + HOST_WIDE_INT intval = lo_val | (hi_val << 16); + operands[0] = gen_rtx_REG (SImode, REGNO (operands[0])); + operands[2] = GEN_INT (trunc_int_for_mode (intval, SImode)); } [(set_attr "type" "move,move,load,store") (set_attr "predicable" "yes,yes,no,no") diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h index 077387b..1bafdba 100644 --- a/gcc/config/arm/aarch-common-protos.h +++ b/gcc/config/arm/aarch-common-protos.h @@ -57,33 +57,33 @@ extern bool aarch_fun_is_indirect_return (rtx_insn *); Costs may not have a negative value. */ struct alu_cost_table { - const int arith; /* ADD/SUB. */ - const int logical; /* AND/ORR/EOR/BIC, etc. */ - const int shift; /* Simple shift. */ - const int shift_reg; /* Simple shift by reg. */ - const int arith_shift; /* Additional when arith also shifts... */ - const int arith_shift_reg; /* ... and when the shift is by a reg. */ - const int log_shift; /* Additional when logic also shifts... */ - const int log_shift_reg; /* ... and when the shift is by a reg. */ - const int extend; /* Zero/sign extension. */ - const int extend_arith; /* Extend and arith. */ - const int bfi; /* Bit-field insert. */ - const int bfx; /* Bit-field extraction. */ - const int clz; /* Count Leading Zeros. */ - const int rev; /* Reverse bits/bytes. */ - const int non_exec; /* Extra cost when not executing insn. */ - const bool non_exec_costs_exec; /* True if non-execution must add the exec + int arith; /* ADD/SUB. */ + int logical; /* AND/ORR/EOR/BIC, etc. */ + int shift; /* Simple shift. */ + int shift_reg; /* Simple shift by reg. */ + int arith_shift; /* Additional when arith also shifts... */ + int arith_shift_reg; /* ... and when the shift is by a reg. */ + int log_shift; /* Additional when logic also shifts... */ + int log_shift_reg; /* ... and when the shift is by a reg. */ + int extend; /* Zero/sign extension. */ + int extend_arith; /* Extend and arith. */ + int bfi; /* Bit-field insert. */ + int bfx; /* Bit-field extraction. */ + int clz; /* Count Leading Zeros. */ + int rev; /* Reverse bits/bytes. */ + int non_exec; /* Extra cost when not executing insn. */ + bool non_exec_costs_exec; /* True if non-execution must add the exec cost. */ }; struct mult_cost_table { - const int simple; - const int flag_setting; /* Additional cost if multiply sets flags. */ - const int extend; - const int add; - const int extend_add; - const int idiv; + int simple; + int flag_setting; /* Additional cost if multiply sets flags. */ + int extend; + int add; + int extend_add; + int idiv; }; /* Calculations of LDM costs are complex. We assume an initial cost @@ -98,60 +98,60 @@ struct mult_cost_table */ struct mem_cost_table { - const int load; - const int load_sign_extend; /* Additional to load cost. */ - const int ldrd; /* Cost of LDRD. */ - const int ldm_1st; - const int ldm_regs_per_insn_1st; - const int ldm_regs_per_insn_subsequent; - const int loadf; /* SFmode. */ - const int loadd; /* DFmode. */ - const int load_unaligned; /* Extra for unaligned loads. */ - const int store; - const int strd; - const int stm_1st; - const int stm_regs_per_insn_1st; - const int stm_regs_per_insn_subsequent; - const int storef; /* SFmode. */ - const int stored; /* DFmode. */ - const int store_unaligned; /* Extra for unaligned stores. */ - const int loadv; /* Vector load. */ - const int storev; /* Vector store. */ + int load; + int load_sign_extend; /* Additional to load cost. */ + int ldrd; /* Cost of LDRD. */ + int ldm_1st; + int ldm_regs_per_insn_1st; + int ldm_regs_per_insn_subsequent; + int loadf; /* SFmode. */ + int loadd; /* DFmode. */ + int load_unaligned; /* Extra for unaligned loads. */ + int store; + int strd; + int stm_1st; + int stm_regs_per_insn_1st; + int stm_regs_per_insn_subsequent; + int storef; /* SFmode. */ + int stored; /* DFmode. */ + int store_unaligned; /* Extra for unaligned stores. */ + int loadv; /* Vector load. */ + int storev; /* Vector store. */ }; struct fp_cost_table { - const int div; - const int mult; - const int mult_addsub; /* Non-fused. */ - const int fma; /* Fused. */ - const int addsub; - const int fpconst; /* Immediate. */ - const int neg; /* NEG and ABS. */ - const int compare; - const int widen; /* Widen to this size. */ - const int narrow; /* Narrow from this size. */ - const int toint; - const int fromint; - const int roundint; /* V8 round to integral, remains FP format. */ + int div; + int mult; + int mult_addsub; /* Non-fused. */ + int fma; /* Fused. */ + int addsub; + int fpconst; /* Immediate. */ + int neg; /* NEG and ABS. */ + int compare; + int widen; /* Widen to this size. */ + int narrow; /* Narrow from this size. */ + int toint; + int fromint; + int roundint; /* V8 round to integral, remains FP format. */ }; struct vector_cost_table { - const int alu; - const int mult; - const int movi; - const int dup; - const int extract; + int alu; + int mult; + int movi; + int dup; + int extract; }; struct cpu_cost_table { - const struct alu_cost_table alu; - const struct mult_cost_table mult[2]; /* SImode and DImode. */ - const struct mem_cost_table ldst; - const struct fp_cost_table fp[2]; /* SFmode and DFmode. */ - const struct vector_cost_table vect; + struct alu_cost_table alu; + struct mult_cost_table mult[2]; /* SImode and DImode. */ + struct mem_cost_table ldst; + struct fp_cost_table fp[2]; /* SFmode and DFmode. */ + struct vector_cost_table vect; }; rtx_insn *arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/, diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc index 3bb2566..b421cac 100644 --- a/gcc/config/arm/arm-builtins.cc +++ b/gcc/config/arm/arm-builtins.cc @@ -1449,7 +1449,7 @@ arm_init_cde_builtins (void) static void arm_init_mve_builtins (void) { - volatile unsigned int i, fcode = ARM_BUILTIN_MVE_PATTERN_START; + unsigned int i, fcode = ARM_BUILTIN_MVE_PATTERN_START; arm_init_simd_builtin_scalar_types (); arm_init_simd_builtin_types (); @@ -1685,9 +1685,7 @@ arm_init_fp16_builtins (void) arm_fp16_type_node = make_node (REAL_TYPE); TYPE_PRECISION (arm_fp16_type_node) = GET_MODE_PRECISION (HFmode); layout_type (arm_fp16_type_node); - if (arm_fp16_format) - (*lang_hooks.types.register_builtin_type) (arm_fp16_type_node, - "__fp16"); + (*lang_hooks.types.register_builtin_type) (arm_fp16_type_node, "__fp16"); } void diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc index fd0be2c..6df469d 100644 --- a/gcc/config/arm/arm-mve-builtins-base.cc +++ b/gcc/config/arm/arm-mve-builtins-base.cc @@ -1167,6 +1167,194 @@ public: } }; + +/* Map the function directly to the appropriate scalar shift builtin. */ +enum which_scalar_shift { + ss_ASRL, + ss_LSLL, + ss_SQRSHR, + ss_SQRSHRL, + ss_SQRSHRL_SAT48, + ss_SQSHL, + ss_SQSHLL, + ss_SRSHR, + ss_SRSHRL, + ss_UQRSHL, + ss_UQRSHLL, + ss_UQRSHLL_SAT48, + ss_UQSHL, + ss_UQSHLL, + ss_URSHR, + ss_URSHRL +}; + +class mve_function_scalar_shift : public function_base +{ +public: + CONSTEXPR mve_function_scalar_shift (enum which_scalar_shift shl) + : m_scalar_shift (shl) + {} + + /* Which scalar_shift builtin to map. */ + enum which_scalar_shift m_scalar_shift; + + rtx + expand (function_expander &e) const override + { + insn_code code; + + switch (m_scalar_shift) + { + case ss_ASRL: + e.args[1] = simplify_gen_subreg (QImode, e.args[1], SImode, 0); + code = CODE_FOR_mve_asrl; + break; + + case ss_LSLL: + e.args[1] = simplify_gen_subreg (QImode, e.args[1], SImode, 0); + code = CODE_FOR_mve_lsll; + break; + + case ss_SQRSHR: + code = CODE_FOR_mve_sqrshr_si; + break; + + case ss_SQRSHRL: + code = code_for_mve_sqrshrl_sat_di (SQRSHRL_64); + break; + + case ss_SQRSHRL_SAT48: + code = code_for_mve_sqrshrl_sat_di (SQRSHRL_48); + break; + + case ss_SQSHL: + code = CODE_FOR_mve_sqshl_si; + break; + + case ss_SRSHR: + code = CODE_FOR_mve_srshr_si; + break; + + case ss_UQRSHL: + code = CODE_FOR_mve_uqrshl_si; + break; + + case ss_SQSHLL: + code = CODE_FOR_mve_sqshll_di; + break; + + case ss_SRSHRL: + code = CODE_FOR_mve_srshrl_di; + break; + + case ss_UQRSHLL: + code = code_for_mve_uqrshll_sat_di (UQRSHLL_64); + break; + + case ss_UQRSHLL_SAT48: + code = code_for_mve_uqrshll_sat_di (UQRSHLL_48); + break; + + case ss_UQSHL: + code = CODE_FOR_mve_uqshl_si; + break; + + case ss_UQSHLL: + code = CODE_FOR_mve_uqshll_di; + break; + + case ss_URSHR: + code = CODE_FOR_mve_urshr_si; + break; + + case ss_URSHRL: + code = CODE_FOR_mve_urshrl_di; + break; + + default: + gcc_unreachable (); + } + + return e.use_unpred_insn (code); + } +}; + + +/* Map the function directly to mve_vpnotv16bi, and convert the result into + HImode like we do for vcmp. */ +class mve_function_vpnot : public function_base +{ +public: + CONSTEXPR mve_function_vpnot (void) + {} + + rtx + expand (function_expander &e) const override + { + rtx target = e.use_unpred_insn (CODE_FOR_mve_vpnotv16bi); + rtx HItarget = gen_reg_rtx (HImode); + emit_move_insn (HItarget, gen_lowpart (HImode, target)); + return HItarget; + } +}; + + /* Map the function directly to mve_vec_set_internal (M) or mve_vec_extract + (M, M) where M is the vector mode associated with type suffix 0, except when + mode is V2DI where the builtin name is hardcoded. */ +class mve_function_vsetq_vgetq_lane : public function_base +{ +public: + CONSTEXPR mve_function_vsetq_vgetq_lane (bool is_get) + : m_is_get (is_get) + {} + + /* True for vgetq_lane, false for vsetq_lane. */ + bool m_is_get; + + rtx + expand (function_expander &e) const override + { + machine_mode mode = e.vector_mode (0); + insn_code code; + HOST_WIDE_INT elem; + + code = (mode == V2DImode) + ? (m_is_get + ? CODE_FOR_mve_vec_extractv2didi + : CODE_FOR_mve_vec_setv2di_internal) + : (m_is_get + ? code_for_mve_vec_extract (mode, mode) + : code_for_mve_vec_set_internal (mode)); + + if (!m_is_get) + { + /* mve_vec_set has vector and lane number arguments in opposite order + compared to the intrinsic: swap them now... */ + std::swap (e.args[1], e.args[2]); + } + + elem = INTVAL (e.args[1]); + + /* For big-endian, GCC's vector indices are reversed within each 64 bits + compared to the architectural lane indices used by MVE intrinsics. */ + if (BYTES_BIG_ENDIAN) + { + unsigned int num_lanes = 128 / e.type_suffix (0).element_bits; + elem ^= (num_lanes / 2) - 1; + } + + if (!m_is_get) + { + /* ... and convert the lane number into a mask as expected by the + builtin. */ + elem = HOST_WIDE_INT_1 << elem; + } + e.args[1] = GEN_INT (elem); + + return e.use_unpred_insn (code); + } +}; + } /* end anonymous namespace */ namespace arm_mve { @@ -1334,6 +1522,22 @@ namespace arm_mve { (-1, -1, UNSPEC##_F, \ -1, -1, UNSPEC##_P_F)) +FUNCTION (asrl, mve_function_scalar_shift, (ss_ASRL)) +FUNCTION (lsll, mve_function_scalar_shift, (ss_LSLL)) +FUNCTION (sqrshr, mve_function_scalar_shift, (ss_SQRSHR)) +FUNCTION (sqrshrl, mve_function_scalar_shift, (ss_SQRSHRL)) +FUNCTION (sqrshrl_sat48, mve_function_scalar_shift, (ss_SQRSHRL_SAT48)) +FUNCTION (sqshl, mve_function_scalar_shift, (ss_SQSHL)) +FUNCTION (sqshll, mve_function_scalar_shift, (ss_SQSHLL)) +FUNCTION (srshr, mve_function_scalar_shift, (ss_SRSHR)) +FUNCTION (srshrl, mve_function_scalar_shift, (ss_SRSHRL)) +FUNCTION (uqrshl, mve_function_scalar_shift, (ss_UQRSHL)) +FUNCTION (uqrshll, mve_function_scalar_shift, (ss_UQRSHLL)) +FUNCTION (uqrshll_sat48, mve_function_scalar_shift, (ss_UQRSHLL_SAT48)) +FUNCTION (uqshl, mve_function_scalar_shift, (ss_UQSHL)) +FUNCTION (uqshll, mve_function_scalar_shift, (ss_UQSHLL)) +FUNCTION (urshr, mve_function_scalar_shift, (ss_URSHR)) +FUNCTION (urshrl, mve_function_scalar_shift, (ss_URSHRL)) FUNCTION_PRED_P_S_U (vabavq, VABAVQ) FUNCTION_WITHOUT_N (vabdq, VABDQ) FUNCTION (vabsq, unspec_based_mve_function_exact_insn, (ABS, ABS, ABS, -1, -1, -1, VABSQ_M_S, -1, VABSQ_M_F, -1, -1, -1)) @@ -1390,6 +1594,7 @@ FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ) FUNCTION (vfmaq, unspec_mve_function_exact_insn, (-1, -1, VFMAQ_F, -1, -1, VFMAQ_N_F, -1, -1, VFMAQ_M_F, -1, -1, VFMAQ_M_N_F)) FUNCTION (vfmasq, unspec_mve_function_exact_insn, (-1, -1, -1, -1, -1, VFMASQ_N_F, -1, -1, -1, -1, -1, VFMASQ_M_N_F)) FUNCTION (vfmsq, unspec_mve_function_exact_insn, (-1, -1, VFMSQ_F, -1, -1, -1, -1, -1, VFMSQ_M_F, -1, -1, -1)) +FUNCTION (vgetq_lane, mve_function_vsetq_vgetq_lane, (true)) FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ) FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ) FUNCTION (vld1q, vld1_impl,) @@ -1453,6 +1658,7 @@ FUNCTION (vmulltq_poly, unspec_mve_function_exact_insn_vmull_poly, (VMULLTQ_POLY FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ) FUNCTION_WITH_RTX_M_N_NO_F (vmvnq, NOT, VMVNQ) FUNCTION (vnegq, unspec_based_mve_function_exact_insn, (NEG, NEG, NEG, -1, -1, -1, VNEGQ_M_S, -1, VNEGQ_M_F, -1, -1, -1)) +FUNCTION (vpnot, mve_function_vpnot, ) FUNCTION_WITHOUT_M_N (vpselq, VPSELQ) FUNCTION (vornq, unspec_based_mve_function_exact_insn_vorn, (-1, -1, VORNQ_M_S, VORNQ_M_U, VORNQ_M_F, -1, -1)) FUNCTION_WITH_RTX_M_N_NO_N_F (vorrq, IOR, VORRQ) @@ -1517,6 +1723,7 @@ FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ) FUNCTION_ONLY_N_NO_F (vrshrq, VRSHRQ) FUNCTION (vsbciq, vadc_vsbc_impl, (true, false)) FUNCTION (vsbcq, vadc_vsbc_impl, (false, false)) +FUNCTION (vsetq_lane, mve_function_vsetq_vgetq_lane, (false)) FUNCTION (vshlcq, vshlc_impl,) FUNCTION_ONLY_N_NO_F (vshllbq, VSHLLBQ) FUNCTION_ONLY_N_NO_F (vshlltq, VSHLLTQ) diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def index 0400c3c..9b9603e 100644 --- a/gcc/config/arm/arm-mve-builtins-base.def +++ b/gcc/config/arm/arm-mve-builtins-base.def @@ -18,6 +18,22 @@ <http://www.gnu.org/licenses/>. */ #define REQUIRES_FLOAT false +DEF_MVE_FUNCTION (asrl, scalar_s64_shift, none, none) +DEF_MVE_FUNCTION (lsll, scalar_u64_shift, none, none) +DEF_MVE_FUNCTION (sqrshr, scalar_s32_shift, none, none) +DEF_MVE_FUNCTION (sqrshrl, scalar_s64_shift, none, none) +DEF_MVE_FUNCTION (sqrshrl_sat48, scalar_s64_shift, none, none) +DEF_MVE_FUNCTION (sqshl, scalar_s32_shift_imm, none, none) +DEF_MVE_FUNCTION (sqshll, scalar_s64_shift_imm, none, none) +DEF_MVE_FUNCTION (srshr, scalar_s32_shift_imm, none, none) +DEF_MVE_FUNCTION (srshrl, scalar_s64_shift_imm, none, none) +DEF_MVE_FUNCTION (uqrshl, scalar_u32_shift, none, none) +DEF_MVE_FUNCTION (uqrshll, scalar_u64_shift, none, none) +DEF_MVE_FUNCTION (uqrshll_sat48, scalar_u64_shift, none, none) +DEF_MVE_FUNCTION (uqshl, scalar_u32_shift_imm, none, none) +DEF_MVE_FUNCTION (uqshll, scalar_u64_shift_imm, none, none) +DEF_MVE_FUNCTION (urshr, scalar_u32_shift_imm, none, none) +DEF_MVE_FUNCTION (urshrl, scalar_u64_shift_imm, none, none) DEF_MVE_FUNCTION (vabavq, binary_acca_int32, all_integer, p_or_none) DEF_MVE_FUNCTION (vabdq, binary, all_integer, mx_or_none) DEF_MVE_FUNCTION (vabsq, unary, all_signed, mx_or_none) @@ -52,6 +68,7 @@ DEF_MVE_FUNCTION (vddupq, viddup, all_unsigned, mx_or_none) DEF_MVE_FUNCTION (vdupq, unary_n, all_integer, mx_or_none) DEF_MVE_FUNCTION (vdwdupq, vidwdup, all_unsigned, mx_or_none) DEF_MVE_FUNCTION (veorq, binary, all_integer, mx_or_none) +DEF_MVE_FUNCTION (vgetq_lane, getq_lane, all_integer_with_64, none) DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none) DEF_MVE_FUNCTION (vhcaddq_rot270, binary, all_signed, mx_or_none) DEF_MVE_FUNCTION (vhcaddq_rot90, binary, all_signed, mx_or_none) @@ -113,6 +130,7 @@ DEF_MVE_FUNCTION (vmvnq, mvn, all_integer, mx_or_none) DEF_MVE_FUNCTION (vnegq, unary, all_signed, mx_or_none) DEF_MVE_FUNCTION (vornq, binary_orrq, all_integer, mx_or_none) DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none) +DEF_MVE_FUNCTION (vpnot, vpnot, none, none) DEF_MVE_FUNCTION (vpselq, vpsel, all_integer_with_64, none) DEF_MVE_FUNCTION (vqabsq, unary, all_signed, m_or_none) DEF_MVE_FUNCTION (vqaddq, binary_opt_n, all_integer, m_or_none) @@ -171,6 +189,7 @@ DEF_MVE_FUNCTION (vrshrq, binary_rshift, all_integer, mx_or_none) DEF_MVE_FUNCTION (vsbciq, vadc_vsbc, integer_32, m_or_none) DEF_MVE_FUNCTION (vsbcq, vadc_vsbc, integer_32, m_or_none) DEF_MVE_FUNCTION (vshlcq, vshlc, all_integer, m_or_none) +DEF_MVE_FUNCTION (vsetq_lane, setq_lane, all_integer_with_64, none) DEF_MVE_FUNCTION (vshllbq, binary_widen_n, integer_8_16, mx_or_none) DEF_MVE_FUNCTION (vshlltq, binary_widen_n, integer_8_16, mx_or_none) DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none) @@ -237,6 +256,7 @@ DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none) DEF_MVE_FUNCTION (vfmaq, ternary_opt_n, all_float, m_or_none) DEF_MVE_FUNCTION (vfmasq, ternary_n, all_float, m_or_none) DEF_MVE_FUNCTION (vfmsq, ternary, all_float, m_or_none) +DEF_MVE_FUNCTION (vgetq_lane, getq_lane, all_float, none) DEF_MVE_FUNCTION (vld1q, load, all_float, z_or_none) DEF_MVE_FUNCTION (vld2q, load, all_float, none) DEF_MVE_FUNCTION (vld4q, load, all_float, none) @@ -269,6 +289,7 @@ DEF_MVE_FUNCTION (vrndnq, unary, all_float, mx_or_none) DEF_MVE_FUNCTION (vrndpq, unary, all_float, mx_or_none) DEF_MVE_FUNCTION (vrndq, unary, all_float, mx_or_none) DEF_MVE_FUNCTION (vrndxq, unary, all_float, mx_or_none) +DEF_MVE_FUNCTION (vsetq_lane, setq_lane, all_float, none) DEF_MVE_FUNCTION (vst1q, store, all_float, p_or_none) DEF_MVE_FUNCTION (vst2q, store, all_float, none) DEF_MVE_FUNCTION (vst4q, store, all_float, none) diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h index 6ff3195..60bf51b 100644 --- a/gcc/config/arm/arm-mve-builtins-base.h +++ b/gcc/config/arm/arm-mve-builtins-base.h @@ -23,6 +23,22 @@ namespace arm_mve { namespace functions { +extern const function_base *const asrl; +extern const function_base *const lsll; +extern const function_base *const sqrshr; +extern const function_base *const sqrshrl; +extern const function_base *const sqrshrl_sat48; +extern const function_base *const sqshl; +extern const function_base *const sqshll; +extern const function_base *const srshr; +extern const function_base *const srshrl; +extern const function_base *const uqrshl; +extern const function_base *const uqrshll; +extern const function_base *const uqrshll_sat48; +extern const function_base *const uqshl; +extern const function_base *const uqshll; +extern const function_base *const urshr; +extern const function_base *const urshrl; extern const function_base *const vabavq; extern const function_base *const vabdq; extern const function_base *const vabsq; @@ -75,6 +91,7 @@ extern const function_base *const veorq; extern const function_base *const vfmaq; extern const function_base *const vfmasq; extern const function_base *const vfmsq; +extern const function_base *const vgetq_lane; extern const function_base *const vhaddq; extern const function_base *const vhcaddq_rot270; extern const function_base *const vhcaddq_rot90; @@ -144,6 +161,7 @@ extern const function_base *const vmvnq; extern const function_base *const vnegq; extern const function_base *const vornq; extern const function_base *const vorrq; +extern const function_base *const vpnot; extern const function_base *const vpselq; extern const function_base *const vqabsq; extern const function_base *const vqaddq; @@ -206,6 +224,7 @@ extern const function_base *const vrshrntq; extern const function_base *const vrshrq; extern const function_base *const vsbciq; extern const function_base *const vsbcq; +extern const function_base *const vsetq_lane; extern const function_base *const vshlcq; extern const function_base *const vshllbq; extern const function_base *const vshlltq; diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc index aeb1453..b1dc1dd 100644 --- a/gcc/config/arm/arm-mve-builtins-shapes.cc +++ b/gcc/config/arm/arm-mve-builtins-shapes.cc @@ -249,7 +249,8 @@ static void build_one (function_builder &b, const char *signature, const function_group_info &group, mode_suffix_index mode_suffix_id, unsigned int ti, unsigned int pi, bool preserve_user_namespace, - bool force_direct_overloads) + bool force_direct_overloads, + unsigned int which_overload = NONOVERLOADED_FORM | OVERLOADED_FORM) { /* Current functions take at most five arguments. Match parse_signature parameter below. */ @@ -261,7 +262,7 @@ build_one (function_builder &b, const char *signature, apply_predication (instance, return_type, argument_types); b.add_unique_function (instance, return_type, argument_types, preserve_user_namespace, group.requires_float, - force_direct_overloads); + force_direct_overloads, which_overload); } /* Add a function instance for every type and predicate combination in @@ -1467,19 +1468,93 @@ struct create_def : public nonoverloaded_base }; SHAPE (create) +/* <S0>_t vfoo[_t0](<T0>_t, const int) + + Check that 'idx' is in the [0..#num_lanes - 1] range. + + Example: vgetq_lane. + int8_t [__arm_]vgetq_lane[_s8](int8x16_t a, const int idx) */ + +struct getq_lane_def : public overloaded_base<0> +{ + void + build (function_builder &b, const function_group_info &group, + bool preserve_user_namespace) const override + { + b.add_overloaded_functions (group, MODE_none, preserve_user_namespace); + build_all (b, "s0,v0,su64", group, MODE_none, preserve_user_namespace); + } + + tree + resolve (function_resolver &r) const override + { + unsigned int i, nargs; + type_suffix_index type; + if (!r.check_gp_argument (2, i, nargs) + || (type = r.infer_vector_type (i-1)) == NUM_TYPE_SUFFIXES + || !r.require_integer_immediate (i)) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); + } + + bool + check (function_checker &c) const override + { + unsigned int num_lanes = 128 / c.type_suffix (0).element_bits; + + return c.require_immediate_range (1, 0, num_lanes - 1); + } + +}; +SHAPE (getq_lane) + /* <T0>[xN]_t vfoo_t0(). + <T0>[xN]_t vfoo(<T0>_t). Example: vuninitializedq. int8x16_t [__arm_]vuninitializedq_s8(void) int8x16_t [__arm_]vuninitializedq(int8x16_t t) */ -struct inherent_def : public nonoverloaded_base +struct inherent_def : public overloaded_base<0> { void build (function_builder &b, const function_group_info &group, bool preserve_user_namespace) const override { - build_all (b, "t0", group, MODE_none, preserve_user_namespace); + b.add_overloaded_functions (group, MODE_none, preserve_user_namespace); + + /* Overloaded and non-overloaded forms have different signatures, so call + build_one with either OVERLOADED_FORM or NONOVERLOADED_FORM. */ + unsigned int pi = 0; + bool force_direct_overloads = false; + for (unsigned int ti = 0; + ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti) + { + /* For int8x16_t [__arm_]vuninitializedq(int8x16_t t), generate only + the overloaded form, i.e. without type suffix. */ + build_one (b, "t0,t0", group, MODE_none, ti, pi, + preserve_user_namespace, force_direct_overloads, + OVERLOADED_FORM); + /* For int8x16_t [__arm_]vuninitializedq_s8(void), generate only the + non-overloaded form, i.e. with type suffix. */ + build_one (b, "t0", group, MODE_none, ti, pi, + preserve_user_namespace, force_direct_overloads, + NONOVERLOADED_FORM); + } + } + + tree + resolve (function_resolver &r) const override + { + type_suffix_index type; + if (!r.check_num_arguments (1) + || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES) + return error_mark_node; + + /* We need to pop the useless argument for the non-overloaded function. */ + return r.pop_and_resolve_to (r.mode_suffix_id, type); } + }; SHAPE (inherent) @@ -1682,6 +1757,198 @@ struct mvn_def : public overloaded_base<0> }; SHAPE (mvn) +/* int32_t foo(int32_t, int32_t) + + Example: sqrshr. + int32_t [__arm_]sqrshr(int32_t value, int32_t shift) */ +struct scalar_s32_shift_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group, + bool preserve_user_namespace) const override + { + build_all (b, "ss32,ss32,ss32", group, MODE_none, preserve_user_namespace); + } +}; +SHAPE (scalar_s32_shift) + +/* int32_t foo(int32_t, const int) + + Check that 'shift' is in the [1,32] range. + + Example: sqshl. + int32_t [__arm_]sqshl(int32_t value, const int shift) */ +struct scalar_s32_shift_imm_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group, + bool preserve_user_namespace) const override + { + build_all (b, "ss32,ss32,su64", group, MODE_none, preserve_user_namespace); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_range (1, 1, 32); + } +}; +SHAPE (scalar_s32_shift_imm) + +/* uint32_t foo(uint32_t, int32_t) + + Example: uqrshl. + uint32_t [__arm_]uqrshl(uint32_t value, int32_t shift) */ +struct scalar_u32_shift_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group, + bool preserve_user_namespace) const override + { + build_all (b, "su32,su32,ss32", group, MODE_none, preserve_user_namespace); + } +}; +SHAPE (scalar_u32_shift) + +/* uint32_t foo(uint32_t, const int) + + Check that 'shift' is in the [1,32] range. + + Example: uqshl. + uint32_t [__arm_]uqshl(uint32_t value, const int shift) */ +struct scalar_u32_shift_imm_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group, + bool preserve_user_namespace) const override + { + build_all (b, "su32,su32,su64", group, MODE_none, preserve_user_namespace); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_range (1, 1, 32); + } +}; +SHAPE (scalar_u32_shift_imm) + +/* int64_t foo(int64_t, int32_t) + + Example: asrl + int64_t [__arm_]arsl(int64_t value, int32_t shift) */ +struct scalar_s64_shift_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group, + bool preserve_user_namespace) const override + { + build_all (b, "ss64,ss64,ss32", group, MODE_none, preserve_user_namespace); + } +}; +SHAPE (scalar_s64_shift) + +/* int64_t foo(int64_t, const int) + + Check that 'shift' is in the [1,32] range. + + Example: sqshll. + int64_t [__arm_]sqshll(int64_t value, const int shift) */ +struct scalar_s64_shift_imm_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group, + bool preserve_user_namespace) const override + { + build_all (b, "ss64,ss64,su64", group, MODE_none, preserve_user_namespace); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_range (1, 1, 32); + } +}; +SHAPE (scalar_s64_shift_imm) + +/* uint64_t foo(uint64_t, int32_t) + + Example: lsll. + uint64_t [__arm_]lsll(uint64_t value, int32_t shift) */ +struct scalar_u64_shift_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group, + bool preserve_user_namespace) const override + { + build_all (b, "su64,su64,ss32", group, MODE_none, preserve_user_namespace); + } +}; +SHAPE (scalar_u64_shift) + +/* uint64_t foo(uint64_t, const int) + + Check that 'shift' is in the [1,32] range. + + Example: uqshll. + uint64_t [__arm_]uqshll(uint64_t value, const int shift) */ +struct scalar_u64_shift_imm_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group, + bool preserve_user_namespace) const override + { + build_all (b, "su64,su64,su64", group, MODE_none, preserve_user_namespace); + } + + bool + check (function_checker &c) const override + { + return c.require_immediate_range (1, 1, 32); + } +}; +SHAPE (scalar_u64_shift_imm) + +/* <T0>_t vfoo[_t0](<S0>_t, <T0>_t, const_int) + + Check that 'idx' is in the [0..#num_lanes - 1] range. + + Example: vsetq_lane. + int8x16_t [__arm_]vsetq_lane[_s8](int8_t a, int8x16_t b, const int idx) */ +struct setq_lane_def : public overloaded_base<0> +{ + void + build (function_builder &b, const function_group_info &group, + bool preserve_user_namespace) const override + { + b.add_overloaded_functions (group, MODE_none, preserve_user_namespace); + build_all (b, "v0,s0,v0,su64", group, MODE_none, preserve_user_namespace); + } + + tree + resolve (function_resolver &r) const override + { + unsigned int i, nargs; + type_suffix_index type; + if (!r.check_gp_argument (3, i, nargs) + || (type = r.infer_vector_type (i-1)) == NUM_TYPE_SUFFIXES + || !r.require_derived_scalar_type (i - 2, r.SAME_TYPE_CLASS) + || !r.require_integer_immediate (i)) + return error_mark_node; + + return r.resolve_to (r.mode_suffix_id, type); + } + + bool + check (function_checker &c) const override + { + unsigned int num_lanes = 128 / c.type_suffix (0).element_bits; + + return c.require_immediate_range (2, 0, num_lanes - 1); + } +}; +SHAPE (setq_lane) + /* void vfoo[_t0](<X>_t *, <T0>[xN]_t) where <X> might be tied to <t0> (for non-truncating stores) or might @@ -2710,6 +2977,21 @@ struct vidwdup_def : public overloaded_base<0> }; SHAPE (vidwdup) +/* mve_pred16_t foo_t0(mve_pred16_t) + + Example: vpnot. + mve_pred16_t [__arm_]vpnot(mve_pred16_t a) */ +struct vpnot_def : public nonoverloaded_base +{ + void + build (function_builder &b, const function_group_info &group, + bool preserve_user_namespace) const override + { + build_all (b, "p,p", group, MODE_none, preserve_user_namespace); + } +}; +SHAPE (vpnot) + /* <T0>_t vfoo[_t0](<T0>_t, <T0>_t, mve_pred16_t) i.e. a version of the standard ternary shape in which diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h index 6b839c5..56bba6d 100644 --- a/gcc/config/arm/arm-mve-builtins-shapes.h +++ b/gcc/config/arm/arm-mve-builtins-shapes.h @@ -29,7 +29,8 @@ namespace arm_mve Also: - - "inherent" means that the function takes no arguments. */ + - "inherent" means that the function takes no arguments, except in its + overloaded form. */ namespace shapes { @@ -60,12 +61,22 @@ namespace arm_mve extern const function_shape *const binary_widen_poly; extern const function_shape *const cmp; extern const function_shape *const create; + extern const function_shape *const getq_lane; extern const function_shape *const inherent; extern const function_shape *const load; extern const function_shape *const load_ext; extern const function_shape *const load_ext_gather_offset; extern const function_shape *const load_gather_base; extern const function_shape *const mvn; + extern const function_shape *const scalar_s32_shift; + extern const function_shape *const scalar_s32_shift_imm; + extern const function_shape *const scalar_u32_shift; + extern const function_shape *const scalar_u32_shift_imm; + extern const function_shape *const scalar_s64_shift; + extern const function_shape *const scalar_s64_shift_imm; + extern const function_shape *const scalar_u64_shift; + extern const function_shape *const scalar_u64_shift_imm; + extern const function_shape *const setq_lane; extern const function_shape *const store; extern const function_shape *const store_scatter_base; extern const function_shape *const store_scatter_offset; @@ -90,6 +101,7 @@ namespace arm_mve extern const function_shape *const vcvtx; extern const function_shape *const viddup; extern const function_shape *const vidwdup; + extern const function_shape *const vpnot; extern const function_shape *const vpsel; extern const function_shape *const vshlc; diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc index 42b53cc0..ecf5196 100644 --- a/gcc/config/arm/arm-mve-builtins.cc +++ b/gcc/config/arm/arm-mve-builtins.cc @@ -410,8 +410,6 @@ register_builtin_types () #include "arm-mve-builtins.def" for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i) { - if (vector_types[i].requires_float && !TARGET_HAVE_MVE_FLOAT) - continue; tree eltype = scalar_types[i]; tree vectype; if (eltype == boolean_type_node) @@ -433,18 +431,6 @@ register_builtin_types () static void register_vector_type (vector_type_index type) { - - /* If the target does not have the mve.fp extension, but the type requires - it, then it needs to be assigned a non-dummy type so that functions - with those types in their signature can be registered. This allows for - diagnostics about the missing extension, rather than about a missing - function definition. */ - if (vector_types[type].requires_float && !TARGET_HAVE_MVE_FLOAT) - { - acle_vector_types[0][type] = void_type_node; - return; - } - tree vectype = abi_vector_types[type]; tree id = get_identifier (vector_types[type].acle_name); tree decl = build_decl (input_location, TYPE_DECL, id, vectype); @@ -470,13 +456,7 @@ register_builtin_tuple_types (vector_type_index type) { const vector_type_info* info = &vector_types[type]; - /* If the target does not have the mve.fp extension, but the type requires - it, then it needs to be assigned a non-dummy type so that functions - with those types in their signature can be registered. This allows for - diagnostics about the missing extension, rather than about a missing - function definition. */ - if (scalar_types[type] == boolean_type_node - || (info->requires_float && !TARGET_HAVE_MVE_FLOAT)) + if (scalar_types[type] == boolean_type_node) { for (unsigned int num_vectors = 2; num_vectors <= 4; num_vectors += 2) acle_vector_types[num_vectors >> 1][type] = void_type_node; @@ -1014,7 +994,8 @@ function_builder::add_unique_function (const function_instance &instance, vec<tree> &argument_types, bool preserve_user_namespace, bool requires_float, - bool force_direct_overloads) + bool force_direct_overloads, + unsigned int which_overload) { /* Add the function under its full (unique) name with prefix. */ char *name = get_name (instance, true, false); @@ -1022,27 +1003,31 @@ function_builder::add_unique_function (const function_instance &instance, argument_types.length (), argument_types.address ()); tree attrs = get_attributes (instance); - registered_function &rfn = add_function (instance, name, fntype, attrs, - requires_float, false, false); - - /* Enter the function into the hash table. */ - hashval_t hash = instance.hash (); - registered_function **rfn_slot - = function_table->find_slot_with_hash (instance, hash, INSERT); - gcc_assert (!*rfn_slot); - *rfn_slot = &rfn; - - /* Also add the non-prefixed non-overloaded function, as placeholder - if the user namespace does not need to be preserved. */ - char *noprefix_name = get_name (instance, false, false); - attrs = get_attributes (instance); - add_function (instance, noprefix_name, fntype, attrs, requires_float, - false, preserve_user_namespace); + if (which_overload & NONOVERLOADED_FORM) + { + registered_function &rfn = add_function (instance, name, fntype, attrs, + requires_float, false, false); + + /* Enter the function into the hash table. */ + hashval_t hash = instance.hash (); + registered_function **rfn_slot + = function_table->find_slot_with_hash (instance, hash, INSERT); + gcc_assert (!*rfn_slot); + *rfn_slot = &rfn; + + /* Also add the non-prefixed non-overloaded function, as placeholder + if the user namespace does not need to be preserved. */ + char *noprefix_name = get_name (instance, false, false); + attrs = get_attributes (instance); + add_function (instance, noprefix_name, fntype, attrs, requires_float, + false, preserve_user_namespace); + } /* Also add the function under its overloaded alias, if we want a separate decl for each instance of an overloaded function. */ char *overload_name = get_name (instance, true, true); - if (strcmp (name, overload_name) != 0) + if ((which_overload & OVERLOADED_FORM) + && (strcmp (name, overload_name) != 0)) { /* Attribute lists shouldn't be shared. */ attrs = get_attributes (instance); @@ -1251,6 +1236,18 @@ function_resolver::resolve_to (mode_suffix_index mode, return res; } +/* Pop an argument and resolve the function to one with the mode suffix given + by MODE and the type suffixes given by TYPE0 and TYPE1. Return its function + decl on success, otherwise report an error and return error_mark_node. */ +tree +function_resolver::pop_and_resolve_to (mode_suffix_index mode, + type_suffix_index type0, + type_suffix_index type1) +{ + m_arglist.pop (); + return resolve_to (mode, type0, type1); +} + /* Require argument ARGNO to be a pointer to a scalar type that has a corresponding type suffix. Return that type suffix on success, otherwise report an error and return NUM_TYPE_SUFFIXES. */ diff --git a/gcc/config/arm/arm-mve-builtins.h b/gcc/config/arm/arm-mve-builtins.h index 3a0d50d..5d25b16 100644 --- a/gcc/config/arm/arm-mve-builtins.h +++ b/gcc/config/arm/arm-mve-builtins.h @@ -94,6 +94,13 @@ const unsigned int CP_RAISE_FP_EXCEPTIONS = 1U << 1; const unsigned int CP_READ_MEMORY = 1U << 2; const unsigned int CP_WRITE_MEMORY = 1U << 3; +/* Flags that describe which forms of an intrinsic to generate: non-overloaded + and/or overloaded ones. In general we want both, but for vuninitialized the + two forms have different signatures and we need to generate them + separately. */ +const unsigned int NONOVERLOADED_FORM = 1U << 0; +const unsigned int OVERLOADED_FORM = 1U << 1; + /* Enumerates the MVE predicate and (data) vector types, together called "vector types" for brevity. */ enum vector_type_index @@ -311,7 +318,7 @@ public: ~function_builder (); void add_unique_function (const function_instance &, tree, - vec<tree> &, bool, bool, bool); + vec<tree> &, bool, bool, bool, unsigned int); void add_overloaded_function (const function_instance &, bool, bool); void add_overloaded_functions (const function_group_info &, mode_suffix_index, bool); @@ -383,6 +390,9 @@ public: tree resolve_to (mode_suffix_index, type_suffix_index = NUM_TYPE_SUFFIXES, type_suffix_index = NUM_TYPE_SUFFIXES); + tree pop_and_resolve_to (mode_suffix_index, + type_suffix_index = NUM_TYPE_SUFFIXES, + type_suffix_index = NUM_TYPE_SUFFIXES); type_suffix_index infer_pointer_type (unsigned int); type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int); diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h index 5c543bf..d2384ed 100644 --- a/gcc/config/arm/arm-opts.h +++ b/gcc/config/arm/arm-opts.h @@ -35,6 +35,7 @@ */ enum arm_fp16_format_type { + ARM_FP16_FORMAT_DEFAULT = -1, ARM_FP16_FORMAT_NONE = 0, ARM_FP16_FORMAT_IEEE = 1, ARM_FP16_FORMAT_ALTERNATIVE = 2 diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 6df2fa0..1f413b6 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -3923,13 +3923,19 @@ arm_option_reconfigure_globals (void) arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16); arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16); - if (arm_fp16_inst) + + /* Set arm_fp16_format to IEEE if the target has fp16 support unless user + forced ARM_FP16_FORMAT_NONE. */ + if (arm_fp16_inst && (arm_fp16_format != ARM_FP16_FORMAT_NONE)) { if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) error ("selected fp16 options are incompatible"); arm_fp16_format = ARM_FP16_FORMAT_IEEE; } + if (arm_fp16_format == ARM_FP16_FORMAT_DEFAULT) + arm_fp16_format = ARM_FP16_FORMAT_NONE; + arm_arch_cde = 0; arm_arch_cde_coproc = 0; int cde_bits[] = {isa_bit_cdecp0, isa_bit_cdecp1, isa_bit_cdecp2, @@ -5694,8 +5700,6 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, if (mode == VOIDmode) mode = GET_MODE (*op1); - maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1; - /* For floating-point comparisons, prefer >= and > over <= and < since the former are supported by VSEL on some architectures. Only do this if both operands are registers. */ @@ -5712,6 +5716,13 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, return; } + /* Everything below assumes an integer mode. */ + if (GET_MODE_CLASS (mode) != MODE_INT + || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT) + return; + + maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1; + /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either reversed or (for constant OP1) adjusted to GE/LT. @@ -18498,6 +18509,9 @@ comp_not_to_clear_mask_str_un (tree arg_type, int * regno, if (*last_used_bit != offset) { + /* We never clear padding bits in any other registers than the + first 4 GPRs. */ + gcc_assert (*regno < 4); if (offset < *last_used_bit) { /* This field's offset is before the 'last_used_bit', that @@ -18590,19 +18604,25 @@ comp_not_to_clear_mask_str_un (tree arg_type, int * regno, last_used_bit_t = (starting_bit + field_size) % 32; } - for (i = *regno; i < regno_t; i++) + /* We only clear padding bits in the first 4 GPRs. No need to check + regno_t, since there is no way where this field would have been + put into part GPR part FP reg. */ + if (*regno < 4) { - /* For all but the last register used by this field only keep the - padding bits that were padding bits in this field. */ - padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i]; - } + for (i = *regno; i < regno_t; i++) + { + /* For all but the last register used by this field only keep + the padding bits that were padding bits in this field. */ + padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i]; + } - /* For the last register, keep all padding bits that were padding - bits in this field and any padding bits that are still valid - as padding bits but fall outside of this field's size. */ - mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1; - padding_bits_to_clear_res[regno_t] - &= padding_bits_to_clear_t[regno_t] | mask; + /* For the last register, keep all padding bits that were padding + bits in this field and any padding bits that are still valid + as padding bits but fall outside of this field's size. */ + mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1; + padding_bits_to_clear_res[regno_t] + &= padding_bits_to_clear_t[regno_t] | mask; + } /* Update the maximum size of the fields in terms of registers used ('max_reg') and the 'last_used_bit' in said register. */ @@ -18617,16 +18637,25 @@ comp_not_to_clear_mask_str_un (tree arg_type, int * regno, field = TREE_CHAIN (field); } - /* Update the current padding_bits_to_clear using the intersection of the - padding bits of all the fields. */ - for (i=*regno; i < max_reg; i++) - padding_bits_to_clear[i] |= padding_bits_to_clear_res[i]; + /* We only clear padding bits in the first 4 GPRs. No need to check + regno_t, since there is no way where this field would have been + put into part GPR part FP reg. */ + if (*regno < 4) + { + /* Update the current padding_bits_to_clear using the intersection of the + padding bits of all the fields. */ + for (i=*regno; i < max_reg; i++) + padding_bits_to_clear[i] |= padding_bits_to_clear_res[i]; + + /* Do not keep trailing padding bits, we do not know yet whether this + is the end of the argument. */ + mask = ((uint32_t) 1 << max_bit) - 1; + padding_bits_to_clear[max_reg] + |= padding_bits_to_clear_res[max_reg] & mask; + } - /* Do not keep trailing padding bits, we do not know yet whether this - is the end of the argument. */ - mask = ((uint32_t) 1 << max_bit) - 1; - padding_bits_to_clear[max_reg] - |= padding_bits_to_clear_res[max_reg] & mask; + for (int i = *regno; i < max_reg; ++i) + not_to_clear_reg_mask |= HOST_WIDE_INT_1U << i; *regno = max_reg; *last_used_bit = max_bit; @@ -18668,8 +18697,9 @@ compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno, /* If the 'last_used_bit' is not zero, that means we are still using a part of the last 'regno'. In such cases we must clear the trailing bits. Otherwise we are not using regno and we should mark it as to - clear. */ - if (last_used_bit != 0) + clear. We only clear padding bits for scalar values that are passed + in registers, so regno is never 4 or higher. */ + if (regno < 4 && last_used_bit != 0) padding_bits_to_clear[regno] |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1; else @@ -24039,7 +24069,7 @@ arm_print_condition (FILE *stream) /* Globally reserved letters: acln - Puncutation letters currently used: @_|?().!# + Puncutation letters currently used: @_-|?().!# Lower case letters currently used: bcdefhimpqtvwxyz Upper case letters currently used: ABCDEFGHIJKLMOPQRSTUV Letters previously used, but now deprecated/obsolete: sNWXYZ. @@ -24072,6 +24102,11 @@ arm_print_operand (FILE *stream, rtx x, int code) case '_': fputs (user_label_prefix, stream); return; + case '-': +#ifdef LOCAL_LABEL_PREFIX + fputs (LOCAL_LABEL_PREFIX, stream); +#endif + return; case '|': fputs (REGISTER_PREFIX, stream); @@ -24888,9 +24923,9 @@ arm_print_operand_punct_valid_p (unsigned char code) { return (code == '@' || code == '|' || code == '.' || code == '(' || code == ')' || code == '#' + || code == '-' || code == '_' || (TARGET_32BIT && (code == '?')) - || (TARGET_THUMB2 && (code == '!')) - || (TARGET_THUMB && (code == '_'))); + || (TARGET_THUMB2 && (code == '!'))); } /* Target hook for assembling integer objects. The ARM version needs to @@ -29620,11 +29655,7 @@ arm_vector_mode_supported_p (machine_mode mode) return true; if (TARGET_HAVE_MVE - && (VALID_MVE_SI_MODE (mode) || VALID_MVE_PRED_MODE (mode))) - return true; - - if (TARGET_HAVE_MVE_FLOAT - && (mode == V2DFmode || mode == V4SFmode || mode == V8HFmode)) + && (VALID_MVE_MODE (mode) || VALID_MVE_PRED_MODE (mode))) return true; return false; diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 422ae54..b5d92ae 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -4588,10 +4588,8 @@ if (arm_reg_or_long_shift_imm (operands[2], GET_MODE (operands[2])) && (REG_P (operands[2]) || INTVAL(operands[2]) != 32)) { - if (!reg_overlap_mentioned_p(operands[0], operands[1])) - emit_insn (gen_movdi (operands[0], operands[1])); - - emit_insn (gen_thumb2_lsll (operands[0], operands[2])); + operands[2] = convert_modes (QImode, SImode, operands[2], 0); + emit_insn (gen_mve_lsll (operands[0], operands[1], operands[2])); DONE; } } @@ -4627,10 +4625,8 @@ if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN && arm_reg_or_long_shift_imm (operands[2], GET_MODE (operands[2]))) { - if (!reg_overlap_mentioned_p(operands[0], operands[1])) - emit_insn (gen_movdi (operands[0], operands[1])); - - emit_insn (gen_thumb2_asrl (operands[0], operands[2])); + operands[2] = convert_modes (QImode, SImode, operands[2], 0); + emit_insn (gen_mve_asrl (operands[0], operands[1], operands[2])); DONE; } @@ -4662,10 +4658,7 @@ if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN && long_shift_imm (operands[2], GET_MODE (operands[2]))) { - if (!reg_overlap_mentioned_p(operands[0], operands[1])) - emit_insn (gen_movdi (operands[0], operands[1])); - - emit_insn (gen_thumb2_lsrl (operands[0], operands[2])); + emit_insn (gen_mve_lsrl (operands[0], operands[1], operands[2])); DONE; } @@ -8350,7 +8343,7 @@ (define_expand "movhfcc" [(set (match_operand:HF 0 "s_register_operand") - (if_then_else:HF (match_operand 1 "arm_cond_move_operator") + (if_then_else:HF (match_operand 1 "expandable_comparison_operator") (match_operand:HF 2 "s_register_operand") (match_operand:HF 3 "s_register_operand")))] "TARGET_VFP_FP16INST" @@ -8372,7 +8365,7 @@ (define_expand "movsfcc" [(set (match_operand:SF 0 "s_register_operand") - (if_then_else:SF (match_operand 1 "arm_cond_move_operator") + (if_then_else:SF (match_operand 1 "expandable_comparison_operator") (match_operand:SF 2 "s_register_operand") (match_operand:SF 3 "s_register_operand")))] "TARGET_32BIT && TARGET_HARD_FLOAT" @@ -8394,7 +8387,7 @@ (define_expand "movdfcc" [(set (match_operand:DF 0 "s_register_operand") - (if_then_else:DF (match_operand 1 "arm_cond_move_operator") + (if_then_else:DF (match_operand 1 "expandable_comparison_operator") (match_operand:DF 2 "s_register_operand") (match_operand:DF 3 "s_register_operand")))] "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE" diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index d5eeeae..f340fee 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -72,11 +72,12 @@ Target Mask(APCS_FRAME) Generate APCS conformant stack frames. mapcs-reentrant -Target Mask(APCS_REENT) -Generate re-entrant, PIC code. +Target Mask(APCS_REENT) Undocumented +Unimplemented option to generate re-entrant, PIC code. mapcs-stack-check Target Mask(APCS_STACK) Undocumented +Unimplemented option to generate stack checking code on function entry. march= Target Save RejectNegative Negative(march=) ToLower Joined Var(arm_arch_string) @@ -139,7 +140,7 @@ Target Var(TARGET_FLIP_THUMB) Undocumented Switch ARM/Thumb modes on alternating functions for compiler testing. mfp16-format= -Target RejectNegative Joined Enum(arm_fp16_format_type) Var(arm_fp16_format) Init(ARM_FP16_FORMAT_NONE) +Target RejectNegative Joined Enum(arm_fp16_format_type) Var(arm_fp16_format) Init(ARM_FP16_FORMAT_DEFAULT) Specify the __fp16 floating-point format. Enum @@ -248,7 +249,7 @@ Target Save RejectNegative Negative(mtune=) ToLower Joined Var(arm_tune_string) Tune code for the given processor. mprint-tune-info -Target RejectNegative Var(print_tune_info) Init(0) +Target RejectNegative Var(print_tune_info) Init(0) Undocumented Print CPU tuning information as comment in assembler file. This is an option used only for regression testing of the compiler and not intended for ordinary use in compiling code. @@ -302,7 +303,7 @@ Target Var(unaligned_access) Init(2) Save Enable unaligned word and halfword accesses to packed data. mneon-for-64bits -Target WarnRemoved +Target WarnRemoved Undocumented This option is deprecated and has no effect. mslow-flash-data diff --git a/gcc/config/arm/arm.opt.urls b/gcc/config/arm/arm.opt.urls index b3696f7..93c6fce 100644 --- a/gcc/config/arm/arm.opt.urls +++ b/gcc/config/arm/arm.opt.urls @@ -36,9 +36,6 @@ UrlSuffix(gcc/ARM-Options.html#index-mfloat-abi) mcmse UrlSuffix(gcc/ARM-Options.html#index-mcmse) -mflip-thumb -UrlSuffix(gcc/ARM-Options.html#index-mflip-thumb) - mfp16-format= UrlSuffix(gcc/ARM-Options.html#index-mfp16-format) @@ -94,12 +91,6 @@ UrlSuffix(gcc/ARM-Options.html#index-mtpcs-leaf-frame) mtune= UrlSuffix(gcc/ARM-Options.html#index-mtune-4) -mprint-tune-info -UrlSuffix(gcc/ARM-Options.html#index-mprint-tune-info) - -mverbose-cost-dump -UrlSuffix(gcc/ARM-Options.html#index-mverbose-cost-dump-1) - mword-relocations UrlSuffix(gcc/ARM-Options.html#index-mword-relocations) @@ -115,9 +106,6 @@ UrlSuffix(gcc/ARM-Options.html#index-mfix-cmse-cve-2021-35465) munaligned-access UrlSuffix(gcc/ARM-Options.html#index-munaligned-access) -mneon-for-64bits -UrlSuffix(gcc/ARM-Options.html#index-mneon-for-64bits) - mslow-flash-data UrlSuffix(gcc/ARM-Options.html#index-mslow-flash-data) diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h index ee18a47..b12d704 100644 --- a/gcc/config/arm/arm_mve.h +++ b/gcc/config/arm/arm_mve.h @@ -44,898 +44,5 @@ #pragma GCC arm "arm_mve.h" false #endif -#ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE -#define vuninitializedq(__v) __arm_vuninitializedq(__v) -#define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx) -#define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx) - - -#define vpnot(__a) __arm_vpnot(__a) -#define vuninitializedq_u8(void) __arm_vuninitializedq_u8(void) -#define vuninitializedq_u16(void) __arm_vuninitializedq_u16(void) -#define vuninitializedq_u32(void) __arm_vuninitializedq_u32(void) -#define vuninitializedq_u64(void) __arm_vuninitializedq_u64(void) -#define vuninitializedq_s8(void) __arm_vuninitializedq_s8(void) -#define vuninitializedq_s16(void) __arm_vuninitializedq_s16(void) -#define vuninitializedq_s32(void) __arm_vuninitializedq_s32(void) -#define vuninitializedq_s64(void) __arm_vuninitializedq_s64(void) -#define vuninitializedq_f16(void) __arm_vuninitializedq_f16(void) -#define vuninitializedq_f32(void) __arm_vuninitializedq_f32(void) -#define vsetq_lane_f16(__a, __b, __idx) __arm_vsetq_lane_f16(__a, __b, __idx) -#define vsetq_lane_f32(__a, __b, __idx) __arm_vsetq_lane_f32(__a, __b, __idx) -#define vsetq_lane_s16(__a, __b, __idx) __arm_vsetq_lane_s16(__a, __b, __idx) -#define vsetq_lane_s32(__a, __b, __idx) __arm_vsetq_lane_s32(__a, __b, __idx) -#define vsetq_lane_s8(__a, __b, __idx) __arm_vsetq_lane_s8(__a, __b, __idx) -#define vsetq_lane_s64(__a, __b, __idx) __arm_vsetq_lane_s64(__a, __b, __idx) -#define vsetq_lane_u8(__a, __b, __idx) __arm_vsetq_lane_u8(__a, __b, __idx) -#define vsetq_lane_u16(__a, __b, __idx) __arm_vsetq_lane_u16(__a, __b, __idx) -#define vsetq_lane_u32(__a, __b, __idx) __arm_vsetq_lane_u32(__a, __b, __idx) -#define vsetq_lane_u64(__a, __b, __idx) __arm_vsetq_lane_u64(__a, __b, __idx) -#define vgetq_lane_f16(__a, __idx) __arm_vgetq_lane_f16(__a, __idx) -#define vgetq_lane_f32(__a, __idx) __arm_vgetq_lane_f32(__a, __idx) -#define vgetq_lane_s16(__a, __idx) __arm_vgetq_lane_s16(__a, __idx) -#define vgetq_lane_s32(__a, __idx) __arm_vgetq_lane_s32(__a, __idx) -#define vgetq_lane_s8(__a, __idx) __arm_vgetq_lane_s8(__a, __idx) -#define vgetq_lane_s64(__a, __idx) __arm_vgetq_lane_s64(__a, __idx) -#define vgetq_lane_u8(__a, __idx) __arm_vgetq_lane_u8(__a, __idx) -#define vgetq_lane_u16(__a, __idx) __arm_vgetq_lane_u16(__a, __idx) -#define vgetq_lane_u32(__a, __idx) __arm_vgetq_lane_u32(__a, __idx) -#define vgetq_lane_u64(__a, __idx) __arm_vgetq_lane_u64(__a, __idx) -#define sqrshr(__p0, __p1) __arm_sqrshr(__p0, __p1) -#define sqrshrl(__p0, __p1) __arm_sqrshrl(__p0, __p1) -#define sqrshrl_sat48(__p0, __p1) __arm_sqrshrl_sat48(__p0, __p1) -#define sqshl(__p0, __p1) __arm_sqshl(__p0, __p1) -#define sqshll(__p0, __p1) __arm_sqshll(__p0, __p1) -#define srshr(__p0, __p1) __arm_srshr(__p0, __p1) -#define srshrl(__p0, __p1) __arm_srshrl(__p0, __p1) -#define uqrshl(__p0, __p1) __arm_uqrshl(__p0, __p1) -#define uqrshll(__p0, __p1) __arm_uqrshll(__p0, __p1) -#define uqrshll_sat48(__p0, __p1) __arm_uqrshll_sat48(__p0, __p1) -#define uqshl(__p0, __p1) __arm_uqshl(__p0, __p1) -#define uqshll(__p0, __p1) __arm_uqshll(__p0, __p1) -#define urshr(__p0, __p1) __arm_urshr(__p0, __p1) -#define urshrl(__p0, __p1) __arm_urshrl(__p0, __p1) -#define lsll(__p0, __p1) __arm_lsll(__p0, __p1) -#define asrl(__p0, __p1) __arm_asrl(__p0, __p1) -#endif - -/* For big-endian, GCC's vector indices are reversed within each 64 bits - compared to the architectural lane indices used by MVE intrinsics. */ -#define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0])) -#ifdef __ARM_BIG_ENDIAN -#define __ARM_LANEQ(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1)) -#else -#define __ARM_LANEQ(__vec, __idx) __idx -#endif -#define __ARM_CHECK_LANEQ(__vec, __idx) \ - __builtin_arm_lane_check (__ARM_NUM_LANES(__vec), \ - __ARM_LANEQ(__vec, __idx)) - -__extension__ extern __inline mve_pred16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vpnot (mve_pred16_t __a) -{ - return __builtin_mve_vpnotv16bi (__a); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __idx) -{ - __ARM_CHECK_LANEQ (__b, __idx); - __b[__ARM_LANEQ(__b,__idx)] = __a; - return __b; -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __idx) -{ - __ARM_CHECK_LANEQ (__b, __idx); - __b[__ARM_LANEQ(__b,__idx)] = __a; - return __b; -} - -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __idx) -{ - __ARM_CHECK_LANEQ (__b, __idx); - __b[__ARM_LANEQ(__b,__idx)] = __a; - return __b; -} - -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __idx) -{ - __ARM_CHECK_LANEQ (__b, __idx); - __b[__ARM_LANEQ(__b,__idx)] = __a; - return __b; -} - -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __idx) -{ - __ARM_CHECK_LANEQ (__b, __idx); - __b[__ARM_LANEQ(__b,__idx)] = __a; - return __b; -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __idx) -{ - __ARM_CHECK_LANEQ (__b, __idx); - __b[__ARM_LANEQ(__b,__idx)] = __a; - return __b; -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __idx) -{ - __ARM_CHECK_LANEQ (__b, __idx); - __b[__ARM_LANEQ(__b,__idx)] = __a; - return __b; -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __idx) -{ - __ARM_CHECK_LANEQ (__b, __idx); - __b[__ARM_LANEQ(__b,__idx)] = __a; - return __b; -} - -__extension__ extern __inline int16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane_s16 (int16x8_t __a, const int __idx) -{ - __ARM_CHECK_LANEQ (__a, __idx); - return __a[__ARM_LANEQ(__a,__idx)]; -} - -__extension__ extern __inline int32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane_s32 (int32x4_t __a, const int __idx) -{ - __ARM_CHECK_LANEQ (__a, __idx); - return __a[__ARM_LANEQ(__a,__idx)]; -} - -__extension__ extern __inline int8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane_s8 (int8x16_t __a, const int __idx) -{ - __ARM_CHECK_LANEQ (__a, __idx); - return __a[__ARM_LANEQ(__a,__idx)]; -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane_s64 (int64x2_t __a, const int __idx) -{ - __ARM_CHECK_LANEQ (__a, __idx); - return __a[__ARM_LANEQ(__a,__idx)]; -} - -__extension__ extern __inline uint8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane_u8 (uint8x16_t __a, const int __idx) -{ - __ARM_CHECK_LANEQ (__a, __idx); - return __a[__ARM_LANEQ(__a,__idx)]; -} - -__extension__ extern __inline uint16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane_u16 (uint16x8_t __a, const int __idx) -{ - __ARM_CHECK_LANEQ (__a, __idx); - return __a[__ARM_LANEQ(__a,__idx)]; -} - -__extension__ extern __inline uint32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane_u32 (uint32x4_t __a, const int __idx) -{ - __ARM_CHECK_LANEQ (__a, __idx); - return __a[__ARM_LANEQ(__a,__idx)]; -} - -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane_u64 (uint64x2_t __a, const int __idx) -{ - __ARM_CHECK_LANEQ (__a, __idx); - return __a[__ARM_LANEQ(__a,__idx)]; -} - -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_lsll (uint64_t value, int32_t shift) -{ - return (value << shift); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_asrl (int64_t value, int32_t shift) -{ - return (value >> shift); -} - -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_uqrshll (uint64_t value, int32_t shift) -{ - return __builtin_mve_uqrshll_sat64_di (value, shift); -} - -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_uqrshll_sat48 (uint64_t value, int32_t shift) -{ - return __builtin_mve_uqrshll_sat48_di (value, shift); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_sqrshrl (int64_t value, int32_t shift) -{ - return __builtin_mve_sqrshrl_sat64_di (value, shift); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_sqrshrl_sat48 (int64_t value, int32_t shift) -{ - return __builtin_mve_sqrshrl_sat48_di (value, shift); -} - -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_uqshll (uint64_t value, const int shift) -{ - return __builtin_mve_uqshll_di (value, shift); -} - -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_urshrl (uint64_t value, const int shift) -{ - return __builtin_mve_urshrl_di (value, shift); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_srshrl (int64_t value, const int shift) -{ - return __builtin_mve_srshrl_di (value, shift); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_sqshll (int64_t value, const int shift) -{ - return __builtin_mve_sqshll_di (value, shift); -} - -__extension__ extern __inline uint32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_uqrshl (uint32_t value, int32_t shift) -{ - return __builtin_mve_uqrshl_si (value, shift); -} - -__extension__ extern __inline int32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_sqrshr (int32_t value, int32_t shift) -{ - return __builtin_mve_sqrshr_si (value, shift); -} - -__extension__ extern __inline uint32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_uqshl (uint32_t value, const int shift) -{ - return __builtin_mve_uqshl_si (value, shift); -} - -__extension__ extern __inline uint32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_urshr (uint32_t value, const int shift) -{ - return __builtin_mve_urshr_si (value, shift); -} - -__extension__ extern __inline int32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_sqshl (int32_t value, const int shift) -{ - return __builtin_mve_sqshl_si (value, shift); -} - -__extension__ extern __inline int32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_srshr (int32_t value, const int shift) -{ - return __builtin_mve_srshr_si (value, shift); -} - -#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane_f16 (float16_t __a, float16x8_t __b, const int __idx) -{ - __ARM_CHECK_LANEQ (__b, __idx); - __b[__ARM_LANEQ(__b,__idx)] = __a; - return __b; -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __idx) -{ - __ARM_CHECK_LANEQ (__b, __idx); - __b[__ARM_LANEQ(__b,__idx)] = __a; - return __b; -} - -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane_f16 (float16x8_t __a, const int __idx) -{ - __ARM_CHECK_LANEQ (__a, __idx); - return __a[__ARM_LANEQ(__a,__idx)]; -} - -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane_f32 (float32x4_t __a, const int __idx) -{ - __ARM_CHECK_LANEQ (__a, __idx); - return __a[__ARM_LANEQ(__a,__idx)]; -} -#endif - -#ifdef __cplusplus - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane (int16_t __a, int16x8_t __b, const int __idx) -{ - return __arm_vsetq_lane_s16 (__a, __b, __idx); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane (int32_t __a, int32x4_t __b, const int __idx) -{ - return __arm_vsetq_lane_s32 (__a, __b, __idx); -} - -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane (int8_t __a, int8x16_t __b, const int __idx) -{ - return __arm_vsetq_lane_s8 (__a, __b, __idx); -} - -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane (int64_t __a, int64x2_t __b, const int __idx) -{ - return __arm_vsetq_lane_s64 (__a, __b, __idx); -} - -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane (uint8_t __a, uint8x16_t __b, const int __idx) -{ - return __arm_vsetq_lane_u8 (__a, __b, __idx); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane (uint16_t __a, uint16x8_t __b, const int __idx) -{ - return __arm_vsetq_lane_u16 (__a, __b, __idx); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane (uint32_t __a, uint32x4_t __b, const int __idx) -{ - return __arm_vsetq_lane_u32 (__a, __b, __idx); -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane (uint64_t __a, uint64x2_t __b, const int __idx) -{ - return __arm_vsetq_lane_u64 (__a, __b, __idx); -} - -__extension__ extern __inline int16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane (int16x8_t __a, const int __idx) -{ - return __arm_vgetq_lane_s16 (__a, __idx); -} - -__extension__ extern __inline int32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane (int32x4_t __a, const int __idx) -{ - return __arm_vgetq_lane_s32 (__a, __idx); -} - -__extension__ extern __inline int8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane (int8x16_t __a, const int __idx) -{ - return __arm_vgetq_lane_s8 (__a, __idx); -} - -__extension__ extern __inline int64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane (int64x2_t __a, const int __idx) -{ - return __arm_vgetq_lane_s64 (__a, __idx); -} - -__extension__ extern __inline uint8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane (uint8x16_t __a, const int __idx) -{ - return __arm_vgetq_lane_u8 (__a, __idx); -} - -__extension__ extern __inline uint16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane (uint16x8_t __a, const int __idx) -{ - return __arm_vgetq_lane_u16 (__a, __idx); -} - -__extension__ extern __inline uint32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane (uint32x4_t __a, const int __idx) -{ - return __arm_vgetq_lane_u32 (__a, __idx); -} - -__extension__ extern __inline uint64_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane (uint64x2_t __a, const int __idx) -{ - return __arm_vgetq_lane_u64 (__a, __idx); -} - -#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ - -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane (float16_t __a, float16x8_t __b, const int __idx) -{ - return __arm_vsetq_lane_f16 (__a, __b, __idx); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vsetq_lane (float32_t __a, float32x4_t __b, const int __idx) -{ - return __arm_vsetq_lane_f32 (__a, __b, __idx); -} - -__extension__ extern __inline float16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane (float16x8_t __a, const int __idx) -{ - return __arm_vgetq_lane_f16 (__a, __idx); -} - -__extension__ extern __inline float32_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vgetq_lane (float32x4_t __a, const int __idx) -{ - return __arm_vgetq_lane_f32 (__a, __idx); -} -#endif /* MVE Floating point. */ - - -__extension__ extern __inline uint8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (uint8x16_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_u8 (); -} - -__extension__ extern __inline uint16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (uint16x8_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_u16 (); -} - -__extension__ extern __inline uint32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (uint32x4_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_u32 (); -} - -__extension__ extern __inline uint64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (uint64x2_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_u64 (); -} - -__extension__ extern __inline int8x16_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (int8x16_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_s8 (); -} - -__extension__ extern __inline int16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (int16x8_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_s16 (); -} - -__extension__ extern __inline int32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (int32x4_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_s32 (); -} - -__extension__ extern __inline int64x2_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (int64x2_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_s64 (); -} - -#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ -__extension__ extern __inline float16x8_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (float16x8_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_f16 (); -} - -__extension__ extern __inline float32x4_t -__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -__arm_vuninitializedq (float32x4_t /* __v ATTRIBUTE UNUSED */) -{ - return __arm_vuninitializedq_f32 (); -} -#endif /* __ARM_FEATURE_MVE & 2 (MVE floating point) */ - -#else -enum { - __ARM_mve_type_fp_n = 1, - __ARM_mve_type_int_n, - __ARM_mve_type_float16_t_ptr, - __ARM_mve_type_float16x8_t, - __ARM_mve_type_float16x8x2_t, - __ARM_mve_type_float16x8x4_t, - __ARM_mve_type_float32_t_ptr, - __ARM_mve_type_float32x4_t, - __ARM_mve_type_float32x4x2_t, - __ARM_mve_type_float32x4x4_t, - __ARM_mve_type_int16_t_ptr, - __ARM_mve_type_int16x8_t, - __ARM_mve_type_int16x8x2_t, - __ARM_mve_type_int16x8x4_t, - __ARM_mve_type_int32_t_ptr, - __ARM_mve_type_int32x4_t, - __ARM_mve_type_int32x4x2_t, - __ARM_mve_type_int32x4x4_t, - __ARM_mve_type_int64_t_ptr, - __ARM_mve_type_int64x2_t, - __ARM_mve_type_int8_t_ptr, - __ARM_mve_type_int8x16_t, - __ARM_mve_type_int8x16x2_t, - __ARM_mve_type_int8x16x4_t, - __ARM_mve_type_uint16_t_ptr, - __ARM_mve_type_uint16x8_t, - __ARM_mve_type_uint16x8x2_t, - __ARM_mve_type_uint16x8x4_t, - __ARM_mve_type_uint32_t_ptr, - __ARM_mve_type_uint32x4_t, - __ARM_mve_type_uint32x4x2_t, - __ARM_mve_type_uint32x4x4_t, - __ARM_mve_type_uint64_t_ptr, - __ARM_mve_type_uint64x2_t, - __ARM_mve_type_uint8_t_ptr, - __ARM_mve_type_uint8x16_t, - __ARM_mve_type_uint8x16x2_t, - __ARM_mve_type_uint8x16x4_t, - __ARM_mve_unsupported_type -}; - -#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ -#define __ARM_mve_typeid(x) _Generic(x, \ - float16_t: __ARM_mve_type_fp_n, \ - float16_t *: __ARM_mve_type_float16_t_ptr, \ - float16_t const *: __ARM_mve_type_float16_t_ptr, \ - float16x8_t: __ARM_mve_type_float16x8_t, \ - float16x8x2_t: __ARM_mve_type_float16x8x2_t, \ - float16x8x4_t: __ARM_mve_type_float16x8x4_t, \ - float32_t: __ARM_mve_type_fp_n, \ - float32_t *: __ARM_mve_type_float32_t_ptr, \ - float32_t const *: __ARM_mve_type_float32_t_ptr, \ - float32x4_t: __ARM_mve_type_float32x4_t, \ - float32x4x2_t: __ARM_mve_type_float32x4x2_t, \ - float32x4x4_t: __ARM_mve_type_float32x4x4_t, \ - int16_t: __ARM_mve_type_int_n, \ - int16_t *: __ARM_mve_type_int16_t_ptr, \ - int16_t const *: __ARM_mve_type_int16_t_ptr, \ - int16x8_t: __ARM_mve_type_int16x8_t, \ - int16x8x2_t: __ARM_mve_type_int16x8x2_t, \ - int16x8x4_t: __ARM_mve_type_int16x8x4_t, \ - int32_t: __ARM_mve_type_int_n, \ - int32_t *: __ARM_mve_type_int32_t_ptr, \ - int32_t const *: __ARM_mve_type_int32_t_ptr, \ - int32x4_t: __ARM_mve_type_int32x4_t, \ - int32x4x2_t: __ARM_mve_type_int32x4x2_t, \ - int32x4x4_t: __ARM_mve_type_int32x4x4_t, \ - int64_t: __ARM_mve_type_int_n, \ - int64_t *: __ARM_mve_type_int64_t_ptr, \ - int64_t const *: __ARM_mve_type_int64_t_ptr, \ - int64x2_t: __ARM_mve_type_int64x2_t, \ - int8_t: __ARM_mve_type_int_n, \ - int8_t *: __ARM_mve_type_int8_t_ptr, \ - int8_t const *: __ARM_mve_type_int8_t_ptr, \ - int8x16_t: __ARM_mve_type_int8x16_t, \ - int8x16x2_t: __ARM_mve_type_int8x16x2_t, \ - int8x16x4_t: __ARM_mve_type_int8x16x4_t, \ - uint16_t: __ARM_mve_type_int_n, \ - uint16_t *: __ARM_mve_type_uint16_t_ptr, \ - uint16_t const *: __ARM_mve_type_uint16_t_ptr, \ - uint16x8_t: __ARM_mve_type_uint16x8_t, \ - uint16x8x2_t: __ARM_mve_type_uint16x8x2_t, \ - uint16x8x4_t: __ARM_mve_type_uint16x8x4_t, \ - uint32_t: __ARM_mve_type_int_n, \ - uint32_t *: __ARM_mve_type_uint32_t_ptr, \ - uint32_t const *: __ARM_mve_type_uint32_t_ptr, \ - uint32x4_t: __ARM_mve_type_uint32x4_t, \ - uint32x4x2_t: __ARM_mve_type_uint32x4x2_t, \ - uint32x4x4_t: __ARM_mve_type_uint32x4x4_t, \ - uint64_t: __ARM_mve_type_int_n, \ - uint64_t *: __ARM_mve_type_uint64_t_ptr, \ - uint64_t const *: __ARM_mve_type_uint64_t_ptr, \ - uint64x2_t: __ARM_mve_type_uint64x2_t, \ - uint8_t: __ARM_mve_type_int_n, \ - uint8_t *: __ARM_mve_type_uint8_t_ptr, \ - uint8_t const *: __ARM_mve_type_uint8_t_ptr, \ - uint8x16_t: __ARM_mve_type_uint8x16_t, \ - uint8x16x2_t: __ARM_mve_type_uint8x16x2_t, \ - uint8x16x4_t: __ARM_mve_type_uint8x16x4_t, \ - default: _Generic(x, \ - signed char: __ARM_mve_type_int_n, \ - short: __ARM_mve_type_int_n, \ - int: __ARM_mve_type_int_n, \ - long: __ARM_mve_type_int_n, \ - long long: __ARM_mve_type_int_n, \ - _Float16: __ARM_mve_type_fp_n, \ - __fp16: __ARM_mve_type_fp_n, \ - float: __ARM_mve_type_fp_n, \ - double: __ARM_mve_type_fp_n, \ - unsigned char: __ARM_mve_type_int_n, \ - unsigned short: __ARM_mve_type_int_n, \ - unsigned int: __ARM_mve_type_int_n, \ - unsigned long: __ARM_mve_type_int_n, \ - unsigned long long: __ARM_mve_type_int_n, \ - signed char*: __ARM_mve_type_int8_t_ptr, \ - short*: __ARM_mve_type_int16_t_ptr, \ - int*: __ARM_mve_type_int32_t_ptr, \ - long*: __ARM_mve_type_int32_t_ptr, \ - long long*: __ARM_mve_type_int64_t_ptr, \ - _Float16*: __ARM_mve_type_float16_t_ptr, \ - __fp16*: __ARM_mve_type_float16_t_ptr, \ - float*: __ARM_mve_type_float32_t_ptr, \ - unsigned char*: __ARM_mve_type_uint8_t_ptr, \ - unsigned short*: __ARM_mve_type_uint16_t_ptr, \ - unsigned int*: __ARM_mve_type_uint32_t_ptr, \ - unsigned long*: __ARM_mve_type_uint32_t_ptr, \ - unsigned long long*: __ARM_mve_type_uint64_t_ptr, \ - default: __ARM_mve_unsupported_type)) -#else -#define __ARM_mve_typeid(x) _Generic(x, \ - int16_t: __ARM_mve_type_int_n, \ - int16_t *: __ARM_mve_type_int16_t_ptr, \ - int16_t const *: __ARM_mve_type_int16_t_ptr, \ - int16x8_t: __ARM_mve_type_int16x8_t, \ - int16x8x2_t: __ARM_mve_type_int16x8x2_t, \ - int16x8x4_t: __ARM_mve_type_int16x8x4_t, \ - int32_t: __ARM_mve_type_int_n, \ - int32_t *: __ARM_mve_type_int32_t_ptr, \ - int32_t const *: __ARM_mve_type_int32_t_ptr, \ - int32x4_t: __ARM_mve_type_int32x4_t, \ - int32x4x2_t: __ARM_mve_type_int32x4x2_t, \ - int32x4x4_t: __ARM_mve_type_int32x4x4_t, \ - int64_t: __ARM_mve_type_int_n, \ - int64_t *: __ARM_mve_type_int64_t_ptr, \ - int64_t const *: __ARM_mve_type_int64_t_ptr, \ - int64x2_t: __ARM_mve_type_int64x2_t, \ - int8_t: __ARM_mve_type_int_n, \ - int8_t *: __ARM_mve_type_int8_t_ptr, \ - int8_t const *: __ARM_mve_type_int8_t_ptr, \ - int8x16_t: __ARM_mve_type_int8x16_t, \ - int8x16x2_t: __ARM_mve_type_int8x16x2_t, \ - int8x16x4_t: __ARM_mve_type_int8x16x4_t, \ - uint16_t: __ARM_mve_type_int_n, \ - uint16_t *: __ARM_mve_type_uint16_t_ptr, \ - uint16_t const *: __ARM_mve_type_uint16_t_ptr, \ - uint16x8_t: __ARM_mve_type_uint16x8_t, \ - uint16x8x2_t: __ARM_mve_type_uint16x8x2_t, \ - uint16x8x4_t: __ARM_mve_type_uint16x8x4_t, \ - uint32_t: __ARM_mve_type_int_n, \ - uint32_t *: __ARM_mve_type_uint32_t_ptr, \ - uint32_t const *: __ARM_mve_type_uint32_t_ptr, \ - uint32x4_t: __ARM_mve_type_uint32x4_t, \ - uint32x4x2_t: __ARM_mve_type_uint32x4x2_t, \ - uint32x4x4_t: __ARM_mve_type_uint32x4x4_t, \ - uint64_t: __ARM_mve_type_int_n, \ - uint64_t *: __ARM_mve_type_uint64_t_ptr, \ - uint64_t const *: __ARM_mve_type_uint64_t_ptr, \ - uint64x2_t: __ARM_mve_type_uint64x2_t, \ - uint8_t: __ARM_mve_type_int_n, \ - uint8_t *: __ARM_mve_type_uint8_t_ptr, \ - uint8_t const *: __ARM_mve_type_uint8_t_ptr, \ - uint8x16_t: __ARM_mve_type_uint8x16_t, \ - uint8x16x2_t: __ARM_mve_type_uint8x16x2_t, \ - uint8x16x4_t: __ARM_mve_type_uint8x16x4_t, \ - default: _Generic(x, \ - signed char: __ARM_mve_type_int_n, \ - short: __ARM_mve_type_int_n, \ - int: __ARM_mve_type_int_n, \ - long: __ARM_mve_type_int_n, \ - long long: __ARM_mve_type_int_n, \ - unsigned char: __ARM_mve_type_int_n, \ - unsigned short: __ARM_mve_type_int_n, \ - unsigned int: __ARM_mve_type_int_n, \ - unsigned long: __ARM_mve_type_int_n, \ - unsigned long long: __ARM_mve_type_int_n, \ - signed char*: __ARM_mve_type_int8_t_ptr, \ - short*: __ARM_mve_type_int16_t_ptr, \ - int*: __ARM_mve_type_int32_t_ptr, \ - long*: __ARM_mve_type_int32_t_ptr, \ - long long*: __ARM_mve_type_int64_t_ptr, \ - unsigned char*: __ARM_mve_type_uint8_t_ptr, \ - unsigned short*: __ARM_mve_type_uint16_t_ptr, \ - unsigned int*: __ARM_mve_type_uint32_t_ptr, \ - unsigned long*: __ARM_mve_type_uint32_t_ptr, \ - unsigned long long*: __ARM_mve_type_uint64_t_ptr, \ - default: __ARM_mve_unsupported_type)) -#endif /* MVE Floating point. */ - -extern void *__ARM_undef; -#define __ARM_mve_coerce(param, type) \ - _Generic(param, type: param, default: *(type *)__ARM_undef) -#define __ARM_mve_coerce_i_scalar(param, type) \ - _Generic(param, type: param, const type: param, default: _Generic (param, int8_t: param, int16_t: param, int32_t: param, int64_t: param, uint8_t: param, uint16_t: param, uint32_t: param, uint64_t: param, default: *(type *)__ARM_undef)) - -#define __ARM_mve_coerce_s8_ptr(param, type) \ - _Generic(param, type: param, const type: param, default: _Generic (param, signed char*: param, default: *(type *)__ARM_undef)) -#define __ARM_mve_coerce_u8_ptr(param, type) \ - _Generic(param, type: param, const type: param, default: _Generic (param, unsigned char*: param, default: *(type *)__ARM_undef)) - -#define __ARM_mve_coerce_s16_ptr(param, type) \ - _Generic(param, type: param, const type: param, default: _Generic (param, short*: param, default: *(type *)__ARM_undef)) -#define __ARM_mve_coerce_u16_ptr(param, type) \ - _Generic(param, type: param, const type: param, default: _Generic (param, unsigned short*: param, default: *(type *)__ARM_undef)) - -#define __ARM_mve_coerce_s32_ptr(param, type) \ - _Generic(param, type: param, const type: param, default: _Generic (param, int*: param, long*: param, default: *(type *)__ARM_undef)) -#define __ARM_mve_coerce_u32_ptr(param, type) \ - _Generic(param, type: param, const type: param, default: _Generic (param, unsigned int*: param, unsigned long*: param, default: *(type *)__ARM_undef)) - -#define __ARM_mve_coerce_s64_ptr(param, type) \ - _Generic(param, type: param, const type: param, default: _Generic (param, long long*: param, default: *(type *)__ARM_undef)) -#define __ARM_mve_coerce_u64_ptr(param, type) \ - _Generic(param, type: param, const type: param, default: _Generic (param, unsigned long long*: param, default: *(type *)__ARM_undef)) - -#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ -#define __ARM_mve_coerce_f_scalar(param, type) \ - _Generic(param, type: param, const type: param, __fp16: param, default: _Generic (param, _Float16: param, float16_t: param, float32_t: param, default: *(type *)__ARM_undef)) -#define __ARM_mve_coerce_f16_ptr(param, type) \ - _Generic(param, type: param, const type: param, default: _Generic (param, __fp16*: param, _Float16*: param, default: *(type *)__ARM_undef)) -#define __ARM_mve_coerce_f32_ptr(param, type) \ - _Generic(param, type: param, const type: param, default: _Generic (param, float*: param, default: *(type *)__ARM_undef)) -#endif - -#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ - -#define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \ - int (*)[__ARM_mve_type_int16x8_t]: __arm_vuninitializedq_s16 (), \ - int (*)[__ARM_mve_type_int32x4_t]: __arm_vuninitializedq_s32 (), \ - int (*)[__ARM_mve_type_int64x2_t]: __arm_vuninitializedq_s64 (), \ - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vuninitializedq_u8 (), \ - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vuninitializedq_u16 (), \ - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vuninitializedq_u32 (), \ - int (*)[__ARM_mve_type_uint64x2_t]: __arm_vuninitializedq_u64 (), \ - int (*)[__ARM_mve_type_float16x8_t]: __arm_vuninitializedq_f16 (), \ - int (*)[__ARM_mve_type_float32x4_t]: __arm_vuninitializedq_f32 ());}) - -#define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int8x16_t]: __arm_vgetq_lane_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ - int (*)[__ARM_mve_type_int16x8_t]: __arm_vgetq_lane_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ - int (*)[__ARM_mve_type_int32x4_t]: __arm_vgetq_lane_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \ - int (*)[__ARM_mve_type_int64x2_t]: __arm_vgetq_lane_s64 (__ARM_mve_coerce(__p0, int64x2_t), p1), \ - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vgetq_lane_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vgetq_lane_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vgetq_lane_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1), \ - int (*)[__ARM_mve_type_uint64x2_t]: __arm_vgetq_lane_u64 (__ARM_mve_coerce(__p0, uint64x2_t), p1), \ - int (*)[__ARM_mve_type_float16x8_t]: __arm_vgetq_lane_f16 (__ARM_mve_coerce(__p0, float16x8_t), p1), \ - int (*)[__ARM_mve_type_float32x4_t]: __arm_vgetq_lane_f32 (__ARM_mve_coerce(__p0, float32x4_t), p1));}) - -#define __arm_vsetq_lane(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vsetq_lane_s8 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, int8x16_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vsetq_lane_s16 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, int16x8_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vsetq_lane_s32 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, int32x4_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int64x2_t]: __arm_vsetq_lane_s64 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, int64x2_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vsetq_lane_u8 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vsetq_lane_u16 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vsetq_lane_u32 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint64x2_t]: __arm_vsetq_lane_u64 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, uint64x2_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vsetq_lane_f16 (__ARM_mve_coerce_f_scalar(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \ - int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vsetq_lane_f32 (__ARM_mve_coerce_f_scalar(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));}) - -#else /* MVE Integer. */ - -#define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \ - int (*)[__ARM_mve_type_int16x8_t]: __arm_vuninitializedq_s16 (), \ - int (*)[__ARM_mve_type_int32x4_t]: __arm_vuninitializedq_s32 (), \ - int (*)[__ARM_mve_type_int64x2_t]: __arm_vuninitializedq_s64 (), \ - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vuninitializedq_u8 (), \ - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vuninitializedq_u16 (), \ - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vuninitializedq_u32 (), \ - int (*)[__ARM_mve_type_uint64x2_t]: __arm_vuninitializedq_u64 ());}) - -#define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \ - int (*)[__ARM_mve_type_int8x16_t]: __arm_vgetq_lane_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \ - int (*)[__ARM_mve_type_int16x8_t]: __arm_vgetq_lane_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \ - int (*)[__ARM_mve_type_int32x4_t]: __arm_vgetq_lane_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \ - int (*)[__ARM_mve_type_int64x2_t]: __arm_vgetq_lane_s64 (__ARM_mve_coerce(__p0, int64x2_t), p1), \ - int (*)[__ARM_mve_type_uint8x16_t]: __arm_vgetq_lane_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1), \ - int (*)[__ARM_mve_type_uint16x8_t]: __arm_vgetq_lane_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \ - int (*)[__ARM_mve_type_uint32x4_t]: __arm_vgetq_lane_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1), \ - int (*)[__ARM_mve_type_uint64x2_t]: __arm_vgetq_lane_u64 (__ARM_mve_coerce(__p0, uint64x2_t), p1));}) - -#define __arm_vsetq_lane(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \ - __typeof(p1) __p1 = (p1); \ - _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vsetq_lane_s8 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, int8x16_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vsetq_lane_s16 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, int16x8_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vsetq_lane_s32 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, int32x4_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int64x2_t]: __arm_vsetq_lane_s64 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, int64x2_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vsetq_lane_u8 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, uint8x16_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vsetq_lane_u16 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, uint16x8_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vsetq_lane_u32 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, uint32x4_t), p2), \ - int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint64x2_t]: __arm_vsetq_lane_u64 (__ARM_mve_coerce_i_scalar(__p0, int), __ARM_mve_coerce(__p1, uint64x2_t), p2));}) - -#endif /* MVE Integer. */ - -#endif /* __cplusplus */ #endif /* __ARM_FEATURE_MVE */ #endif /* _GCC_ARM_MVE_H. */ diff --git a/gcc/config/arm/arm_mve_types.h b/gcc/config/arm/arm_mve_types.h index 42e7466..d1889c6 100644 --- a/gcc/config/arm/arm_mve_types.h +++ b/gcc/config/arm/arm_mve_types.h @@ -26,10 +26,8 @@ #ifndef _GCC_ARM_MVE_TYPES_H #define _GCC_ARM_MVE_TYPES_H -#if (__ARM_FEATURE_MVE & 2) /* MVE Floating point. */ typedef __fp16 float16_t; typedef float float32_t; -#endif #pragma GCC arm "arm_mve_types.h" diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 24743a8..e3809d3 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -35,9 +35,8 @@ ;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, DN, Dm, Dl, DL, Do, Dv, Dy, Di, ;; Dj, Ds, Dt, Dp, Dz, Tu, Te ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe -;; in Thumb-2 state: Ha, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Rd, Rf, Rb, Ra, -;; Rg, Ri -;; in all states: Pg +;; in Thumb-2 state: Ha, Pg, Ph, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Ra, +;; Rb, Rd, Rf, Rg, Ri ;; The following memory constraints have been used: ;; in ARM/Thumb-2 state: Uh, Ut, Uv, Un, Um, Us, Uo, Up, Uf, Ux, Ul, Uz @@ -238,6 +237,11 @@ (and (match_code "const_int") (match_test "TARGET_THUMB2 && ival >= 1 && ival <= 32"))) +(define_constraint "Ph" + "@internal In Thumb-2 state a constant in range 32 to 255" + (and (match_code "const_int") + (match_test "TARGET_THUMB2 && ival >= 32 && ival <= 255"))) + (define_constraint "Ps" "@internal In Thumb-2 state a constant in the range -255 to +255" (and (match_code "const_int") diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index dfbe027..592613d 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -2061,6 +2061,14 @@ (V2QI "v2qi")]) (define_mode_attr MVE_vctp [(V16BI "8") (V8BI "16") (V4BI "32") (V2QI "64")]) +;; Assembly modifier for a const_int operand to narrow it to a +;; specific mode. For vector modes this is the element size. +;; Currently only supports SI and HI. + +(define_mode_attr asm_const_size [(SI "") (HI "L") + (V4SI "") (V2SI "") + (V8HI "L") (V4HI "L")]) + ;;---------------------------------------------------------------------------- ;; Code attributes ;;---------------------------------------------------------------------------- diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 87b45b2..6a163e4 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -417,7 +417,7 @@ VMVNQ_N)) ] "TARGET_HAVE_MVE" - "<mve_insn>.i%#<V_sz_elem>\t%q0, %1" + "<mve_insn>.i%#<V_sz_elem>\t%q0, %<asm_const_size>1" [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>")) (set_attr "type" "mve_move") ]) @@ -1444,7 +1444,7 @@ MVE_INT_N_BINARY_LOGIC)) ] "TARGET_HAVE_MVE" - "<mve_insn>.i%#<V_sz_elem> %q0, %2" + "<mve_insn>.i%#<V_sz_elem> %q0, %<asm_const_size>2" [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>")) (set_attr "type" "mve_move") ]) @@ -2335,7 +2335,7 @@ VMVNQ_M_N)) ] "TARGET_HAVE_MVE" - "vpst\;<mve_insn>t.i%#<V_sz_elem>\t%q0, %2" + "vpst\;<mve_insn>t.i%#<V_sz_elem>\t%q0, %<asm_const_size>2" [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>")) (set_attr "type" "mve_move") (set_attr "length""8")]) @@ -2353,7 +2353,7 @@ MVE_INT_M_N_BINARY_LOGIC)) ] "TARGET_HAVE_MVE" - "vpst\;<mve_insn>t.i%#<V_sz_elem>\t%q0, %2" + "vpst\;<mve_insn>t.i%#<V_sz_elem>\t%q0, %<asm_const_size>2" [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>")) (set_attr "type" "mve_move") (set_attr "length""8")]) @@ -4158,10 +4158,11 @@ return ""; } [(set_attr "length" "16")]) + ;; ;; [vgetq_lane_u, vgetq_lane_s, vgetq_lane_f]) ;; -(define_insn "mve_vec_extract<mode><V_elem_l>" +(define_insn "@mve_vec_extract<mode><V_elem_l>" [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=r") (vec_select:<V_elem> (match_operand:MVE_VLD_ST 1 "s_register_operand" "w") @@ -4236,7 +4237,7 @@ ;; ;; [vsetq_lane_u, vsetq_lane_s, vsetq_lane_f]) ;; -(define_insn "mve_vec_set<mode>_internal" +(define_insn "@mve_vec_set<mode>_internal" [(set (match_operand:VQ2 0 "s_register_operand" "=w") (vec_merge:VQ2 (vec_duplicate:VQ2 @@ -4278,7 +4279,7 @@ ;; ;; [uqrshll_di] ;; -(define_insn "mve_uqrshll_sat<supf>_di" +(define_insn "@mve_uqrshll_sat<supf>_di" [(set (match_operand:DI 0 "arm_low_register_operand" "=l") (unspec:DI [(match_operand:DI 1 "arm_low_register_operand" "0") (match_operand:SI 2 "register_operand" "r")] @@ -4290,7 +4291,7 @@ ;; ;; [sqrshrl_di] ;; -(define_insn "mve_sqrshrl_sat<supf>_di" +(define_insn "@mve_sqrshrl_sat<supf>_di" [(set (match_operand:DI 0 "arm_low_register_operand" "=l") (unspec:DI [(match_operand:DI 1 "arm_low_register_operand" "0") (match_operand:SI 2 "register_operand" "r")] @@ -4374,7 +4375,7 @@ ;; (define_insn "mve_sqshl_si" [(set (match_operand:SI 0 "arm_general_register_operand" "=r") - (ss_ashift:SI (match_operand:DI 1 "arm_general_register_operand" "0") + (ss_ashift:SI (match_operand:SI 1 "arm_general_register_operand" "0") (match_operand:SI 2 "immediate_operand" "Pg")))] "TARGET_HAVE_MVE" "sqshl%?\\t%1, %2" @@ -4385,7 +4386,7 @@ ;; (define_insn "mve_srshr_si" [(set (match_operand:SI 0 "arm_general_register_operand" "=r") - (unspec:SI [(match_operand:DI 1 "arm_general_register_operand" "0") + (unspec:SI [(match_operand:SI 1 "arm_general_register_operand" "0") (match_operand:SI 2 "immediate_operand" "Pg")] SRSHR))] "TARGET_HAVE_MVE" @@ -4714,3 +4715,215 @@ "TARGET_HAVE_MVE" "dlstp.<dlstp_elemsize>\t%|lr, %0" [(set_attr "type" "mve_misc")]) + + +;; +;; Scalar shifts +;; +;; immediate shift amounts have to be in the [1..32] range +;; +;; shift amounts stored in a register can be negative, in which case +;; the shift is reversed (asrl, lsll only) +;; since RTL expects shift amounts to be unsigned, make sure the +;; negative case is handled, in case simplify_rtx could optimize: +;; (set (reg:SI 1) (const_int -5)) +;; (set (reg:DI 2) (ashift:DI (reg:DI 3) (reg:SI 1))) +;; into: +;; (set (reg:DI 2) (ashift:DI (reg:DI 3) (const_int -5))) + +;; General pattern for asrl +(define_expand "mve_asrl" + [(set (match_operand:DI 0 "arm_general_register_operand" "") + (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "") + (match_operand:QI 2 "reg_or_int_operand" "")))] + "TARGET_HAVE_MVE" +{ + rtx amount = operands[2]; + if (CONST_INT_P (amount)) + { + HOST_WIDE_INT ival = INTVAL (amount); + + if (ival >= 0) + /* Right shift. */ + emit_insn (gen_mve_asrl_imm (operands[0], operands[1], amount)); + else + /* Left shift. */ + emit_insn (gen_mve_lsll_imm (operands[0], operands[1], + GEN_INT (-ival))); + DONE; + } + + emit_insn (gen_mve_asrl_internal (operands[0], operands[1], operands[2])); + DONE; +}) + +;; immediate shift amount +;; we have to split the insn if the amount is not in the [1..32] range +(define_insn_and_split "mve_asrl_imm" + [(set (match_operand:DI 0 "arm_general_register_operand" "=r,r") + (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "0,r") + (match_operand:QI 2 "immediate_operand" "Pg,Ph")))] + "TARGET_HAVE_MVE" + "asrl%?\\t%Q0, %R0, %2" + "&& satisfies_constraint_Ph (operands[2])" + [(clobber (const_int 0))] + " + rtx amount = operands[2]; + HOST_WIDE_INT ival = INTVAL (amount); + + /* shift amount in [1..32] is already handled by the Pg constraint. */ + + /* Shift by 0, it is just a move. */ + if (ival == 0) + { + emit_insn (gen_movdi (operands[0], operands[1])); + DONE; + } + + /* ival < 0 should have already been handled by mve_asrl. */ + gcc_assert (ival >= 32); + + rtx in_hi = gen_highpart (SImode, operands[1]); + rtx out_lo = gen_lowpart (SImode, operands[0]); + rtx out_hi = gen_highpart (SImode, operands[0]); + + if (ival == 32) + /* out_hi gets the sign bit + out_lo gets in_hi. */ + emit_insn (gen_movsi (out_lo, in_hi)); + else + /* Shift amount above immediate range (ival > 32). + out_hi gets the sign bit + out_lo gets in_hi << (ival - 32) or << 31 if ival >= 64. + If ival >= 64, the result is either 0 or -1, depending on the + input sign. */ + emit_insn (gen_rtx_SET (out_lo, + gen_rtx_fmt_ee (ASHIFTRT, + SImode, + in_hi, + GEN_INT (MIN (ival - 32, + 31))))); + + /* Copy sign bit, which is OK even if out_lo == in_hi. */ + emit_insn (gen_rtx_SET (out_hi, + gen_rtx_fmt_ee (ASHIFTRT, + SImode, + in_hi, + GEN_INT (31)))); + DONE; + " + [(set_attr "predicable" "yes,yes") + (set_attr "length" "4,8")]) + +(define_insn "mve_asrl_internal" + [(set (match_operand:DI 0 "arm_general_register_operand" "=r") + (if_then_else:DI + (ge:QI (match_operand:QI 2 "arm_general_register_operand" "r") + (const_int 0)) + (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "0") + (match_dup 2)) + (ashift:DI (match_dup 1) (neg:QI (match_dup 2)))))] + "TARGET_HAVE_MVE" + "asrl%?\\t%Q0, %R0, %2" + [(set_attr "predicable" "yes")]) + +;; General pattern for lsll +(define_expand "mve_lsll" + [(set (match_operand:DI 0 "arm_general_register_operand" "") + (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "") + (match_operand:QI 2 "reg_or_int_operand" "")))] + "TARGET_HAVE_MVE" +{ + rtx amount = operands[2]; + if (CONST_INT_P (amount)) + { + HOST_WIDE_INT ival = INTVAL (amount); + + if (ival >= 0) + /* Left shift. */ + emit_insn (gen_mve_lsll_imm (operands[0], operands[1], amount)); + else + /* Right shift. */ + emit_insn (gen_lshrdi3 (operands[0], operands[1], + GEN_INT (-ival))); + DONE; + } + + emit_insn (gen_mve_lsll_internal (operands[0], operands[1], operands[2])); + DONE; +}) + +;; immediate shift amount +;; we have to split the insn if the amount is not in the [1..32] range +(define_insn_and_split "mve_lsll_imm" + [(set (match_operand:DI 0 "arm_general_register_operand" "=r,r") + (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "0,r") + (match_operand:QI 2 "immediate_operand" "Pg,Ph")))] + "TARGET_HAVE_MVE" + "lsll%?\\t%Q0, %R0, %2" + "&& satisfies_constraint_Ph (operands[2])" + [(clobber (const_int 0))] + " + rtx amount = operands[2]; + HOST_WIDE_INT ival = INTVAL (amount); + + /* shift amount in [1..32] is already handled by the Pg constraint. */ + + /* Shift by 0, it is just a move. */ + if (ival == 0) + { + emit_insn (gen_movdi (operands[0], operands[1])); + DONE; + } + + /* Shift amount larger than input, result is 0. */ + if (ival >= 64) + { + emit_insn (gen_movdi (operands[0], const0_rtx)); + DONE; + } + + /* ival < 0 should have already been handled by mve_asrl. */ + gcc_assert (ival >= 32); + + rtx in_lo = gen_lowpart (SImode, operands[1]); + rtx out_lo = gen_lowpart (SImode, operands[0]); + rtx out_hi = gen_highpart (SImode, operands[0]); + + if (ival == 32) + /* Shift by 32 is just a move. */ + emit_insn (gen_movsi (out_hi, in_lo)); + else + /* Shift amount above immediate range: 32 < ival < 64. */ + emit_insn (gen_rtx_SET (out_hi, + gen_rtx_fmt_ee (ASHIFT, + SImode, + in_lo, + GEN_INT (ival - 32)))); + + /* Clear low 32 bits. */ + emit_insn (gen_rtx_SET (out_lo, const0_rtx)); + DONE; + " + [(set_attr "predicable" "yes,yes") + (set_attr "length" "4,8")]) + +(define_insn "mve_lsll_internal" + [(set (match_operand:DI 0 "arm_general_register_operand" "=r") + (if_then_else:DI + (ge:QI (match_operand:QI 2 "arm_general_register_operand" "r") + (const_int 0)) + (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "0") + (match_dup 2)) + (lshiftrt:DI (match_dup 1) (neg:QI (match_dup 2)))))] + "TARGET_HAVE_MVE" + "lsll%?\\t%Q0, %R0, %2" + [(set_attr "predicable" "yes")]) + +(define_insn "mve_lsrl" + [(set (match_operand:DI 0 "arm_general_register_operand" "=r") + (lshiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "0") + (match_operand:SI 2 "long_shift_imm" "Pg")))] + "TARGET_HAVE_MVE" + "lsrl%?\\t%Q0, %R0, %2" + [(set_attr "predicable" "yes")]) diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 2c2026b..c353995 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -1464,19 +1464,24 @@ (pc))) (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" - "* - if (get_attr_length (insn) == 2) - return \"cbz\\t%0, %l1\"; - else - return \"cmp\\t%0, #0\;beq\\t%l1\"; - " + { + int offset = (INSN_ADDRESSES (INSN_UID (operands[1])) + - INSN_ADDRESSES (INSN_UID (insn))); + if (get_attr_length (insn) == 2) + return "cbz\t%0, %l1"; + else if (offset >= -1048564 && offset <= 1048576) + return "cmp\t%0, #0\;beq\t%l1"; + else if (which_alternative == 0) + return "cbnz\t%0, %-LCB%=\;b\t%l1\n%-LCB%=:"; + return "cmp\t%0, #0\;bne\t%-LCB%=\;b\t%l1\n%-LCB%=:"; + } [(set (attr "length") (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int 2)) (le (minus (match_dup 1) (pc)) (const_int 128)) (not (match_test "which_alternative"))) (const_int 2) - (const_int 8))) + (const_int 10))) (set_attr "type" "branch,multiple")] ) @@ -1488,19 +1493,24 @@ (pc))) (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" - "* - if (get_attr_length (insn) == 2) - return \"cbnz\\t%0, %l1\"; - else - return \"cmp\\t%0, #0\;bne\\t%l1\"; - " + { + int offset = (INSN_ADDRESSES (INSN_UID (operands[1])) + - INSN_ADDRESSES (INSN_UID (insn))); + if (get_attr_length (insn) == 2) + return "cbnz\t%0, %l1"; + else if (offset >= -1048564 && offset <= 1048576) + return "cmp\t%0, #0\;bne\t%l1"; + else if (which_alternative == 0) + return "cbz\t%0, %-LCB%=\;b\t%l1\n%-LCB%=:"; + return "cmp\t%0, #0\;beq\t%-LCB%=\;b\t%l1\n%-LCB%=:"; + } [(set (attr "length") (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int 2)) (le (minus (match_dup 1) (pc)) (const_int 128)) (not (match_test "which_alternative"))) (const_int 2) - (const_int 8))) + (const_int 10))) (set_attr "type" "branch,multiple")] ) @@ -1733,30 +1743,6 @@ [(set_attr "predicable" "yes")] ) -(define_insn "thumb2_asrl" - [(set (match_operand:DI 0 "arm_general_register_operand" "+r") - (ashiftrt:DI (match_dup 0) - (match_operand:SI 1 "arm_reg_or_long_shift_imm" "rPg")))] - "TARGET_HAVE_MVE" - "asrl%?\\t%Q0, %R0, %1" - [(set_attr "predicable" "yes")]) - -(define_insn "thumb2_lsll" - [(set (match_operand:DI 0 "arm_general_register_operand" "+r") - (ashift:DI (match_dup 0) - (match_operand:SI 1 "arm_reg_or_long_shift_imm" "rPg")))] - "TARGET_HAVE_MVE" - "lsll%?\\t%Q0, %R0, %1" - [(set_attr "predicable" "yes")]) - -(define_insn "thumb2_lsrl" - [(set (match_operand:DI 0 "arm_general_register_operand" "+r") - (lshiftrt:DI (match_dup 0) - (match_operand:SI 1 "long_shift_imm" "Pg")))] - "TARGET_HAVE_MVE" - "lsrl%?\\t%Q0, %R0, %1" - [(set_attr "predicable" "yes")]) - ;; Originally expanded by 'doloop_end'. (define_insn "*doloop_end_internal" [(set (pc) diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def index 896623d..8b05af5 100644 --- a/gcc/config/avr/avr-mcus.def +++ b/gcc/config/avr/avr-mcus.def @@ -421,6 +421,14 @@ AVR_MCU ("avr16ea48", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR AVR_MCU ("avr32ea28", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR32EA28__", 0x7000, 0x0, 0x8000, 0x8000) AVR_MCU ("avr32ea32", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR32EA32__", 0x7000, 0x0, 0x8000, 0x8000) AVR_MCU ("avr32ea48", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR32EA48__", 0x7000, 0x0, 0x8000, 0x8000) +AVR_MCU ("avr16la14", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR16LA14__", 0x7800, 0x0, 0x4000, 0x8000) +AVR_MCU ("avr16la20", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR16LA20__", 0x7800, 0x0, 0x4000, 0x8000) +AVR_MCU ("avr16la28", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR16LA28__", 0x7800, 0x0, 0x4000, 0x8000) +AVR_MCU ("avr16la32", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR16LA32__", 0x7800, 0x0, 0x4000, 0x8000) +AVR_MCU ("avr32la14", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR32LA14__", 0x7800, 0x0, 0x8000, 0x8000) +AVR_MCU ("avr32la20", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR32LA20__", 0x7800, 0x0, 0x8000, 0x8000) +AVR_MCU ("avr32la28", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR32LA28__", 0x7800, 0x0, 0x8000, 0x8000) +AVR_MCU ("avr32la32", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR32LA32__", 0x7800, 0x0, 0x8000, 0x8000) AVR_MCU ("avr32sd20", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR32SD20__", 0x7000, 0x0, 0x8000, 0x8000) AVR_MCU ("avr32sd28", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR32SD28__", 0x7000, 0x0, 0x8000, 0x8000) AVR_MCU ("avr32sd32", ARCH_AVRXMEGA3, AVR_CVT, "__AVR_AVR32SD32__", 0x7000, 0x0, 0x8000, 0x8000) diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 227c12a..775be80 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -3272,7 +3272,8 @@ avr_load_libgcc_p (rtx op) return (n_bytes > 2 && !AVR_HAVE_LPMX - && avr_mem_flash_p (op)); + && avr_mem_flash_p (op) + && MEM_ADDR_SPACE (op) == ADDR_SPACE_FLASH); } @@ -3624,6 +3625,46 @@ avr_out_lpm_no_lpmx (rtx_insn *insn, rtx *xop, int *plen) avr_asm_len ("sbiw %2,1", xop, plen, 1); break; /* 2 */ + + /* cases 3 and 4 are only needed with ELPM but no ELPMx. */ + + case 3: + if (REGNO (dest) == REG_Z - 2 + && !reg_unused_after (insn, all_regs_rtx[REG_31])) + avr_asm_len ("push r31", xop, plen, 1); + + avr_asm_len ("%4lpm $ mov %A0,%3 $ adiw %2,1", xop, plen, 3); + avr_asm_len ("%4lpm $ mov %B0,%3 $ adiw %2,1", xop, plen, 3); + avr_asm_len ("%4lpm $ mov %C0,%3", xop, plen, 2); + + if (REGNO (dest) == REG_Z - 2) + { + if (!reg_unused_after (insn, all_regs_rtx[REG_31])) + avr_asm_len ("pop r31", xop, plen, 1); + } + else if (!reg_unused_after (insn, addr)) + avr_asm_len ("sbiw %2,2", xop, plen, 1); + + break; /* 3 */ + + case 4: + avr_asm_len ("%4lpm $ mov %A0,%3 $ adiw %2,1", xop, plen, 3); + avr_asm_len ("%4lpm $ mov %B0,%3 $ adiw %2,1", xop, plen, 3); + if (REGNO (dest) != REG_Z - 2) + { + avr_asm_len ("%4lpm $ mov %C0,%3 $ adiw %2,1", xop, plen, 3); + avr_asm_len ("%4lpm $ mov %D0,%3", xop, plen, 2); + if (!reg_unused_after (insn, addr)) + avr_asm_len ("sbiw %2,3", xop, plen, 1); + } + else + { + avr_asm_len ("%4lpm $ push %3 $ adiw %2,1", xop, plen, 3); + avr_asm_len ("%4lpm $ mov %D0,%3", xop, plen, 2); + avr_asm_len ("pop $C0", xop, plen, 1); + } + + break; /* 4 */ } break; /* REG */ @@ -14353,6 +14394,16 @@ avr_output_addr_vec (rtx_insn *labl, rtx table) { FILE *stream = asm_out_file; + // AVR-SD: On functional safety devices, each executed instruction must + // be followed by a valid opcode. This is because instruction validation + // runs at fetch and decode for the next instruction and while the 2-stage + // pipeline is executing the current one. There is no multilib option for + // these devices, so take all multilib variants that contain AVR-SD. + const bool maybe_sd = (AVR_HAVE_JMP_CALL + && (avr_arch_index == ARCH_AVRXMEGA2 + || avr_arch_index == ARCH_AVRXMEGA3)); + bool uses_subsection = false; + app_disable (); // Switch to appropriate (sub)section. @@ -14366,6 +14417,7 @@ avr_output_addr_vec (rtx_insn *labl, rtx table) switch_to_section (current_function_section ()); fprintf (stream, "\t.subsection\t1\n"); + uses_subsection = true; } else { @@ -14388,10 +14440,21 @@ avr_output_addr_vec (rtx_insn *labl, rtx table) AVR_HAVE_JMP_CALL ? "a" : "ax"); } - // Output the label that precedes the table. - ASM_OUTPUT_ALIGN (stream, 1); + if (maybe_sd && uses_subsection) + { + // Insert a valid opcode prior to the first gs() label. + // Any valid opcode will do. Use CLH since it disassembles + // more nicely than NOP = 0x0000. This is all GCC can do. + // Other cases, like inserting CLH after the vector table and + // after the last instruction, are handled by other parts of + // the toolchain. + fprintf (stream, "\tclh\n"); + } + + // Output the label that precedes the table. + char s_labl[40]; targetm.asm_out.generate_internal_label (s_labl, "L", CODE_LABEL_NUMBER (labl)); diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 30a02a4..d73cf96 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -3947,9 +3947,17 @@ (match_operand:PSI 2 "nonmemory_operand" ""))) (clobber (reg:HI 26)) (clobber (reg:DI 18))])] - "AVR_HAVE_MUL" + "AVR_HAVE_MUL + || (avropt_pr118012 + /* AVR_TINY passes args on the stack, so we cannot work + around PR118012 like this. */ + && ! AVR_TINY)" { - if (s8_operand (operands[2], PSImode)) + if (!AVR_HAVE_MUL) + { + operands[2] = force_reg (PSImode, operands[2]); + } + else if (s8_operand (operands[2], PSImode)) { rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode)); emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1])); @@ -4038,7 +4046,9 @@ (match_operand:PSI 2 "pseudo_register_or_const_int_operand" "rn"))) (clobber (reg:HI 26)) (clobber (reg:DI 18))] - "AVR_HAVE_MUL && !reload_completed" + "!reload_completed + && (AVR_HAVE_MUL + || (avropt_pr118012 && !AVR_TINY))" { gcc_unreachable(); } "&& 1" [(set (reg:PSI 18) @@ -4048,13 +4058,30 @@ (parallel [(set (reg:PSI 22) (mult:PSI (reg:PSI 22) (reg:PSI 18))) - (clobber (reg:QI 21)) - (clobber (reg:QI 25)) - (clobber (reg:HI 26))]) + (clobber (match_dup 3)) + (clobber (match_dup 4)) + (clobber (match_dup 5))]) (set (match_dup 0) (reg:PSI 22))] { - if (s8_operand (operands[2], PSImode)) + if (AVR_HAVE_MUL) + { + operands[3] = gen_rtx_REG (QImode, REG_21); + operands[4] = gen_rtx_REG (QImode, REG_25); + operands[5] = gen_rtx_REG (HImode, REG_26); + } + else + { + operands[3] = gen_rtx_REG (SImode, REG_18); + operands[4] = gen_rtx_SCRATCH (QImode); + operands[5] = gen_rtx_SCRATCH (HImode); + } + + if (!AVR_HAVE_MUL) + { + operands[2] = force_reg (PSImode, operands[2]); + } + else if (s8_operand (operands[2], PSImode)) { rtx reg = force_reg (QImode, gen_int_mode (INTVAL (operands[2]), QImode)); emit_insn (gen_mulsqipsi3 (operands[0], reg, operands[1])); @@ -4106,6 +4133,32 @@ "%~call __mulpsi3" [(set_attr "type" "xcall")]) +(define_insn_and_split "*mulpsi3-nomul.libgcc_split" + [(set (reg:PSI 22) + (mult:PSI (reg:PSI 22) + (reg:PSI 18))) + (clobber (reg:SI 18)) + (clobber (scratch:QI)) + (clobber (scratch:HI))] + "!AVR_HAVE_MUL && avropt_pr118012 && !AVR_TINY" + "#" + "&& reload_completed" + [(scratch)] + { DONE_ADD_CCC }) + +(define_insn "*mulpsi3-nomul.libgcc" + [(set (reg:PSI 22) + (mult:PSI (reg:PSI 22) + (reg:PSI 18))) + (clobber (reg:SI 18)) + (clobber (scratch:QI)) + (clobber (scratch:HI)) + (clobber (reg:CC REG_CC))] + "reload_completed + && !AVR_HAVE_MUL && avropt_pr118012 && !AVR_TINY" + "%~call __mulpsi3" + [(set_attr "type" "xcall")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; 24-bit signed/unsigned division and modulo. diff --git a/gcc/config/avr/avr.opt.urls b/gcc/config/avr/avr.opt.urls index fa560bc..b46c9be 100644 --- a/gcc/config/avr/avr.opt.urls +++ b/gcc/config/avr/avr.opt.urls @@ -28,7 +28,7 @@ muse-nonzero-bits UrlSuffix(gcc/AVR-Options.html#index-muse-nonzero-bits) mshort-calls -UrlSuffix(gcc/AVR-Options.html#index-mshort-calls) +UrlSuffix(gcc/AVR-Options.html#index-mshort-calls-1) mint8 UrlSuffix(gcc/AVR-Options.html#index-mint8) diff --git a/gcc/config/avr/elf.h b/gcc/config/avr/elf.h index d240f85..e0f8a87 100644 --- a/gcc/config/avr/elf.h +++ b/gcc/config/avr/elf.h @@ -18,6 +18,19 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +/* defaults.h requires HAVE_INITFINI_ARRAY_SUPPORT to be present + in order for attribute "retain" to be recognized. This is due + to some quirks in crtstuff.h -- which isn't even used by avr. + All we need is that Binutils supports the "R"etain section flag. + If that's the case, define SUPPORTS_SHF_GNU_RETAIN so that + defaults.h doesn't define it to 0. */ +#if defined(IN_GCC) && !defined(USED_FOR_TARGET) && !defined(GENERATOR_FILE) +#include "auto-host.h" /* HAVE_GAS_SHF_GNU_RETAIN */ +#if HAVE_GAS_SHF_GNU_RETAIN +#undef SUPPORTS_SHF_GNU_RETAIN +#define SUPPORTS_SHF_GNU_RETAIN 1 +#endif +#endif /* Overriding some definitions from elfos.h for AVR. */ diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc index a28018b..39168f6 100644 --- a/gcc/config/bpf/bpf.cc +++ b/gcc/config/bpf/bpf.cc @@ -1252,13 +1252,11 @@ static void emit_move_loop (rtx src, rtx dst, machine_mode mode, int offset, int inc, unsigned iters, unsigned remainder) { - rtx reg = gen_reg_rtx (mode); - /* First copy in chunks as large as alignment permits. */ for (unsigned int i = 0; i < iters; i++) { - emit_move_insn (reg, adjust_address (src, mode, offset)); - emit_move_insn (adjust_address (dst, mode, offset), reg); + emit_insn (gen_rtx_SET (adjust_address (dst, mode, offset), + adjust_address (src, mode, offset))); offset += inc; } @@ -1266,22 +1264,22 @@ emit_move_loop (rtx src, rtx dst, machine_mode mode, int offset, int inc, used above. */ if (remainder & 4) { - emit_move_insn (reg, adjust_address (src, SImode, offset)); - emit_move_insn (adjust_address (dst, SImode, offset), reg); + emit_insn (gen_rtx_SET (adjust_address (dst, SImode, offset), + adjust_address (src, SImode, offset))); offset += (inc < 0 ? -4 : 4); remainder -= 4; } if (remainder & 2) { - emit_move_insn (reg, adjust_address (src, HImode, offset)); - emit_move_insn (adjust_address (dst, HImode, offset), reg); + emit_insn (gen_rtx_SET (adjust_address (dst, HImode, offset), + adjust_address (src, HImode, offset))); offset += (inc < 0 ? -2 : 2); remainder -= 2; } if (remainder & 1) { - emit_move_insn (reg, adjust_address (src, QImode, offset)); - emit_move_insn (adjust_address (dst, QImode, offset), reg); + emit_insn (gen_rtx_SET (adjust_address (dst, QImode, offset), + adjust_address (src, QImode, offset))); } } @@ -1351,13 +1349,13 @@ bpf_expand_cpymem (rtx *operands, bool is_move) fwd_label = gen_label_rtx (); done_label = gen_label_rtx (); - rtx dst_addr = copy_to_mode_reg (Pmode, XEXP (dst, 0)); - rtx src_addr = copy_to_mode_reg (Pmode, XEXP (src, 0)); + rtx src_addr = force_operand (XEXP (src, 0), NULL_RTX); + rtx dst_addr = force_operand (XEXP (dst, 0), NULL_RTX); emit_cmp_and_jump_insns (src_addr, dst_addr, GEU, NULL_RTX, Pmode, true, fwd_label, profile_probability::even ()); /* Emit the "backwards" unrolled loop. */ - emit_move_loop (src, dst, mode, size_bytes, -inc, iters, remainder); + emit_move_loop (src, dst, mode, (size_bytes - 1), -inc, iters, remainder); emit_jump_insn (gen_jump (done_label)); emit_barrier (); diff --git a/gcc/config/bpf/bpf.opt.urls b/gcc/config/bpf/bpf.opt.urls index 1e8873a..afa5f6c6 100644 --- a/gcc/config/bpf/bpf.opt.urls +++ b/gcc/config/bpf/bpf.opt.urls @@ -1,5 +1,8 @@ ; Autogenerated by regenerate-opt-urls.py from gcc/config/bpf/bpf.opt and generated HTML +mxbpf +UrlSuffix(gcc/eBPF-Options.html#index-mxbpf) + mbig-endian UrlSuffix(gcc/eBPF-Options.html#index-mbig-endian-5) diff --git a/gcc/config/c6x/c6x.opt.urls b/gcc/config/c6x/c6x.opt.urls index 5b1c103..7813a80 100644 --- a/gcc/config/c6x/c6x.opt.urls +++ b/gcc/config/c6x/c6x.opt.urls @@ -11,7 +11,11 @@ UrlSuffix(gcc/C6X-Options.html#index-msim-1) ; skipping UrlSuffix for 'msdata=' due to finding no URLs -; skipping UrlSuffix for 'mlong-calls' due to finding no URLs +mdsbt +UrlSuffix(gcc/C6X-Options.html#index-mdsbt) + +mlong-calls +UrlSuffix(gcc/C6X-Options.html#index-mlong-calls-4) march= UrlSuffix(gcc/C6X-Options.html#index-march-3) diff --git a/gcc/config/cris/cris.opt b/gcc/config/cris/cris.opt index 9fa9cbf..55b03ee 100644 --- a/gcc/config/cris/cris.opt +++ b/gcc/config/cris/cris.opt @@ -108,28 +108,32 @@ Do not tune code and read-only data alignment. ; See cris_handle_option. m32-bit -Target RejectNegative Undocumented +Target RejectNegative +Align code and data to 32 bits. ; See cris_handle_option. m32bit -Target RejectNegative +Target RejectNegative Undocumented Align code and data to 32 bits. ; See cris_handle_option. m16-bit -Target RejectNegative Undocumented +Target RejectNegative +Align code and data to 16 bits. ; See cris_handle_option. m16bit Target RejectNegative Undocumented +Align code and data to 16 bits. ; See cris_handle_option. m8-bit -Target RejectNegative Undocumented +Target RejectNegative +Don't align items in code or data. ; See cris_handle_option. m8bit -Target RejectNegative +Target RejectNegative Undocumented Don't align items in code or data. ; TARGET_PROLOGUE_EPILOGUE: Whether or not to omit function diff --git a/gcc/config/cris/cris.opt.urls b/gcc/config/cris/cris.opt.urls index 56eeaa2..20b31b3 100644 --- a/gcc/config/cris/cris.opt.urls +++ b/gcc/config/cris/cris.opt.urls @@ -9,6 +9,9 @@ UrlSuffix(gcc/CRIS-Options.html#index-metrax4) metrax100 UrlSuffix(gcc/CRIS-Options.html#index-metrax100) +mno-etrax100 +UrlSuffix(gcc/CRIS-Options.html#index-mno-etrax100) + mpdebug UrlSuffix(gcc/CRIS-Options.html#index-mpdebug) @@ -54,6 +57,12 @@ UrlSuffix(gcc/CRIS-Options.html#index-mprologue-epilogue) mno-prologue-epilogue UrlSuffix(gcc/CRIS-Options.html#index-mno-prologue-epilogue) +mbest-lib-options +UrlSuffix(gcc/CRIS-Options.html#index-mbest-lib-options) + +moverride-best-lib-options +UrlSuffix(gcc/CRIS-Options.html#index-moverride-best-lib-options) + mcpu= UrlSuffix(gcc/CRIS-Options.html#index-mcpu-3) @@ -63,3 +72,12 @@ UrlSuffix(gcc/CRIS-Options.html#index-march-4) mtune= UrlSuffix(gcc/CRIS-Options.html#index-mtune-5) +mtrap-using-break8 +UrlSuffix(gcc/CRIS-Options.html#index-mtrap-using-break8) + +mtrap-unaligned-atomic +UrlSuffix(gcc/CRIS-Options.html#index-mtrap-unaligned-atomic) + +munaligned-atomic-may-use-library +UrlSuffix(gcc/CRIS-Options.html#index-munaligned-atomic-may-use-library) + diff --git a/gcc/config/cris/elf.opt.urls b/gcc/config/cris/elf.opt.urls index bdfa01e..f7969fb 100644 --- a/gcc/config/cris/elf.opt.urls +++ b/gcc/config/cris/elf.opt.urls @@ -1,7 +1,6 @@ ; Autogenerated by regenerate-opt-urls.py from gcc/config/cris/elf.opt and generated HTML -melf -UrlSuffix(gcc/CRIS-Options.html#index-melf) +; skipping UrlSuffix for 'melf' due to finding no URLs sim UrlSuffix(gcc/CRIS-Options.html#index-sim) diff --git a/gcc/config/csky/csky.opt.urls b/gcc/config/csky/csky.opt.urls index 96b0b17..50f7b87 100644 --- a/gcc/config/csky/csky.opt.urls +++ b/gcc/config/csky/csky.opt.urls @@ -9,20 +9,16 @@ UrlSuffix(gcc/C-SKY-Options.html#index-mcpu_003d-1) mbig-endian UrlSuffix(gcc/C-SKY-Options.html#index-mbig-endian-4) -EB -UrlSuffix(gcc/C-SKY-Options.html#index-EB-1) +; skipping UrlSuffix for 'EB' due to finding no URLs mlittle-endian UrlSuffix(gcc/C-SKY-Options.html#index-mlittle-endian-4) -EL -UrlSuffix(gcc/C-SKY-Options.html#index-EL-1) +; skipping UrlSuffix for 'EL' due to finding no URLs -mhard-float -UrlSuffix(gcc/C-SKY-Options.html#index-mhard-float) +; skipping UrlSuffix for 'mhard-float' due to finding no URLs -msoft-float -UrlSuffix(gcc/C-SKY-Options.html#index-msoft-float-1) +; skipping UrlSuffix for 'msoft-float' due to finding no URLs mfloat-abi= UrlSuffix(gcc/C-SKY-Options.html#index-mfloat-abi-1) @@ -88,7 +84,7 @@ mconstpool UrlSuffix(gcc/C-SKY-Options.html#index-mconstpool) mstack-size -UrlSuffix(gcc/C-SKY-Options.html#index-mstack-size-1) +UrlSuffix(gcc/C-SKY-Options.html#index-mstack-size) mccrt UrlSuffix(gcc/C-SKY-Options.html#index-mccrt) diff --git a/gcc/config/darwin-c.cc b/gcc/config/darwin-c.cc index 7257015..c3a1cd5 100644 --- a/gcc/config/darwin-c.cc +++ b/gcc/config/darwin-c.cc @@ -537,17 +537,18 @@ find_subframework_header (cpp_reader *pfile, const char *header, cpp_dir **dirp) const char *n; for (b = cpp_get_buffer (pfile); - b && cpp_get_file (b) && cpp_get_path (cpp_get_file (b)); + b && cpp_get_file (b) && _cpp_get_file_path (cpp_get_file (b)); b = cpp_get_prev (b)) { - n = find_subframework_file (fname, cpp_get_path (cpp_get_file (b))); + n = find_subframework_file (fname, + _cpp_get_file_path (cpp_get_file (b))); if (n) { /* Logically, the place where we found the subframework is the place where we found the Framework that contains the subframework. This is useful for tracking wether or not we are in a system header. */ - *dirp = cpp_get_dir (cpp_get_file (b)); + *dirp = _cpp_get_file_dir (cpp_get_file (b)); return n; } } diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h index e23414c..ef356ad 100644 --- a/gcc/config/darwin.h +++ b/gcc/config/darwin.h @@ -523,6 +523,7 @@ extern GTY(()) int darwin_ms_struct; %{static|static-libgcc|static-libgcobol:%:replace-outfile(-lgcobol libgcobol.a%s)}\ %{static|static-libgcc|static-libstdc++|static-libgfortran:%:replace-outfile(-lgomp libgomp.a%s)}\ %{static|static-libgcc|static-libstdc++:%:replace-outfile(-lstdc++ libstdc++.a%s)}\ + %{static|static-libga68:%:replace-outfile(-lga68 libga68.a%s)}\ %{static|static-libgm2:%:replace-outfile(-lm2pim libm2pim.a%s)}\ %{static|static-libgm2:%:replace-outfile(-lm2iso libm2iso.a%s)}\ %{static|static-libgm2:%:replace-outfile(-lm2min libm2min.a%s)}\ @@ -1301,4 +1302,8 @@ extern void darwin_driver_init (unsigned int *,struct cl_decoded_option **); #undef BTF_INFO_SECTION_NAME #define BTF_INFO_SECTION_NAME "__CTF_BTF,__btf,regular,debug" +/* Algol68 */ +#undef A68_EXPORT_SECTION_NAME +#define A68_EXPORT_SECTION_NAME "__a68_exports" + #endif /* CONFIG_DARWIN_H */ diff --git a/gcc/config/darwin.opt b/gcc/config/darwin.opt index e275d84..d6e6271 100644 --- a/gcc/config/darwin.opt +++ b/gcc/config/darwin.opt @@ -169,8 +169,9 @@ filelist Driver RejectNegative Separate Supply a list of objects to be linked from a file, rather than the command line. +; specs turn this into fapple-kext. findirect-virtual-calls -Driver RejectNegative +Driver RejectNegative Undocumented Used for generating code for some older kernel revisions. flat_namespace @@ -189,8 +190,9 @@ framework Driver RejectNegative Separate -framework <name> The linker should search for the framework <name> in the framework search path. +; specs turn this into fapple-kext. fterminated-vtables -Driver RejectNegative +Driver RejectNegative Undocumented Used for generating code for some older kernel revisions. gfull @@ -222,19 +224,19 @@ Driver RejectNegative Usually \"private extern\" (hidden) symbols are made local when linking, this command suppresses that such that they remain exported. multi_module -Driver RejectNegative +Driver RejectNegative Undocumented (Obsolete after 10.4) Multi modules are ignored at runtime since macOS 10.4. multiply_defined -Driver RejectNegative Separate +Driver RejectNegative Separate Undocumented (Obsolete after 10.4) -multiply_defined <treatment> Provided a mechanism for warning about symbols defined in multiple dylibs. multiply_defined_unused -Driver RejectNegative Separate +Driver RejectNegative Separate Undocumented (Obsolete after 10.4) -multiply_defined_unused <treatment> Provided a mechanism for warning about symbols defined in the current executable also being defined in linked dylibs. no_dead_strip_inits_and_terms -Driver RejectNegative +Driver RejectNegative Undocumented (Obsolete) Current linkers never dead-strip these items, so the option is not needed. nodefaultexport @@ -246,40 +248,45 @@ Driver RejectNegative Do not add default run paths (for the compiler library directories) to executables, modules or dynamic libraries. nofixprebinding -Driver RejectNegative +Driver RejectNegative Undocumented (Obsolete after 10.3.9) Set MH_NOPREFIXBINDING, in an executable. nomultidefs -Driver RejectNegative +Driver RejectNegative Undocumented (Obsolete after 10.4) Set MH_NOMULTIDEFS in an umbrella framework. noprebind -Driver RejectNegative Negative(prebind) +Driver RejectNegative Negative(prebind) Undocumented (Obsolete) LD_PREBIND is no longer supported. noseglinkedit -Driver RejectNegative Negative(seglinkedit) +Driver RejectNegative Negative(seglinkedit) Undocumented (Obsolete) This is the default. ObjC Driver RejectNegative +Equivalent to -xobjective-c. ObjC++ Driver RejectNegative +Equivalent to -xobjective-c++. +; This option is only used in STARTFILE_SPEC and has never been +; documented since it was added in 2002, so it appears to be +; intentionally undocumented. object -Driver RejectNegative +Driver RejectNegative Undocumented pagezero_size Driver RejectNegative Separate -pagezero_size <size> Allows setting the page 0 size to 4kb when required. prebind -Driver RejectNegative Negative(noprebind) +Driver RejectNegative Negative(noprebind) Undocumented (Obsolete) LD_PREBIND is no longer supported. prebind_all_twolevel_modules -Driver RejectNegative +Driver RejectNegative Undocumented (Obsolete) LD_PREBIND is no longer supported. preload @@ -287,7 +294,7 @@ Driver RejectNegative Produces a Mach-O file suitable for embedded/ROM use. private_bundle -Driver RejectNegative +Driver RejectNegative Undocumented (Obsolete) Allowed linking to proceed with \"-flat_namespace\" when a linked bundle contained a symbol also exported from the main executable. pthread @@ -310,11 +317,11 @@ Driver RejectNegative Separate Args(3) -sectcreate <segname> <sectname> <file> Create section <sectname> in segment <segname> from the contents of <file>. sectobjectsymbols -Driver RejectNegative Separate Args(2) +Driver RejectNegative Separate Args(2) Undocumented (Obsolete) -sectobjectsymbols <segname> <sectname> Setting a local symbol at the start of a section is no longer supported. sectorder -Driver RejectNegative Separate Args(3) +Driver RejectNegative Separate Args(3) Undocumented (Obsolete) -sectorder <segname> <sectname> <orderfile> Replaced by a more general option \"-order_file\". seg_addr_table @@ -323,7 +330,7 @@ Driver RejectNegative Separate ; This is only usable by the ld_classic linker. seg_addr_table_filename -Driver RejectNegative Separate +Driver RejectNegative Separate Undocumented (Obsolete, ld_classic only) -seg_addr_table_filename <path>. seg1addr @@ -336,11 +343,11 @@ Driver RejectNegative Separate Args(2) ; This is only usable by the ld_classic linker. segcreate -Driver RejectNegative Separate Args(3) +Driver RejectNegative Separate Args(3) Undocumented (Obsolete, ld_classic only) -sectcreate <segname> <sectname> <file> Allowed creation of a section from a file. seglinkedit -Driver RejectNegative Negative(noseglinkedit) +Driver RejectNegative Negative(noseglinkedit) Undocumented (Obsolete) Object files with LINKEDIT sections are no longer supported. segprot @@ -356,7 +363,7 @@ Driver RejectNegative Separate -segs_read_write_addr <address> Specify that <address> is the base address address of the read-write segments of a dylib. single_module -Driver RejectNegative +Driver RejectNegative Undocumented (Obsolete) This is the default. sub_library @@ -405,12 +412,12 @@ Logs which symbol(s) caused an object to be loaded. ;(Obsolete, ignored) Strip symbols starting with "L", this is the default. X -Driver RejectNegative +Driver RejectNegative Undocumented y -Driver RejectNegative Joined +Driver RejectNegative Joined Undocumented (Obsolete, ignored) Old support similar to \"-whyload\". Mach -Driver RejectNegative +Driver RejectNegative Undocumented (Obsolete and unhandled by ld64, ignored) ld should produce an executable (only handled by ld_classic). diff --git a/gcc/config/darwin.opt.urls b/gcc/config/darwin.opt.urls index e83d183..a3da8d2 100644 --- a/gcc/config/darwin.opt.urls +++ b/gcc/config/darwin.opt.urls @@ -3,6 +3,9 @@ dependency-file UrlSuffix(gcc/Darwin-Options.html#index-dependency-file) +fapple-kext +UrlSuffix(gcc/Darwin-Options.html#index-fapple-kext) + fconstant-cfstrings UrlSuffix(gcc/Darwin-Options.html#index-fconstant-cfstrings) @@ -12,7 +15,14 @@ UrlSuffix(gcc/Darwin-Options.html#index-iframework) mconstant-cfstrings UrlSuffix(gcc/Darwin-Options.html#index-mconstant-cfstrings) -; skipping UrlSuffix for 'mdynamic-no-pic' due to finding no URLs +Wnonportable-cfstrings +UrlSuffix(gcc/Darwin-Options.html#index-Wno-nonportable-cfstrings) + +matt-stubs +UrlSuffix(gcc/Darwin-Options.html#index-matt-stubs) + +mdynamic-no-pic +UrlSuffix(gcc/Darwin-Options.html#index-mdynamic-no-pic) mfix-and-continue UrlSuffix(gcc/Darwin-Options.html#index-mfix-and-continue) @@ -26,15 +36,30 @@ UrlSuffix(gcc/Darwin-Options.html#index-mmacosx-version-min) mone-byte-bool UrlSuffix(gcc/Darwin-Options.html#index-mone-byte-bool) +msymbol-stubs +UrlSuffix(gcc/Darwin-Options.html#index-msymbol-stubs) + +mtarget-linker= +UrlSuffix(gcc/Darwin-Options.html#index-mtarget-linker) + +mtarget-linker +UrlSuffix(gcc/Darwin-Options.html#index-mtarget-linker) + all_load UrlSuffix(gcc/Darwin-Options.html#index-all_005fload) allowable_client UrlSuffix(gcc/Darwin-Options.html#index-allowable_005fclient) +arch +UrlSuffix(gcc/Darwin-Options.html#index-arch) + arch_errors_fatal UrlSuffix(gcc/Darwin-Options.html#index-arch_005ferrors_005ffatal) +asm_macosx_version_min= +UrlSuffix(gcc/Darwin-Options.html#index-asm_005fmacosx_005fversion_005fmin) + bind_at_load UrlSuffix(gcc/Darwin-Options.html#index-bind_005fat_005fload) @@ -59,6 +84,9 @@ UrlSuffix(gcc/Darwin-Options.html#index-dead_005fstrip) dylib_file UrlSuffix(gcc/Darwin-Options.html#index-dylib_005ffile) +dylinker +UrlSuffix(gcc/Darwin-Options.html#index-dylinker) + dylinker_install_name UrlSuffix(gcc/Darwin-Options.html#index-dylinker_005finstall_005fname) @@ -83,6 +111,9 @@ UrlSuffix(gcc/Darwin-Options.html#index-force_005fcpusubtype_005fALL) force_flat_namespace UrlSuffix(gcc/Darwin-Options.html#index-force_005fflat_005fnamespace) +framework +UrlSuffix(gcc/Darwin-Options.html#index-framework) + gfull UrlSuffix(gcc/Darwin-Options.html#index-gfull) @@ -104,45 +135,21 @@ UrlSuffix(gcc/Darwin-Options.html#index-install_005fname) keep_private_externs UrlSuffix(gcc/Darwin-Options.html#index-keep_005fprivate_005fexterns) -multi_module -UrlSuffix(gcc/Darwin-Options.html#index-multi_005fmodule) - -multiply_defined -UrlSuffix(gcc/Darwin-Options.html#index-multiply_005fdefined) - -multiply_defined_unused -UrlSuffix(gcc/Darwin-Options.html#index-multiply_005fdefined_005funused) - -no_dead_strip_inits_and_terms -UrlSuffix(gcc/Darwin-Options.html#index-no_005fdead_005fstrip_005finits_005fand_005fterms) +nodefaultexport +UrlSuffix(gcc/Darwin-Options.html#index-nodefaultexport) nodefaultrpaths UrlSuffix(gcc/Darwin-Options.html#index-nodefaultrpaths) -nofixprebinding -UrlSuffix(gcc/Darwin-Options.html#index-nofixprebinding) +ObjC +UrlSuffix(gcc/Darwin-Options.html#index-ObjC) -nomultidefs -UrlSuffix(gcc/Darwin-Options.html#index-nomultidefs) - -noprebind -UrlSuffix(gcc/Darwin-Options.html#index-noprebind) - -noseglinkedit -UrlSuffix(gcc/Darwin-Options.html#index-noseglinkedit) +ObjC++ +UrlSuffix(gcc/Darwin-Options.html#index-ObjC_002b_002b) pagezero_size UrlSuffix(gcc/Darwin-Options.html#index-pagezero_005fsize) -prebind -UrlSuffix(gcc/Darwin-Options.html#index-prebind) - -prebind_all_twolevel_modules -UrlSuffix(gcc/Darwin-Options.html#index-prebind_005fall_005ftwolevel_005fmodules) - -private_bundle -UrlSuffix(gcc/Darwin-Options.html#index-private_005fbundle) - ; skipping UrlSuffix for 'pthread' due to multiple URLs: ; duplicate: 'gcc/Link-Options.html#index-pthread-1' ; duplicate: 'gcc/Preprocessor-Options.html#index-pthread' @@ -159,27 +166,15 @@ UrlSuffix(gcc/Darwin-Options.html#index-sectalign) sectcreate UrlSuffix(gcc/Darwin-Options.html#index-sectcreate) -sectobjectsymbols -UrlSuffix(gcc/Darwin-Options.html#index-sectobjectsymbols) - -sectorder -UrlSuffix(gcc/Darwin-Options.html#index-sectorder) - seg_addr_table UrlSuffix(gcc/Darwin-Options.html#index-seg_005faddr_005ftable) -seg_addr_table_filename -UrlSuffix(gcc/Darwin-Options.html#index-seg_005faddr_005ftable_005ffilename) - seg1addr UrlSuffix(gcc/Darwin-Options.html#index-seg1addr) segaddr UrlSuffix(gcc/Darwin-Options.html#index-segaddr) -seglinkedit -UrlSuffix(gcc/Darwin-Options.html#index-seglinkedit) - segprot UrlSuffix(gcc/Darwin-Options.html#index-segprot) @@ -189,9 +184,6 @@ UrlSuffix(gcc/Darwin-Options.html#index-segs_005fread_005fonly_005faddr) segs_read_write_addr UrlSuffix(gcc/Darwin-Options.html#index-segs_005fread_005fwrite_005faddr) -single_module -UrlSuffix(gcc/Darwin-Options.html#index-single_005fmodule) - sub_library UrlSuffix(gcc/Darwin-Options.html#index-sub_005flibrary) @@ -201,6 +193,9 @@ UrlSuffix(gcc/Darwin-Options.html#index-sub_005fumbrella) twolevel_namespace UrlSuffix(gcc/Darwin-Options.html#index-twolevel_005fnamespace) +twolevel_namespace_hints +UrlSuffix(gcc/Darwin-Options.html#index-twolevel_005fnamespace_005fhints) + umbrella UrlSuffix(gcc/Darwin-Options.html#index-umbrella) @@ -210,6 +205,9 @@ UrlSuffix(gcc/Darwin-Options.html#index-undefined) unexported_symbols_list UrlSuffix(gcc/Darwin-Options.html#index-unexported_005fsymbols_005flist) +weak_framework +UrlSuffix(gcc/Darwin-Options.html#index-weak_005fframework) + weak_reference_mismatches UrlSuffix(gcc/Darwin-Options.html#index-weak_005freference_005fmismatches) diff --git a/gcc/config/dragonfly.opt.urls b/gcc/config/dragonfly.opt.urls index 62e2e21..7b31d4e 100644 --- a/gcc/config/dragonfly.opt.urls +++ b/gcc/config/dragonfly.opt.urls @@ -1,5 +1,8 @@ ; Autogenerated by regenerate-opt-urls.py from gcc/config/dragonfly.opt and generated HTML +profile +UrlSuffix(gcc/Instrumentation-Options.html#index-profile) + ; skipping UrlSuffix for 'pthread' due to multiple URLs: ; duplicate: 'gcc/Link-Options.html#index-pthread-1' ; duplicate: 'gcc/Preprocessor-Options.html#index-pthread' diff --git a/gcc/config/epiphany/epiphany.opt b/gcc/config/epiphany/epiphany.opt index 9cd93db..4b89754 100644 --- a/gcc/config/epiphany/epiphany.opt +++ b/gcc/config/epiphany/epiphany.opt @@ -72,7 +72,7 @@ target Mask(ROUND_NEAREST) Assume round to nearest is selected for purposes of scheduling. mlong-calls -Target Mask(LONG_CALLS) +Target InverseMask(SHORT_CALLS) Generate call insns as indirect calls. mshort-calls @@ -103,7 +103,12 @@ Enum(attr_fp_mode) String(truncate) Value(FP_MODE_ROUND_TRUNC) EnumValue Enum(attr_fp_mode) String(int) Value(FP_MODE_INT) +; This option has never been documented under this name, but it's +; been around since 2012. may-round-for-trunc +Target RejectNegative Undocumented Alias(mmay-round-for-trunc) + +mmay-round-for-trunc Target Mask(MAY_ROUND_FOR_TRUNC) A floating point to integer truncation may be replaced with rounding to save mode switching. @@ -129,7 +134,7 @@ Split unaligned 8 byte vector moves before post-modify address generation. mfp-iarith Target Mask(FP_IARITH) -Use the floating point unit for integer add/subtract. +Use the floating-point unit for integer add/subtract. m1reg- Target RejectNegative Joined Var(epiphany_m1reg) Enum(m1reg) Init(-1) diff --git a/gcc/config/epiphany/epiphany.opt.urls b/gcc/config/epiphany/epiphany.opt.urls index a8e28c4..0037dd7 100644 --- a/gcc/config/epiphany/epiphany.opt.urls +++ b/gcc/config/epiphany/epiphany.opt.urls @@ -21,6 +21,12 @@ UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-msoft-cmpsf) msplit-lohi UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-msplit-lohi) +mpost-inc +UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-mpost-inc) + +mpost-modify +UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-mpost-modify) + mstack-offset= UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-mstack-offset) @@ -30,7 +36,8 @@ UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-mround-nearest) mlong-calls UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-mlong-calls) -; skipping UrlSuffix for 'mshort-calls' due to finding no URLs +mshort-calls +UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-mshort-calls) msmall16 UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-msmall16) @@ -38,6 +45,9 @@ UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-msmall16) mfp-mode= UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-mfp-mode) +mmay-round-for-trunc +UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-mmay-round-for-trunc) + mvect-double UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-mvect-double) @@ -47,6 +57,9 @@ UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-max-vect-align) msplit-vecmove-early UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-msplit-vecmove-early) +mfp-iarith +UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-mfp-iarith) + m1reg- UrlSuffix(gcc/Adapteva-Epiphany-Options.html#index-m1reg-) diff --git a/gcc/config/freebsd.opt.urls b/gcc/config/freebsd.opt.urls index 3be69e4..64cc06f 100644 --- a/gcc/config/freebsd.opt.urls +++ b/gcc/config/freebsd.opt.urls @@ -1,5 +1,8 @@ ; Autogenerated by regenerate-opt-urls.py from gcc/config/freebsd.opt and generated HTML +profile +UrlSuffix(gcc/Instrumentation-Options.html#index-profile) + ; skipping UrlSuffix for 'pthread' due to multiple URLs: ; duplicate: 'gcc/Link-Options.html#index-pthread-1' ; duplicate: 'gcc/Preprocessor-Options.html#index-pthread' diff --git a/gcc/config/frv/frv.opt b/gcc/config/frv/frv.opt index 1890f91..19f4186 100644 --- a/gcc/config/frv/frv.opt +++ b/gcc/config/frv/frv.opt @@ -45,7 +45,7 @@ Dynamically allocate cc registers. ; generating SCC instructions and or/and-ing them together, and then doing the ; branch on the result, which collectively generate much worse code. mbranch-cost= -Target RejectNegative Joined UInteger Var(frv_branch_cost_int) Init(1) +Target RejectNegative Joined UInteger Var(frv_branch_cost_int) Init(1) Undocumented Set the cost of branches. mcond-exec @@ -53,11 +53,11 @@ Target Mask(COND_EXEC) Enable conditional execution other than moves/scc. mcond-exec-insns= -Target RejectNegative Joined UInteger Var(frv_condexec_insns) Init(8) +Target RejectNegative Joined UInteger Var(frv_condexec_insns) Init(8) Undocumented Change the maximum length of conditionally-executed sequences. mcond-exec-temps= -Target RejectNegative Joined UInteger Var(frv_condexec_temps) Init(4) +Target RejectNegative Joined UInteger Var(frv_condexec_temps) Init(4) Undocumented Change the number of temporary registers that are available to conditionally-executed sequences. mcond-move diff --git a/gcc/config/frv/frv.opt.urls b/gcc/config/frv/frv.opt.urls index d3d3757..f8e56cd 100644 --- a/gcc/config/frv/frv.opt.urls +++ b/gcc/config/frv/frv.opt.urls @@ -53,7 +53,7 @@ mgprel-ro UrlSuffix(gcc/FRV-Options.html#index-mgprel-ro) mhard-float -UrlSuffix(gcc/FRV-Options.html#index-mhard-float-1) +UrlSuffix(gcc/FRV-Options.html#index-mhard-float) minline-plt UrlSuffix(gcc/FRV-Options.html#index-minline-plt-1) @@ -65,7 +65,7 @@ mlinked-fp UrlSuffix(gcc/FRV-Options.html#index-mlinked-fp) mlong-calls -UrlSuffix(gcc/FRV-Options.html#index-mlong-calls-4) +UrlSuffix(gcc/FRV-Options.html#index-mlong-calls-5) mmedia UrlSuffix(gcc/FRV-Options.html#index-mmedia) @@ -92,7 +92,7 @@ mscc UrlSuffix(gcc/FRV-Options.html#index-mscc) msoft-float -UrlSuffix(gcc/FRV-Options.html#index-msoft-float-3) +UrlSuffix(gcc/FRV-Options.html#index-msoft-float-2) mTLS UrlSuffix(gcc/FRV-Options.html#index-mTLS) diff --git a/gcc/config/ft32/ft32.opt b/gcc/config/ft32/ft32.opt index 039ca29..def410d 100644 --- a/gcc/config/ft32/ft32.opt +++ b/gcc/config/ft32/ft32.opt @@ -23,7 +23,7 @@ Target Mask(SIM) Target the software simulator. mlra -Target RejectNegative Ignore +Target RejectNegative Ignore Undocumented Ignored, but preserved for backward compatibility. mnodiv diff --git a/gcc/config/ft32/ft32.opt.urls b/gcc/config/ft32/ft32.opt.urls index 707cb83..27857da 100644 --- a/gcc/config/ft32/ft32.opt.urls +++ b/gcc/config/ft32/ft32.opt.urls @@ -3,8 +3,7 @@ msim UrlSuffix(gcc/FT32-Options.html#index-msim-3) -mlra -UrlSuffix(gcc/FT32-Options.html#index-mlra-1) +; skipping UrlSuffix for 'mlra' due to finding no URLs mnodiv UrlSuffix(gcc/FT32-Options.html#index-mnodiv) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 1e04074..54abf8c 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -2940,6 +2940,19 @@ gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */ , if (!caller && cfun->machine->normal_function) gcn_detect_incoming_pointer_arg (fndecl); + static bool warned_xnack = 0; + if (!warned_xnack + && (omp_requires_mask & (OMP_REQUIRES_UNIFIED_SHARED_MEMORY + | OMP_REQUIRES_SELF_MAPS)) + && gcn_devices[gcn_arch].xnack_default != HSACO_ATTR_UNSUPPORTED + && flag_xnack == HSACO_ATTR_OFF) + { + warning_at (UNKNOWN_LOCATION, 0, + "Unified Shared Memory is required, but XNACK is disabled"); + inform (UNKNOWN_LOCATION, "Try -foffload-options=-mxnack=any"); + warned_xnack = 1; + } + reinit_regs (); } diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index 99d6aeb..e877912 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -22,36 +22,46 @@ HeaderInclude config/gcn/gcn-opts.h march= -Target RejectNegative Negative(march=) Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_GFX900) +Target RejectNegative Negative(march=) Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_GFX90A) Specify the name of the target GPU. mtune= -Target RejectNegative Negative(mtune=) Joined ToLower Enum(gpu_type) Var(gcn_tune) Init(PROCESSOR_GFX900) +Target RejectNegative Negative(mtune=) Joined ToLower Enum(gpu_type) Var(gcn_tune) Init(PROCESSOR_GFX90A) Specify the name of the target GPU. +; mkoffload passes -m32, -m64, and -mgomp to the offload compiler, but +; nothing in the offload compiler actually uses any of these flags. +; Maybe they're there for compatibility with other offload backends, +; or maybe we can just delete these. In any case, there's no point in +; documenting them for users. m32 -Target RejectNegative InverseMask(ABI64) +Target RejectNegative InverseMask(ABI64) Undocumented Generate code for a 32-bit ABI. m64 -Target RejectNegative Mask(ABI64) +Target RejectNegative Mask(ABI64) Undocumented Generate code for a 64-bit ABI. mgomp -Target RejectNegative +Target RejectNegative Undocumented Enable OpenMP GPU offloading. +; This option seems not to ever have done anything useful, or to have +; been documented. +Variable bool flag_bypass_init_error = false mbypass-init-error -Target RejectNegative Var(flag_bypass_init_error) +Target RejectNegative Var(flag_bypass_init_error) Undocumented +Variable int stack_size_opt = -1 mstack-size= -Target RejectNegative Joined UInteger Var(stack_size_opt) Init(-1) +Target RejectNegative Joined UInteger Var(stack_size_opt) Init(-1) Undocumented Obsolete; use GCN_STACK_SIZE at runtime. +Variable int gang_private_size_opt = -1 mgang-private-size= diff --git a/gcc/config/gcn/gcn.opt.urls b/gcc/config/gcn/gcn.opt.urls index 7bc0ef4..2f9ce48 100644 --- a/gcc/config/gcn/gcn.opt.urls +++ b/gcc/config/gcn/gcn.opt.urls @@ -12,8 +12,13 @@ UrlSuffix(gcc/AMD-GCN-Options.html#index-mtune-1) ; skipping UrlSuffix for 'mgomp' due to finding no URLs -mstack-size= -UrlSuffix(gcc/AMD-GCN-Options.html#index-mstack-size) +; skipping UrlSuffix for 'mstack-size=' due to finding no URLs + +mgang-private-size= +UrlSuffix(gcc/AMD-GCN-Options.html#index-mgang-private-size) + +Wopenacc-dims +UrlSuffix(gcc/AMD-GCN-Options.html#index-Wno-openacc-dims) mxnack= UrlSuffix(gcc/AMD-GCN-Options.html#index-mxnack) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index b284ff4..ac6aae5 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -38,6 +38,9 @@ #include "configargs.h" /* For configure_default_options. */ #include "multilib.h" /* For multilib_options. */ +#include "tree.h" /* Dependency of omp-general.h. */ +#include "omp-general.h" /* For enum omp_requires. */ + /* These probably won't (all) be in elf.h for a while. */ #undef EM_AMDGPU #define EM_AMDGPU 0xe0; @@ -441,10 +444,12 @@ copy_early_debug_info (const char *infile, const char *outfile) encoded as structured data. */ static void -process_asm (FILE *in, FILE *out, FILE *cfile) +process_asm (FILE *in, FILE *out, FILE *cfile, uint32_t omp_requires) { int fn_count = 0, var_count = 0, ind_fn_count = 0; int dims_count = 0, regcount_count = 0; + bool xnack_required = (omp_requires & (OMP_REQUIRES_UNIFIED_SHARED_MEMORY + | OMP_REQUIRES_SELF_MAPS)); struct obstack fns_os, dims_os, regcounts_os; obstack_init (&fns_os); obstack_init (&dims_os); @@ -469,6 +474,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile) fn_count += 2; char buf[1000]; + char dummy; enum { IN_CODE, IN_METADATA, @@ -549,7 +555,6 @@ process_asm (FILE *in, FILE *out, FILE *cfile) } } - char dummy; if (sscanf (buf, " .section .gnu.offload_vars%c", &dummy) > 0) { state = IN_VARS; @@ -615,12 +620,28 @@ process_asm (FILE *in, FILE *out, FILE *cfile) struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *); struct regcount *regcounts = XOBFINISH (®counts_os, struct regcount *); - if (gcn_stack_size) + /* If the -mxnack setting has a definite value (not "any" or undefined), or + the program "requires unified_shared_memory" (in which case -mxnack might + be "any"), then we emit code to check the mode at runtime. */ + bool check_xnack = (TEST_XNACK_OFF (elf_flags) + || TEST_XNACK_ON (elf_flags) + || xnack_required); + if (TEST_XNACK_OFF (elf_flags) && xnack_required) { - fprintf (cfile, "#include <stdlib.h>\n"); - fprintf (cfile, "#include <stdbool.h>\n\n"); + warning (input_location, + "conflicting settings; XNACK is forced off but Unified " + "Shared Memory is required"); + xnack_required = 0; } + /* Start generating the C code. */ + if (gcn_stack_size) + fprintf (cfile, "#include <stdbool.h>\n"); + if (check_xnack) + fprintf (cfile, "#include <stdio.h>\n"); + if (gcn_stack_size || check_xnack) + fprintf (cfile, "#include <stdlib.h>\n\n"); + fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count); fprintf (cfile, "static const int gcn_num_ind_funcs = %d;\n\n", ind_fn_count); @@ -661,18 +682,44 @@ process_asm (FILE *in, FILE *out, FILE *cfile) } fprintf (cfile, "\n};\n\n"); + /* Start a mkoffload_setup function to hold zero-or-more setup actions. */ + fprintf (cfile, + "static void\n" + "mkoffload_setup (void)\n" + "{"); + /* Set the stack size if the user configured a value. */ if (gcn_stack_size) fprintf (cfile, - "static __attribute__((constructor))\n" - "void configure_stack_size (void)\n" - "{\n" + "\n" + " /* Pass through the -mstack-size compile-time option. */\n" " const char *val = getenv (\"GCN_STACK_SIZE\");\n" " if (!val || val[0] == '\\0')\n" - " setenv (\"GCN_STACK_SIZE\", \"%d\", true);\n" - "}\n\n", + " setenv (\"GCN_STACK_SIZE\", \"%d\", true);\n", gcn_stack_size); + /* Emit a constructor function to set the HSA_XNACK environment variable. + This must be done before the ROCr runtime library is loaded. + We never override a user value (except empty string), but we do emit a + useful diagnostic in the wrong mode (the ROCr message is not good. */ + if (check_xnack) + fprintf (cfile, + "\n" + " const char *xn_var = getenv (\"HSA_XNACK\");\n" + " if (!xn_var || xn_var[0] == '\\0')\n" + " setenv (\"HSA_XNACK\", \"%d\", true);\n" + " else if (%s)\n" + " fprintf (stderr, \"warning: HSA_XNACK=%%s is incompatible; " + "the GPU kernel may revert to host fallback\\n\", " + "xn_var);\n", + xnack_required || TEST_XNACK_ON (elf_flags), + (xnack_required || TEST_XNACK_ON (elf_flags) + ? "xn_var[0] != '1' || xn_var[1] != '\\0'" + : "xn_var[0] != '0' || xn_var[1] != '\\0'")); + + /* End of mkoffload_setup function. */ + fprintf (cfile, "}\n\n"); + obstack_free (&fns_os, NULL); for (i = 0; i < dims_count; i++) free (dims[i].name); @@ -737,6 +784,7 @@ process_obj (const char *fname_in, FILE *cfile, uint32_t omp_requires) fprintf (cfile, "static __attribute__((constructor)) void init (void)\n" "{\n" + " mkoffload_setup ();\n" " GOMP_offload_register_ver (%#x, __OFFLOAD_TABLE__," " %d/*GCN*/, &gcn_data);\n" "};\n", @@ -1108,7 +1156,8 @@ main (int argc, char **argv) #define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAM, ...) \ case ELF: XNACK; break; #define HSACO_ATTR_UNSUPPORTED SET_XNACK_UNSET (elf_flags) -#define HSACO_ATTR_OFF SET_XNACK_OFF (elf_flags) +#define HSACO_ATTR_OFF \ + if (TEST_XNACK_UNSET (elf_flags)) SET_XNACK_OFF (elf_flags) #define HSACO_ATTR_ANY \ if (TEST_XNACK_UNSET (elf_flags)) SET_XNACK_ANY (elf_flags) #include "gcn-devices.def" @@ -1340,7 +1389,7 @@ main (int argc, char **argv) if (!out) fatal_error (input_location, "cannot open %qs", gcn_s2_name); - process_asm (in, out, cfile); + process_asm (in, out, cfile, omp_requires); fclose (in); fclose (out); diff --git a/gcc/config/gnu-user.opt.urls b/gcc/config/gnu-user.opt.urls index 42021e1..7707ed5 100644 --- a/gcc/config/gnu-user.opt.urls +++ b/gcc/config/gnu-user.opt.urls @@ -1,5 +1,8 @@ ; Autogenerated by regenerate-opt-urls.py from gcc/config/gnu-user.opt and generated HTML +profile +UrlSuffix(gcc/Instrumentation-Options.html#index-profile) + ; skipping UrlSuffix for 'pthread' due to multiple URLs: ; duplicate: 'gcc/Link-Options.html#index-pthread-1' ; duplicate: 'gcc/Preprocessor-Options.html#index-pthread' diff --git a/gcc/config/i386/amxavx512intrin.h b/gcc/config/i386/amxavx512intrin.h index ab53625..1e28460 100644 --- a/gcc/config/i386/amxavx512intrin.h +++ b/gcc/config/i386/amxavx512intrin.h @@ -39,8 +39,9 @@ ({ \ __m512 dst; \ __asm__ volatile \ - ("{tcvtrowd2ps\t%1, %%tmm"#src", %0|tcvtrowd2ps\t%0, %%tmm"#src", %1}" \ - : "=v" (dst) : "r" ((unsigned) (A))); \ + ("{tcvtrowd2ps\t%1, %%tmm%c[_src], %0 \ + |tcvtrowd2ps\t%0, tmm%c[_src], %1}" \ + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ dst; \ }) @@ -48,8 +49,9 @@ ({ \ __m512 dst; \ __asm__ volatile \ - ("{tcvtrowd2ps\t$"#imm", %%tmm"#src", %0|tcvtrowd2ps\t%0, %%tmm"#src", "#imm"}" \ - : "=v" (dst) :); \ + ("{tcvtrowd2ps\t%[_imm], %%tmm%c[_src], %0 \ + |tcvtrowd2ps\t%0, tmm%c[_src], %[_imm]}" \ + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ dst; \ }) @@ -57,8 +59,9 @@ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2bf16h\t%1, %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tmm"#src", %1}" \ - : "=v" (dst) : "r" ((unsigned) (A))); \ + ("{tcvtrowps2bf16h\t%1, %%tmm%c[_src], %0 \ + |tcvtrowps2bf16h\t%0, tmm%c[_src], %1}" \ + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ dst; \ }) @@ -66,8 +69,9 @@ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2bf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tmm"#src", "#imm"}" \ - : "=v" (dst) :); \ + ("{tcvtrowps2bf16h\t%[_imm], %%tmm%c[_src], %0 \ + |tcvtrowps2bf16h\t%0, tmm%c[_src], %[_imm]}" \ + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ dst; \ }) @@ -75,8 +79,9 @@ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2bf16l\t%1, %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"#src", %1}" \ - : "=v" (dst) : "r" ((unsigned) (A))); \ + ("{tcvtrowps2bf16l\t%1, %%tmm%c[_src], %0 \ + |tcvtrowps2bf16l\t%0, tmm%c[_src], %1}" \ + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ dst; \ }) @@ -84,8 +89,9 @@ ({ \ __m512bh dst; \ __asm__ volatile \ - ("{tcvtrowps2bf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"#src", "#imm"}" \ - : "=v" (dst) :); \ + ("{tcvtrowps2bf16l\t%[_imm], %%tmm%c[_src], %0 \ + |tcvtrowps2bf16l\t%0, tmm%c[_src], "#imm"}" \ + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ dst; \ }) @@ -93,8 +99,8 @@ ({ \ __m512h dst; \ __asm__ volatile \ - ("{tcvtrowps2phh\t%1, %%tmm"#src", %0|tcvtrowps2phh\t%0, %%tmm"#src", %1}" \ - : "=v" (dst) : "r" ((unsigned) (A))); \ + ("{tcvtrowps2phh\t%1, %%tmm%c[_src], %0|tcvtrowps2phh\t%0, tmm%c[_src], %1}" \ + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ dst; \ }) @@ -102,8 +108,9 @@ ({ \ __m512h dst; \ __asm__ volatile \ - ("{tcvtrowps2phh\t$"#imm", %%tmm"#src", %0|tcvtrowps2phh\t%0, %%tmm"#src", "#imm"}" \ - : "=v" (dst) :); \ + ("{tcvtrowps2phh\t%[_imm], %%tmm%c[_src], %0 \ + |tcvtrowps2phh\t%0, tmm%c[_src], "#imm"}" \ + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ dst; \ }) @@ -111,8 +118,8 @@ ({ \ __m512h dst; \ __asm__ volatile \ - ("{tcvtrowps2phl\t%1, %%tmm"#src", %0|tcvtrowps2phl\t%0, %%tmm"#src", %1}" \ - : "=v" (dst) : "r" ((unsigned) (A))); \ + ("{tcvtrowps2phl\t%1, %%tmm%c[_src], %0|tcvtrowps2phl\t%0, tmm%c[_src], %1}" \ + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ dst; \ }) @@ -120,8 +127,9 @@ ({ \ __m512h dst; \ __asm__ volatile \ - ("{tcvtrowps2phl\t$"#imm", %%tmm"#src", %0|tcvtrowps2phl\t%0, %%tmm"#src", "#imm"}" \ - : "=v" (dst) :); \ + ("{tcvtrowps2phl\t%[_imm], %%tmm%c[_src], %0 \ + |tcvtrowps2phl\t%0, tmm%c[_src], "#imm"}" \ + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ dst; \ }) @@ -129,8 +137,8 @@ ({ \ __m512 dst; \ __asm__ volatile \ - ("{tilemovrow\t%1, %%tmm"#src", %0|tilemovrow\t%0, %%tmm"#src", %1}" \ - : "=v" (dst) : "r" ((unsigned) (A))); \ + ("{tilemovrow\t%1, %%tmm%c[_src], %0|tilemovrow\t%0, tmm%c[_src], %1}" \ + : "=v" (dst) : "r" ((unsigned) (A)), [_src]"i"(src)); \ dst; \ }) @@ -138,8 +146,9 @@ ({ \ __m512 dst; \ __asm__ volatile \ - ("{tilemovrow\t$"#imm", %%tmm"#src", %0|tilemovrow\t%0, %%tmm"#src", "#imm"}" \ - : "=v" (dst) :); \ + ("{tilemovrow\t%[_imm], %%tmm%c[_src], %0 \ + |tilemovrow\t%0, tmm%c[_src], "#imm"}" \ + : "=v" (dst) : [_src]"i"(src), [_imm]"i"(imm)); \ dst; \ }) diff --git a/gcc/config/i386/amxbf16intrin.h b/gcc/config/i386/amxbf16intrin.h index 9f4a9d1..b2792bb 100644 --- a/gcc/config/i386/amxbf16intrin.h +++ b/gcc/config/i386/amxbf16intrin.h @@ -36,8 +36,10 @@ #if defined(__x86_64__) #define _tile_dpbf16ps_internal(dst,src1,src2) \ - __asm__ volatile\ - ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) + __asm__ volatile \ + ("{tdpbf16ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ + |tdpbf16ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) #define _tile_dpbf16ps(dst,src1,src2) \ _tile_dpbf16ps_internal (dst, src1, src2) diff --git a/gcc/config/i386/amxcomplexintrin.h b/gcc/config/i386/amxcomplexintrin.h index fc5964f..55b7d53 100644 --- a/gcc/config/i386/amxcomplexintrin.h +++ b/gcc/config/i386/amxcomplexintrin.h @@ -35,13 +35,17 @@ #endif /* __AMX_COMPLEX__ */ #if defined(__x86_64__) -#define _tile_cmmimfp16ps_internal(src1_dst,src2,src3) \ - __asm__ volatile\ - ("{tcmmimfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tcmmimfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::) - -#define _tile_cmmrlfp16ps_internal(src1_dst,src2,src3) \ - __asm__ volatile\ - ("{tcmmrlfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tcmmrlfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::) +#define _tile_cmmimfp16ps_internal(src1_dst,src2,src3) \ + __asm__ volatile \ + ("{tcmmimfp16ps\t%%tmm%c[_src3], %%tmm%c[_src2], %%tmm%c[_src1_dst] \ + |tcmmimfp16ps\ttmm%c[_src1_dst], tmm%c[_src2], tmm%c[_src3]}" \ + :: [_src1_dst]"i"(src1_dst), [_src2]"i"(src2), [_src3]"i"(src3)) + +#define _tile_cmmrlfp16ps_internal(src1_dst,src2,src3) \ + __asm__ volatile \ + ("{tcmmrlfp16ps\t%%tmm%c[_src3], %%tmm%c[_src2], %%tmm%c[_src1_dst] \ + |tcmmrlfp16ps\ttmm%c[_src1_dst], tmm%c[_src2], tmm%c[_src3]}" \ + :: [_src1_dst]"i"(src1_dst), [_src2]"i"(src2), [_src3]"i"(src3)) #define _tile_cmmimfp16ps(src1_dst,src2,src3) \ _tile_cmmimfp16ps_internal (src1_dst, src2, src3) diff --git a/gcc/config/i386/amxfp16intrin.h b/gcc/config/i386/amxfp16intrin.h index 02fd031..1e0ef27 100644 --- a/gcc/config/i386/amxfp16intrin.h +++ b/gcc/config/i386/amxfp16intrin.h @@ -29,9 +29,11 @@ #define _AMXFP16INTRIN_H_INCLUDED #if defined(__x86_64__) -#define _tile_dpfp16ps_internal(dst,src1,src2) \ - __asm__ volatile \ - ("{tdpfp16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpfp16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) +#define _tile_dpfp16ps_internal(dst,src1,src2) \ + __asm__ volatile \ + ("{tdpfp16ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ + |tdpfp16ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) #define _tile_dpfp16ps(dst,src1,src2) \ _tile_dpfp16ps_internal (dst,src1,src2) diff --git a/gcc/config/i386/amxfp8intrin.h b/gcc/config/i386/amxfp8intrin.h index 8952be9..9467f53 100644 --- a/gcc/config/i386/amxfp8intrin.h +++ b/gcc/config/i386/amxfp8intrin.h @@ -29,21 +29,29 @@ #define _AMXFP8INTRIN_H_INCLUDED #if defined(__x86_64__) -#define _tile_dpbf8ps_internal(dst,src1,src2) \ - __asm__ volatile \ - ("{tdpbf8ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) - -#define _tile_dpbhf8ps_internal(dst,src1,src2) \ - __asm__ volatile \ - ("{tdpbhf8ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbhf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) - -#define _tile_dphbf8ps_internal(dst,src1,src2) \ - __asm__ volatile \ - ("{tdphbf8ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdphbf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) - -#define _tile_dphf8ps_internal(dst,src1,src2) \ - __asm__ volatile \ - ("{tdphf8ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdphf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) +#define _tile_dpbf8ps_internal(dst,src1,src2) \ + __asm__ volatile \ + ("{tdpbf8ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ + |tdpbf8ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) + +#define _tile_dpbhf8ps_internal(dst,src1,src2) \ + __asm__ volatile \ + ("{tdpbhf8ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ + |tdpbhf8ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) + +#define _tile_dphbf8ps_internal(dst,src1,src2) \ + __asm__ volatile \ + ("{tdphbf8ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ + |tdphbf8ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) + +#define _tile_dphf8ps_internal(dst,src1,src2) \ + __asm__ volatile \ + ("{tdphf8ps\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ + |tdphf8ps\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ + :: [_dst]"i"(dst), [_src1]"i"(src1), [_src2]"i"(src2)) #define _tile_dpbf8ps(dst,src1,src2) \ _tile_dpbf8ps_internal (dst,src1,src2) diff --git a/gcc/config/i386/amxint8intrin.h b/gcc/config/i386/amxint8intrin.h index 332c8db..f7cb36c 100644 --- a/gcc/config/i386/amxint8intrin.h +++ b/gcc/config/i386/amxint8intrin.h @@ -37,7 +37,9 @@ #if defined(__x86_64__) #define _tile_int8_dp_internal(name,dst,src1,src2) \ __asm__ volatile \ - ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::) + ("{"#name"\t%%tmm%c[_src2], %%tmm%c[_src1], %%tmm%c[_dst] \ + |"#name"\ttmm%c[_dst], tmm%c[_src1], tmm%c[_src2]}" \ + ::[_dst]"i"(dst),[_src1]"i"(src1),[_src2]"i"(src2)) #define _tile_dpbssd(dst,src1,src2) \ _tile_int8_dp_internal (tdpbssd, dst, src1, src2) diff --git a/gcc/config/i386/amxmovrsintrin.h b/gcc/config/i386/amxmovrsintrin.h index 93a2dbf..9f5d317 100644 --- a/gcc/config/i386/amxmovrsintrin.h +++ b/gcc/config/i386/amxmovrsintrin.h @@ -36,17 +36,17 @@ #define __DISABLE_AMX_MOVRS__ #endif /* __AMX_MOVRS__ */ -#define _tile_loaddrs_internal(tdst, base, stride) \ -__asm__ volatile \ - ("{tileloaddrs\t(%0,%1,1), %%tmm"#tdst \ - "|tileloaddrs\t%%tmm"#tdst", [%0+%1*1]}" \ - :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride))) +#define _tile_loaddrs_internal(tdst, base, stride) \ +__asm__ volatile \ + ("{tileloaddrs\t(%0,%1,1), %%tmm%c[_tdst] \ + |tileloaddrs\ttmm%c[_tdst], [%0+%1*1]}" \ + :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)), [_tdst]"i"(tdst)) -#define _tile_loaddrst1_internal(tdst, base, stride) \ -__asm__ volatile \ - ("{tileloaddrst1\t(%0,%1,1), %%tmm"#tdst \ - "|tileloaddrst1\t%%tmm"#tdst", [%0+%1*1]}" \ - :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride))) +#define _tile_loaddrst1_internal(tdst, base, stride) \ +__asm__ volatile \ + ("{tileloaddrst1\t(%0,%1,1), %%tmm%c[_tdst] \ + |tileloaddrst1\ttmm%c[_tdst], [%0+%1*1]}" \ + :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)), [_tdst]"i"(tdst)) #define _tile_loaddrs(tdst, base, stride) \ _tile_loaddrs_internal(tdst, base, stride) diff --git a/gcc/config/i386/amxtf32intrin.h b/gcc/config/i386/amxtf32intrin.h index 8ed910d..a7a1f4f 100644 --- a/gcc/config/i386/amxtf32intrin.h +++ b/gcc/config/i386/amxtf32intrin.h @@ -31,8 +31,10 @@ #if defined(__x86_64__) #define _tile_mmultf32ps_internal(src1_dst,src2,src3) \ - __asm__ volatile\ - ("{tmmultf32ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tmmultf32ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::) + __asm__ volatile \ + ("{tmmultf32ps\t%%tmm%c[_src3], %%tmm%c[_src2], %%tmm%c[_src1_dst] \ + |tmmultf32ps\ttmm%c[_src1_dst], tmm%c[_src2], tmm%c[_src3]}" \ + :: [_src1_dst]"i"(src1_dst), [_src2]"i"(src2), [_src3]"i"(src3)) #define _tile_mmultf32ps(src1_dst,src2,src3) \ _tile_mmultf32ps_internal (src1_dst, src2, src3) diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h index 8c8e2cd..67c6b53 100644 --- a/gcc/config/i386/amxtileintrin.h +++ b/gcc/config/i386/amxtileintrin.h @@ -61,32 +61,32 @@ _tile_release (void) #define _tile_loadd_internal(dst,base,stride) \ __asm__ volatile \ - ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" \ - :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride))) + ("{tileloadd\t(%0,%1,1), %%tmm%c[_dst]|tileloadd\ttmm%c[_dst], [%0+%1*1]}" \ + :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)), [_dst]"i"(dst)) #define _tile_stream_loadd(dst,base,stride) \ _tile_stream_loadd_internal (dst, base, stride) #define _tile_stream_loadd_internal(dst,base,stride) \ __asm__ volatile \ - ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" \ - :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride))) + ("{tileloaddt1\t(%0,%1,1), %%tmm%c[_dst]|tileloaddt1\ttmm%c[_dst], [%0+%1*1]}" \ + :: "r" ((const void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)), [_dst]"i"(dst)) #define _tile_stored(dst,base,stride) \ _tile_stored_internal (dst, base, stride) #define _tile_stored_internal(src,base,stride) \ __asm__ volatile \ - ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" \ - :: "r" ((void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)) \ - : "memory") + ("{tilestored\t%%tmm%c[_src], (%0,%1,1)|tilestored\t[%0+%1*1], tmm%c[_src]}" \ + :: "r" ((void*) (base)), "r" ((__PTRDIFF_TYPE__) (stride)), [_src]"i"(src) \ + : "memory") #define _tile_zero(dst) \ _tile_zero_internal (dst) -#define _tile_zero_internal(dst) \ - __asm__ volatile \ - ("tilezero\t%%tmm"#dst ::) +#define _tile_zero_internal(dst) \ + __asm__ volatile \ + ("{tilezero\t%%tmm%c[_dst]|tilezero\ttmm%c[_dst]}" :: [_dst]"i"(dst)) #endif diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc index 0557df9..b54f0af 100644 --- a/gcc/config/i386/driver-i386.cc +++ b/gcc/config/i386/driver-i386.cc @@ -603,6 +603,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* Assume Diamond Rapids. */ if (has_feature (FEATURE_AMX_FP8)) cpu = "diamondrapids"; + /* Assume Nova Lake. */ + else if (has_feature (FEATURE_AVX10_2)) + cpu = "novalake"; /* Assume Granite Rapids D. */ else if (has_feature (FEATURE_AMX_COMPLEX)) cpu = "graniterapids-d"; @@ -643,9 +646,6 @@ const char *host_detect_local_cpu (int argc, const char **argv) /* Assume Clearwater Forest. */ if (has_feature (FEATURE_USER_MSR)) cpu = "clearwaterforest"; - /* Assume Nova Lake. */ - else if (has_feature (FEATURE_PREFETCHI)) - cpu = "novalake"; else if (has_feature (FEATURE_SM3)) { if (has_feature (FEATURE_KL)) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index a1f1b26..438fa4e 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -4159,12 +4159,18 @@ static bool ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, rtx if_true, rtx if_false) { - machine_mode mode; + machine_mode mode = GET_MODE (dest); bool is_min; rtx tmp; if (code == LT) ; + else if (code == LE && !HONOR_NANS (mode)) + { + /* We can swap LE to GE and then invert to LT. */ + std::swap (cmp_op0, cmp_op1); + std::swap (if_true, if_false); + } else if (code == UNGE) std::swap (if_true, if_false); else @@ -4177,7 +4183,6 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, else return false; - mode = GET_MODE (dest); if (immediate_operand (if_false, mode)) if_false = force_reg (mode, if_false); if (immediate_operand (if_true, mode)) @@ -9995,6 +10000,754 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, return true; } +/* Fully unroll memmove of known size with up to 8 registers. */ + +static bool +ix86_expand_unroll_movmem (rtx dst, rtx src, rtx destreg, rtx srcreg, + unsigned HOST_WIDE_INT count, + machine_mode mode) +{ + /* If 8 registers registers can cover all memory, load them into + registers and store them together to avoid possible address + overlap between source and destination. */ + unsigned HOST_WIDE_INT moves = count / GET_MODE_SIZE (mode); + if (moves == 0) + { + mode = smallest_int_mode_for_size + (count * BITS_PER_UNIT).require (); + if (count == GET_MODE_SIZE (mode)) + moves = 1; + else + { + /* Reduce the smallest move size by half so that MOVES == 1. */ + mode = smallest_int_mode_for_size + (GET_MODE_BITSIZE (mode) / 2).require (); + moves = count / GET_MODE_SIZE (mode); + gcc_assert (moves == 1); + } + } + else if (moves > 8) + return false; + + unsigned int i; + rtx tmp[9]; + + for (i = 0; i < moves; i++) + tmp[i] = gen_reg_rtx (mode); + + rtx srcmem = change_address (src, mode, srcreg); + for (i = 0; i < moves; i++) + { + emit_move_insn (tmp[i], srcmem); + srcmem = offset_address (srcmem, + GEN_INT (GET_MODE_SIZE (mode)), + GET_MODE_SIZE (mode)); + } + + unsigned int epilogue_size = count & (GET_MODE_SIZE (mode) - 1); + machine_mode epilogue_mode = VOIDmode; + if (epilogue_size) + { + /* Handle the remaining bytes with overlapping move. */ + epilogue_mode = smallest_int_mode_for_size + (epilogue_size * BITS_PER_UNIT).require (); + tmp[8] = gen_reg_rtx (epilogue_mode); + srcmem = adjust_address (srcmem, epilogue_mode, 0); + srcmem = offset_address (srcmem, GEN_INT (epilogue_size), 1); + srcmem = offset_address (srcmem, + GEN_INT (-GET_MODE_SIZE (epilogue_mode)), + GET_MODE_SIZE (epilogue_mode)); + emit_move_insn (tmp[8], srcmem); + } + + rtx destmem = change_address (dst, mode, destreg); + for (i = 0; i < moves; i++) + { + emit_move_insn (destmem, tmp[i]); + destmem = offset_address (destmem, + GEN_INT (GET_MODE_SIZE (mode)), + GET_MODE_SIZE (mode)); + } + + if (epilogue_size) + { + /* Use overlapping move. */ + destmem = adjust_address (destmem, epilogue_mode, 0); + destmem = offset_address (destmem, GEN_INT (epilogue_size), 1); + destmem = offset_address (destmem, + GEN_INT (-GET_MODE_SIZE (epilogue_mode)), + GET_MODE_SIZE (epilogue_mode)); + emit_move_insn (destmem, tmp[8]); + } + + return true; +} + +/* Expand memmove of size with MOVES * mode size and MOVES <= 4. If + FORWARD is true, copy forward. Otherwise copy backward. */ + +static void +ix86_expand_n_move_movmem (rtx destmem, rtx srcmem, machine_mode mode, + unsigned int moves, bool forward) +{ + gcc_assert (moves <= 4); + + unsigned int i; + rtx tmp[8]; + + for (i = 0; i < moves; i++) + tmp[i] = gen_reg_rtx (mode); + + rtx step; + if (forward) + step = GEN_INT (GET_MODE_SIZE (mode)); + else + step = GEN_INT (-GET_MODE_SIZE (mode)); + + /* Load MOVES. */ + for (i = 0; i < moves - 1; i++) + { + emit_move_insn (tmp[i], srcmem); + srcmem = offset_address (srcmem, step, GET_MODE_SIZE (mode)); + } + emit_move_insn (tmp[i], srcmem); + + /* Store MOVES. */ + for (i = 0; i < moves - 1; i++) + { + emit_move_insn (destmem, tmp[i]); + destmem = offset_address (destmem, step, GET_MODE_SIZE (mode)); + } + emit_move_insn (destmem, tmp[i]); +} + +/* Load MOVES of mode size into REGS. If LAST is true, load the + last MOVES. Otherwise, load the first MOVES. */ + +static void +ix86_expand_load_movmem (rtx src, rtx srcreg, rtx count_exp, + machine_mode mode, unsigned int moves, + rtx regs[], bool last) +{ + unsigned int i; + + for (i = 0; i < moves; i++) + regs[i] = gen_reg_rtx (mode); + + rtx srcmem = change_address (src, mode, srcreg); + rtx step; + if (last) + { + srcmem = offset_address (srcmem, count_exp, 1); + step = GEN_INT (-GET_MODE_SIZE (mode)); + srcmem = offset_address (srcmem, step, GET_MODE_SIZE (mode)); + } + else + step = GEN_INT (GET_MODE_SIZE (mode)); + + for (i = 0; i < moves - 1; i++) + { + emit_move_insn (regs[i], srcmem); + srcmem = offset_address (srcmem, step, GET_MODE_SIZE (mode)); + } + emit_move_insn (regs[i], srcmem); +} + +/* Store MOVES of mode size into REGS. If LAST is true, store the + last MOVES. Otherwise, store the first MOVES. */ + +static void +ix86_expand_store_movmem (rtx dst, rtx destreg, rtx count_exp, + machine_mode mode, unsigned int moves, + rtx regs[], bool last) +{ + unsigned int i; + + rtx destmem = change_address (dst, mode, destreg); + rtx step; + if (last) + { + destmem = offset_address (destmem, count_exp, 1); + step = GEN_INT (-GET_MODE_SIZE (mode)); + destmem = offset_address (destmem, step, GET_MODE_SIZE (mode)); + } + else + step = GEN_INT (GET_MODE_SIZE (mode)); + + for (i = 0; i < moves - 1; i++) + { + emit_move_insn (destmem, regs[i]); + destmem = offset_address (destmem, step, GET_MODE_SIZE (mode)); + } + emit_move_insn (destmem, regs[i]); +} + +/* Expand memmove of size between (MOVES / 2) * mode size and + MOVES * mode size with overlapping load and store. MOVES is even. + MOVES >= 2 and MOVES <= 8. */ + +static void +ix86_expand_n_overlapping_move_movmem (rtx dst, rtx src, rtx destreg, + rtx srcreg, rtx count_exp, + machine_mode mode, + unsigned int moves) +{ + gcc_assert (moves >= 2 && moves <= 8 && (moves & 1) == 0); + + unsigned int half_moves = moves / 2; + unsigned int i, j; + rtx tmp[8]; + + for (i = 0; i < moves; i++) + tmp[i] = gen_reg_rtx (mode); + + rtx base_srcmem = change_address (src, mode, srcreg); + + /* Load the first half. */ + rtx srcmem = base_srcmem; + for (i = 0; i < half_moves - 1; i++) + { + emit_move_insn (tmp[i], srcmem); + srcmem = offset_address (srcmem, + GEN_INT (GET_MODE_SIZE (mode)), + GET_MODE_SIZE (mode)); + } + emit_move_insn (tmp[i], srcmem); + + /* Load the second half. */ + srcmem = offset_address (base_srcmem, count_exp, 1); + srcmem = offset_address (srcmem, + GEN_INT (-GET_MODE_SIZE (mode)), + GET_MODE_SIZE (mode)); + for (j = half_moves, i = 0; i < half_moves - 1; i++, j++) + { + emit_move_insn (tmp[j], srcmem); + srcmem = offset_address (srcmem, + GEN_INT (-GET_MODE_SIZE (mode)), + GET_MODE_SIZE (mode)); + } + emit_move_insn (tmp[j], srcmem); + + rtx base_destmem = change_address (dst, mode, destreg); + + /* Store the first half. */ + rtx destmem = base_destmem; + for (i = 0; i < half_moves - 1; i++) + { + emit_move_insn (destmem, tmp[i]); + destmem = offset_address (destmem, + GEN_INT (GET_MODE_SIZE (mode)), + GET_MODE_SIZE (mode)); + } + emit_move_insn (destmem, tmp[i]); + + /* Store the second half. */ + destmem = offset_address (base_destmem, count_exp, 1); + destmem = offset_address (destmem, GEN_INT (-GET_MODE_SIZE (mode)), + GET_MODE_SIZE (mode)); + for (j = half_moves, i = 0; i < half_moves - 1; i++, j++) + { + emit_move_insn (destmem, tmp[j]); + destmem = offset_address (destmem, GEN_INT (-GET_MODE_SIZE (mode)), + GET_MODE_SIZE (mode)); + } + emit_move_insn (destmem, tmp[j]); +} + +/* Expand memmove of size < mode size which is <= 64. */ + +static void +ix86_expand_less_move_movmem (rtx dst, rtx src, rtx destreg, + rtx srcreg, rtx count_exp, + unsigned HOST_WIDE_INT min_size, + machine_mode mode, + rtx_code_label *done_label) +{ + bool skip = false; + machine_mode count_mode = counter_mode (count_exp); + + rtx_code_label *between_32_63_label + = GET_MODE_SIZE (mode) > 32 ? gen_label_rtx () : nullptr; + /* Jump to BETWEEN_32_64_LABEL if size >= 32 and size < 64. */ + if (between_32_63_label) + { + if (min_size && min_size >= 32) + { + emit_jump_insn (gen_jump (between_32_63_label)); + emit_barrier (); + skip = true; + } + else + emit_cmp_and_jump_insns (count_exp, GEN_INT (32), GEU, + nullptr, count_mode, 1, + between_32_63_label); + } + + rtx_code_label *between_16_31_label + = (!skip && GET_MODE_SIZE (mode) > 16) ? gen_label_rtx () : nullptr; + /* Jump to BETWEEN_16_31_LABEL if size >= 16 and size < 31. */ + if (between_16_31_label) + { + if (min_size && min_size >= 16) + { + emit_jump_insn (gen_jump (between_16_31_label)); + emit_barrier (); + skip = true; + } + else + emit_cmp_and_jump_insns (count_exp, GEN_INT (16), GEU, + nullptr, count_mode, 1, + between_16_31_label); + } + + rtx_code_label *between_8_15_label + = (!skip && GET_MODE_SIZE (mode) > 8) ? gen_label_rtx () : nullptr; + /* Jump to BETWEEN_8_15_LABEL if size >= 8 and size < 15. */ + if (between_8_15_label) + { + if (min_size && min_size >= 8) + { + emit_jump_insn (gen_jump (between_8_15_label)); + emit_barrier (); + skip = true; + } + else + emit_cmp_and_jump_insns (count_exp, GEN_INT (8), GEU, + nullptr, count_mode, 1, + between_8_15_label); + } + + rtx_code_label *between_4_7_label + = (!skip && GET_MODE_SIZE (mode) > 4) ? gen_label_rtx () : nullptr; + /* Jump to BETWEEN_4_7_LABEL if size >= 4 and size < 7. */ + if (between_4_7_label) + { + if (min_size && min_size >= 4) + { + emit_jump_insn (gen_jump (between_4_7_label)); + emit_barrier (); + skip = true; + } + else + emit_cmp_and_jump_insns (count_exp, GEN_INT (4), GEU, + nullptr, count_mode, 1, + between_4_7_label); + } + + rtx_code_label *between_2_3_label + = (!skip && GET_MODE_SIZE (mode) > 2) ? gen_label_rtx () : nullptr; + /* Jump to BETWEEN_2_3_LABEL if size >= 2 and size < 3. */ + if (between_2_3_label) + { + if (min_size && min_size >= 2) + { + emit_jump_insn (gen_jump (between_2_3_label)); + emit_barrier (); + skip = true; + } + else + emit_cmp_and_jump_insns (count_exp, GEN_INT (1), GT, + nullptr, count_mode, 1, + between_2_3_label); + } + + if (!skip) + { + rtx_code_label *zero_label + = min_size == 0 ? gen_label_rtx () : nullptr; + /* Skip if size == 0. */ + if (zero_label) + emit_cmp_and_jump_insns (count_exp, GEN_INT (1), LT, + nullptr, count_mode, 1, + zero_label, + profile_probability::unlikely ()); + + /* Move 1 byte. */ + rtx tmp0 = gen_reg_rtx (QImode); + rtx srcmem = change_address (src, QImode, srcreg); + emit_move_insn (tmp0, srcmem); + rtx destmem = change_address (dst, QImode, destreg); + emit_move_insn (destmem, tmp0); + + if (zero_label) + emit_label (zero_label); + + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + } + + if (between_32_63_label) + { + emit_label (between_32_63_label); + ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg, + count_exp, OImode, 2); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + } + + if (between_16_31_label) + { + emit_label (between_16_31_label); + ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg, + count_exp, TImode, 2); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + } + + if (between_8_15_label) + { + emit_label (between_8_15_label); + ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg, + count_exp, DImode, 2); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + } + + if (between_4_7_label) + { + emit_label (between_4_7_label); + ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg, + count_exp, SImode, 2); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + } + + if (between_2_3_label) + { + emit_label (between_2_3_label); + ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg, + count_exp, HImode, 2); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + } +} + +/* Expand movmem with overlapping unaligned loads and stores: + 1. Load all sources into registers and store them together to avoid + possible address overlap between source and destination. + 2. For known size, first try to fully unroll with 8 registers. + 3. For size <= 2 * MOVE_MAX, load all sources into 2 registers first + and then store them together. + 4. For size > 2 * MOVE_MAX and size <= 4 * MOVE_MAX, load all sources + into 4 registers first and then store them together. + 5. For size > 4 * MOVE_MAX and size <= 8 * MOVE_MAX, load all sources + into 8 registers first and then store them together. + 6. For size > 8 * MOVE_MAX, + a. If address of destination > address of source, copy backward + with a 4 * MOVE_MAX loop with unaligned loads and stores. Load + the first 4 * MOVE_MAX into 4 registers before the loop and + store them after the loop to support overlapping addresses. + b. Otherwise, copy forward with a 4 * MOVE_MAX loop with unaligned + loads and stores. Load the last 4 * MOVE_MAX into 4 registers + before the loop and store them after the loop to support + overlapping addresses. + */ + +bool +ix86_expand_movmem (rtx operands[]) +{ + /* Since there are much less registers available in 32-bit mode, don't + inline movmem in 32-bit mode. */ + if (!TARGET_64BIT) + return false; + + rtx dst = operands[0]; + rtx src = operands[1]; + rtx count_exp = operands[2]; + rtx expected_size_exp = operands[5]; + rtx min_size_exp = operands[6]; + rtx probable_max_size_exp = operands[8]; + unsigned HOST_WIDE_INT count = HOST_WIDE_INT_0U; + HOST_WIDE_INT expected_size = HOST_WIDE_INT_M1U; + unsigned HOST_WIDE_INT min_size = HOST_WIDE_INT_0U; + unsigned HOST_WIDE_INT probable_max_size = HOST_WIDE_INT_M1U; + + if (CONST_INT_P (count_exp)) + { + min_size = probable_max_size = count = expected_size + = INTVAL (count_exp); + /* When COUNT is 0, there is nothing to do. */ + if (!count) + return true; + } + else + { + if (min_size_exp) + min_size = INTVAL (min_size_exp); + if (probable_max_size_exp) + probable_max_size = INTVAL (probable_max_size_exp); + if (CONST_INT_P (expected_size_exp)) + expected_size = INTVAL (expected_size_exp); + } + + /* Make sure we don't need to care about overflow later on. */ + if (count > (HOST_WIDE_INT_1U << 30)) + return false; + + addr_space_t dst_as = MEM_ADDR_SPACE (dst); + addr_space_t src_as = MEM_ADDR_SPACE (src); + int dynamic_check; + bool noalign; + enum stringop_alg alg = decide_alg (count, expected_size, min_size, + probable_max_size, false, false, + dst_as, src_as, &dynamic_check, + &noalign, false); + if (alg == libcall) + return false; + + rtx destreg = ix86_copy_addr_to_reg (XEXP (dst, 0)); + rtx srcreg = ix86_copy_addr_to_reg (XEXP (src, 0)); + + unsigned int move_max = MOVE_MAX; + machine_mode mode = smallest_int_mode_for_size + (move_max * BITS_PER_UNIT).require (); + if (probable_max_size && probable_max_size < move_max) + { + /* Get a usable MOVE_MAX. */ + mode = smallest_int_mode_for_size + (probable_max_size * BITS_PER_UNIT).require (); + /* Reduce MOVE_MAX by half so that MOVE_MAX can be used. */ + if (GET_MODE_SIZE (mode) > probable_max_size) + mode = smallest_int_mode_for_size + (GET_MODE_BITSIZE (mode) / 2).require (); + move_max = GET_MODE_SIZE (mode); + } + + /* Try to fully unroll memmove of known size first. */ + if (count + && ix86_expand_unroll_movmem (dst, src, destreg, srcreg, count, + mode)) + return true; + + rtx_code_label *done_label = gen_label_rtx (); + + rtx_code_label *less_vec_label = nullptr; + if (min_size == 0 || min_size < move_max) + less_vec_label = gen_label_rtx (); + + machine_mode count_mode = counter_mode (count_exp); + + /* Jump to LESS_VEC_LABEL if size < MOVE_MAX. */ + if (less_vec_label) + emit_cmp_and_jump_insns (count_exp, GEN_INT (move_max), LTU, + nullptr, count_mode, 1, + less_vec_label); + + rtx_code_label *more_2x_vec_label = nullptr; + if (probable_max_size == 0 || probable_max_size > 2 * move_max) + more_2x_vec_label = gen_label_rtx (); + + /* Jump to MORE_2X_VEC_LABEL if size > 2 * MOVE_MAX. */ + if (more_2x_vec_label) + emit_cmp_and_jump_insns (count_exp, GEN_INT (2 * move_max), GTU, + nullptr, count_mode, 1, + more_2x_vec_label); + + if (min_size == 0 || min_size <= 2 * move_max) + { + /* Size >= MOVE_MAX and size <= 2 * MOVE_MAX. */ + ix86_expand_n_overlapping_move_movmem (dst, src, destreg, srcreg, + count_exp, mode, 2); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + } + + if (less_vec_label) + { + /* Size < MOVE_MAX. */ + emit_label (less_vec_label); + ix86_expand_less_move_movmem (dst, src, destreg, srcreg, + count_exp, min_size, mode, + done_label); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + } + + if (more_2x_vec_label) + { + /* Size > 2 * MOVE_MAX and destination may overlap with source. */ + emit_label (more_2x_vec_label); + + rtx_code_label *more_8x_vec_label = nullptr; + if (probable_max_size == 0 || probable_max_size > 8 * move_max) + more_8x_vec_label = gen_label_rtx (); + + /* Jump to MORE_8X_VEC_LABEL if size > 8 * MOVE_MAX. */ + if (more_8x_vec_label) + emit_cmp_and_jump_insns (count_exp, GEN_INT (8 * move_max), GTU, + nullptr, count_mode, 1, + more_8x_vec_label); + + rtx_code_label *last_4x_vec_label = nullptr; + if (min_size == 0 || min_size < 4 * move_max) + last_4x_vec_label = gen_label_rtx (); + + /* Jump to LAST_4X_VEC_LABEL if size < 4 * MOVE_MAX. */ + if (last_4x_vec_label) + emit_cmp_and_jump_insns (count_exp, GEN_INT (4 * move_max), LTU, + nullptr, count_mode, 1, + last_4x_vec_label); + + if (probable_max_size == 0 || probable_max_size > 4 * move_max) + { + /* Size > 4 * MOVE_MAX and size <= 8 * MOVE_MAX. */ + ix86_expand_n_overlapping_move_movmem (dst, src, destreg, + srcreg, count_exp, + mode, 8); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + } + + if (last_4x_vec_label) + { + /* Size > 2 * MOVE_MAX and size <= 4 * MOVE_MAX. */ + emit_label (last_4x_vec_label); + ix86_expand_n_overlapping_move_movmem (dst, src, destreg, + srcreg, count_exp, + mode, 4); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + } + + if (more_8x_vec_label) + { + /* Size > 8 * MOVE_MAX. */ + emit_label (more_8x_vec_label); + + rtx loop_count = gen_reg_rtx (count_mode); + emit_move_insn (loop_count, count_exp); + + /* Jump to MORE_8X_VEC_BACKWARD_LABEL if source address is + lower than destination address. */ + rtx_code_label *more_8x_vec_backward_label = gen_label_rtx (); + emit_cmp_and_jump_insns (srcreg, destreg, LTU, nullptr, + GET_MODE (destreg), 1, + more_8x_vec_backward_label); + + /* Skip if source == destination which is less common. */ + emit_cmp_and_jump_insns (srcreg, destreg, EQ, nullptr, + GET_MODE (destreg), 1, done_label, + profile_probability::unlikely ()); + + rtx base_destreg = gen_reg_rtx (GET_MODE (destreg)); + emit_move_insn (base_destreg, destreg); + + /* Load the last 4 * MOVE_MAX. */ + rtx regs[4]; + ix86_expand_load_movmem (src, srcreg, count_exp, mode, + ARRAY_SIZE (regs), regs, true); + + rtx srcmem = change_address (src, mode, srcreg); + rtx destmem = change_address (dst, mode, destreg); + + /* Copy forward with a 4 * MOVE_MAX loop. */ + rtx_code_label *loop_4x_vec_forward_label = gen_label_rtx (); + emit_label (loop_4x_vec_forward_label); + + ix86_expand_n_move_movmem (destmem, srcmem, mode, 4, true); + + rtx tmp; + rtx delta = GEN_INT (4 * MOVE_MAX); + + /* Decrement LOOP_COUNT by 4 * MOVE_MAX. */ + tmp = expand_simple_binop (GET_MODE (loop_count), MINUS, + loop_count, delta, nullptr, 1, + OPTAB_DIRECT); + if (tmp != loop_count) + emit_move_insn (loop_count, tmp); + + /* Increment DESTREG and SRCREG by 4 * MOVE_MAX. */ + tmp = expand_simple_binop (GET_MODE (destreg), PLUS, + destreg, delta, nullptr, 1, + OPTAB_DIRECT); + if (tmp != destreg) + emit_move_insn (destreg, tmp); + tmp = expand_simple_binop (GET_MODE (srcreg), PLUS, srcreg, + delta, nullptr, 1, OPTAB_DIRECT); + if (tmp != srcreg) + emit_move_insn (srcreg, tmp); + + /* Stop if LOOP_EXP <= 4 * MOVE_MAX. */ + emit_cmp_and_jump_insns (loop_count, delta, GTU, nullptr, + GET_MODE (loop_count), 1, + loop_4x_vec_forward_label); + + /* Store the last 4 * MOVE_MAX. */ + ix86_expand_store_movmem (dst, base_destreg, count_exp, mode, + ARRAY_SIZE (regs), regs, true); + + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + + /* Copy backward with a 4 * MOVE_MAX loop. */ + emit_label (more_8x_vec_backward_label); + + base_destreg = gen_reg_rtx (GET_MODE (destreg)); + emit_move_insn (base_destreg, destreg); + + /* Load the first 4 * MOVE_MAX. */ + ix86_expand_load_movmem (src, srcreg, count_exp, mode, + ARRAY_SIZE (regs), regs, false); + + /* Increment DESTREG and SRCREG by COUNT_EXP. */ + tmp = expand_simple_binop (GET_MODE (destreg), PLUS, + destreg, count_exp, nullptr, 1, + OPTAB_DIRECT); + if (tmp != destreg) + emit_move_insn (destreg, tmp); + tmp = expand_simple_binop (GET_MODE (srcreg), PLUS, srcreg, + count_exp, nullptr, 1, OPTAB_DIRECT); + if (tmp != srcreg) + emit_move_insn (srcreg, tmp); + + srcmem = change_address (src, mode, srcreg); + destmem = change_address (dst, mode, destreg); + rtx step = GEN_INT (-GET_MODE_SIZE (mode)); + srcmem = offset_address (srcmem, step, GET_MODE_SIZE (mode)); + destmem = offset_address (destmem, step, GET_MODE_SIZE (mode)); + + rtx_code_label *loop_4x_vec_backward_label = gen_label_rtx (); + emit_label (loop_4x_vec_backward_label); + + ix86_expand_n_move_movmem (destmem, srcmem, mode, 4, false); + + /* Decrement LOOP_COUNT by 4 * MOVE_MAX. */ + tmp = expand_simple_binop (GET_MODE (loop_count), MINUS, + loop_count, delta, nullptr, 1, + OPTAB_DIRECT); + if (tmp != loop_count) + emit_move_insn (loop_count, tmp); + + /* Decrement DESTREG and SRCREG by 4 * MOVE_MAX. */ + tmp = expand_simple_binop (GET_MODE (destreg), MINUS, + destreg, delta, nullptr, 1, + OPTAB_DIRECT); + if (tmp != destreg) + emit_move_insn (destreg, tmp); + tmp = expand_simple_binop (GET_MODE (srcreg), MINUS, srcreg, + delta, nullptr, 1, OPTAB_DIRECT); + if (tmp != srcreg) + emit_move_insn (srcreg, tmp); + + /* Stop if LOOP_EXP <= 4 * MOVE_MAX. */ + emit_cmp_and_jump_insns (loop_count, delta, GTU, nullptr, + GET_MODE (loop_count), 1, + loop_4x_vec_backward_label); + + /* Store the first 4 * MOVE_MAX. */ + ix86_expand_store_movmem (dst, base_destreg, count_exp, mode, + ARRAY_SIZE (regs), regs, false); + + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + } + } + + emit_label (done_label); + + return true; +} + /* Expand cmpstrn or memcmp. */ bool @@ -26377,17 +27130,15 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev, struct expand_operand ops[5]; int dfv; - push_to_sequence (*prep_seq); - expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); - - cmp_mode = op_mode = GET_MODE (op0); + /* Exit early for non integer modes to avoid O(n^2) part of expand_operands. */ + cmp_mode = op_mode = TYPE_MODE (TREE_TYPE (treeop0)); if (!(op_mode == DImode || op_mode == SImode || op_mode == HImode || op_mode == QImode)) - { - end_sequence (); - return NULL_RTX; - } + return NULL_RTX; + + push_to_sequence (*prep_seq); + expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL); icode = code_for_ccmp (op_mode); diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 8e27784..ce6f40b 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3947,12 +3947,20 @@ ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb, (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG) (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil)) + or a basic block with only deleted instructions: + + (code_label 348 23 349 45 3 (nil) [0 uses]) + (note 349 348 436 45 [bb 45] NOTE_INSN_BASIC_BLOCK) + (note 436 349 362 45 NOTE_INSN_DELETED) + */ gcc_assert (DEBUG_INSN_P (insn) || (NOTE_P (insn) && ((NOTE_KIND (insn) == NOTE_INSN_FUNCTION_BEG) || (NOTE_KIND (insn) + == NOTE_INSN_DELETED) + || (NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)))); insn = NULL; break; @@ -4810,6 +4818,9 @@ pass_x86_cse::x86_cse (void) df_process_deferred_rescans (); } + FOR_EACH_VEC_ELT (loads, i, load) + delete load; + df_clear_flags (DF_DEFER_INSN_RESCAN); timevar_pop (TV_MACH_DEP); diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index ba598a8..35064d8 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -1837,6 +1837,21 @@ set_ix86_tune_features (struct gcc_options *opts, } parse_mtune_ctrl_str (opts, dump); + + /* mgather/mscatter option would overwrite -mtune-crtl option. */ + if (OPTION_SET_P (ix86_use_gather)) + { + ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] = ix86_use_gather; + ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] = ix86_use_gather; + ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] = ix86_use_gather; + } + + if (OPTION_SET_P (ix86_use_scatter)) + { + ix86_tune_features[X86_TUNE_USE_SCATTER_2PARTS] = ix86_use_scatter; + ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] = ix86_use_scatter; + ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] = ix86_use_scatter; + } } diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index bdb8bb9..5ff414a 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -78,6 +78,7 @@ extern void substitute_vpternlog_operands (rtx[]); extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx); extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, bool); +extern bool ix86_expand_movmem (rtx[]); extern bool ix86_expand_cmpstrn_or_cmpmem (rtx, rtx, rtx, rtx, rtx, bool); extern enum reg_class ix86_insn_base_reg_class (rtx_insn *); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 587b2bd..75a9cb6 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -598,6 +598,20 @@ ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1, } } + /* SUB (a, b) underflows precisely when a < b. Convert + (compare (minus (a b)) a) to (compare (a b)) + to match *sub<mode>_3 pattern. */ + if (!op0_preserve_value + && (*code == GTU || *code == LEU) + && GET_CODE (*op0) == MINUS + && rtx_equal_p (XEXP (*op0, 0), *op1)) + { + *op1 = XEXP (*op0, 1); + *op0 = XEXP (*op0, 0); + *code = (int) swap_condition ((enum rtx_code) *code); + return; + } + /* Swap operands of GTU comparison to canonicalize addcarry/subborrow comparison. */ if (!op0_preserve_value @@ -23753,9 +23767,15 @@ x86_print_call_or_nop (FILE *file, const char *target, const char *label) { if (flag_nop_mcount || !strcmp (target, "nop")) - /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ - fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n", - label); + { + if (TARGET_16BIT) + /* 3 byte no-op: lea 0(%si), %si */ + fprintf (file, "%s" ASM_BYTE "0x8d, 0x74, 0x00\n", label); + else + /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ + fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n", + label); + } else if (!TARGET_PECOFF && flag_pic) { gcc_assert (flag_plt); @@ -25089,7 +25109,7 @@ i386_solaris_elf_named_section (const char *name, unsigned int flags, return; } -#ifndef USE_GAS +#if !HAVE_GNU_AS if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) { solaris_elf_asm_comdat_section (name, flags, decl); @@ -26377,7 +26397,20 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, (TREE_OPERAND (gimple_assign_rhs1 (def), 0)))))) { if (fp) - m_num_sse_needed[where]++; + { + /* Scalar FP values residing in x87 registers need to be + spilled and reloaded. */ + auto mode2 = TYPE_MODE (TREE_TYPE (op)); + if (IS_STACK_MODE (mode2)) + { + int cost + = (ix86_cost->hard_register.fp_store[mode2 == SFmode + ? 0 : 1] + + ix86_cost->sse_load[sse_store_index (mode2)]); + stmt_cost += COSTS_N_INSNS (cost) / 2; + } + m_num_sse_needed[where]++; + } else { m_num_gpr_needed[where]++; @@ -26595,6 +26628,11 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs) if (loop_vinfo && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2 + /* Avoid a masked epilog if cascaded epilogues eventually get us + to one with VF 1 as that means no scalar epilog at all. */ + && !((GET_MODE_SIZE (loop_vinfo->vector_mode) + / LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () == 16) + && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES]) && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES] && !OPTION_SET_P (param_vect_partial_vector_usage)) { diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 94f335f..b934117 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2488,7 +2488,11 @@ constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_MOVRS | PTA_AMX_MOVRS; -constexpr wide_int_bitmask PTA_NOVALAKE = PTA_PANTHERLAKE | PTA_PREFETCHI; +constexpr wide_int_bitmask PTA_NOVALAKE = PTA_PANTHERLAKE | PTA_PREFETCHI + | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ + | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_AVX512VNNI | PTA_AVX512VBMI2 + | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ | PTA_AVX512FP16 | PTA_AVX512BF16 + | PTA_AVX10_1 | PTA_AVX10_2 | PTA_APX_F | PTA_MOVRS; constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_POPCNT | PTA_LZCNT diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b812d8b..df7135f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -8642,7 +8642,7 @@ [(set (reg FLAGS_REG) (compare (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r") (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>"))) - (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>i,r,r") + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r") (minus:SWI (match_dup 1) (match_dup 2)))] "ix86_match_ccmode (insn, CCmode) && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)" @@ -8860,6 +8860,35 @@ (match_dup 0))) (clobber (reg:CC FLAGS_REG))])]) +(define_insn "*add<mode>3_carry_2" + [(set (reg FLAGS_REG) + (compare + (plus:SWI + (plus:SWI + (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI 1 "nonimmediate_operand" "%0,0,rm,r")) + (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r") + (plus:SWI + (plus:SWI + (match_op_dup 4 [(match_dup 3) (const_int 0)]) + (match_dup 1)) + (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)" + "@ + adc{<imodesuffix>}\t{%2, %0|%0, %2} + adc{<imodesuffix>}\t{%2, %0|%0, %2} + adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + adc{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>")]) + (define_insn "*add<mode>3_carry_0" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") (plus:SWI @@ -8874,6 +8903,26 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "<MODE>")]) +(define_insn "*add<mode>3_carry_0_cc" + [(set (reg FLAGS_REG) + (compare + (plus:SWI + (match_operator:SWI 2 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (plus:SWI + (match_op_dup 2 [(match_dup 3) (const_int 0)]) + (match_dup 1)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1]))" + "adc{<imodesuffix>}\t{$0, %0|%0, 0}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>")]) + (define_insn "*add<mode>3_carry_0r" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") (plus:SWI @@ -8888,6 +8937,26 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "<MODE>")]) +(define_insn "*add<mode>3_carry_0r_cc" + [(set (reg FLAGS_REG) + (compare + (plus:SWI + (match_operator:SWI 2 "ix86_carry_flag_unset_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)]) + (match_operand:SWI 1 "nonimmediate_operand" "0")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (plus:SWI + (match_op_dup 2 [(match_dup 3) (const_int 0)]) + (match_dup 1)))] + "ix86_match_ccmode (insn, CCGOCmode) + && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1]))" + "sbb{<imodesuffix>}\t{$-1, %0|%0, -1}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>")]) + (define_insn "*addqi3_carry_zext<mode>" [(set (match_operand:SWI248x 0 "register_operand" "=r,r") (zero_extend:SWI248x @@ -9456,6 +9525,35 @@ (match_dup 0))) (clobber (reg:CC FLAGS_REG))])]) +(define_insn "*sub<mode>3_carry_2" + [(set (reg FLAGS_REG) + (compare + (minus:SWI + (minus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0,0,rm,r") + (match_operator:SWI 4 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)])) + (match_operand:SWI 2 "<general_operand>" "<r><i>,<m>,r<i>,<m>")) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>,r,r") + (minus:SWI + (minus:SWI + (match_dup 1) + (match_op_dup 4 [(match_dup 3) (const_int 0)])) + (match_dup 2)))] + "ix86_match_ccmode (insn, CCGOCmode) + && ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)" + "@ + sbb{<imodesuffix>}\t{%2, %0|%0, %2} + sbb{<imodesuffix>}\t{%2, %0|%0, %2} + sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2} + sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,*,apx_ndd,apx_ndd") + (set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>")]) + (define_insn "*sub<mode>3_carry_0" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") (minus:SWI @@ -9470,6 +9568,26 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "<MODE>")]) +(define_insn "*sub<mode>3_carry_0_cc" + [(set (reg FLAGS_REG) + (compare + (minus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0") + (match_operator:SWI 2 "ix86_carry_flag_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)])) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (minus:SWI + (match_dup 1) + (match_op_dup 2 [(match_dup 3) (const_int 0)])))] + "ix86_match_ccmode (insn, CCGOCmode) + && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1]))" + "sbb{<imodesuffix>}\t{$0, %0|%0, 0}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>")]) + (define_insn "*sub<mode>3_carry_0r" [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") (minus:SWI @@ -9484,6 +9602,26 @@ (set_attr "pent_pair" "pu") (set_attr "mode" "<MODE>")]) +(define_insn "*sub<mode>3_carry_0r_cc" + [(set (reg FLAGS_REG) + (compare + (minus:SWI + (match_operand:SWI 1 "nonimmediate_operand" "0") + (match_operator:SWI 2 "ix86_carry_flag_unset_operator" + [(match_operand 3 "flags_reg_operand") (const_int 0)])) + (const_int 0))) + (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m") + (minus:SWI + (match_dup 1) + (match_op_dup 2 [(match_dup 3) (const_int 0)])))] + "ix86_match_ccmode (insn, CCGOCmode) + && (!MEM_P (operands[0]) || rtx_equal_p (operands[0], operands[1]))" + "adc{<imodesuffix>}\t{$-1, %0|%0, -1}" + [(set_attr "type" "alu") + (set_attr "use_carry" "1") + (set_attr "pent_pair" "pu") + (set_attr "mode" "<MODE>")]) + (define_insn "*subqi3_carry_zext<mode>" [(set (match_operand:SWI248x 0 "register_operand" "=r,r") (zero_extend:SWI248x @@ -12213,7 +12351,7 @@ (compare:CCNO (and:SWI48 (match_operand:SWI48 0 "nonimmediate_operand") - (match_operand:SWI48 1 "<nonmemory_szext_operand>")) + (match_operand:SWI48 1 "<general_szext_operand>")) (const_int 0)))]) (define_expand "testqi_ccz_1" @@ -12221,7 +12359,7 @@ (compare:CCZ (and:QI (match_operand:QI 0 "nonimmediate_operand") - (match_operand:QI 1 "nonmemory_operand")) + (match_operand:QI 1 "general_operand")) (const_int 0)))]) (define_insn "*testdi_1" @@ -12229,7 +12367,7 @@ (compare (and:DI (match_operand:DI 0 "nonimmediate_operand" "%r,rm") - (match_operand:DI 1 "x86_64_szext_nonmemory_operand" "Z,re")) + (match_operand:DI 1 "x86_64_szext_general_operand" "Z,re")) (const_int 0)))] "TARGET_64BIT && ix86_match_ccmode @@ -12242,7 +12380,8 @@ (satisfies_constraint_Z (operands[1]) && (!CONST_INT_P (operands[1]) || val_signbit_known_set_p (SImode, INTVAL (operands[1])))) - ? CCZmode : CCNOmode)" + ? CCZmode : CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "@ test{l}\t{%k1, %k0|%k0, %k1} test{q}\t{%1, %0|%0, %1}" @@ -12253,12 +12392,13 @@ [(set (reg FLAGS_REG) (compare (and:QI - (match_operand:QI 0 "nonimmediate_operand" "%qm,qm,r") - (match_operand:QI 1 "nonmemory_operand" "q,n,n")) + (match_operand:QI 0 "nonimmediate_operand" "%qm,*a,qm,r") + (match_operand:QI 1 "general_operand" "q,n,n,n")) (const_int 0)))] "ix86_match_ccmode (insn, CONST_INT_P (operands[1]) - && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)" + && INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" { if (get_attr_mode (insn) == MODE_SI) { @@ -12270,7 +12410,7 @@ } [(set_attr "type" "test") (set (attr "mode") - (cond [(eq_attr "alternative" "2") + (cond [(eq_attr "alternative" "3") (const_string "SI") (and (match_test "optimize_insn_for_size_p ()") (and (match_operand 0 "ext_QIreg_operand") @@ -12278,16 +12418,17 @@ (const_string "SI") ] (const_string "QI"))) - (set_attr "pent_pair" "uv,np,np")]) + (set_attr "pent_pair" "uv,uv,np,np")]) (define_insn "*test<mode>_1" [(set (reg FLAGS_REG) (compare (and:SWI124 (match_operand:SWI124 0 "nonimmediate_operand" "%<r>m,*a,<r>m") - (match_operand:SWI124 1 "<nonmemory_szext_operand>" "<r>,<i>,<i>")) - (const_int 0)))] - "ix86_match_ccmode (insn, CCNOmode)" + (match_operand:SWI124 1 "<general_operand>" "<r>,<i>,<i>")) + (const_int 0)))] + "ix86_match_ccmode (insn, CCNOmode) + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "test{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "test") (set_attr "mode" "<MODE>") @@ -14062,6 +14203,22 @@ (set_attr "isa" "*,apx_ndd") (set_attr "mode" "SI")]) +;; It must be put before *<code><mode>_3, the one below. +(define_insn "*ior<mode>_ccz_1" + [(set (reg:CCZ FLAGS_REG) + (compare:CCZ + (ior:SWI1248_AVX512BWDQ_64 + (match_operand:SWI1248_AVX512BWDQ_64 1 "nonimmediate_operand" "%0,?k") + (match_operand:SWI1248_AVX512BWDQ_64 2 "<general_operand>" "<g>, k")) + (const_int 0))) + (clobber (match_scratch:SWI1248_AVX512BWDQ_64 0 "=<r>, X"))] + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + or{<imodesuffix>}\t{%2, %0|%0, %2} + kortest<mskmodesuffix>\t{%1, %2|%2, %1}" + [(set_attr "type" "alu,msklog") + (set_attr "mode" "<MODE>")]) + (define_insn "*<code><mode>_3" [(set (reg FLAGS_REG) (compare (any_or:SWI @@ -25708,6 +25865,23 @@ (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) +(define_expand "movmem<mode>" + [(use (match_operand:BLK 0 "memory_operand")) + (use (match_operand:BLK 1 "memory_operand")) + (use (match_operand:SWI48 2 "nonmemory_operand")) + (use (match_operand:SWI48 3 "const_int_operand")) + (use (match_operand:SI 4 "const_int_operand")) + (use (match_operand:SI 5 "const_int_operand")) + (use (match_operand:SI 6 "")) + (use (match_operand:SI 7 "")) + (use (match_operand:SI 8 ""))] + "" +{ + if (ix86_expand_movmem (operands)) + DONE; + FAIL; +}) + (define_expand "cpymem<mode>" [(use (match_operand:BLK 0 "memory_operand")) (use (match_operand:BLK 1 "memory_operand")) @@ -29537,7 +29711,7 @@ [(match_operand:SI 0 "register_operand") (match_operand:SI 1 "register_operand") (match_operand:SWI124 2 "nonimmediate_operand") - (match_operand:SI 3)] + (match_operand:SI 3 "const_int_operand")] "TARGET_CRC32" { /* crc32 uses iSCSI polynomial */ diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 8449450..c0093ef 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1290,11 +1290,11 @@ Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and SM4 built-in functions and code generation. mgather -Target Alias(mtune-ctrl=, use_gather, ^use_gather) +Target Var(ix86_use_gather) Init(0) Optimization. Enable vectorization for gather instruction. mscatter -Target Alias(mtune-ctrl=, use_scatter, ^use_scatter) +Target Var(ix86_use_scatter) Init(0) Optimization Enable vectorization for scatter instruction. mapxf diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls index a9bbac0..129d91f 100644 --- a/gcc/config/i386/i386.opt.urls +++ b/gcc/config/i386/i386.opt.urls @@ -13,10 +13,10 @@ mlong-double-80 UrlSuffix(gcc/x86-Options.html#index-mlong-double-80) mlong-double-64 -UrlSuffix(gcc/x86-Options.html#index-mlong-double-64-1) +UrlSuffix(gcc/x86-Options.html#index-mlong-double-64-2) mlong-double-128 -UrlSuffix(gcc/x86-Options.html#index-mlong-double-128-1) +UrlSuffix(gcc/x86-Options.html#index-mlong-double-128-2) maccumulate-outgoing-args UrlSuffix(gcc/x86-Options.html#index-maccumulate-outgoing-args-1) @@ -57,7 +57,7 @@ UrlSuffix(gcc/x86-Options.html#index-mfp-ret-in-387) ; duplicate: 'gcc/x86-Options.html#index-mfpmath-1' mhard-float -UrlSuffix(gcc/x86-Options.html#index-mhard-float-11) +UrlSuffix(gcc/x86-Options.html#index-mhard-float-10) mieee-fp UrlSuffix(gcc/x86-Options.html#index-mieee-fp) @@ -120,7 +120,7 @@ mrtd UrlSuffix(gcc/x86-Options.html#index-mrtd-1) msoft-float -UrlSuffix(gcc/x86-Options.html#index-msoft-float-16) +UrlSuffix(gcc/x86-Options.html#index-msoft-float-15) msseregparm UrlSuffix(gcc/x86-Options.html#index-msseregparm) @@ -438,7 +438,7 @@ mpku UrlSuffix(gcc/x86-Options.html#index-mpku) mstack-protector-guard= -UrlSuffix(gcc/x86-Options.html#index-mstack-protector-guard-4) +UrlSuffix(gcc/x86-Options.html#index-mstack-protector-guard-5) mstack-protector-guard-reg= UrlSuffix(gcc/x86-Options.html#index-mstack-protector-guard-reg-3) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 57950d3..2863b3e 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1319,9 +1319,6 @@ (ior (match_operand 0 "nonimmediate_operand") (match_test "const_vec_duplicate_p (op)"))) -(define_predicate "const_vec_dup_operand" - (match_test "const_vec_duplicate_p (op)")) - ;; Return true when OP is either register operand, or any ;; CONST_VECTOR. (define_predicate "reg_or_const_vector_operand" @@ -1587,6 +1584,9 @@ (define_predicate "add_comparison_operator" (match_code "geu,ltu")) +(define_predicate "ieee_maxmin_comparison_operator" + (match_code "lt,gt")) + ;; Return true if OP is a valid comparison operator in valid mode. (define_predicate "ix86_comparison_operator" (match_operand 0 "comparison_operator") diff --git a/gcc/config/i386/sol2.h b/gcc/config/i386/sol2.h index 013e87f..3720424 100644 --- a/gcc/config/i386/sol2.h +++ b/gcc/config/i386/sol2.h @@ -60,7 +60,7 @@ along with GCC; see the file COPYING3. If not see /* GNU as understands --32 and --64, but the native Solaris assembler requires -xarch=generic or -xarch=generic64 instead. */ -#ifdef USE_GAS +#if HAVE_GNU_AS #define ASM_CPU32_DEFAULT_SPEC "--32" #define ASM_CPU64_DEFAULT_SPEC "--64" #else @@ -90,16 +90,9 @@ along with GCC; see the file COPYING3. If not see #define ARCH64_SUBDIR "amd64" -#ifdef USE_GLD -/* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly - follow the Solaris 2 ABI. Prefer them if present. */ -#ifdef HAVE_LD_SOL2_EMULATION +#if HAVE_GNU_LD #define ARCH32_EMULATION "elf_i386_sol2" #define ARCH64_EMULATION "elf_x86_64_sol2" -#else -#define ARCH32_EMULATION "elf_i386" -#define ARCH64_EMULATION "elf_x86_64" -#endif #endif #define ENDFILE_ARCH_SPEC \ @@ -156,7 +149,7 @@ along with GCC; see the file COPYING3. If not see } \ } while (0) -#ifndef USE_GAS +#if !HAVE_GNU_AS /* The Sun assembler uses .tcomm for TLS common sections. */ #define TLS_COMMON_ASM_OP ".tcomm" @@ -186,7 +179,7 @@ along with GCC; see the file COPYING3. If not see ASM_OUTPUT_LABEL (FILE, NAME); \ } \ while (0) -#endif /* !USE_GAS */ +#endif /* !HAVE_GNU_AS */ /* As in sparc/sol2.h, override the default from i386/x86-64.h to work around Sun as TLS bug. */ @@ -217,13 +210,13 @@ along with GCC; see the file COPYING3. If not see /* Sun as requires "h" flag for large sections, GNU as can do without, but accepts "l". */ -#ifdef USE_GAS +#if HAVE_GNU_AS #define MACH_DEP_SECTION_ASM_FLAG 'l' #else #define MACH_DEP_SECTION_ASM_FLAG 'h' #endif -#ifndef USE_GAS +#if !HAVE_GNU_AS /* Emit COMDAT group signature symbols for Sun as. */ #undef TARGET_ASM_FILE_END #define TARGET_ASM_FILE_END solaris_file_end @@ -231,12 +224,12 @@ along with GCC; see the file COPYING3. If not see /* Unlike GNU ld, Sun ld doesn't coalesce .ctors.N/.dtors.N sections, so inhibit their creation. Also cf. sparc/sysv4.h. */ -#ifndef USE_GLD +#if !HAVE_GNU_LD #define CTORS_SECTION_ASM_OP "\t.section\t.ctors, \"aw\"" #define DTORS_SECTION_ASM_OP "\t.section\t.dtors, \"aw\"" #endif -#ifndef USE_GAS +#if !HAVE_GNU_AS #define LARGECOMM_SECTION_ASM_OP "\t.lbcomm\t" #endif diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 7d91585..fb79b2e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3338,10 +3338,10 @@ [(set (match_operand:VFH 0 "register_operand") (vec_merge:VFH (match_operand:VFH 1 "nonimmediate_operand") - (match_operand:VFH 2 "nonimmediate_operand") + (match_operand:VFH 2 "general_operand") (unspec:<avx512fmaskmode> [(match_operand:VFH 3 "nonimmediate_operand") - (match_operand:VFH 4 "nonimmediate_operand") + (match_operand:VFH 4 "general_operand") (match_operand:SI 5 "const_0_to_31_operand")] UNSPEC_PCMP)))] "TARGET_SSE && ix86_pre_reload_split () @@ -3352,19 +3352,21 @@ && (INTVAL (operands[5]) == 1 || INTVAL (operands[5]) == 14)" "#" "&& 1" - [(const_int 0)] + [(set (match_dup 0) (match_dup 6))] { int u = UNSPEC_IEEE_MIN; + rtx tmp = operands[2]; if ((INTVAL (operands[5]) == 1 && rtx_equal_p (operands[1], operands[4])) || (INTVAL (operands[5]) == 14 && rtx_equal_p (operands[1], operands[3]))) u = UNSPEC_IEEE_MAX; if (MEM_P (operands[1])) operands[1] = force_reg (<MODE>mode, operands[1]); - rtvec v = gen_rtvec (2, operands[1], operands[2]); - rtx tmp = gen_rtx_UNSPEC (<MODE>mode, v, u); - emit_move_insn (operands[0], tmp); - DONE; + + if (immediate_operand (operands[2], <MODE>mode)) + tmp = force_reg (<MODE>mode, operands[2]); + rtvec v = gen_rtvec (2, operands[1], tmp); + operands[6] = gen_rtx_UNSPEC (<MODE>mode, v, u); }) (define_insn_and_split "*minmax<mode>3_2" @@ -3383,7 +3385,7 @@ && rtx_equal_p (operands[2], operands[3])))" "#" "&& 1" - [(const_int 0)] + [(set (match_dup 0) (match_dup 5))] { int u = UNSPEC_IEEE_MIN; if (rtx_equal_p (operands[1], operands[3])) @@ -3392,9 +3394,53 @@ if (MEM_P (operands[2])) operands[2] = force_reg (<MODE>mode, operands[2]); rtvec v = gen_rtvec (2, operands[2], operands[1]); - rtx tmp = gen_rtx_UNSPEC (<MODE>mode, v, u); - emit_move_insn (operands[0], tmp); - DONE; + operands[5] = gen_rtx_UNSPEC (<MODE>mode, v, u); + }) + + +(define_insn_and_split "*minmax<mode>3_3" + [(set (match_operand:VF_128_256 0 "register_operand") + (and:VF_128_256 + (not:VF_128_256 + (match_operator:VF_128_256 1 "ieee_maxmin_comparison_operator" + [(match_operand:VF_128_256 2 "nonimmediate_operand") + (match_operand:VF_128_256 3 "const0_operand")])) + (match_operand:VF_128_256 4 "nonimmediate_operand")))] + "TARGET_SSE && ix86_pre_reload_split () + && rtx_equal_p (operands[2], operands[4])" + "#" + "&& 1" + [(set (match_dup 0) (match_dup 5))] + { + int u = UNSPEC_IEEE_MIN; + if (GET_CODE (operands[1]) == LT) + u = UNSPEC_IEEE_MAX; + + rtx tmp = force_reg (<MODE>mode, operands[3]); + rtvec v = gen_rtvec (2, tmp, operands[2]); + operands[5] = gen_rtx_UNSPEC (<MODE>mode, v, u); + }) + +(define_insn_and_split "*minmax<mode>3_4" + [(set (match_operand:VF_128_256 0 "register_operand") + (and:VF_128_256 + (match_operator:VF_128_256 1 "ieee_maxmin_comparison_operator" + [(match_operand:VF_128_256 2 "nonimmediate_operand") + (match_operand:VF_128_256 3 "const0_operand")]) + (match_operand:VF_128_256 4 "nonimmediate_operand")))] + "TARGET_SSE && ix86_pre_reload_split () + && rtx_equal_p (operands[2], operands[4])" + "#" + "&& 1" + [(set (match_dup 0) (match_dup 5))] + { + int u = UNSPEC_IEEE_MIN; + if (GET_CODE (operands[1]) == GT) + u = UNSPEC_IEEE_MAX; + + rtx tmp = force_reg (<MODE>mode, operands[3]); + rtvec v = gen_rtvec (2, operands[2], tmp); + operands[5] = gen_rtx_UNSPEC (<MODE>mode, v, u); }) ;; These versions of the min/max patterns implement exactly the operations @@ -4653,6 +4699,9 @@ UNSPEC_PCMP))] "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);") +(define_int_iterator UNSPEC_PCMP_ITER + [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP]) + (define_insn "*<avx512>_cmp<mode>3_and15" [(set (match_operand:QI 0 "register_operand" "=k") (and:QI @@ -4685,6 +4734,23 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn "*<avx512>_eq<mode>3_and15" + [(set (match_operand:QI 0 "register_operand" "=k, k") + (and:QI + (unspec:QI + [(match_operand:VI48_AVX512VL_4 1 "nonimm_or_0_operand" "%v, v") + (match_operand:VI48_AVX512VL_4 2 "nonimm_or_0_operand" "vm, C") + (const_int 0)] + UNSPEC_PCMP_ITER) + (const_int 15)))] + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} + vptestnm<ssemodesuffix>\t{%1, %1, %0|%0, %1, %1}" + [(set_attr "type" "ssecmp") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + (define_insn "*<avx512>_cmp<mode>3_and3" [(set (match_operand:QI 0 "register_operand" "=k") (and:QI @@ -4717,6 +4783,23 @@ (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn "*avx512vl_eqv2di_and3" + [(set (match_operand:QI 0 "register_operand" "=k, k") + (and:QI + (unspec:QI + [(match_operand:V2DI 1 "nonimm_or_0_operand" "%v, v") + (match_operand:V2DI 2 "nonimm_or_0_operand" "vm, C") + (const_int 0)] + UNSPEC_PCMP_ITER) + (const_int 3)))] + "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))" + "@ + vpcmpeqq\t{%2, %1, %0|%0, %1, %2} + vptestnmq\t{%1, %1, %0|%0, %1, %1}" + [(set_attr "type" "ssecmp") + (set_attr "prefix" "evex") + (set_attr "mode" "TI")]) + (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> @@ -4790,9 +4873,6 @@ (set_attr "prefix" "evex") (set_attr "mode" "<VI12_AVX512VL:sseinsnmode>")]) -(define_int_iterator UNSPEC_PCMP_ITER - [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP]) - (define_insn_and_split "*<avx512>_cmp<mode>3" [(set (match_operand:<avx512fmaskmode> 0 "register_operand") (not:<avx512fmaskmode> @@ -4820,7 +4900,8 @@ (match_operand:SI 3 "<cmp_imm_predicate>")] UNSPEC_PCMP_ITER))] "TARGET_AVX512F && ix86_pre_reload_split () - && rtx_equal_p (operands[1], operands[2])" + && rtx_equal_p (operands[1], operands[2]) + && (!MEM_P (operands[1]) || !MEM_VOLATILE_P (operands[1]))" "#" "&& 1" [(set (match_dup 0) (match_dup 4))] @@ -27259,24 +27340,6 @@ DONE; }) -(define_expand "cond_<insn><mode>" - [(set (match_operand:VI1_AVX512VL 0 "register_operand") - (vec_merge:VI1_AVX512VL - (any_shift:VI1_AVX512VL - (match_operand:VI1_AVX512VL 2 "register_operand") - (match_operand:VI1_AVX512VL 3 "const_vec_dup_operand")) - (match_operand:VI1_AVX512VL 4 "nonimm_or_0_operand") - (match_operand:<avx512fmaskmode> 1 "register_operand")))] - "TARGET_GFNI && TARGET_AVX512F" -{ - rtx count = XVECEXP (operands[3], 0, 0); - rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], count, <CODE>); - emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[2], matrix, - const0_rtx, operands[4], - operands[1])); - DONE; -}) - (define_expand "<insn><mode>3" [(set (match_operand:VI1_AVX512_3264 0 "register_operand") (any_rotate:VI1_AVX512_3264 diff --git a/gcc/config/i386/x-mingw32 b/gcc/config/i386/x-mingw32 index 8900bfc..5ebe088 100644 --- a/gcc/config/i386/x-mingw32 +++ b/gcc/config/i386/x-mingw32 @@ -21,6 +21,9 @@ # local_includedir=$(libsubdir)/$(unlibsubdir)/..`echo $(exec_prefix) | sed -e 's|^$(prefix)||' -e 's|/[^/]*|/..|g'`/include +# Add Windows socket library. +LIBS += -lws2_32 + # On MinGW, we use "%IA64d" to print 64-bit integers, and the format-checking # code does not handle that, so we have to disable checking here. WERROR_FLAGS += -Wno-format diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc index ff9c268..11b3338 100644 --- a/gcc/config/i386/x86-tune-sched.cc +++ b/gcc/config/i386/x86-tune-sched.cc @@ -110,6 +110,9 @@ ix86_issue_rate (void) case PROCESSOR_PANTHERLAKE: return 6; + case PROCESSOR_NOVALAKE: + return 8; + default: return 1; } diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 3627312..dcd26d5 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -602,7 +602,7 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 /* X86_TUNE_AVX256_OPTIMAL: Use 256-bit AVX instructions instead of 512-bit AVX instructions in the auto-vectorizer. */ -DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512) +DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512 | m_NOVALAKE) /* X86_TUNE_AVX256_AVOID_VEC_PERM: Avoid using 256-bit cross-lane vector permutation instructions in the auto-vectorizer. */ diff --git a/gcc/config/ia64/hpux.h b/gcc/config/ia64/hpux.h index 13c7900..a1e05f6 100644 --- a/gcc/config/ia64/hpux.h +++ b/gcc/config/ia64/hpux.h @@ -62,7 +62,7 @@ do { \ #undef ASM_EXTRA_SPEC #define ASM_EXTRA_SPEC "%{milp32:-milp32} %{mlp64:-mlp64}" -#ifndef USE_GAS +#ifndef HAVE_GNU_AS #define AS_NEEDS_DASH_FOR_PIPED_INPUT #endif diff --git a/gcc/config/ia64/ia64.opt.urls b/gcc/config/ia64/ia64.opt.urls index 1e1d063..cc228d7 100644 --- a/gcc/config/ia64/ia64.opt.urls +++ b/gcc/config/ia64/ia64.opt.urls @@ -70,7 +70,7 @@ mfixed-range= UrlSuffix(gcc/IA-64-Options.html#index-mfixed-range-1) mtls-size= -UrlSuffix(gcc/IA-64-Options.html#index-mtls-size-1) +UrlSuffix(gcc/IA-64-Options.html#index-mtls-size-2) mtune= UrlSuffix(gcc/IA-64-Options.html#index-mtune-7) diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md index 97a4e4e..82bf1d8 100644 --- a/gcc/config/loongarch/constraints.md +++ b/gcc/config/loongarch/constraints.md @@ -298,10 +298,9 @@ (define_constraint "YI" "@internal - A replicated vector const in which the replicated value is in the range - [-512,511]." + A vector const can be generated by vldi or xvldi instruction." (and (match_code "const_vector") - (match_test "loongarch_const_vector_vrepli (op, mode)"))) + (match_test "loongarch_const_vector_vldi (op, mode)"))) (define_constraint "YC" "@internal diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk index 507063b..789be80 100644 --- a/gcc/config/loongarch/genopts/gen-evolution.awk +++ b/gcc/config/loongarch/genopts/gen-evolution.awk @@ -34,6 +34,7 @@ BEGIN { cpucfg_word[NR] = $1 cpucfg_bit_in_word[NR] = $2 name[NR] = $3 + orig_name[NR] = $3 gsub("-", "_", name[NR]) name_capitalized[NR] = toupper(name[NR]) split($4, isa_ver, "\\.") @@ -47,8 +48,12 @@ BEGIN { function copyright_header(from_year,to_year) { - print " Copyright (C) " from_year "-" to_year \ - " Free Software Foundation, Inc." + if (to_year == "") + print " Copyright (C) " from_year \ + " Free Software Foundation, Inc." + else + print " Copyright (C) " from_year "-" to_year \ + " Free Software Foundation, Inc." print "" print "This file is part of GCC." print "" @@ -233,9 +238,33 @@ function gen_full_source() print "};" } +function gen_full_def() +{ + print "/* Generated automatically by \"genstr\" from \"isa-evolution.in\"." + print " Please do not edit this file directly." + print "" + + copyright_header(2025) + + print "*/" + print "" + + print "#ifndef LARCH_ATTR_BOOL" + print "#define LARCH_ATTR_BOOL" + print "#endif" + print "" + for (i = 1; i <= NR; i++) + printf (" LARCH_ATTR_BOOL (\"%s\", OPT_m%s, OPTION_MASK_ISA_%s, FEAT_%s," \ + " ARCH_LA64V%d_%d, LA_PRIO_%s)\n", + orig_name[i], name[i], name_capitalized[i], name_capitalized[i], + isa_version_major[i], isa_version_minor[i], name_capitalized[i]) +} + END { - if (header_p) + if (header_p == 1) gen_full_header() - else + else if (header_p == 0) gen_full_source() + else if (header_p == 2) + gen_full_def() } diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh index 97517da..fc11a70 100755 --- a/gcc/config/loongarch/genopts/genstr.sh +++ b/gcc/config/loongarch/genopts/genstr.sh @@ -125,6 +125,9 @@ main() { evolution_c) awk -v header_p=0 -f gen-evolution.awk isa-evolution.in ;; + evolution_def) + awk -v header_p=2 -f gen-evolution.awk isa-evolution.in + ;; header) gen_defines ;; diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index 7a91473..c8749d1 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -44,8 +44,6 @@ UNSPEC_LASX_XVREPL128VEI UNSPEC_LASX_XVSRAR UNSPEC_LASX_XVSRLR - UNSPEC_LASX_XVSHUF - UNSPEC_LASX_XVSHUF_B UNSPEC_LASX_BRANCH UNSPEC_LASX_BRANCH_V @@ -130,16 +128,11 @@ ;; Only used for splitting insert_d and copy_{u,s}.d. (define_mode_iterator LASX_WD [V4DI V4DF V8SI V8SF]) +(define_mode_iterator LASX_PART [V4DI V4DF V8SF]) ;; Only used for copy256_{u,s}.w. (define_mode_iterator LASX_W [V8SI V8SF]) -;; As ILASX but excludes V32QI. -(define_mode_iterator ILASX_DWH [V4DI V8SI V16HI]) - -;; As LASX but excludes V32QI. -(define_mode_iterator LASX_DWH [V4DF V8SF V4DI V8SI V16HI]) - ;; As ILASX but excludes V4DI. (define_mode_iterator ILASX_WHB [V8SI V16HI V32QI]) @@ -468,8 +461,8 @@ [(set_attr "type" "simd_splat") (set_attr "mode" "<MODE>")]) -;; xshuf.w -(define_insn "lasx_xvperm_<lasxfmt_f_wd>" +;; xvperm.w +(define_insn "@lasx_xvperm_<lasxfmt_f_wd>" [(set (match_operand:LASX_W 0 "register_operand" "=f") (unspec:LASX_W [(match_operand:LASX_W 1 "nonimmediate_operand" "f") @@ -481,7 +474,7 @@ (set_attr "mode" "<MODE>")]) ;; xvpermi.d -(define_insn "lasx_xvpermi_d_<LASX:mode>" +(define_insn "@lasx_xvpermi_d_<LASX:mode>" [(set (match_operand:LASX 0 "register_operand" "=f") (unspec:LASX [(match_operand:LASX 1 "register_operand" "f") @@ -514,7 +507,7 @@ (set_attr "mode" "<MODE>")]) ;; xvpermi.q -(define_insn "lasx_xvpermi_q_<LASX:mode>" +(define_insn_and_split "lasx_xvpermi_q_<LASX:mode>" [(set (match_operand:LASX 0 "register_operand" "=f") (unspec:LASX [(match_operand:LASX 1 "register_operand" "0") @@ -525,6 +518,37 @@ { return "xvpermi.q\t%u0,%u2,%3"; } + "&& ((INTVAL (operands[3]) & 0xee) == 0x0 + || (INTVAL (operands[3]) & 0xee) == 0x22)" + [(const_int 0)] +{ + HOST_WIDE_INT selector = INTVAL (operands[3]); + /* Reduce the dependency caused by using output operands[0] as input. */ + switch (INTVAL (operands[3])) + { + case 0x22: + case 0x23: + case 0x33: + selector -= 0x22; + operands[2] = operands[1]; + /* FALLTHRU. */ + case 0x0: + case 0x1: + case 0x11: + emit_insn (gen_lasx_xvpermi_d_<mode> (operands[0], operands[2], + GEN_INT (selector * 0xa + 0x44))); + break; + case 0x10: + emit_move_insn (operands[0], operands[2]); + break; + case 0x32: + emit_move_insn (operands[0], operands[1]); + break; + default: + gcc_unreachable (); + } + DONE; +} [(set_attr "type" "simd_splat") (set_attr "mode" "<MODE>")]) @@ -672,6 +696,41 @@ [(set_attr "move_type" "fmove") (set_attr "mode" "<MODE>")]) +;; vr0 -> low xr0 +;; +(define_insn "vec_cast<mode>" + [(set (match_operand:LASX_PART 0 "register_operand" "=f") + (subreg:LASX_PART + (match_operand:<VHMODE256_ALL> 1 "register_operand" "0") 0))] + "ISA_HAS_LASX" + "" + [(set_attr "type" "simd_splat") + (set_attr "mode" "<MODE>")]) + +(define_insn "vec_insert_lo_<mode>" + [(set (match_operand:LASX_PART 0 "register_operand" "=f") + (vec_concat:LASX_PART + (match_operand:<VHMODE256_ALL> 2 "register_operand" "f") + (vec_select:<VHMODE256_ALL> + (match_operand:LASX_PART 1 "register_operand" "0") + (match_operand:LASX_PART 3 "vect_par_cnst_high_half"))))] + "ISA_HAS_LASX" + "xvpermi.q\t%u0,%u2,0x30" + [(set_attr "type" "simd_splat") + (set_attr "mode" "<MODE>")]) + +(define_insn "vec_insert_hi_<mode>" + [(set (match_operand:LASX_PART 0 "register_operand" "=f") + (vec_concat:LASX_PART + (vec_select:<VHMODE256_ALL> + (match_operand:LASX_PART 1 "register_operand" "0") + (match_operand:LASX_PART 3 "vect_par_cnst_low_half")) + (match_operand:<VHMODE256_ALL> 2 "register_operand" "f")))] + "ISA_HAS_LASX" + "xvpermi.q\t%u0,%u2,0x02" + [(set_attr "type" "simd_splat") + (set_attr "mode" "<MODE>")]) + (define_expand "vec_perm<mode>" [(match_operand:LASX 0 "register_operand") (match_operand:LASX 1 "register_operand") @@ -2036,28 +2095,6 @@ [(set_attr "type" "simd_int_arith") (set_attr "mode" "<MODE>")]) -(define_insn "@lasx_xvshuf_<lasxfmt_f>" - [(set (match_operand:LASX_DWH 0 "register_operand" "=f") - (unspec:LASX_DWH [(match_operand:<VIMODE> 1 "register_operand" "0") - (match_operand:LASX_DWH 2 "register_operand" "f") - (match_operand:LASX_DWH 3 "register_operand" "f")] - UNSPEC_LASX_XVSHUF))] - "ISA_HAS_LASX" - "xvshuf.<lasxfmt>\t%u0,%u2,%u3" - [(set_attr "type" "simd_sld") - (set_attr "mode" "<MODE>")]) - -(define_insn "lasx_xvshuf_b" - [(set (match_operand:V32QI 0 "register_operand" "=f") - (unspec:V32QI [(match_operand:V32QI 1 "register_operand" "f") - (match_operand:V32QI 2 "register_operand" "f") - (match_operand:V32QI 3 "register_operand" "f")] - UNSPEC_LASX_XVSHUF_B))] - "ISA_HAS_LASX" - "xvshuf.b\t%u0,%u1,%u2,%u3" - [(set_attr "type" "simd_sld") - (set_attr "mode" "V32QI")]) - (define_insn "lasx_xvreplve0_<lasxfmt_f>" [(set (match_operand:LASX 0 "register_operand" "=f") (vec_duplicate:LASX @@ -2635,6 +2672,16 @@ [(set_attr "type" "simd_shift") (set_attr "mode" "<MODE>")]) +(define_insn "lasx_xvbsrl_d_f" + [(set (match_operand:V4DF 0 "register_operand" "=f") + (unspec:V4DF [(match_operand:V4DF 1 "register_operand" "f") + (match_operand 2 "const_uimm5_operand" "")] + UNSPEC_LASX_XVBSRL_V))] + "ISA_HAS_LASX" + "xvbsrl.v\t%u0,%u1,%2" + [(set_attr "type" "simd_shift") + (set_attr "mode" "V4DF")]) + (define_insn "lasx_xvbsll_<lasxfmt>" [(set (match_operand:ILASX 0 "register_operand" "=f") (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f") diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h index 6bcffc2..6c34ede 100644 --- a/gcc/config/loongarch/lasxintrin.h +++ b/gcc/config/loongarch/lasxintrin.h @@ -23,6 +23,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see <http://www.gnu.org/licenses/>. */ +#include <lsxintrin.h> + #ifndef _GCC_LOONGSON_ASXINTRIN_H #define _GCC_LOONGSON_ASXINTRIN_H 1 @@ -5368,5 +5370,159 @@ __m256i __lasx_xvfcmp_sun_s (__m256 _1, __m256 _2) #define __lasx_xvrepli_w(/*si10*/ _1) \ ((__m256i)__builtin_lasx_xvrepli_w ((_1))) +#if defined (__loongarch_asx_sx_conv) +/* Add builtin interfaces for 128 and 256 vector conversions. + For the assembly instruction format of some functions of the following vector + conversion, it is not described exactly in accordance with the format of the + generated assembly instruction. + In the front end of the Rust language, different built-in functions are called + by analyzing the format of assembly instructions. The data types of instructions + are all defined based on the interfaces of the defined functions, in the + following order: output, input... . */ +/* Assembly instruction format: xd, vj. */ +/* Data types in instruction templates: V8SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_cast_128_s (__m128 _1) +{ + return (__m256)__builtin_lasx_cast_128_s ((v4f32)_1); +} + +/* Assembly instruction format: xd, vj. */ +/* Data types in instruction templates: V4DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_cast_128_d (__m128d _1) +{ + return (__m256d)__builtin_lasx_cast_128_d ((v2f64)_1); +} + +/* Assembly instruction format: xd, vj. */ +/* Data types in instruction templates: V4DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_cast_128 (__m128i _1) +{ + return (__m256i)__builtin_lasx_cast_128 ((v2i64)_1); +} + +/* Assembly instruction format: xd, vj, vk. */ +/* Data types in instruction templates: V8SF, V4SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_concat_128_s (__m128 _1, __m128 _2) +{ + return (__m256)__builtin_lasx_concat_128_s ((v4f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: xd, vj, vk. */ +/* Data types in instruction templates: V4DF, V2DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_concat_128_d (__m128d _1, __m128d _2) +{ + return (__m256d)__builtin_lasx_concat_128_d ((v2f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: xd, vj, vk. */ +/* Data types in instruction templates: V4DI, V2DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_concat_128 (__m128i _1, __m128i _2) +{ + return (__m256i)__builtin_lasx_concat_128 ((v2i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V4SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lasx_extract_128_lo_s (__m256 _1) +{ + return (__m128)__builtin_lasx_extract_128_lo_s ((v8f32)_1); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V4SF, V8SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128 __lasx_extract_128_hi_s (__m256 _1) +{ + return (__m128)__builtin_lasx_extract_128_hi_s ((v8f32)_1); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V2DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lasx_extract_128_lo_d (__m256d _1) +{ + return (__m128d)__builtin_lasx_extract_128_lo_d ((v4f64)_1); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V2DF, V4DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128d __lasx_extract_128_hi_d (__m256d _1) +{ + return (__m128d)__builtin_lasx_extract_128_hi_d ((v4f64)_1); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V2DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lasx_extract_128_lo (__m256i _1) +{ + return (__m128i)__builtin_lasx_extract_128_lo ((v4i64)_1); +} + +/* Assembly instruction format: vd, xj. */ +/* Data types in instruction templates: V2DI, V4DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m128i __lasx_extract_128_hi (__m256i _1) +{ + return (__m128i)__builtin_lasx_extract_128_hi ((v4i64)_1); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V8SF, V8SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_insert_128_lo_s (__m256 _1, __m128 _2) +{ + return (__m256)__builtin_lasx_insert_128_lo_s ((v8f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V8SF, V8SF, V4SF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256 __lasx_insert_128_hi_s (__m256 _1, __m128 _2) +{ + return (__m256)__builtin_lasx_insert_128_hi_s ((v8f32)_1, (v4f32)_2); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V4DF, V4DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_insert_128_lo_d (__m256d _1, __m128d _2) +{ + return (__m256d)__builtin_lasx_insert_128_lo_d ((v4f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V4DF, V4DF, V2DF. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256d __lasx_insert_128_hi_d (__m256d _1, __m128d _2) +{ + return (__m256d)__builtin_lasx_insert_128_hi_d ((v4f64)_1, (v2f64)_2); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V4DI, V4DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_insert_128_lo (__m256i _1, __m128i _2) +{ + return (__m256i)__builtin_lasx_insert_128_lo ((v4i64)_1, (v2i64)_2); +} + +/* Assembly instruction format: xd, xj, vk. */ +/* Data types in instruction templates: V4DI, V4DI, V2DI. */ +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__m256i __lasx_insert_128_hi (__m256i _1, __m128i _2) +{ + return (__m256i)__builtin_lasx_insert_128_hi ((v4i64)_1, (v2i64)_2); +} + +#endif /* defined(__loongarch_asx_sx_conv). */ #endif /* defined(__loongarch_asx). */ #endif /* _GCC_LOONGSON_ASXINTRIN_H. */ diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h index b95a11f..e312f54 100644 --- a/gcc/config/loongarch/linux.h +++ b/gcc/config/loongarch/linux.h @@ -53,3 +53,7 @@ along with GCC; see the file COPYING3. If not see /* The stack pointer needs to be moved while checking the stack. */ #define STACK_CHECK_MOVING_SP 1 + +/* Depend on glibc because the libatomic ifunc resolver needs glibc + ifunc resolver interface. */ +#define HAVE_IFUNC_FOR_LIBATOMIC_16B (HAVE_AS_16B_ATOMIC && OPTION_GLIBC) diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc index 9493ded..6c914c0 100644 --- a/gcc/config/loongarch/loongarch-builtins.cc +++ b/gcc/config/loongarch/loongarch-builtins.cc @@ -865,6 +865,27 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) #define CODE_FOR_lasx_xvmaddwod_q_du CODE_FOR_lasx_maddwod_q_du_punned #define CODE_FOR_lasx_xvmaddwod_q_du_d CODE_FOR_lasx_maddwod_q_du_d_punned + +/* Add mutual conversion between 128 and 256 vectors. */ +#define CODE_FOR_lasx_extract_128_lo_s CODE_FOR_vec_extract_lo_v8sf +#define CODE_FOR_lasx_extract_128_hi_s CODE_FOR_vec_extract_hi_v8sf +#define CODE_FOR_lasx_extract_128_lo_d CODE_FOR_vec_extract_lo_v4df +#define CODE_FOR_lasx_extract_128_hi_d CODE_FOR_vec_extract_hi_v4df +#define CODE_FOR_lasx_extract_128_lo CODE_FOR_vec_extract_lo_v4di +#define CODE_FOR_lasx_extract_128_hi CODE_FOR_vec_extract_hi_v4di +#define CODE_FOR_lasx_insert_128_lo_s CODE_FOR_vec_insert_lo_v8sf +#define CODE_FOR_lasx_insert_128_hi_s CODE_FOR_vec_insert_hi_v8sf +#define CODE_FOR_lasx_insert_128_lo_d CODE_FOR_vec_insert_lo_v4df +#define CODE_FOR_lasx_insert_128_hi_d CODE_FOR_vec_insert_hi_v4df +#define CODE_FOR_lasx_insert_128_lo CODE_FOR_vec_insert_lo_v4di +#define CODE_FOR_lasx_insert_128_hi CODE_FOR_vec_insert_hi_v4di +#define CODE_FOR_lasx_concat_128_s CODE_FOR_vec_concatv8sf +#define CODE_FOR_lasx_concat_128_d CODE_FOR_vec_concatv4df +#define CODE_FOR_lasx_concat_128 CODE_FOR_vec_concatv4di +#define CODE_FOR_lasx_cast_128_s CODE_FOR_vec_castv8sf +#define CODE_FOR_lasx_cast_128_d CODE_FOR_vec_castv4df +#define CODE_FOR_lasx_cast_128 CODE_FOR_vec_castv4di + static const struct loongarch_builtin_description loongarch_builtins[] = { #define LARCH_MOVFCSR2GR 0 DIRECT_BUILTIN (movfcsr2gr, LARCH_USI_FTYPE_UQI, hard_float), @@ -2407,7 +2428,25 @@ static const struct loongarch_builtin_description loongarch_builtins[] = { LASX_BUILTIN (xvssrarni_bu_h, LARCH_UV32QI_FTYPE_UV32QI_V32QI_USI), LASX_BUILTIN (xvssrarni_hu_w, LARCH_UV16HI_FTYPE_UV16HI_V16HI_USI), LASX_BUILTIN (xvssrarni_wu_d, LARCH_UV8SI_FTYPE_UV8SI_V8SI_USI), - LASX_BUILTIN (xvssrarni_du_q, LARCH_UV4DI_FTYPE_UV4DI_V4DI_USI) + LASX_BUILTIN (xvssrarni_du_q, LARCH_UV4DI_FTYPE_UV4DI_V4DI_USI), + LASX_BUILTIN (extract_128_lo_s, LARCH_V4SF_FTYPE_V8SF), + LASX_BUILTIN (extract_128_hi_s, LARCH_V4SF_FTYPE_V8SF), + LASX_BUILTIN (extract_128_lo_d, LARCH_V2DF_FTYPE_V4DF), + LASX_BUILTIN (extract_128_hi_d, LARCH_V2DF_FTYPE_V4DF), + LASX_BUILTIN (extract_128_lo, LARCH_V2DI_FTYPE_V4DI), + LASX_BUILTIN (extract_128_hi, LARCH_V2DI_FTYPE_V4DI), + LASX_BUILTIN (insert_128_lo_s, LARCH_V8SF_FTYPE_V8SF_V4SF), + LASX_BUILTIN (insert_128_hi_s, LARCH_V8SF_FTYPE_V8SF_V4SF), + LASX_BUILTIN (insert_128_lo_d, LARCH_V4DF_FTYPE_V4DF_V2DF), + LASX_BUILTIN (insert_128_hi_d, LARCH_V4DF_FTYPE_V4DF_V2DF), + LASX_BUILTIN (insert_128_lo, LARCH_V4DI_FTYPE_V4DI_V2DI), + LASX_BUILTIN (insert_128_hi, LARCH_V4DI_FTYPE_V4DI_V2DI), + LASX_BUILTIN (concat_128_s, LARCH_V8SF_FTYPE_V4SF_V4SF), + LASX_BUILTIN (concat_128_d, LARCH_V4DF_FTYPE_V2DF_V2DF), + LASX_BUILTIN (concat_128, LARCH_V4DI_FTYPE_V2DI_V2DI), + LASX_BUILTIN (cast_128_s, LARCH_V8SF_FTYPE_V4SF), + LASX_BUILTIN (cast_128_d, LARCH_V4DF_FTYPE_V2DF), + LASX_BUILTIN (cast_128, LARCH_V4DI_FTYPE_V2DI) }; /* Index I is the function declaration for loongarch_builtins[I], or null if @@ -3001,6 +3040,10 @@ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp, { struct expand_operand ops[MAX_RECOG_OPERANDS]; int opno, argno; + /* For vector extraction/insertion operations, sel_high_p being true + indicates that the high of the data is selected/retained from the + vector register. */ + bool sel_high_p = true; /* Map any target to operand 0. */ opno = 0; @@ -3019,6 +3062,51 @@ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp, create_input_operand (&ops[1], CONST1_RTX (ops[0].mode), ops[0].mode); return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p); + case CODE_FOR_vec_extract_lo_v8sf: + case CODE_FOR_vec_extract_lo_v4df: + case CODE_FOR_vec_extract_lo_v4di: + sel_high_p = false; + /* Fall through. */ + case CODE_FOR_vec_extract_hi_v8sf: + case CODE_FOR_vec_extract_hi_v4df: + case CODE_FOR_vec_extract_hi_v4di: + { + /* The selection method for constructing the high/low half. */ + loongarch_prepare_builtin_arg (&ops[1], exp, 0); + int nelts = GET_MODE_NUNITS (GET_MODE (ops[1].value)); + int half_nelts = nelts / 2; + int base = sel_high_p ? half_nelts : 0; + + rtx pat_rtx + = loongarch_gen_stepped_int_parallel (half_nelts, base, 1); + create_input_operand (&ops[2], pat_rtx, ops[1].mode); + + return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p); + } + + case CODE_FOR_vec_insert_hi_v8sf: + case CODE_FOR_vec_insert_hi_v4df: + case CODE_FOR_vec_insert_hi_v4di: + sel_high_p = false; + /* Fall through. */ + case CODE_FOR_vec_insert_lo_v8sf: + case CODE_FOR_vec_insert_lo_v4df: + case CODE_FOR_vec_insert_lo_v4di: + { + /* The selection method for constructing the high/low half. */ + loongarch_prepare_builtin_arg (&ops[1], exp, 0); + loongarch_prepare_builtin_arg (&ops[2], exp, 1); + int nelts = GET_MODE_NUNITS (GET_MODE (ops[1].value)); + int half_nelts = nelts / 2; + int base = sel_high_p ? half_nelts : 0; + + rtx pat_rtx + = loongarch_gen_stepped_int_parallel (half_nelts, base, 1); + create_input_operand (&ops[3], pat_rtx, ops[1].mode); + + return loongarch_expand_builtin_insn (icode, 4, ops, has_target_p); + } + default: break; } @@ -3171,3 +3259,5 @@ loongarch_build_builtin_va_list (void) { return ptr_type_node; } + +#include "gt-loongarch-builtins.h" diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc index effdcf0..fc031a6 100644 --- a/gcc/config/loongarch/loongarch-c.cc +++ b/gcc/config/loongarch/loongarch-c.cc @@ -132,6 +132,7 @@ loongarch_update_cpp_builtins (cpp_reader *pfile) loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_simd", pfile); loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_sx", pfile); loongarch_def_or_undef (ISA_HAS_LASX, "__loongarch_asx", pfile); + loongarch_def_or_undef (ISA_HAS_LASX, "__loongarch_asx_sx_conv", pfile); builtin_undef ("__loongarch_simd_width"); if (ISA_HAS_LSX) diff --git a/gcc/config/loongarch/loongarch-evol-attr.def b/gcc/config/loongarch/loongarch-evol-attr.def new file mode 100644 index 0000000..796d355 --- /dev/null +++ b/gcc/config/loongarch/loongarch-evol-attr.def @@ -0,0 +1,32 @@ +/* Generated automatically by "genstr" from "isa-evolution.in". + Please do not edit this file directly. + + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. +*/ + +#ifndef LARCH_ATTR_BOOL +#define LARCH_ATTR_BOOL +#endif + + LARCH_ATTR_BOOL ("frecipe", OPT_mfrecipe, OPTION_MASK_ISA_FRECIPE, FEAT_FRECIPE, ARCH_LA64V1_1, LA_PRIO_FRECIPE) + LARCH_ATTR_BOOL ("div32", OPT_mdiv32, OPTION_MASK_ISA_DIV32, FEAT_DIV32, ARCH_LA64V1_1, LA_PRIO_DIV32) + LARCH_ATTR_BOOL ("lam-bh", OPT_mlam_bh, OPTION_MASK_ISA_LAM_BH, FEAT_LAM_BH, ARCH_LA64V1_1, LA_PRIO_LAM_BH) + LARCH_ATTR_BOOL ("lamcas", OPT_mlamcas, OPTION_MASK_ISA_LAMCAS, FEAT_LAMCAS, ARCH_LA64V1_1, LA_PRIO_LAMCAS) + LARCH_ATTR_BOOL ("scq", OPT_mscq, OPTION_MASK_ISA_SCQ, FEAT_SCQ, ARCH_LA64V1_1, LA_PRIO_SCQ) + LARCH_ATTR_BOOL ("ld-seq-sa", OPT_mld_seq_sa, OPTION_MASK_ISA_LD_SEQ_SA, FEAT_LD_SEQ_SA, ARCH_LA64V1_1, LA_PRIO_LD_SEQ_SA) diff --git a/gcc/config/loongarch/loongarch-ftypes.def b/gcc/config/loongarch/loongarch-ftypes.def index 337f2c2..68b1b44 100644 --- a/gcc/config/loongarch/loongarch-ftypes.def +++ b/gcc/config/loongarch/loongarch-ftypes.def @@ -42,6 +42,12 @@ DEF_LARCH_FTYPE (1, (USI, USI)) DEF_LARCH_FTYPE (1, (UDI, USI)) DEF_LARCH_FTYPE (1, (USI, UQI)) DEF_LARCH_FTYPE (1, (VOID, USI)) +DEF_LARCH_FTYPE (1, (V4SF, V8SF)) +DEF_LARCH_FTYPE (1, (V2DF, V4DF)) +DEF_LARCH_FTYPE (1, (V2DI, V4DI)) +DEF_LARCH_FTYPE (1, (V8SF, V4SF)) +DEF_LARCH_FTYPE (1, (V4DF, V2DF)) +DEF_LARCH_FTYPE (1, (V4DI, V2DI)) DEF_LARCH_FTYPE (2, (VOID, UQI, USI)) DEF_LARCH_FTYPE (2, (VOID, UHI, USI)) @@ -58,6 +64,12 @@ DEF_LARCH_FTYPE (2, (SI, SI, SI)) DEF_LARCH_FTYPE (2, (SI, DI, SI)) DEF_LARCH_FTYPE (2, (USI, USI, USI)) DEF_LARCH_FTYPE (2, (UDI, UDI, USI)) +DEF_LARCH_FTYPE (2, (V8SF, V4SF, V4SF)) +DEF_LARCH_FTYPE (2, (V4DF, V2DF, V2DF)) +DEF_LARCH_FTYPE (2, (V4DI, V2DI, V2DI)) +DEF_LARCH_FTYPE (2, (V8SF, V8SF, V4SF)) +DEF_LARCH_FTYPE (2, (V4DF, V4DF, V2DF)) +DEF_LARCH_FTYPE (2, (V4DI, V4DI, V2DI)) DEF_LARCH_FTYPE (3, (VOID, USI, USI, SI)) DEF_LARCH_FTYPE (3, (VOID, USI, UDI, SI)) diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc index 6e72084..cacfe37 100644 --- a/gcc/config/loongarch/loongarch-opts.cc +++ b/gcc/config/loongarch/loongarch-opts.cc @@ -540,7 +540,7 @@ fallback: /* 5. Target code model */ - t.cmodel = constrained.cmodel ? target->cmodel : CMODEL_NORMAL; + t.cmodel = constrained.cmodel ? target->cmodel : TARGET_DEFAULT_CMODEL; switch (t.cmodel) { diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h index 1b397b1..0289209 100644 --- a/gcc/config/loongarch/loongarch-opts.h +++ b/gcc/config/loongarch/loongarch-opts.h @@ -147,4 +147,8 @@ struct loongarch_flags { #define HAVE_AS_TLS_LE_RELAXATION 0 #endif +#ifndef HAVE_AS_16B_ATOMIC +#define HAVE_AS_16B_ATOMIC 0 +#endif + #endif /* LOONGARCH_OPTS_H */ diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index bec4368..69c7605 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -22,6 +22,8 @@ along with GCC; see the file COPYING3. If not see #ifndef GCC_LOONGARCH_PROTOS_H #define GCC_LOONGARCH_PROTOS_H +#include "common/config/loongarch/cpu-features.h" + /* Classifies a SYMBOL_REF, LABEL_REF or UNSPEC address. SYMBOL_GOT_DISP @@ -122,7 +124,7 @@ extern bool loongarch_const_vector_shuffle_set_p (rtx, machine_mode); extern bool loongarch_const_vector_bitimm_set_p (rtx, machine_mode); extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode); extern bool loongarch_check_vect_par_cnst_half (rtx, machine_mode, bool); -extern rtx loongarch_const_vector_vrepli (rtx, machine_mode); +extern rtx loongarch_const_vector_vldi (rtx, machine_mode); extern rtx loongarch_lsx_vec_parallel_const_half (machine_mode, bool); extern rtx loongarch_gen_const_int_vector (machine_mode, HOST_WIDE_INT); extern enum reg_class loongarch_secondary_reload_class (enum reg_class, @@ -159,6 +161,7 @@ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT); extern bool loongarch_check_zero_div_p (void); extern bool loongarch_pre_reload_split (void); extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *); +extern bool loongarch_use_bstrins_bstrpick_for_and (rtx, machine_mode); extern rtx loongarch_rewrite_mem_for_simple_ldst (rtx); union loongarch_gen_fn_ptrs @@ -176,7 +179,6 @@ extern void loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs, extern void loongarch_expand_vector_group_init (rtx, rtx); extern void loongarch_expand_vector_init (rtx, rtx); extern void loongarch_expand_vec_unpack (rtx op[2], bool); -extern void loongarch_expand_vec_perm (rtx, rtx, rtx, rtx); extern void loongarch_expand_vec_perm_1 (rtx[]); extern void loongarch_expand_vector_extract (rtx, rtx, int); extern void loongarch_expand_vector_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx); @@ -200,6 +202,7 @@ extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode, rtx *); extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool, rtx (*)(rtx, rtx, rtx), rtx (*)(rtx, rtx, rtx)); +extern bool loongarch_16b_atomic_lock_free_p (void); /* Routines implemented in loongarch-c.c. */ void loongarch_cpu_cpp_builtins (cpp_reader *); @@ -226,4 +229,9 @@ extern void loongarch_register_pragmas (void); extern bool loongarch_process_target_attr (tree args, tree fndecl); extern rtx loongarch_gen_stepped_int_parallel (unsigned int nelts, int base, int step); +extern bool loongarch_parse_fmv_features (tree, string_slice, loongarch_fmv_feature_mask *, + auto_vec<unsigned int> *); +extern void get_feature_mask_for_version (tree, loongarch_fmv_feature_mask *, + auto_vec<unsigned int> *); +extern int loongarch_compare_version_priority (tree, tree); #endif /* ! GCC_LOONGARCH_PROTOS_H */ diff --git a/gcc/config/loongarch/loongarch-target-attr.cc b/gcc/config/loongarch/loongarch-target-attr.cc index cb53744..c690d7a 100644 --- a/gcc/config/loongarch/loongarch-target-attr.cc +++ b/gcc/config/loongarch/loongarch-target-attr.cc @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see #define IN_TARGET_CODE 1 +#define INCLUDE_STRING #include "config.h" #include "system.h" #include "coretypes.h" @@ -40,6 +41,32 @@ enum loongarch_attr_opt_type loongarch_attr_bool /* Attribute sets or unsets a boolean variable. */ }; +/* Describes the priority of each feature. The larger the value, the higher + the priority. The priority setting rule is vector priority. + + The highest priority currently is "-mlasx". + The second highest is "-march=la64v1.1" (lsx and la64v1.1 enabled + instructions). + The third highest is "-mlsx". + */ +enum features_prio +{ + LA_PRIO_NONE = 0, + LA_PRIO_LOONGARCH64, + LA_PRIO_STRICT_ALIGN, + LA_PRIO_FRECIPE, + LA_PRIO_DIV32 = LA_PRIO_FRECIPE, + LA_PRIO_LAM_BH = LA_PRIO_FRECIPE, + LA_PRIO_LAMCAS = LA_PRIO_FRECIPE, + LA_PRIO_SCQ = LA_PRIO_FRECIPE, + LA_PRIO_LD_SEQ_SA = LA_PRIO_FRECIPE, + LA_PRIO_LSX, + LA_PRIO_LA64V1_0, + LA_PRIO_LA64V1_1, + LA_PRIO_LASX, + LA_PRIO_MAX +}; + /* All the information needed to handle a target attribute. NAME is the name of the attribute. ATTR_TYPE specifies the type of behavior of the attribute as described @@ -52,28 +79,65 @@ enum loongarch_attr_opt_type struct loongarch_attribute_info { const char *name; + unsigned int opt_mask; enum loongarch_attr_opt_type attr_type; - bool allow_neg; enum opt_code opt_num; + bool allow_neg; + const loongarch_fmv_feature_mask feat_mask; + const unsigned int arch_ver; + enum features_prio priority; }; + +/* Construct a loongarch_attributes from the given arguments. + + OPTS is the name of the compilation option after the "-m" string. + + OPTNUM is the opt_code corresponding to the compilation option. + + OPTMASK is the mask corresponding to the mutation option. If the + compilation option does not have a corresponding mask, pass 0. + */ +#define LARCH_ATTR_MASK(OPTS, OPTNUM, OPTMASK, FEATMASK, PRIO) \ +{ \ + OPTS, OPTMASK, loongarch_attr_mask, OPTNUM, true, 1ULL << FEATMASK, \ + N_ARCH_TYPES, PRIO \ +}, + +#define LARCH_ATTR_ENUM(OPTS, OPTNUM, PRIO) \ +{ \ + OPTS, 0, loongarch_attr_enum, OPTNUM, false, 0, N_ARCH_TYPES, PRIO \ +}, + +#define LARCH_ATTR_BOOL(OPTS, OPTNUM, OPTMASK, FEATMASK, ARCH_V, PRIO) \ +{ \ + OPTS, OPTMASK, loongarch_attr_bool, OPTNUM, true, 1ULL << FEATMASK, ARCH_V, \ + PRIO \ +}, + /* The target attributes that we support. */ static const struct loongarch_attribute_info loongarch_attributes[] = { - { "strict-align", loongarch_attr_mask, true, OPT_mstrict_align }, - { "cmodel", loongarch_attr_enum, false, OPT_mcmodel_ }, - { "arch", loongarch_attr_enum, false, OPT_march_ }, - { "tune", loongarch_attr_enum, false, OPT_mtune_ }, - { "lsx", loongarch_attr_bool, true, OPT_mlsx }, - { "lasx", loongarch_attr_bool, true, OPT_mlasx }, - { NULL, loongarch_attr_bool, false, OPT____ } + LARCH_ATTR_MASK ("strict-align", OPT_mstrict_align, MASK_STRICT_ALIGN, + FEAT_UAL, LA_PRIO_STRICT_ALIGN) + LARCH_ATTR_ENUM ("cmodel", OPT_mcmodel_, LA_PRIO_NONE) + LARCH_ATTR_ENUM ("arch", OPT_march_, LA_PRIO_NONE) + LARCH_ATTR_ENUM ("tune", OPT_mtune_, LA_PRIO_NONE) + LARCH_ATTR_BOOL ("lsx", OPT_mlsx, 0, FEAT_LSX, ARCH_LA64V1_0, LA_PRIO_LSX) + LARCH_ATTR_BOOL ("lasx", OPT_mlasx, 0, FEAT_LASX | FEAT_LSX, 0, LA_PRIO_LASX) +#include "loongarch-evol-attr.def" + { NULL, 0, loongarch_attr_bool, OPT____, false, 0, N_ARCH_TYPES, LA_PRIO_NONE } }; +#undef LARCH_ATTR_MASK +#undef LARCH_ATTR_ENUM +#undef LARCH_ATTR_BOOL -bool +static void loongarch_handle_option (struct gcc_options *opts, struct gcc_options *opts_set ATTRIBUTE_UNUSED, const struct cl_decoded_option *decoded, - location_t loc ATTRIBUTE_UNUSED) + location_t loc ATTRIBUTE_UNUSED, + unsigned int opt_mask ATTRIBUTE_UNUSED) { size_t code = decoded->opt_index; int val = decoded->value; @@ -82,14 +146,14 @@ loongarch_handle_option (struct gcc_options *opts, { case OPT_mstrict_align: if (val) - opts->x_target_flags |= MASK_STRICT_ALIGN; + opts->x_target_flags |= opt_mask; else - opts->x_target_flags &= ~MASK_STRICT_ALIGN; - return true; + opts->x_target_flags &= ~opt_mask; + break; case OPT_mcmodel_: opts->x_la_opt_cmodel = val; - return true; + break; case OPT_march_: opts->x_la_opt_cpu_arch = val; @@ -100,7 +164,7 @@ loongarch_handle_option (struct gcc_options *opts, opts->x_la_opt_simd = M_OPT_UNSET; opts->x_la_opt_fpu = M_OPT_UNSET; opts->x_la_isa_evolution = 0; - return true; + break; case OPT_mtune_: opts->x_la_opt_cpu_tune = val; @@ -111,21 +175,10 @@ loongarch_handle_option (struct gcc_options *opts, opts->x_str_align_functions = NULL; opts->x_str_align_loops = NULL; opts->x_str_align_jumps = NULL; - return true; - - case OPT_mlsx: - opts->x_la_opt_simd = val ? (la_opt_simd == ISA_EXT_SIMD_LASX - ? ISA_EXT_SIMD_LASX : ISA_EXT_SIMD_LSX) : ISA_EXT_NONE; - return true; - - case OPT_mlasx: - opts->x_la_opt_simd = val ? ISA_EXT_SIMD_LASX - : (la_opt_simd == ISA_EXT_SIMD_LASX || la_opt_simd == ISA_EXT_SIMD_LSX - ? ISA_EXT_SIMD_LSX : ISA_EXT_NONE); - return true; + break; default: - return true; + gcc_unreachable (); } } @@ -147,7 +200,6 @@ loongarch_process_one_target_attr (char *arg_str, location_t loc) char *str_to_check = (char *) alloca (len + 1); strcpy (str_to_check, arg_str); - if (len > 3 && startswith (str_to_check, "no-")) { invert = true; @@ -196,14 +248,21 @@ loongarch_process_one_target_attr (char *arg_str, location_t loc) decoded.value = !invert; loongarch_handle_option (&global_options, &global_options_set, - &decoded, input_location); + &decoded, input_location, + p_attr->opt_mask); break; } /* Use the option setting machinery to set an option to an enum. */ case loongarch_attr_enum: { - gcc_assert (arg); + if (!arg) + { + error_at (loc, "the value of pragma or attribute " + "%<target(\"%s\")%> not be empty", str_to_check); + return false; + } + bool valid; int value; struct cl_decoded_option decoded; @@ -216,7 +275,8 @@ loongarch_process_one_target_attr (char *arg_str, location_t loc) if (valid) loongarch_handle_option (&global_options, &global_options_set, - &decoded, input_location); + &decoded, input_location, + p_attr->opt_mask); else error_at (loc, "pragma or attribute %<target(\"%s=%s\")%> is " "not valid", str_to_check, arg); @@ -230,8 +290,34 @@ loongarch_process_one_target_attr (char *arg_str, location_t loc) generate_option (p_attr->opt_num, NULL, !invert, CL_TARGET, &decoded); - loongarch_handle_option (&global_options, &global_options_set, - &decoded, input_location); + switch (decoded.opt_index) + { + case OPT_mlsx: + global_options.x_la_opt_simd + = decoded.value + ? (la_opt_simd == ISA_EXT_SIMD_LASX + ? ISA_EXT_SIMD_LASX : ISA_EXT_SIMD_LSX) + : ISA_EXT_NONE; + break; + + case OPT_mlasx: + global_options.x_la_opt_simd + = decoded.value + ? ISA_EXT_SIMD_LASX + : (la_opt_simd == ISA_EXT_SIMD_LASX + || la_opt_simd == ISA_EXT_SIMD_LSX + ? ISA_EXT_SIMD_LSX : ISA_EXT_NONE); + break; + + default: + { + if (decoded.value) + global_options.x_la_isa_evolution |= p_attr->opt_mask; + else + global_options.x_la_isa_evolution &= ~p_attr->opt_mask; + global_options_set.x_la_isa_evolution |= p_attr->opt_mask; + } + } break; } default: @@ -244,7 +330,7 @@ loongarch_process_one_target_attr (char *arg_str, location_t loc) were malformed we will have returned false already. */ if (!found) error_at (loc, "attribute %<target%> argument %qs is unknown", - str_to_check); + arg_str); return found; } @@ -422,3 +508,247 @@ loongarch_option_valid_attribute_p (tree fndecl, tree, tree args, int) return ret; } +/* Parse a function multiversioning feature string STR, as found in a + target_version or target_clones attribute. + + If FEATURE_MASK is nonnull, then assign to it a bitmask representing + the set of features explicitly specified in the feature string. + + If FEATURE_PRIORITY is nonnull, set one or two unsigned integer values + presenting the priority of the feature string. When the priority is + set explicitly in the attribute string, the number of members of + feature_priority is 2, feature_priority[0] is the priority set in the + code, and feature_priority[1] is the priority calculated from the + feature string. When the priority is not set in the attribute string, + the number of members of feature_priority is 1, and its value is the + priority calculated by the feature string. */ + +bool +loongarch_parse_fmv_features (tree decl, string_slice str, + loongarch_fmv_feature_mask *feature_mask, + auto_vec<unsigned int> *feature_priority) +{ + location_t loc + = decl == NULL ? UNKNOWN_LOCATION : DECL_SOURCE_LOCATION (decl); + + if (feature_mask) + *feature_mask = 0; + + string_slice attr_str = string_slice::tokenize (&str, ";"); + attr_str = attr_str.strip (); + + if (attr_str == "default") + { + if (str.is_valid ()) + { + error_at (loc, "\"default\" cannot be set together with other " + "features in %qs", attr_str.begin ()); + return false; + } + + if (feature_priority) + feature_priority->safe_push (LA_PRIO_NONE); + return true; + } + + if (attr_str.empty ()) + { + error_at (loc, "characher before %<;%> in attribute %qs cannot be empty", + attr_str.begin ()); + return false; + } + + /* At this time, str stores the string with the priority set in attribute. + If it does not exist, it is illegal. */ + if (str.is_valid ()) + { + if (str.empty ()) + { + error_at (loc, "in attribute %qs priority cannot be empty", + attr_str.begin ()); + return false; + } + + string_slice prio_str = string_slice::tokenize (&str, ";"); + + if (str.is_valid ()) + { + error_at (loc, "in attribute %qs the number of reatures " + "cannot exceed two", attr_str.begin ()); + return false; + } + + prio_str = prio_str.strip (); + string_slice name = string_slice::tokenize (&prio_str, "="); + + if (name == "priority" && prio_str.is_valid ()) + { + unsigned int tmp_prio = 0; + unsigned int len = 0; + + for (char c : prio_str) + { + if (ISDIGIT (c)) + len++; + else + break; + } + + if (len != prio_str.size () + || sscanf (prio_str.begin (), "%u", &tmp_prio) != 1) + { + error_at (loc, "Setting the priority value to %qs is " + "illegal in attribute %qs", prio_str.begin (), + attr_str.begin ()); + return false; + } + + if (feature_priority) + feature_priority->safe_push (tmp_prio + LA_PRIO_MAX); + } + else + { + error_at (loc, "in attribute %qs, the second feature should be " + "\"priority=%<num%>\" instead of %qs", attr_str.begin (), + name.begin ()); + return false; + } + } + + if (attr_str.is_valid ()) + { + int num_features = ARRAY_SIZE (loongarch_attributes); + + /* Handle arch= if specified. For priority, set it to be 1 more than + the best instruction set the processor can handle. */ + if (strstr (attr_str.begin (), "arch=") != NULL) + { + string_slice arch_name = attr_str; + string_slice::tokenize (&arch_name, "="); + if (!arch_name.is_valid ()) + { + error_at (loc, "in attribute %qs you need to set a legal value " + "for \"arch\"", attr_str.begin ()); + return false; + } + + loongarch_fmv_feature_mask tmp_mask = 0ULL; + unsigned int tmp_prio = 0; + + if (arch_name == "loongarch64") + { + tmp_mask = 1UL << FEAT_LA64; + tmp_prio = LA_PRIO_LOONGARCH64; + } + else if (arch_name == "la64v1.0") + { + tmp_mask = 1ULL << FEAT_LA64; + for (int i = 0; i < num_features; i++) + if (loongarch_attributes[i].arch_ver == ARCH_LA64V1_0) + tmp_mask |= loongarch_attributes[i].feat_mask; + tmp_prio = LA_PRIO_LA64V1_0; + } + else if (arch_name == "la64v1.1") + { + tmp_mask = 1ULL << FEAT_LA64; + for (int i = 0; i < num_features; i++) + if (loongarch_attributes[i].arch_ver == ARCH_LA64V1_0 + || loongarch_attributes[i].arch_ver == ARCH_LA64V1_1) + tmp_mask |= loongarch_attributes[i].feat_mask; + tmp_prio = LA_PRIO_LA64V1_1; + } + else + { + error_at (loc, "in attribute %qs you need to set a legal value " + "for \"arch\"", attr_str.begin ()); + return false; + } + + if (feature_mask) + *feature_mask = tmp_mask; + + if (feature_priority) + feature_priority->safe_push (tmp_prio); + } + else + { + int i; + for (i = 0; i < num_features - 1; i++) + { + if (loongarch_attributes[i].name == attr_str + || strstr (attr_str.begin (), + loongarch_attributes[i].name) != NULL) + { + if (loongarch_attributes[i].feat_mask == 0) + { + error_at (loc, "attribute %qs is not supported in " + "%<target_version%> or %<target_clones%>", + attr_str.begin ()); + return false; + } + + if (feature_mask) + *feature_mask = loongarch_attributes[i].feat_mask; + + if (feature_priority) + feature_priority->safe_push (loongarch_attributes[i].priority); + break; + } + } + + if (i == num_features - 1) + { + error_at (loc, "%qs is not supported in target attribute", + attr_str.begin ()); + return false; + } + } + } + + if (feature_priority) + gcc_assert (feature_priority->length () == 1 + || feature_priority->length () == 2); + + return true; +} + +/* Compare priorities of two version decls. Return: + 1: decl1 has a higher priority + -1: decl2 has a higher priority + 0: decl1 and decl2 have the same priority. +*/ + +int +loongarch_compare_version_priority (tree decl1, tree decl2) +{ + auto_vec<unsigned int> prio1, prio2; + + get_feature_mask_for_version (decl1, NULL, &prio1); + get_feature_mask_for_version (decl2, NULL, &prio2); + + unsigned int max_prio1 + = prio1.length () == 2 ? MAX (prio1[0], prio1[1]) : prio1[0]; + unsigned int max_prio2 + = prio2.length () == 2 ? MAX (prio2[0], prio2[1]) : prio2[0]; + + if (max_prio1 != max_prio2) + return max_prio1 > max_prio2 ? 1 : -1; + + /* If max_prio1 == max_prio2, and max_prio1 >= LA_PRIO_MAX, + it means that the attribute strings of decl1 and decl2 are both + set with priorities, and the priority values are the same. + So next we use the priority calculated by the attribute string to + compare. */ + if (max_prio1 >= LA_PRIO_MAX) + { + unsigned int min_prio1 + = prio1.length () == 2 ? MIN (prio1[0], prio1[1]) : prio1[0]; + unsigned int min_prio2 + = prio2.length () == 2 ? MIN (prio2[0], prio2[1]) : prio2[0]; + + if (min_prio1 != min_prio2) + return min_prio1 > min_prio2 ? 1 : -1; + } + + return 0; +} diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index f7ce3aa..053f77c 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see #define IN_TARGET_CODE 1 +#define INCLUDE_STRING #include "config.h" #include "system.h" #include "coretypes.h" @@ -1393,6 +1394,42 @@ loongarch_can_use_return_insn (void) return reload_completed && cfun->machine->frame.total_size == 0; } +/* If we want to support lock-free 16B atomic, we must support at least + lock-free atomic load, store, and CAS (other operations can be emulated + with CAS even if not supported directly). Otherwise, for example if + store is lock-free but CAS is not, the store may happen when the CAS + operation is holding the lock, breaking the atomicity of CAS. + + We need LSX for load/store and SCQ for CAS, so require both for + lock-free 16B atomic. + + If we link a TU (1) compiled with -mlsx -mscq and the TU (2) not, for + the same reason we need to ensure the libatomic call invoked by TU (2) + always use the lock-free sequence. Thus libatomic must contain the + ifuncs built with -mlsx -mscq. Since the ifunc resolver interface is + glibc-specific and the hwcap bits are Linux-specific, the resolver + implementation in libatomic assumes GNU/Linux and + HAVE_IFUNC_FOR_LIBATOMIC_16B is only enabled for it. To support + another OS, add the correct ifunc resolver implementation into + libatomic/config/loongarch/host-config.h and then define + HAVE_IFUNC_FOR_LIBATOMIC_16B for it. + + FIXME: when ifunc is not supported but libatomic is entirely built with + -mlsx -mscq, we don't really need ifunc. But we don't have a way to + get CFLAGS_FOR_TARGET here... */ +bool +loongarch_16b_atomic_lock_free_p (void) +{ +#ifdef HAVE_IFUNC_FOR_LIBATOMIC_16B + bool ok_p = HAVE_IFUNC_FOR_LIBATOMIC_16B; +#else + bool ok_p = false; +#endif + + return (ok_p && targetm.has_ifunc_p () + && TARGET_64BIT && ISA_HAS_LSX && ISA_HAS_SCQ); +} + /* Expand function epilogue using the following insn patterns: "epilogue" (style == NORMAL_RETURN) "sibcall_epilogue" (style == SIBCALL_RETURN) @@ -1835,7 +1872,27 @@ loongarch_const_vector_same_bytes_p (rtx op, machine_mode mode) first = CONST_VECTOR_ELT (op, 0); bytes = GET_MODE_UNIT_SIZE (mode); - val = INTVAL (first); + + if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) + { + rtx val_s = CONST_VECTOR_ELT (op, 0); + const REAL_VALUE_TYPE *x = CONST_DOUBLE_REAL_VALUE (val_s); + if (GET_MODE (val_s) == DFmode) + { + long tmp[2]; + REAL_VALUE_TO_TARGET_DOUBLE (*x, tmp); + val = (unsigned HOST_WIDE_INT) tmp[1] << 32 | tmp[0]; + } + else + { + long tmp; + REAL_VALUE_TO_TARGET_SINGLE (*x, tmp); + val = (unsigned HOST_WIDE_INT) tmp; + } + } + else + val = UINTVAL (first); + first_byte = val & 0xff; for (i = 1; i < bytes; i++) { @@ -1920,8 +1977,170 @@ loongarch_check_vect_par_cnst_half (rtx op, machine_mode mode, bool high_p) return true; } +/* VLDI or XVLDI instruction could have 13 bits imm part, this mask is used to + indicate the highest bit is 1. */ +#define VLDI_NEG_MASK HOST_WIDE_INT_UC(0xFFFFFFFFFFFFF000) + +/* Return true if repeated value in vector for machine mode can be set by VLDI + or XVLDI instruction, the immediate value for VLDI or XVLDI will be put into + res. */ +static bool +loongarch_parse_vldi_const (rtx op, machine_mode mode, + unsigned HOST_WIDE_INT *res) +{ + if (!loongarch_const_vector_same_val_p (op, mode)) + return false; + + rtx elem0 = CONST_VECTOR_ELT (op, 0); + if (!CONST_INT_P (elem0)) + return false; + + HOST_WIDE_INT value = INTVAL (elem0); + switch (mode) + { + case E_V16QImode: + case E_V32QImode: + { + *res = value & 0xFF; + return true; + } + case E_V8HImode: + case E_V16HImode: + { + if (value >= -512 && value <= 511) + { + *res = 0x400 | (value & 0x3FF); + return true; + } + + uint16_t num = value & 0xFFFF; + /* 4'b0101:data={4{x[7:0],8'b0}}. */ + if ((num & 0xFF) == 0) + { + *res = VLDI_NEG_MASK | 0x500 | (num >> 8); + return true; + } + break; + } + case E_V4SImode: + case E_V8SImode: + { + if (value >= -512 && value <= 511) + { + *res = 0x800 | (value & 0x3FF); + return true; + } + uint32_t num = value & 0xFFFFFFFF; + /* 4'b0001:data={2{16'b0,x[7:0],8'b0}}. */ + if ((num & 0xFFFF00FF) == 0) + { + *res = VLDI_NEG_MASK | 0x100 | ((num >> 8) & 0xFF); + return true; + } + + /* 4'b0010:data={2{8'b0,x[7:0],16'b0}}. */ + if ((num & 0xFF00FFFF) == 0) + { + *res = VLDI_NEG_MASK | 0x200 | ((num >> 16) & 0xFF); + return true; + } + + /* 4'b0011:data={2{x[7:0],24'b0}}. */ + if ((num & 0xFFFFFF) == 0) + { + *res = VLDI_NEG_MASK | 0x300 | ((num >> 24) & 0xFF); + return true; + } + + /* 4'b0110:data={2{16'b0,x[7:0],8'hFF}}. */ + if (num >> 16 == 0 && (num & 0xFF) == 0xFF) + { + *res = VLDI_NEG_MASK | 0x600 | ((num >> 8) & 0xFF); + return true; + } + + /* 4'b0111:data={2{8'b0,x[7:0],16'hFFFF}}. */ + if (num >> 24 == 0 && (num & 0xFFFF) == 0xFFFF) + { + *res = VLDI_NEG_MASK | 0x700 | ((num >> 16) & 0xFF); + return true; + } + + /* 4'b1010:data={2{x[7],~x[6],{5{x[6]}},x[5:0],19'b0}}. */ + uint32_t temp = (num >> 25) & 0x3F; + /* x[6] == 0, then ~x[6],{5{x[6]}} should be 0b10 0000, + x[6] == 1, then ~x[6],{5{x[6]}} should be 0b01 1111. */ + if ((temp == 0x20 || temp == 0x1F) && (num & 0x7FFFF) == 0) + { + temp = ((num >> 19) & 0x7F) | ((num >> 24) & 0x80); + *res = VLDI_NEG_MASK | 0xa00 | temp; + return true; + } + break; + } + case E_V2DImode: + case E_V4DImode: + { + if (value >= -512 && value <= 511) + { + *res = 0xC00 | (value & 0x3FF); + return true; + } + + uint64_t num = value; + /* 4'b1001:data={{8{x[7]}},{8{x[6]}},{8{x[5]}},{8{x[4]}},{8{x[3]}}, + {8{x[2]}},{8{x[1]}},{8{x[0]}}}. */ + bool same_bit = true; + uint64_t temp = 0; + for (int i = 0; i < 8; i++) + { + uint8_t n = (num >> (i * 8)) & 0xFF; + if (n != 0 && n != 0xFF) + { + same_bit = false; + break; + } + + if (n == 0xFF) + temp = (1 << i) | temp; + } + if (same_bit) + { + *res = VLDI_NEG_MASK | 0x900 | temp; + return true; + } + + /* 4'b1011:data={32'b0,x[7],~x[6],{5{x[6]}},x[5:0],19'b0}. */ + temp = (num >> 25) & 0x3F; + if ((num & 0xFFFFFFFF) == num + && (temp == 0x20 || temp == 0x1F) + && (num & 0x7FFFF) == 0) + { + temp = ((num >> 19) & 0x7F) | ((num >> 24) & 0x80); + *res = VLDI_NEG_MASK | 0xB00 | temp; + return true; + } + + /* 4'b1100:data={x[7],~x[6],{8{x[6]}},x[5:0],48'b0}. */ + temp = (num >> 54) & 0x1FF; + if ((num & HOST_WIDE_INT_UC(0xFFFF000000000000)) == num + && (temp == 0xFF || temp == 0x100)) + { + temp = ((num >> 48) & 0x7F) | ((num >> 56) & 0x80); + *res = VLDI_NEG_MASK | 0xC00 | temp; + return true; + } + break; + } + default: + break; + } + + return false; +} + rtx -loongarch_const_vector_vrepli (rtx x, machine_mode mode) +loongarch_const_vector_vldi (rtx x, machine_mode mode) { int size = GET_MODE_SIZE (mode); @@ -1935,8 +2154,11 @@ loongarch_const_vector_vrepli (rtx x, machine_mode mode) mode_for_vector (elem_mode, size / GET_MODE_SIZE (elem_mode)) .require (); rtx op = lowpart_subreg (new_mode, x, mode); - if (loongarch_const_vector_same_int_p (op, new_mode, -512, 511)) - return op; + + HOST_WIDE_INT res = 0; + if (loongarch_parse_vldi_const (op, new_mode, + (unsigned HOST_WIDE_INT *)&res)) + return GEN_INT (res); } return NULL_RTX; @@ -2604,7 +2826,7 @@ loongarch_const_insns (rtx x) case CONST_VECTOR: if ((LSX_SUPPORTED_MODE_P (GET_MODE (x)) || LASX_SUPPORTED_MODE_P (GET_MODE (x))) - && loongarch_const_vector_vrepli (x, GET_MODE (x))) + && loongarch_const_vector_vldi (x, GET_MODE (x))) return 1; /* Fall through. */ case CONST_DOUBLE: @@ -2781,7 +3003,7 @@ loongarch_unspec_address_offset (rtx base, rtx offset, enum loongarch_symbol_type symbol_type) { base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base), - UNSPEC_ADDRESS_FIRST + symbol_type); + UNSPEC_ADDRESS_FIRST + (int) symbol_type); if (offset != const0_rtx) base = gen_rtx_PLUS (Pmode, base, offset); return gen_rtx_CONST (Pmode, base); @@ -3001,7 +3223,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) rtx sum = gen_rtx_UNSPEC ( Pmode, gen_rtvec (1, loongarch_tls_symbol), UNSPEC_ADDRESS_FIRST - + loongarch_classify_symbol (loongarch_tls_symbol)); + + (int) loongarch_classify_symbol (loongarch_tls_symbol)); set_unique_reg_note (get_last_insn (), REG_EQUAL, sum); } else @@ -3439,13 +3661,8 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value) x = GEN_INT (codes[0].value); for (i = 1; i < num_ops; i++) { - if (!can_create_pseudo_p ()) - { - emit_insn (gen_rtx_SET (temp, x)); - x = temp; - } - else - x = force_reg (mode, x); + emit_insn (gen_rtx_SET (temp, x)); + x = temp; set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (codes[i-1].curr_value)); @@ -3539,7 +3756,20 @@ loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src) { if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode)) { - loongarch_emit_move (dest, force_reg (mode, src)); + /* When loading fixed-point scalar data, if the size of the mode + is smaller than the size of `word_mode`, the immediate value + is first loaded into a register of type `word_mode`. + This facilitates the elimination of common self-expressions. + This reduces redundant immediate value loading instructions. */ + rtx tmp; + if (GET_MODE_CLASS (mode) == MODE_INT + && GET_CODE (src) == CONST_INT + && GET_MODE_SIZE (mode) < UNITS_PER_WORD) + tmp = gen_lowpart (mode, force_reg (word_mode, src)); + else + tmp = force_reg (mode, src); + + loongarch_emit_move (dest, tmp); return true; } @@ -3803,6 +4033,34 @@ loongarch_use_bstrins_for_ior_with_mask_1 (machine_mode mode, return 0; } +/* Check if it is possible to optimize AND operation with an immediate: + a. immediate is loaded by more than 1 instruction + b. can use bstrpick.d + bstrins.d. */ + +bool +loongarch_use_bstrins_bstrpick_for_and (rtx op, machine_mode mode) +{ + if (!TARGET_64BIT) + return false; + + /* Avoid aggressive optimization of combine before reload. */ + if (!reload_completed) + return false; + + /* It's meaningless if the OP is not splittable + and skip the cases already supported in AND operation. */ + if (!splittable_const_int_operand (op, mode) || and_operand (op, mode)) + return false; + + int leading_zero_bit = __builtin_clzll (UINTVAL (op)); + unsigned HOST_WIDE_INT mask = (~0ULL) << (64 - leading_zero_bit); + + if (ins_zero_bitmask_operand (GEN_INT (UINTVAL (op) | mask), mode)) + return true; + + return false; +} + /* Return the cost of moving between two registers of mode MODE. */ static int @@ -3922,14 +4180,24 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, return false; case AND: - /* Check for a *clear_upper32 pattern and treat it like a zero - extension. See the pattern's comment for details. */ - if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1)) - && UINTVAL (XEXP (x, 1)) == 0xffffffff) + if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1))) { - *total = (loongarch_zero_extend_cost (XEXP (x, 0)) - + set_src_cost (XEXP (x, 0), mode, speed)); - return true; + /* Check for a *clear_upper32 pattern and treat it like a zero + extension. See the pattern's comment for details. */ + if (UINTVAL (XEXP (x, 1)) == 0xffffffff) + { + *total = (loongarch_zero_extend_cost (XEXP (x, 0)) + + set_src_cost (XEXP (x, 0), mode, speed)); + return true; + } + /* Check if it can be done by bstrpick.d and bstrins.d. */ + else if (loongarch_use_bstrins_bstrpick_for_and (XEXP (x, 1), mode)) + { + /* The pattern will be split into 2 insns. */ + *total = (COSTS_N_INSNS (2) + + set_src_cost (XEXP (x, 0), mode, speed)); + return true; + } } /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in a single instruction. */ @@ -3983,9 +4251,34 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, speed); return true; + case LSHIFTRT: + /* Correct the cost of mulh.{w[u]/d[u]}. */ + if (outer_code == TRUNCATE && CONST_INT_P (XEXP (x, 1)) + && INTVAL (XEXP (x, 1)) == (GET_MODE_BITSIZE (mode) / 2) + && GET_CODE (XEXP (x, 0)) == MULT + && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND + && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND) + || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND + && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)) + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == REG + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 0)) == REG) + { + if (GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SImode + && GET_MODE (XEXP (XEXP (XEXP (x, 0), 1), 0)) == SImode) + { + *total = loongarch_cost->int_mult_si; + return true; + } + if (GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode + && GET_MODE (XEXP (XEXP (XEXP (x, 0), 1), 0)) == DImode) + { + *total = loongarch_cost->int_mult_di; + return true; + } + } + /* Fall through. */ case ASHIFT: case ASHIFTRT: - case LSHIFTRT: case ROTATE: case ROTATERT: if (CONSTANT_P (XEXP (x, 1))) @@ -4234,7 +4527,8 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, machine_mode loongarch_split_reduction (machine_mode mode) { - if (LSX_SUPPORTED_MODE_P (mode)) + if (!VECTOR_MODE_P (mode) + || LSX_SUPPORTED_MODE_P (mode)) return mode; return mode_for_vector (as_a <scalar_mode> (GET_MODE_INNER (mode)), @@ -4791,7 +5085,7 @@ loongarch_split_vector_move_p (rtx dest, rtx src) /* Check for vector set to an immediate const vector with valid replicated element. */ if (FP_REG_RTX_P (dest) - && loongarch_const_vector_vrepli (src, GET_MODE (src))) + && loongarch_const_vector_vldi (src, GET_MODE (src))) return false; /* Check for vector load zero immediate. */ @@ -4927,15 +5221,15 @@ loongarch_output_move (rtx *operands) && src_code == CONST_VECTOR && CONST_INT_P (CONST_VECTOR_ELT (src, 0))) { - operands[1] = loongarch_const_vector_vrepli (src, mode); + operands[1] = loongarch_const_vector_vldi (src, mode); gcc_assert (operands[1]); switch (GET_MODE_SIZE (mode)) { case 16: - return "vrepli.%v1\t%w0,%E1"; + return "vldi\t%w0,%1"; case 32: - return "xvrepli.%v1\t%u0,%E1"; + return "xvldi\t%u0,%1"; default: gcc_unreachable (); } } @@ -5217,29 +5511,41 @@ loongarch_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1, if (loongarch_int_order_operand_ok_p (*code, *cmp1)) return true; - if (CONST_INT_P (*cmp1)) switch (*code) { case LE: - plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); - if (INTVAL (*cmp1) < plus_one) + if (CONST_INT_P (*cmp1)) { - *code = LT; - *cmp1 = force_reg (mode, GEN_INT (plus_one)); - return true; + plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); + if (INTVAL (*cmp1) < plus_one) + { + *code = LT; + *cmp1 = force_reg (mode, GEN_INT (plus_one)); + return true; + } } break; case LEU: - plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); - if (plus_one != 0) + if (CONST_INT_P (*cmp1)) { - *code = LTU; - *cmp1 = force_reg (mode, GEN_INT (plus_one)); - return true; + plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode); + if (plus_one != 0) + { + *code = LTU; + *cmp1 = force_reg (mode, GEN_INT (plus_one)); + return true; + } } break; + case GT: + case GTU: + case LT: + case LTU: + *cmp1 = force_reg (mode, *cmp1); + break; + default: break; } @@ -5304,6 +5610,26 @@ loongarch_zero_if_equal (rtx cmp0, rtx cmp1) OPTAB_DIRECT); } +/* Helper function for loongarch_extend_comparands to Sign-extend the OP. + However if the OP is SI subreg promoted with an inner DI, such as + (subreg/s/v:SI (reg/v:DI) 0) + just peel off the SUBREG to get DI, avoiding extraneous extension. + This modification refers to riscv's commit r14-5506. */ + +static void +loongarch_sign_extend_if_subreg_prom_p (rtx *op) +{ + if (SUBREG_P (*op) + && SUBREG_PROMOTED_VAR_P (*op) + && SUBREG_PROMOTED_SIGNED_P (*op) + && REG_P (XEXP (*op, 0)) + && (GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))) + == GET_MODE_SIZE (word_mode))) + *op = XEXP (*op, 0); + else + *op = gen_rtx_SIGN_EXTEND (word_mode, *op); +} + /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */ static void @@ -5333,14 +5659,16 @@ loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1) } else { - *op0 = gen_rtx_SIGN_EXTEND (word_mode, *op0); - if (*op1 != const0_rtx) - *op1 = gen_rtx_SIGN_EXTEND (word_mode, *op1); + loongarch_sign_extend_if_subreg_prom_p (op0); + /* Regardless of whether *op1 is any immediate number, it is not + loaded into the register, in order to facilitate the generation + of slt{u}i. */ + if (!CONST_INT_P (*op1)) + loongarch_sign_extend_if_subreg_prom_p (op1); } } } - /* Convert a comparison into something that can be used in a branch. On entry, *OP0 and *OP1 are the values being compared and *CODE is the code used to compare them. Update them to describe the final comparison. */ @@ -5481,11 +5809,8 @@ loongarch_expand_conditional_move (rtx *operands) enum rtx_code code = GET_CODE (operands[1]); rtx op0 = XEXP (operands[1], 0); rtx op1 = XEXP (operands[1], 1); - rtx op0_extend = op0; - rtx op1_extend = op1; - /* Record whether operands[2] and operands[3] modes are promoted to word_mode. */ - bool promote_op[2] = {false, false}; + /* Record whether operands[0] is extended by SImode. */ bool promote_p = false; machine_mode mode = GET_MODE (operands[0]); @@ -5588,25 +5913,10 @@ loongarch_expand_conditional_move (rtx *operands) } } - if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD) - { - promote_op[0] = (REG_P (op0) && REG_P (operands[2]) && - REGNO (op0) == REGNO (operands[2])); - promote_op[1] = (REG_P (op1) && REG_P (operands[3]) && - REGNO (op1) == REGNO (operands[3])); - } - - if (promote_op[0] || promote_op[1]) - { - mode = word_mode; - promote_p = true; - } - loongarch_extend_comparands (code, &op0, &op1); op0 = force_reg (word_mode, op0); - op0_extend = op0; - op1_extend = force_reg (word_mode, op1); + op1 = CONST_INT_P (op1) ? op1 : force_reg (word_mode, op1); rtx target = gen_reg_rtx (GET_MODE (op0)); @@ -5652,65 +5962,65 @@ loongarch_expand_conditional_move (rtx *operands) } } + /* If the target of the mov<mode> is SImode, then the two operands are + extended to display symbols. */ + if (TARGET_64BIT && mode == SImode) + { + loongarch_extend_comparands (code, &operands[2], &operands[3]); + operands[2] = force_reg (word_mode, operands[2]); + operands[3] = CONST_INT_P (operands[3]) ? operands[3] + : force_reg (word_mode, operands[3]); + + promote_p = true; + mode = DImode; + } + rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1); /* There is no direct support for general conditional GP move involving two registers using SEL. */ - if (INTEGRAL_MODE_P (GET_MODE (operands[2])) - && register_operand (operands[2], VOIDmode) - && register_operand (operands[3], VOIDmode)) + if (INTEGRAL_MODE_P (GET_MODE (operands[0]))) { - rtx op2 = operands[2]; - rtx op3 = operands[3]; + rtx pdest = promote_p ? gen_reg_rtx (mode) : operands[0]; - if (promote_p) + if (register_operand (operands[2], VOIDmode) + && register_operand (operands[3], VOIDmode)) { - if (promote_op[0]) - op2 = op0_extend; - else - { - loongarch_extend_comparands (code, &op2, &const0_rtx); - op2 = force_reg (mode, op2); - } - - if (promote_op[1]) - op3 = op1_extend; - else - { - loongarch_extend_comparands (code, &op3, &const0_rtx); - op3 = force_reg (mode, op3); - } - } + rtx sel1 = gen_reg_rtx (mode); + rtx sel2 = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (sel1, + gen_rtx_IF_THEN_ELSE (mode, cond, + operands[2], + const0_rtx))); + /* Flip the test for the second operand. */ + cond = gen_rtx_fmt_ee ((code == EQ) ? NE + : EQ, GET_MODE (op0), + op0, op1); - rtx temp = gen_reg_rtx (mode); - rtx temp2 = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (sel2, + gen_rtx_IF_THEN_ELSE (mode, cond, + operands[3], + const0_rtx))); - emit_insn (gen_rtx_SET (temp, - gen_rtx_IF_THEN_ELSE (mode, cond, - op2, const0_rtx))); - - /* Flip the test for the second operand. */ - cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1); - - emit_insn (gen_rtx_SET (temp2, - gen_rtx_IF_THEN_ELSE (mode, cond, - op3, const0_rtx))); + /* Merge the two results, at least one is guaranteed to be zero. */ + emit_insn (gen_rtx_SET (pdest, gen_rtx_IOR (mode, sel1, sel2))); + } + else + emit_insn (gen_rtx_SET (pdest, + gen_rtx_IF_THEN_ELSE (mode, cond, + operands[2], + operands[3]))); - /* Merge the two results, at least one is guaranteed to be zero. */ if (promote_p) { - rtx temp3 = gen_reg_rtx (mode); - emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2))); - temp3 = gen_lowpart (GET_MODE (operands[0]), temp3); + pdest = gen_lowpart (GET_MODE (operands[0]), pdest); /* Nonzero in a subreg if it was made when accessing an object that was promoted to a wider mode in accord with the PROMOTED_MODE machine description macro. */ - SUBREG_PROMOTED_VAR_P (temp3) = 1; + SUBREG_PROMOTED_VAR_P (pdest) = 1; /* Sets promoted mode for SUBREG_PROMOTED_VAR_P. */ - SUBREG_PROMOTED_SET (temp3, SRP_SIGNED); - loongarch_emit_move (operands[0], temp3); + SUBREG_PROMOTED_SET (pdest, SRP_SIGNED); + loongarch_emit_move (operands[0], pdest); } - else - emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2))); } else emit_insn (gen_rtx_SET (operands[0], @@ -5777,6 +6087,16 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length, /* Allocate a buffer for the temporary registers. */ regs = XALLOCAVEC (rtx, num_reg); + /* Extract the base address what plus operation to promote the combine of + RTX. */ + if (GET_CODE (XEXP (dest, 0)) == PLUS) + { + unsigned int dest_align = MEM_ALIGN (dest); + rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0)); + dest = change_address (dest, BLKmode, dest_reg); + set_mem_align (dest, dest_align); + } + for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2) { mode = loongarch_mode_for_move_size (delta_cur); @@ -7087,6 +7407,10 @@ static bool loongarch_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t rclass) { + if ((INTEGRAL_MODE_P (from) && FLOAT_MODE_P (to)) + || (INTEGRAL_MODE_P (to) && FLOAT_MODE_P (from))) + return true; + /* Allow conversions between different LSX/LASX vector modes. */ if (LASX_SUPPORTED_MODE_P (from) && LASX_SUPPORTED_MODE_P (to)) return true; @@ -7133,7 +7457,11 @@ loongarch_modes_tieable_p (machine_mode mode1, machine_mode mode2) prefer to put one of them in FPRs. */ return (mode1 == mode2 || (!loongarch_mode_ok_for_mov_fmt_p (mode1) - && !loongarch_mode_ok_for_mov_fmt_p (mode2))); + && !loongarch_mode_ok_for_mov_fmt_p (mode2)) + || (GET_MODE_CLASS(mode1) == MODE_FLOAT + && GET_MODE_CLASS(mode2) == MODE_INT) + || (GET_MODE_CLASS(mode2) == MODE_FLOAT + && GET_MODE_CLASS(mode1) == MODE_INT)); } /* Implement TARGET_PREFERRED_RELOAD_CLASS. */ @@ -8559,10 +8887,7 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d) rtx sel = force_reg (sel_mode, gen_rtx_CONST_VECTOR (sel_mode, sel_v)); - if (d->vmode == E_V16QImode) - emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel)); - else - emit_insn (gen_lsx_vshuf (d->vmode, target, sel, op1, op0)); + emit_insn (gen_simd_vshuf (d->vmode, target, op1, op0, sel)); return true; } @@ -8937,101 +9262,44 @@ loongarch_expand_vec_perm_1 (rtx operands[]) rtx t1 = NULL; rtx t2 = NULL; rtx t3, t4, t5, t6, vt = NULL; - rtx vec[32] = {NULL}; machine_mode mode = GET_MODE (op0); machine_mode maskmode = GET_MODE (mask); - int w, i; + int w; /* Number of elements in the vector. */ w = GET_MODE_NUNITS (mode); - rtx round_data[MAX_VECT_LEN]; - rtx round_reg, round_data_rtx; - - if (mode != E_V32QImode) + /* If we are using xvshuf.*, clamp the selector to avoid unpredictable + output; if we need to blend two shuf results for the final result, + also clamp it so we can use xvslei to generate the bitmask for + the blending. */ + if ((maskmode != V8SImode && maskmode != V4DImode) + || !one_operand_shuffle) { - for (int i = 0; i < w; i += 1) - { - round_data[i] = GEN_INT (0x1f); - } - - if (mode == E_V4DFmode) - { - round_data_rtx = gen_rtx_CONST_VECTOR (E_V4DImode, - gen_rtvec_v (w, round_data)); - round_reg = gen_reg_rtx (E_V4DImode); - } - else if (mode == E_V8SFmode) - { - - round_data_rtx = gen_rtx_CONST_VECTOR (E_V8SImode, - gen_rtvec_v (w, round_data)); - round_reg = gen_reg_rtx (E_V8SImode); - } - else - { - round_data_rtx = gen_rtx_CONST_VECTOR (mode, - gen_rtvec_v (w, round_data)); - round_reg = gen_reg_rtx (mode); - } - - emit_move_insn (round_reg, round_data_rtx); - switch (mode) - { - case E_V32QImode: - emit_insn (gen_andv32qi3 (mask, mask, round_reg)); - break; - case E_V16HImode: - emit_insn (gen_andv16hi3 (mask, mask, round_reg)); - break; - case E_V8SImode: - case E_V8SFmode: - emit_insn (gen_andv8si3 (mask, mask, round_reg)); - break; - case E_V4DImode: - case E_V4DFmode: - emit_insn (gen_andv4di3 (mask, mask, round_reg)); - break; - default: - gcc_unreachable (); - break; - } + rtx t = gen_const_vec_duplicate (maskmode, GEN_INT (2 * w - 1)); + mask = expand_binop (maskmode, and_optab, mask, t, NULL_RTX, false, + OPTAB_DIRECT); } if (mode == V4DImode || mode == V4DFmode) { maskmode = mode = V8SImode; w = 8; - t1 = gen_reg_rtx (maskmode); /* Replicate the low bits of the V4DImode mask into V8SImode: - mask = { A B C D } - t1 = { A A B B C C D D }. */ - for (i = 0; i < w / 2; ++i) - vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2); - vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); - vt = force_reg (maskmode, vt); - mask = gen_lowpart (maskmode, mask); - emit_insn (gen_lasx_xvperm_w (t1, mask, vt)); - - /* Multiply the shuffle indicies by two. */ - t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1, - OPTAB_DIRECT); - - /* Add one to the odd shuffle indicies: - t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */ - for (i = 0; i < w / 2; ++i) - { - vec[i * 2] = const0_rtx; - vec[i * 2 + 1] = const1_rtx; - } - vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); - vt = validize_mem (force_const_mem (maskmode, vt)); - t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1, - OPTAB_DIRECT); + mask = lasx_xvpackev_w (mask * 2, mask * 2 + 1) */ + t1 = expand_binop (V4DImode, add_optab, mask, mask, NULL_RTX, + false, OPTAB_DIRECT); + t2 = gen_const_vec_duplicate (V4DImode, CONST1_RTX (DImode)); + t2 = expand_binop (V4DImode, add_optab, t1, t2, NULL_RTX, + true, OPTAB_DIRECT); + t1 = gen_lowpart (mode, t1); + t2 = gen_lowpart (mode, t2); + t3 = gen_reg_rtx (mode); + emit_insn (gen_lasx_xvpackev_w (t3, t1, t2)); /* Continue as if V8SImode (resp. V32QImode) was used initially. */ - operands[3] = mask = t1; + operands[3] = mask = t3; target = gen_reg_rtx (mode); op0 = gen_lowpart (mode, op0); op1 = gen_lowpart (mode, op1); @@ -9040,112 +9308,68 @@ loongarch_expand_vec_perm_1 (rtx operands[]) switch (mode) { case E_V8SImode: + case E_V8SFmode: if (one_operand_shuffle) { - emit_insn (gen_lasx_xvperm_w (target, op0, mask)); + emit_insn (gen_lasx_xvperm (mode, target, op0, mask)); if (target != operands[0]) emit_move_insn (operands[0], gen_lowpart (GET_MODE (operands[0]), target)); } else { - t1 = gen_reg_rtx (V8SImode); - t2 = gen_reg_rtx (V8SImode); - emit_insn (gen_lasx_xvperm_w (t1, op0, mask)); - emit_insn (gen_lasx_xvperm_w (t2, op1, mask)); - goto merge_two; - } - return; - - case E_V8SFmode: - mask = gen_lowpart (V8SImode, mask); - if (one_operand_shuffle) - emit_insn (gen_lasx_xvperm_w_f (target, op0, mask)); - else - { - t1 = gen_reg_rtx (V8SFmode); - t2 = gen_reg_rtx (V8SFmode); - emit_insn (gen_lasx_xvperm_w_f (t1, op0, mask)); - emit_insn (gen_lasx_xvperm_w_f (t2, op1, mask)); - goto merge_two; + t1 = gen_reg_rtx (mode); + t2 = gen_reg_rtx (mode); + emit_insn (gen_lasx_xvperm (mode, t1, op0, mask)); + emit_insn (gen_lasx_xvperm (mode, t2, op1, mask)); } - return; + break; case E_V16HImode: - if (one_operand_shuffle) - { - t1 = gen_reg_rtx (V16HImode); - t2 = gen_reg_rtx (V16HImode); - emit_insn (gen_lasx_xvpermi_d_v16hi (t1, op0, GEN_INT (0x44))); - emit_insn (gen_lasx_xvpermi_d_v16hi (t2, op0, GEN_INT (0xee))); - emit_insn (gen_lasx_xvshuf_h (target, mask, t2, t1)); - } - else - { - t1 = gen_reg_rtx (V16HImode); - t2 = gen_reg_rtx (V16HImode); - t3 = gen_reg_rtx (V16HImode); - t4 = gen_reg_rtx (V16HImode); - t5 = gen_reg_rtx (V16HImode); - t6 = gen_reg_rtx (V16HImode); - emit_insn (gen_lasx_xvpermi_d_v16hi (t3, op0, GEN_INT (0x44))); - emit_insn (gen_lasx_xvpermi_d_v16hi (t4, op0, GEN_INT (0xee))); - emit_insn (gen_lasx_xvshuf_h (t1, mask, t4, t3)); - emit_insn (gen_lasx_xvpermi_d_v16hi (t5, op1, GEN_INT (0x44))); - emit_insn (gen_lasx_xvpermi_d_v16hi (t6, op1, GEN_INT (0xee))); - emit_insn (gen_lasx_xvshuf_h (t2, mask, t6, t5)); - goto merge_two; - } - return; - case E_V32QImode: if (one_operand_shuffle) { - t1 = gen_reg_rtx (V32QImode); - t2 = gen_reg_rtx (V32QImode); - emit_insn (gen_lasx_xvpermi_d_v32qi (t1, op0, GEN_INT (0x44))); - emit_insn (gen_lasx_xvpermi_d_v32qi (t2, op0, GEN_INT (0xee))); - emit_insn (gen_lasx_xvshuf_b (target, t2, t1, mask)); + t1 = gen_reg_rtx (mode); + t2 = gen_reg_rtx (mode); + emit_insn (gen_lasx_xvpermi_d (mode, t1, op0, GEN_INT (0x44))); + emit_insn (gen_lasx_xvpermi_d (mode, t2, op0, GEN_INT (0xee))); + emit_insn (gen_simd_vshuf (mode, target, t2, t1, mask)); } else { - t1 = gen_reg_rtx (V32QImode); - t2 = gen_reg_rtx (V32QImode); - t3 = gen_reg_rtx (V32QImode); - t4 = gen_reg_rtx (V32QImode); - t5 = gen_reg_rtx (V32QImode); - t6 = gen_reg_rtx (V32QImode); - emit_insn (gen_lasx_xvpermi_d_v32qi (t3, op0, GEN_INT (0x44))); - emit_insn (gen_lasx_xvpermi_d_v32qi (t4, op0, GEN_INT (0xee))); - emit_insn (gen_lasx_xvshuf_b (t1, t4, t3, mask)); - emit_insn (gen_lasx_xvpermi_d_v32qi (t5, op1, GEN_INT (0x44))); - emit_insn (gen_lasx_xvpermi_d_v32qi (t6, op1, GEN_INT (0xee))); - emit_insn (gen_lasx_xvshuf_b (t2, t6, t5, mask)); - goto merge_two; + t1 = gen_reg_rtx (mode); + t2 = gen_reg_rtx (mode); + t3 = gen_reg_rtx (mode); + t4 = gen_reg_rtx (mode); + t5 = gen_reg_rtx (mode); + t6 = gen_reg_rtx (mode); + emit_insn (gen_lasx_xvpermi_d (mode, t3, op0, GEN_INT (0x44))); + emit_insn (gen_lasx_xvpermi_d (mode, t4, op0, GEN_INT (0xee))); + emit_insn (gen_simd_vshuf (mode, t1, t4, t3, mask)); + emit_insn (gen_lasx_xvpermi_d (mode, t5, op1, GEN_INT (0x44))); + emit_insn (gen_lasx_xvpermi_d (mode, t6, op1, GEN_INT (0xee))); + emit_insn (gen_simd_vshuf (mode, t2, t6, t5, mask)); } - return; + break; default: - gcc_assert (GET_MODE_SIZE (mode) == 32); + gcc_unreachable (); break; } -merge_two: + if (one_operand_shuffle) + return; + /* Then merge them together. The key is whether any given control element contained a bit set that indicates the second word. */ rtx xops[6]; - mask = operands[3]; - vt = GEN_INT (w); - vt = gen_const_vec_duplicate (maskmode, vt); - vt = force_reg (maskmode, vt); - mask = expand_simple_binop (maskmode, AND, mask, vt, - NULL_RTX, 0, OPTAB_DIRECT); + vt = gen_const_vec_duplicate (maskmode, GEN_INT (w - 1)); if (GET_MODE (target) != mode) target = gen_reg_rtx (mode); xops[0] = target; - xops[1] = gen_lowpart (mode, t2); - xops[2] = gen_lowpart (mode, t1); - xops[3] = gen_rtx_EQ (maskmode, mask, vt); + xops[1] = gen_lowpart (mode, t1); + xops[2] = gen_lowpart (mode, t2); + xops[3] = gen_rtx_LEU (maskmode, mask, vt); xops[4] = mask; xops[5] = vt; @@ -9155,55 +9379,6 @@ merge_two: gen_lowpart (GET_MODE (operands[0]), target)); } -void -loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) -{ - machine_mode vmode = GET_MODE (target); - machine_mode vimode = GET_MODE (sel); - auto nelt = GET_MODE_NUNITS (vmode); - auto round_reg = gen_reg_rtx (vimode); - rtx round_data[MAX_VECT_LEN]; - - for (int i = 0; i < nelt; i += 1) - { - round_data[i] = GEN_INT (0x1f); - } - - rtx round_data_rtx = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (nelt, round_data)); - emit_move_insn (round_reg, round_data_rtx); - - if (vmode != vimode) - { - target = lowpart_subreg (vimode, target, vmode); - op0 = lowpart_subreg (vimode, op0, vmode); - op1 = lowpart_subreg (vimode, op1, vmode); - } - - switch (vmode) - { - case E_V16QImode: - emit_insn (gen_andv16qi3 (sel, sel, round_reg)); - emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel)); - break; - case E_V2DFmode: - case E_V2DImode: - emit_insn (gen_andv2di3 (sel, sel, round_reg)); - emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0)); - break; - case E_V4SFmode: - case E_V4SImode: - emit_insn (gen_andv4si3 (sel, sel, round_reg)); - emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0)); - break; - case E_V8HImode: - emit_insn (gen_andv8hi3 (sel, sel, round_reg)); - emit_insn (gen_lsx_vshuf_h (target, sel, op1, op0)); - break; - default: - break; - } -} - /* Following are the assist function for const vector permutation support. */ static bool loongarch_is_quad_duplicate (struct expand_vec_perm_d *d) @@ -9668,11 +9843,7 @@ expand_perm_const_end: rtx sel = force_reg (sel_mode, gen_rtx_CONST_VECTOR (sel_mode, sel_v)); - if (d->vmode == E_V32QImode) - emit_insn (gen_lasx_xvshuf_b (target, op1, op0, sel)); - else - emit_insn (gen_lasx_xvshuf (d->vmode, target, sel, op1, op0)); - + emit_insn (gen_simd_vshuf (d->vmode, target, op1, op0, sel)); return true; } } @@ -9939,7 +10110,7 @@ emit_reduc_half (rtx dest, rtx src, int i) if (i == 256) tem = gen_lasx_xvpermi_d_v4df (dest, src, GEN_INT (0xe)); else - tem = gen_lasx_xvpermi_d_v4df (dest, src, const1_rtx); + tem = gen_lasx_xvbsrl_d_f (dest, src, GEN_INT (0x8)); break; case E_V32QImode: case E_V16HImode: @@ -11266,6 +11437,10 @@ loongarch_compute_pressure_classes (reg_class *classes) static bool loongarch_can_inline_p (tree caller, tree callee) { + /* Do not inline when callee is versioned but caller is not. */ + if (DECL_FUNCTION_VERSIONED (callee) && ! DECL_FUNCTION_VERSIONED (caller)) + return false; + tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); @@ -11320,6 +11495,627 @@ loongarch_can_inline_p (tree caller, tree callee) return true; } +/* Parse the tree in ARGS that contains the target_version attribute + information and update the global target options space. If LOC is nonnull, + report diagnostics against *LOC, otherwise remain silent. */ + +bool +loongarch_process_target_version_attr (tree args, tree fndecl) +{ + location_t loc = DECL_SOURCE_LOCATION (fndecl); + + if (TREE_CODE (args) == TREE_LIST) + { + if (TREE_CHAIN (args)) + { + if (loc) + error_at (loc, "attribute %<target_version%> " + "has multiple values"); + return false; + } + args = TREE_VALUE (args); + } + + if (!args || TREE_CODE (args) != STRING_CST) + { + if (loc) + error_at (loc, "attribute %<target_version%> argument not a string"); + return false; + } + + string_slice str = TREE_STRING_POINTER (args); + + if (str == "default") + return true; + + if (loongarch_parse_fmv_features (fndecl, str, NULL, NULL) == false) + return false; + + /* Get the attribute string and take out only the option part. + eg: + "arch=la64v1.0;priority=1" + The attr_string is "arch=la64v1.0". + */ + string_slice attr_string = string_slice::tokenize (&str, ";"); + attr_string = attr_string.strip (); + + args = build_string (attr_string.size (), attr_string.begin ()); + + return loongarch_process_target_attr (args, fndecl); +} + + +/* Implement TARGET_OPTION_VALID_VERSION_ATTRIBUTE_P. This is used to + process attribute ((target_version ("..."))). */ + +static bool +loongarch_option_valid_version_attribute_p (tree fndecl, tree, tree args, int) +{ + struct cl_target_option cur_target; + bool ret; + tree new_target; + tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); + + /* Save the current target options to restore at the end. */ + cl_target_option_save (&cur_target, &global_options, &global_options_set); + + /* If fndecl already has some target attributes applied to it, unpack + them so that we add this attribute on top of them, rather than + overwriting them. */ + if (existing_target) + { + struct cl_target_option *existing_options + = TREE_TARGET_OPTION (existing_target); + + if (existing_options) + cl_target_option_restore (&global_options, &global_options_set, + existing_options); + } + else + cl_target_option_restore (&global_options, &global_options_set, + TREE_TARGET_OPTION (target_option_current_node)); + + ret = loongarch_process_target_version_attr (args, fndecl); + + /* Set up any additional state. */ + if (ret) + { + loongarch_option_override_internal (&la_target, + &global_options, + &global_options_set); + new_target = build_target_option_node (&global_options, + &global_options_set); + } + else + new_target = NULL; + + if (fndecl && ret) + DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; + + cl_target_option_restore (&global_options, &global_options_set, &cur_target); + + return ret; +} + +/* Make a dispatcher declaration for the multi-versioned function DECL. + Calls to DECL function will be replaced with calls to the dispatcher + by the front-end. Returns the decl of the dispatcher function. */ + +tree +loongarch_get_function_versions_dispatcher (void *decl) +{ + tree fn = (tree) decl; + struct cgraph_node *node = NULL; + struct cgraph_node *default_node = NULL; + struct cgraph_function_version_info *node_v = NULL; + + tree dispatch_decl = NULL; + + struct cgraph_function_version_info *default_version_info = NULL; + + gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); + + node = cgraph_node::get (fn); + gcc_assert (node != NULL); + + node_v = node->function_version (); + gcc_assert (node_v != NULL); + + if (node_v->dispatcher_resolver != NULL) + return node_v->dispatcher_resolver; + + /* The default node is always the beginning of the chain. */ + default_version_info = node_v; + while (default_version_info->prev) + default_version_info = default_version_info->prev; + default_node = default_version_info->this_node; + + /* If there is no default node, just return NULL. */ + if (!is_function_default_version (default_node->decl)) + return NULL; + + if (targetm.has_ifunc_p ()) + { + struct cgraph_function_version_info *it_v = NULL; + struct cgraph_node *dispatcher_node = NULL; + struct cgraph_function_version_info *dispatcher_version_info = NULL; + + /* Right now, the dispatching is done via ifunc. */ + dispatch_decl = make_dispatcher_decl (default_node->decl); + TREE_NOTHROW (dispatch_decl) = TREE_NOTHROW (fn); + + dispatcher_node = cgraph_node::get_create (dispatch_decl); + gcc_assert (dispatcher_node != NULL); + dispatcher_node->dispatcher_function = 1; + dispatcher_version_info + = dispatcher_node->insert_new_function_version (); + dispatcher_version_info->next = default_version_info; + dispatcher_node->definition = 1; + + /* Set the dispatcher for all the versions. */ + it_v = default_version_info; + while (it_v != NULL) + { + it_v->dispatcher_resolver = dispatch_decl; + it_v = it_v->next; + } + } + else + { + error_at (DECL_SOURCE_LOCATION (default_node->decl), + "multiversioning needs %<ifunc%> which is not supported " + "on this target"); + } + + return dispatch_decl; +} + +/* Implement TARGET_MANGLE_DECL_ASSEMBLER_NAME, to add function multiversioning + suffixes. */ + +tree +loongarch_mangle_decl_assembler_name (tree decl, tree id) +{ + /* For function version, add the target suffix to the assembler name. */ + if (TREE_CODE (decl) == FUNCTION_DECL + && DECL_FUNCTION_VERSIONED (decl)) + { + std::string name = IDENTIFIER_POINTER (id) + std::string ("."); + tree target_attr = lookup_attribute ("target_version", + DECL_ATTRIBUTES (decl)); + + if (target_attr == NULL_TREE) + { + name += "default"; + return get_identifier (name.c_str ()); + } + + const char *version_string + = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (target_attr))); + + /* Replace non-alphanumeric characters with underscores as the suffix. */ + for (const char *c = version_string; *c; c++) + name += ISALNUM (*c) == 0 ? '_' : *c; + + if (DECL_ASSEMBLER_NAME_SET_P (decl)) + SET_DECL_RTL (decl, NULL); + + id = get_identifier (name.c_str ()); + } + return id; +} + +/* Return an identifier for the base assembler name of a versioned function. + This is computed by taking the default version's assembler name, and + stripping off the ".default" suffix if it's already been appended. */ + +static tree +get_suffixed_assembler_name (tree default_decl, const char *suffix) +{ + std::string name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (default_decl)); + + auto size = name.size (); + if (size >= 8 && name.compare (size - 8, 8, ".default") == 0) + name.resize (size - 8); + name += suffix; + return get_identifier (name.c_str ()); +} + +/* Get the mask and priority of features. */ +void +get_feature_mask_for_version (tree decl, + loongarch_fmv_feature_mask *feature_mask, + auto_vec<unsigned int> *feature_priority) +{ + tree version_attr = lookup_attribute ("target_version", + DECL_ATTRIBUTES (decl)); + + /* When a function is added with "__attribute__((target_version(**)))" to + define multiple versions, it is allowed to define this function without + adding the "__attribute__((target_version("default")))" attribute. + In this case, the function without the attribute will be compiled as the + default version. + If version_attr is empty, it is the default version of the function. + Set the feature_mask of this function to 0 and the priority to + LA_PRIO_NONE. */ + if (version_attr == NULL) + { + if (feature_mask) + feature_mask = 0ULL; + + if (feature_priority) + feature_priority->safe_push (0); + return; + } + + string_slice version_string + = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr))); + loongarch_parse_fmv_features (decl, version_string, feature_mask, + feature_priority); +} + +/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL + to return a pointer to VERSION_DECL if all feature bits specified in + FEATURE_MASK are not set in MASK_VAR. This function will be called during + version dispatch to decide which function version to execute. It returns + the basic block at the end, to which more conditions can be added. */ +static basic_block +add_condition_to_bb (tree function_decl, tree version_decl, + loongarch_fmv_feature_mask feature_mask, + tree mask_var, basic_block new_bb) +{ + gimple *return_stmt; + tree convert_expr, result_var; + gimple *convert_stmt; + gimple *if_else_stmt; + + basic_block bb1, bb2, bb3; + edge e12, e23; + + gimple_seq gseq; + + push_cfun (DECL_STRUCT_FUNCTION (function_decl)); + + gcc_assert (new_bb != NULL); + gseq = bb_seq (new_bb); + + convert_expr = build1 (CONVERT_EXPR, ptr_type_node, + build_fold_addr_expr (version_decl)); + result_var = create_tmp_var (ptr_type_node); + convert_stmt = gimple_build_assign (result_var, convert_expr); + return_stmt = gimple_build_return (result_var); + + if (feature_mask == 0ULL) + { + /* Default version. */ + gimple_seq_add_stmt (&gseq, convert_stmt); + gimple_seq_add_stmt (&gseq, return_stmt); + set_bb_seq (new_bb, gseq); + gimple_set_bb (convert_stmt, new_bb); + gimple_set_bb (return_stmt, new_bb); + pop_cfun (); + return new_bb; + } + + tree and_expr_var = create_tmp_var (unsigned_type_node); + tree and_expr = build2 (BIT_AND_EXPR, + long_long_unsigned_type_node, + mask_var, + build_int_cst (unsigned_type_node, + feature_mask)); + gimple *and_stmt = gimple_build_assign (and_expr_var, and_expr); + gimple_set_block (and_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (and_stmt, new_bb); + gimple_seq_add_stmt (&gseq, and_stmt); + + tree zero_llu = build_int_cst (unsigned_type_node, 0); + if_else_stmt = gimple_build_cond (EQ_EXPR, and_expr_var, zero_llu, + NULL_TREE, NULL_TREE); + gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (if_else_stmt, new_bb); + gimple_seq_add_stmt (&gseq, if_else_stmt); + + gimple_seq_add_stmt (&gseq, convert_stmt); + gimple_seq_add_stmt (&gseq, return_stmt); + set_bb_seq (new_bb, gseq); + + bb1 = new_bb; + e12 = split_block (bb1, if_else_stmt); + bb2 = e12->dest; + e12->flags &= ~EDGE_FALLTHRU; + e12->flags |= EDGE_TRUE_VALUE; + + e23 = split_block (bb2, return_stmt); + + gimple_set_bb (convert_stmt, bb2); + gimple_set_bb (return_stmt, bb2); + + bb3 = e23->dest; + make_edge (bb1, bb3, EDGE_FALSE_VALUE); + + remove_edge (e23); + make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); + + pop_cfun (); + + return bb3; +} + +/* This function generates the dispatch function for + multi-versioned functions. DISPATCH_DECL is the function which will + contain the dispatch logic. FNDECLS are the function choices for + dispatch, and is a tree chain. EMPTY_BB is the basic block pointer + in DISPATCH_DECL in which the dispatch code is generated. */ + +static int +dispatch_function_versions (tree dispatch_decl, + void *fndecls_p, + basic_block *empty_bb) +{ + gimple *ifunc_cpu_init_stmt; + gimple_seq gseq; + vec<tree> *fndecls; + + gcc_assert (dispatch_decl != NULL + && fndecls_p != NULL + && empty_bb != NULL); + + push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); + + gseq = bb_seq (*empty_bb); + /* Function version dispatch is via IFUNC. IFUNC resolvers fire before + constructors, so explicity call __init_loongarch_feature_bits here. */ + tree init_fn_type = build_function_type_list (void_type_node, + void_type_node, + NULL); + tree init_fn_id = get_identifier ("__init_loongarch_features_resolver"); + tree init_fn_decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, + init_fn_id, init_fn_type); + DECL_EXTERNAL (init_fn_decl) = 1; + TREE_PUBLIC (init_fn_decl) = 1; + DECL_VISIBILITY (init_fn_decl) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (init_fn_decl) = 1; + ifunc_cpu_init_stmt = gimple_build_call (init_fn_decl, 0); + gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); + gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); + + /* Build the struct type for __loongarch_feature_bits. */ + tree global_type = lang_hooks.types.make_type (RECORD_TYPE); + tree field1 = build_decl (UNKNOWN_LOCATION, FIELD_DECL, + get_identifier ("features"), + unsigned_type_node); + DECL_FIELD_CONTEXT (field1) = global_type; + TYPE_FIELDS (global_type) = field1; + layout_type (global_type); + + tree global_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier ("__loongarch_feature_bits"), + global_type); + DECL_EXTERNAL (global_var) = 1; + TREE_PUBLIC (global_var) = 1; + DECL_VISIBILITY (global_var) = VISIBILITY_HIDDEN; + DECL_VISIBILITY_SPECIFIED (global_var) = 1; + tree mask_var = create_tmp_var (unsigned_type_node); + + tree component_expr = build3 (COMPONENT_REF, unsigned_type_node, + global_var, field1, NULL_TREE); + gimple *component_stmt = gimple_build_assign (mask_var, component_expr); + gimple_set_block (component_stmt, DECL_INITIAL (dispatch_decl)); + gimple_set_bb (component_stmt, *empty_bb); + gimple_seq_add_stmt (&gseq, component_stmt); + + tree not_expr = build1 (BIT_NOT_EXPR, unsigned_type_node, mask_var); + gimple *not_stmt = gimple_build_assign (mask_var, not_expr); + gimple_set_block (not_stmt, DECL_INITIAL (dispatch_decl)); + gimple_set_bb (not_stmt, *empty_bb); + gimple_seq_add_stmt (&gseq, not_stmt); + + set_bb_seq (*empty_bb, gseq); + + pop_cfun (); + + /* fndecls_p is actually a vector. */ + fndecls = static_cast<vec<tree> *> (fndecls_p); + + /* At least one more version other than the default. */ + unsigned int num_versions = fndecls->length (); + gcc_assert (num_versions >= 2); + + int i; + tree version_decl; + FOR_EACH_VEC_ELT_REVERSE (*fndecls, i, version_decl) + { + loongarch_fmv_feature_mask feature_mask = 0; + /* Get attribute string, parse it and find the right features. */ + get_feature_mask_for_version (version_decl, &feature_mask, + NULL); + *empty_bb = add_condition_to_bb (dispatch_decl, + version_decl, + feature_mask, + mask_var, + *empty_bb); + } + + return 0; +} + +/* Make the resolver function decl to dispatch the versions of + a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is + ifunc alias that will point to the created resolver. Create an + empty basic block in the resolver and store the pointer in + EMPTY_BB. Return the decl of the resolver function. */ + +static tree +make_resolver_func (const tree default_decl, + const tree ifunc_alias_decl, + basic_block *empty_bb) +{ + tree decl, type, t; + + /* Create resolver function name based on default_decl. We need to remove an + existing ".default" suffix if this has already been appended. */ + tree decl_name = get_suffixed_assembler_name (default_decl, ".resolver"); + const char *resolver_name = IDENTIFIER_POINTER (decl_name); + + /* The resolver function should return a (void *). */ + type = build_function_type_list (ptr_type_node, NULL_TREE); + + decl = build_fn_decl (resolver_name, type); + SET_DECL_ASSEMBLER_NAME (decl, decl_name); + + DECL_NAME (decl) = decl_name; + TREE_USED (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; + DECL_IGNORED_P (decl) = 1; + TREE_PUBLIC (decl) = 0; + DECL_UNINLINABLE (decl) = 1; + + /* Resolver is not external, body is generated. */ + DECL_EXTERNAL (decl) = 0; + DECL_EXTERNAL (ifunc_alias_decl) = 0; + + DECL_CONTEXT (decl) = NULL_TREE; + DECL_INITIAL (decl) = make_node (BLOCK); + DECL_STATIC_CONSTRUCTOR (decl) = 0; + + if (DECL_COMDAT_GROUP (default_decl) + || TREE_PUBLIC (default_decl)) + { + /* In this case, each translation unit with a call to this + versioned function will put out a resolver. Ensure it + is comdat to keep just one copy. */ + DECL_COMDAT (decl) = 1; + make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); + } + else + TREE_PUBLIC (ifunc_alias_decl) = 0; + + /* Build result decl and add to function_decl. */ + t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); + DECL_CONTEXT (t) = decl; + DECL_ARTIFICIAL (t) = 1; + DECL_IGNORED_P (t) = 1; + DECL_RESULT (decl) = t; + + gimplify_function_tree (decl); + push_cfun (DECL_STRUCT_FUNCTION (decl)); + *empty_bb = init_lowered_empty_function (decl, false, + profile_count::uninitialized ()); + + cgraph_node::add_new_function (decl, true); + symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); + + pop_cfun (); + + gcc_assert (ifunc_alias_decl != NULL); + /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */ + DECL_ATTRIBUTES (ifunc_alias_decl) + = make_attribute ("ifunc", resolver_name, + DECL_ATTRIBUTES (ifunc_alias_decl)); + + /* Create the alias for dispatch to resolver here. */ + cgraph_node::create_same_body_alias (ifunc_alias_decl, decl); + return decl; +} + +/* Implement TARGET_GENERATE_VERSION_DISPATCHER_BODY. */ + +tree +loongarch_generate_version_dispatcher_body (void *node_p) +{ + tree resolver_decl; + basic_block empty_bb; + tree default_ver_decl; + struct cgraph_node *versn; + struct cgraph_node *node; + + struct cgraph_function_version_info *node_version_info = NULL; + struct cgraph_function_version_info *versn_info = NULL; + + node = (cgraph_node *)node_p; + + node_version_info = node->function_version (); + gcc_assert (node->dispatcher_function + && node_version_info != NULL); + + if (node_version_info->dispatcher_resolver) + return node_version_info->dispatcher_resolver; + + /* The first version in the chain corresponds to the default version. */ + default_ver_decl = node_version_info->next->this_node->decl; + + /* node is going to be an alias, so remove the finalized bit. */ + node->definition = false; + + resolver_decl = make_resolver_func (default_ver_decl, + node->decl, &empty_bb); + + node_version_info->dispatcher_resolver = resolver_decl; + + push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); + + auto_vec<tree, 2> fn_ver_vec; + + for (versn_info = node_version_info->next; versn_info; + versn_info = versn_info->next) + { + versn = versn_info->this_node; + /* Check for virtual functions here again, as by this time it should + have been determined if this function needs a vtable index or + not. This happens for methods in derived classes that override + virtual methods in base classes but are not explicitly marked as + virtual. */ + if (DECL_VINDEX (versn->decl)) + sorry ("virtual function multiversioning not supported"); + + fn_ver_vec.safe_push (versn->decl); + } + + dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); + cgraph_edge::rebuild_edges (); + pop_cfun (); + + /* Fix up symbol names. First we need to obtain the base name, which may + have already been mangled. */ + tree base_name = get_suffixed_assembler_name (default_ver_decl, ""); + + /* We need to redo the version mangling on the non-default versions for the + target_clones case. Redoing the mangling for the target_version case is + redundant but does no harm. We need to skip the default version, because + expand_clones will append ".default" later; fortunately that suffix is the + one we want anyway. */ + for (versn_info = node_version_info->next->next; versn_info; + versn_info = versn_info->next) + { + tree version_decl = versn_info->this_node->decl; + tree name = loongarch_mangle_decl_assembler_name (version_decl, + base_name); + symtab->change_decl_assembler_name (version_decl, name); + } + + /* We also need to use the base name for the ifunc declaration. */ + symtab->change_decl_assembler_name (node->decl, base_name); + + return resolver_decl; +} + +/* This function returns true if FN1 and FN2 are versions of the same function, + that is, the target_version attributes of the function decls are different. + This assumes that FN1 and FN2 have the same signature. */ + +bool +loongarch_option_same_function_versions (string_slice str1, string_slice str2) +{ + loongarch_fmv_feature_mask feature_mask1; + loongarch_fmv_feature_mask feature_mask2; + loongarch_parse_fmv_features (NULL, str1, + &feature_mask1, NULL); + loongarch_parse_fmv_features (NULL, str2, + &feature_mask2, NULL); + + return feature_mask1 == feature_mask2; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -11607,6 +12403,30 @@ loongarch_can_inline_p (tree caller, tree callee) #undef TARGET_CAN_INLINE_P #define TARGET_CAN_INLINE_P loongarch_can_inline_p +#undef TARGET_OPTION_VALID_VERSION_ATTRIBUTE_P +#define TARGET_OPTION_VALID_VERSION_ATTRIBUTE_P \ + loongarch_option_valid_version_attribute_p + +#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER +#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \ + loongarch_get_function_versions_dispatcher + +#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME +#define TARGET_MANGLE_DECL_ASSEMBLER_NAME \ + loongarch_mangle_decl_assembler_name + +#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY +#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \ + loongarch_generate_version_dispatcher_body + +#undef TARGET_COMPARE_VERSION_PRIORITY +#define TARGET_COMPARE_VERSION_PRIORITY \ + loongarch_compare_version_priority + +#undef TARGET_OPTION_SAME_FUNCTION_VERSIONS +#define TARGET_OPTION_SAME_FUNCTION_VERSIONS \ + loongarch_option_same_function_versions + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-loongarch.h" diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h index e8819bf..b3fb482 100644 --- a/gcc/config/loongarch/loongarch.h +++ b/gcc/config/loongarch/loongarch.h @@ -1293,3 +1293,5 @@ struct GTY (()) machine_function #define TARGET_EXPLICIT_RELOCS \ (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS) + +#define TARGET_HAS_FMV_TARGET_ATTRIBUTE 0 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 625f30c..763d514 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -518,6 +518,7 @@ ;; These code iterators allow the signed and unsigned scc operations to use ;; the same template. +(define_code_iterator any_ge [ge geu]) (define_code_iterator any_gt [gt gtu]) (define_code_iterator any_lt [lt ltu]) (define_code_iterator any_le [le leu]) @@ -1636,6 +1637,80 @@ operands[3] = tmp; }) +;; Optimize (a << imm1) | (b & imm2) to use bstrins.w instruction, both a and b +;; should be 32bits, imm2 value should be equal to (1LL << imm1) - 1. +;; For example: (a << 1) | (b & 1) +;; slli.w $r12,$r12,1 +;; andi $r13,$r13,1 +;; or $r12,$r12,$r13 +;; Optimized to use bstrins.w instruction as below: +;; bstrins.w $r13,$r12,31,1 +(define_insn_and_split "*bstrins_w_for_ior_ashift_and_extend" + [(set (match_operand:DI 0 "register_operand" "=r") + (any_or_plus:DI + (and:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:SI 2 "const_int_operand" "i")) + (ashift:DI + (sign_extract:DI + (match_operand:DI 3 "register_operand" "r") + (match_operand:SI 4 "const_uimm5_operand") + (const_int 0)) + (match_operand:SI 5 "const_uimm5_operand"))))] + "TARGET_64BIT && loongarch_pre_reload_split () + && !reg_overlap_mentioned_p (operands[0], operands[3]) + && INTVAL (operands[2]) != 0 && INTVAL (operands[5]) != 0 + && INTVAL (operands[2]) == (1LL << INTVAL (operands[5])) - 1 + && INTVAL (operands[4]) + INTVAL (operands[5]) == 0x20" + "#" + "&& true" + [(const_int 0)] + { + emit_move_insn (operands[0], operands[1]); + rtx len = GEN_INT (32 - INTVAL (operands[5])); + rtx dest = gen_lowpart (SImode, operands[0]); + rtx op = gen_lowpart (SImode, operands[3]); + emit_insn (gen_insvsi (dest, len, operands[5], op)); + }) + +;; Optimize (a << imm1) | (b & imm2) to use bstrins.d instruction, the size of +;; a and b are 8 bits, 16 bits or 64bits, imm2 value should be equal to +;; (1LL << imm1) - 1. +(define_insn_and_split "*bstrins_d_for_ior_ashift_and" + [(set (match_operand:DI 0 "register_operand" "=r") + (any_or_plus:DI + (and:DI (match_operand:DI 1 "register_operand" "r") + (match_operand:DI 2 "const_int_operand" "i")) + (ashift:DI + (match_operand:DI 3 "register_operand" "r") + (match_operand:DI 4 "const_uimm63_operand"))))] + "TARGET_64BIT && loongarch_pre_reload_split () + && !reg_overlap_mentioned_p (operands[0], operands[3]) + && INTVAL (operands[2]) != 0 && INTVAL (operands[4]) != 0 + && INTVAL (operands[2]) == (1LL << INTVAL (operands[4])) - 1" + "#" + "&& true" + [(set (match_dup 0) (match_dup 1)) + (set (zero_extract:DI (match_dup 0) (match_dup 2) (match_dup 4)) + (match_dup 3))] + { + operands[2] = GEN_INT (64 - INTVAL (operands[4])); + }) + +(define_insn "and_load_zero_extend<mode>" + [(set (match_operand:X 0 "register_operand" "=r,r,r,r,r,r") + (and:X (match_operand:X 1 "memory_operand" "%m,m,m,k,k,k") + (match_operand:X 2 "mask_operand" "Yb,Yh,Yw,Yb,Yh,Yw")))] + "" + "@ + ld.bu\t%0,%1 + ld.hu\t%0,%1 + ld.wu\t%0,%1 + ldx.bu\t%0,%1 + ldx.hu\t%0,%1 + ldx.wu\t%0,%1" + [(set_attr "move_type" "load,load,load,load,load,load") + (set_attr "mode" "<MODE>")]) + ;; We always avoid the shift operation in bstrins_<mode>_for_ior_mask ;; if possible, but the result may be sub-optimal when one of the masks ;; is (1 << N) - 1 and one of the src register is the dest register. @@ -1670,6 +1745,24 @@ DONE; }) +(define_insn_and_split "bstrins_bstrpick_for_and_imm<mode>" + [(set (match_operand:X 0 "register_operand" "=r") + (and:X (match_operand:X 1 "register_operand" "r") + (match_operand:X 2 "const_int_operand" "i")))] + "loongarch_use_bstrins_bstrpick_for_and (operands[2], <MODE>mode)" + "#" + "&& true" + [(const_int 0)] +{ + unsigned HOST_WIDE_INT op2 = INTVAL (operands[2]); + int leading_zero_bit = __builtin_clzll (op2); + unsigned HOST_WIDE_INT mask = (~0ULL) << (64 - leading_zero_bit); + emit_insn (gen_extzv<mode> (operands[0], operands[1], + GEN_INT (64 - leading_zero_bit), const0_rtx)); + emit_insn (gen_and<mode>3 (operands[0], operands[0], GEN_INT (op2 | mask))); +} + [(set_attr "length" "8")]) + (define_insn "*iorhi3" [(set (match_operand:HI 0 "register_operand" "=r,r") (ior:HI (match_operand:HI 1 "register_operand" "%r,r") @@ -1740,21 +1833,23 @@ ;; This attribute used for get connection of scalar mode and corresponding ;; vector mode. -(define_mode_attr cntmap [(SI "v4si") (DI "v2di")]) +(define_mode_attr cntmap [(SI "V4SI") (DI "V2DI")]) -(define_expand "popcount<mode>2" - [(set (match_operand:GPR 0 "register_operand") - (popcount:GPR (match_operand:GPR 1 "register_operand")))] +(define_insn_and_split "popcount<mode>2" + [(set (match_operand:GPR 0 "register_operand" "=f") + (popcount:GPR (match_operand:GPR 1 "register_operand" "f")))] "ISA_HAS_LSX" + "#" + ;; Do the split very lately to work around init-regs unneeded zero- + ;; initialization from init-regs. See PR61810 and all the referenced + ;; issues. + "&& reload_completed" + [(set (match_operand:<cntmap> 0 "register_operand" "=f") + (popcount:<cntmap> + (match_operand:<cntmap> 1 "register_operand" "f")))] { - rtx in = operands[1]; - rtx out = operands[0]; - rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) : - gen_reg_rtx (V2DImode); - emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1))); - emit_insn (gen_popcount<cntmap>2 (vreg, vreg)); - emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0))); - DONE; + operands[0] = gen_rtx_REG (<cntmap>mode, REGNO (operands[0])); + operands[1] = gen_rtx_REG (<cntmap>mode, REGNO (operands[1])); }) ;; @@ -2306,8 +2401,8 @@ }) (define_insn_and_split "*movsi_internal" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,f,*r,*m") - (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,m,*f,*f"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,f,f,r,*m") + (match_operand:SI 1 "move_operand" "r,Yd,w,rJ,rJ,m,f,*f"))] "(register_operand (operands[0], SImode) || reg_or_0_operand (operands[1], SImode))" { return loongarch_output_move (operands); } @@ -3495,6 +3590,15 @@ [(set_attr "type" "slt") (set_attr "mode" "<X:MODE>")]) +(define_insn "*sge<u>_<X:mode><GPR:mode>" + [(set (match_operand:GPR 0 "register_operand" "=r") + (any_ge:GPR (match_operand:X 1 "register_operand" " r") + (const_int 1)))] + "" + "slti<u>\t%0,zero,%1" + [(set_attr "type" "slt") + (set_attr "mode" "<X:MODE>")]) + (define_insn "*sgt<u>_<X:mode><GPR:mode>" [(set (match_operand:GPR 0 "register_operand" "=r") (any_gt:GPR (match_operand:X 1 "register_operand" "r") diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls index c93f046..a72075d 100644 --- a/gcc/config/loongarch/loongarch.opt.urls +++ b/gcc/config/loongarch/loongarch.opt.urls @@ -4,7 +4,7 @@ mfpu= UrlSuffix(gcc/LoongArch-Options.html#index-mfpu-2) msoft-float -UrlSuffix(gcc/LoongArch-Options.html#index-msoft-float-5) +UrlSuffix(gcc/LoongArch-Options.html#index-msoft-float-4) msingle-float UrlSuffix(gcc/LoongArch-Options.html#index-msingle-float) diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index cd87757..3b06d2e 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -45,7 +45,6 @@ UNSPEC_LSX_VSAT_U UNSPEC_LSX_VSRAR UNSPEC_LSX_VSRLR - UNSPEC_LSX_VSHUF UNSPEC_LSX_VEXTW_S UNSPEC_LSX_VEXTW_U UNSPEC_LSX_VSLLWIL_S @@ -86,7 +85,6 @@ UNSPEC_LSX_VSSRLN UNSPEC_LSX_VSSRLRN UNSPEC_LSX_VLDI - UNSPEC_LSX_VSHUF_B UNSPEC_LSX_VSTX UNSPEC_LSX_VEXTL_QU_DU UNSPEC_LSX_VSETEQZ_V @@ -133,12 +131,6 @@ ;; Only used for copy_{u,s}.w and vilvh. (define_mode_iterator LSX_W [V4SI V4SF]) -;; As ILSX but excludes V16QI. -(define_mode_iterator ILSX_DWH [V2DI V4SI V8HI]) - -;; As LSX but excludes V16QI. -(define_mode_iterator LSX_DWH [V2DF V4SF V2DI V4SI V8HI]) - ;; As ILSX but excludes V2DI. (define_mode_iterator ILSX_WHB [V4SI V8HI V16QI]) @@ -521,28 +513,19 @@ }) (define_expand "vec_perm<mode>" - [(match_operand:LSX 0 "register_operand") - (match_operand:LSX 1 "register_operand") - (match_operand:LSX 2 "register_operand") - (match_operand:<VIMODE> 3 "register_operand")] - "ISA_HAS_LSX" -{ - loongarch_expand_vec_perm (operands[0], operands[1], - operands[2], operands[3]); - DONE; -}) - -(define_insn "@lsx_vshuf_<lsxfmt_f>" - [(set (match_operand:LSX_DWH 0 "register_operand" "=f") - (unspec:LSX_DWH [(match_operand:<VIMODE> 1 "register_operand" "0") - (match_operand:LSX_DWH 2 "register_operand" "f") - (match_operand:LSX_DWH 3 "register_operand" "f")] - UNSPEC_LSX_VSHUF))] - "ISA_HAS_LSX" - "vshuf.<lsxfmt>\t%w0,%w2,%w3" - [(set_attr "type" "simd_sld") - (set_attr "mode" "<MODE>")]) - + [(set (match_dup 4) + (and:<VIMODE> (match_operand:<VIMODE> 3 "register_operand") + (match_dup 5))) + (set (match_operand:LSX 0 "register_operand") + (unspec:LSX [(match_operand:LSX 2 "register_operand") + (match_operand:LSX 1 "register_operand") + (match_dup 4)] + UNSPEC_SIMD_VSHUF))] + "ISA_HAS_LSX" + { + operands[4] = gen_reg_rtx (<VIMODE>mode); + operands[5] = gen_const_vec_duplicate (<VIMODE>mode, GEN_INT (0x1f)); + }) ;; Integer operations (define_insn "add<mode>3" @@ -1631,6 +1614,39 @@ [(set_attr "type" "simd_shf") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "lsx_vshuf4i_mem_w_0" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (vec_merge:V4SI + (vec_duplicate:V4SI + (mem:SI (match_operand:DI 1 "register_operand" "r"))) + (vec_duplicate:V4SI + (mem:SI (plus:DI (match_dup 1) (const_int 4)))) + (match_operand 2 "const_uimm4_operand" "")))] + "ISA_HAS_LSX" + "#" + "&& reload_completed" + [(const_int 0)] +{ + operands[0] = gen_rtx_REG (V2DImode, REGNO (operands[0])); + emit_insn (gen_lsx_vldrepl_d_insn_0 (operands[0], operands[1])); + + operands[0] = gen_rtx_REG (V4SImode, REGNO (operands[0])); + rtx sel[4]; + int op2 = INTVAL (operands[2]); + int mask = 1; + + /* Convert imm to an selection. */ + for (int i = 0; i < 4; ++i) + { + sel[i] = (op2 & mask) ? const0_rtx : const1_rtx; + mask = mask << 1; + } + + rtx shuf4i_mask = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, sel)); + emit_insn (gen_lsx_vshuf4i_w (operands[0], operands[0], shuf4i_mask)); + DONE; +}) + (define_insn "lsx_vsrar_<lsxfmt>" [(set (match_operand:ILSX 0 "register_operand" "=f") (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f") @@ -1680,11 +1696,15 @@ [(set_attr "type" "simd_splat") (set_attr "mode" "<MODE>")]) +;; UNSPEC_LSX_VREPLVEI_MIRROR describes the mirror operation that copies +;; the lower 64 bits of a 128-bit register to the upper 64 bits. It is only +;; called when the high half-part is the same as the low. + (define_insn "lsx_vreplvei_mirror_<lsxfmt_f>" [(set (match_operand:LSX 0 "register_operand" "=f") (unspec: LSX [(match_operand:LSX 1 "register_operand" "f") - (match_operand 2 "const_<indeximm>_operand" "")] - UNSPEC_LSX_VREPLVEI_MIRROR))] + (match_operand 2 "const_0_or_1_operand" "")] + UNSPEC_LSX_VREPLVEI_MIRROR))] "ISA_HAS_LSX" "vreplvei.d\t%w0,%w1,%2" [(set_attr "type" "simd_splat") @@ -2550,6 +2570,35 @@ (set_attr "mode" "<MODE>") (set_attr "length" "4")]) +;; In 128-bits register, the template implements the load of identical +;; consecutive SImode data into both the upper 64 bits and lower 64 bits. +;; Operand[2] performs a vec_merge operation on two consecutive addresses +;; SImode data items, and places the result in either the lower 64 bits or +;; the upper 64 bits. When operand[3] is 0, the lower 64 bits are copied +;; to the upper 64 bits; when operand[3] is 1, the upper 64 bits are copied +;; to the lower 64 bits. + +(define_insn "lsx_vldrepl_merge_w_0" + [(set (match_operand:V4SI 0 "register_operand" "=f") + (unspec:V4SI + [(vec_merge:V4SI + (vec_duplicate:V4SI + (mem:SI (match_operand:DI 1 "register_operand" "r"))) + (vec_duplicate:V4SI + (mem:SI (plus:DI (match_dup 1) (const_int 4)))) + (match_operand 2 "const_uimm4_operand" "")) + (match_operand 3 "const_0_or_1_operand" "")] + UNSPEC_LSX_VREPLVEI_MIRROR))] + "ISA_HAS_LSX + && (INTVAL (operands[3]) ? (INTVAL (operands[2]) & 0xc) == 0x4 + : (INTVAL (operands[2]) & 0x3) == 0x1)" +{ + return "vldrepl.d\t%w0,%1,0"; +} + [(set_attr "type" "simd_load") + (set_attr "mode" "V4SI") + (set_attr "length" "4")]) + ;; Offset store by sel (define_expand "lsx_vstelm_<lsxfmt_f>" [(match_operand:LSX 0 "register_operand") @@ -2668,17 +2717,6 @@ [(set_attr "type" "simd_load") (set_attr "mode" "V2DI")]) -(define_insn "lsx_vshuf_b" - [(set (match_operand:V16QI 0 "register_operand" "=f") - (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "f") - (match_operand:V16QI 2 "register_operand" "f") - (match_operand:V16QI 3 "register_operand" "f")] - UNSPEC_LSX_VSHUF_B))] - "ISA_HAS_LSX" - "vshuf.b\t%w0,%w1,%w2,%w3" - [(set_attr "type" "simd_shf") - (set_attr "mode" "V16QI")]) - (define_insn "lsx_vstx" [(set (mem:V16QI (plus:DI (match_operand:DI 1 "register_operand" "r") (match_operand:DI 2 "reg_or_0_operand" "rJ"))) diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md index 34cf74d..7e4d8ab 100644 --- a/gcc/config/loongarch/predicates.md +++ b/gcc/config/loongarch/predicates.md @@ -135,6 +135,10 @@ (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), -16, 15)"))) +(define_predicate "const_uimm63_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 63)"))) + (define_predicate "const_imm10_operand" (and (match_code "const_int") (match_test "IMM10_OPERAND (INTVAL (op))"))) @@ -413,6 +417,11 @@ (match_operand 0 "low_bitmask_operand") (match_operand 0 "ins_zero_bitmask_operand"))) +(define_predicate "mask_operand" + (ior (match_operand 0 "qi_mask_operand") + (match_operand 0 "hi_mask_operand") + (match_operand 0 "si_mask_operand"))) + (define_predicate "const_call_insn_operand" (match_code "const,symbol_ref,label_ref") { @@ -570,8 +579,8 @@ (define_predicate "symbolic_pcrel_offset_operand" (and (match_code "plus") - (match_operand 0 "symbolic_pcrel_operand") - (match_operand 1 "const_int_operand"))) + (match_test "symbolic_pcrel_operand (XEXP (op, 0), mode)") + (match_test "const_int_operand (XEXP (op, 1), mode)"))) (define_predicate "mem_simple_ldst_operand" (match_code "mem") diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md index b73f65a..0ad1068 100644 --- a/gcc/config/loongarch/simd.md +++ b/gcc/config/loongarch/simd.md @@ -18,10 +18,14 @@ ;; <http://www.gnu.org/licenses/>. ;; Integer modes supported by LSX. -(define_mode_iterator ILSX [V2DI V4SI V8HI V16QI]) +(define_mode_iterator ILSX_DWH [V2DI V4SI V8HI]) +(define_mode_iterator ILSX [ILSX_DWH V16QI]) +(define_mode_iterator LSX_DWH [ILSX_DWH V2DF V4SF]) ;; Integer modes supported by LASX. -(define_mode_iterator ILASX [V4DI V8SI V16HI V32QI]) +(define_mode_iterator ILASX_DWH [V4DI V8SI V16HI]) +(define_mode_iterator ILASX [ILASX_DWH V32QI]) +(define_mode_iterator LASX_DWH [ILASX_DWH V4DF V8SF]) ;; Only integer modes smaller than a word. (define_mode_iterator ILSX_HB [V8HI V16QI]) @@ -46,6 +50,10 @@ (define_mode_iterator IVEC_HB [(ILSX_HB "ISA_HAS_LSX") (ILASX_HB "ISA_HAS_LASX")]) +;; All modes longer than a byte +(define_mode_iterator VEC_DWH [(LSX_DWH "ISA_HAS_LSX") + (LASX_DWH "ISA_HAS_LASX")]) + ;; All FP modes available (define_mode_iterator FVEC [(FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")]) @@ -255,7 +263,8 @@ UNSPEC_SIMD_FRINTRZ UNSPEC_SIMD_FRINT UNSPEC_SIMD_FRINTRM - UNSPEC_SIMD_FRINTRNE]) + UNSPEC_SIMD_FRINTRNE + UNSPEC_SIMD_VSHUF]) (define_int_iterator SIMD_FRINT [UNSPEC_SIMD_FRINTRP @@ -1107,6 +1116,66 @@ [(set_attr "type" "simd_logic,simd_bit,simd_logic") (set_attr "mode" "<MODE>")]) +(define_insn "@simd_vshuf_<mode>" + [(set (match_operand:QIVEC 0 "register_operand" "=f") + (unspec:QIVEC [(match_operand:QIVEC 1 "register_operand" "f") + (match_operand:QIVEC 2 "register_operand" "f") + (match_operand:QIVEC 3 "register_operand" "f")] + UNSPEC_SIMD_VSHUF))] + "" + { + return "<x>vshuf.b\t%<wu>0,%<wu>1,%<wu>2,%<wu>3"; + } + [(set_attr "type" "simd_sld") + (set_attr "mode" "<MODE>")]) + +(define_insn "@simd_vshuf_<mode>" + [(set (match_operand:VEC_DWH 0 "register_operand" "=f") + (unspec:VEC_DWH [(match_operand:VEC_DWH 1 "register_operand" "f") + (match_operand:VEC_DWH 2 "register_operand" "f") + (match_operand:<VIMODE> 3 "register_operand" "0")] + UNSPEC_SIMD_VSHUF))] + "" + { + return "<x>vshuf.<simdfmt_as_i>\t%<wu>0,%<wu>1,%<wu>2"; + } + [(set_attr "type" "simd_sld") + (set_attr "mode" "<MODE>")]) + +;; Backward compatibility wrapper. New code should use simd_vshuf +;; directly instead: gen_simd_vshuf (mode, ...) can often significantly +;; simplify the logic. +(define_expand "<simd_isa>_<x>vshuf_<simdfmt><_f>" + [(match_operand:ALLVEC 0 "register_operand") + (match_operand 1 "register_operand") + (match_operand 2 "register_operand") + (match_operand 3 "register_operand")] + "" + { + rtx op0 = operands[0], op1, op2, op3; + + switch (<MODE>mode) + { + case V32QImode: + case V16QImode: + op1 = operands[1]; + op2 = operands[2]; + op3 = operands[3]; + break; + default: + op3 = operands[1]; + op1 = operands[2]; + op2 = operands[3]; + } + + gcc_assert (GET_MODE (op1) == <MODE>mode); + gcc_assert (GET_MODE (op2) == <MODE>mode); + gcc_assert (GET_MODE (op3) == <VIMODE>mode); + + emit_insn (gen_simd_vshuf (<MODE>mode, op0, op1, op2, op3)); + DONE; + }) + ; The LoongArch SX Instructions. (include "lsx.md") diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index 2ef8e88..5784dab 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -222,7 +222,7 @@ [(match_operand:V2DI 1 "register_operand" "f") (match_operand:SI 2 "const_int_operand")] ;; model UNSPEC_ATOMIC_STORE))] - "ISA_HAS_LSX && TARGET_64BIT" + "loongarch_16b_atomic_lock_free_p ()" { enum memmodel model = memmodel_base (INTVAL (operands[2])); @@ -243,28 +243,12 @@ } [(set (attr "length") (const_int 12))]) -(define_insn "atomic_storeti_scq" - [(set (match_operand:TI 0 "memory_operand" "=m") - (unspec_volatile:TI - [(match_operand:TI 1 "register_operand" "r")] - UNSPEC_ATOMIC_STORE)) - (clobber (match_scratch:DI 2 "=&r"))] - "TARGET_64BIT && ISA_HAS_SCQ" - "1:\\n\\tll.d\t$r0,%0\n\tmove\t%2,%1\n\tsc.q\t%2,%t1,%0\n\tbeqz\t%2,1b" - [(set (attr "length") (const_int 16))]) - (define_expand "atomic_storeti" [(match_operand:TI 0 "memory_operand" "=m") (match_operand:TI 1 "reg_or_0_operand" "rJ") (match_operand:SI 2 "const_int_operand")] - "TARGET_64BIT && (ISA_HAS_LSX || ISA_HAS_SCQ)" + "loongarch_16b_atomic_lock_free_p ()" { - if (!ISA_HAS_LSX) - { - emit_insn (gen_atomic_storeti_scq (operands[0], operands[1])); - DONE; - } - rtx vr = gen_reg_rtx (V2DImode), op1 = operands[1]; rtvec v = rtvec_alloc (2); @@ -330,7 +314,7 @@ } [(set (attr "length") (const_int 16))]) -(define_mode_iterator ALL_SC [GPR (TI "TARGET_64BIT && ISA_HAS_SCQ")]) +(define_mode_iterator ALL_SC [GPR (TI "loongarch_16b_atomic_lock_free_p ()")]) (define_mode_attr _scq [(SI "") (DI "") (TI "_scq")]) (define_expand "atomic_fetch_nand<mode>" [(match_operand:ALL_SC 0 "register_operand") @@ -374,7 +358,7 @@ (set (match_dup 1) (match_operand:TI 2 "register_operand" "rJ")) (clobber (match_scratch:DI 3 "=&r"))] - "TARGET_64BIT && ISA_HAS_SCQ" + "loongarch_16b_atomic_lock_free_p ()" { output_asm_insn ("1:", operands); output_asm_insn ("ll.d\t%0,%1", operands); @@ -394,7 +378,7 @@ (match_operand:TI 1 "memory_operand" "+ZB") (match_operand:TI 2 "register_operand" "rJ") (match_operand:SI 3 "const_int_operand")] ;; model - "TARGET_64BIT && ISA_HAS_SCQ" + "loongarch_16b_atomic_lock_free_p ()" { emit_insn (gen_atomic_exchangeti_scq (operands[0], operands[1], operands[2])); @@ -694,7 +678,7 @@ (ne:FCC (match_dup 1) (match_dup 2))) (clobber (match_scratch:V2DI 6 "=&f")) (clobber (match_scratch:DI 7 "=&r"))] - "TARGET_64BIT && ISA_HAS_SCQ && ISA_HAS_LSX" + "loongarch_16b_atomic_lock_free_p ()" { output_asm_insn ("1:", operands); @@ -755,7 +739,7 @@ (match_operand:SI 5 "const_int_operand" "") ;; is_weak (match_operand:SI 6 "const_int_operand" "") ;; mod_s (match_operand:SI 7 "const_int_operand" "")] ;; mod_f - "TARGET_64BIT && ISA_HAS_SCQ && ISA_HAS_LSX" + "loongarch_16b_atomic_lock_free_p ()" { rtx fcc = gen_reg_rtx (FCCmode); rtx gpr = gen_reg_rtx (DImode); @@ -945,7 +929,7 @@ UNSPEC_TI_FETCH)) (clobber (match_scratch:DI 3 "=&r")) (clobber (match_scratch:DI 4 "=&r"))] - "TARGET_64BIT && ISA_HAS_SCQ" + "loongarch_16b_atomic_lock_free_p ()" { output_asm_insn ("1:", operands); output_asm_insn ("ll.d\t%0,%1", operands); @@ -998,7 +982,7 @@ (match_operand:TI 2 "reg_or_0_operand" "rJ")] UNSPEC_TI_FETCH_DIRECT)) (match_operand:SI 3 "const_int_operand")] ;; model - "TARGET_64BIT && ISA_HAS_SCQ" + "loongarch_16b_atomic_lock_free_p ()" { /* Model is ignored as sc.q implies a full barrier. */ emit_insn (gen_atomic_fetch_<amop_ti_fetch>ti_scq (operands[0], diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch index b7dbb4b..462ad51 100644 --- a/gcc/config/loongarch/t-loongarch +++ b/gcc/config/loongarch/t-loongarch @@ -103,8 +103,11 @@ s-loongarch-evolution: $(srcdir)/config/loongarch/genopts/genstr.sh \ $(srcdir)/config/loongarch/genopts/gen-evolution.awk $(SHELL) $< evolution_h > tmp-isa-evo.h $(SHELL) $< evolution_c > tmp-isa-evo.cc + $(SHELL) $< evolution_def > tmp-isa-evo.def $(SHELL) $(srcdir)/../move-if-change tmp-isa-evo.h \ $(srcdir)/config/loongarch/loongarch-evolution.h $(SHELL) $(srcdir)/../move-if-change tmp-isa-evo.cc \ $(srcdir)/config/loongarch/loongarch-evolution.cc + $(SHELL) $(srcdir)/../move-if-change tmp-isa-evo.def \ + $(srcdir)/config/loongarch/loongarch-evol-attr.def $(STAMP) $@ diff --git a/gcc/config/m68k/m68k.opt.urls b/gcc/config/m68k/m68k.opt.urls index 1f1ac88..bb5e1a0 100644 --- a/gcc/config/m68k/m68k.opt.urls +++ b/gcc/config/m68k/m68k.opt.urls @@ -70,7 +70,7 @@ mdiv UrlSuffix(gcc/M680x0-Options.html#index-mdiv-1) mhard-float -UrlSuffix(gcc/M680x0-Options.html#index-mhard-float-2) +UrlSuffix(gcc/M680x0-Options.html#index-mhard-float-1) ; skipping UrlSuffix for 'mid-shared-library' due to finding no URLs @@ -96,7 +96,7 @@ mshort UrlSuffix(gcc/M680x0-Options.html#index-mshort) msoft-float -UrlSuffix(gcc/M680x0-Options.html#index-msoft-float-6) +UrlSuffix(gcc/M680x0-Options.html#index-msoft-float-5) mstrict-align UrlSuffix(gcc/M680x0-Options.html#index-mstrict-align-2) diff --git a/gcc/config/microblaze/microblaze.opt.urls b/gcc/config/microblaze/microblaze.opt.urls index 33b13b8..be42305 100644 --- a/gcc/config/microblaze/microblaze.opt.urls +++ b/gcc/config/microblaze/microblaze.opt.urls @@ -1,10 +1,10 @@ ; Autogenerated by regenerate-opt-urls.py from gcc/config/microblaze/microblaze.opt and generated HTML msoft-float -UrlSuffix(gcc/MicroBlaze-Options.html#index-msoft-float-7) +UrlSuffix(gcc/MicroBlaze-Options.html#index-msoft-float-6) mhard-float -UrlSuffix(gcc/MicroBlaze-Options.html#index-mhard-float-3) +UrlSuffix(gcc/MicroBlaze-Options.html#index-mhard-float-2) msmall-divides UrlSuffix(gcc/MicroBlaze-Options.html#index-msmall-divides) diff --git a/gcc/config/mingw/winnt.cc b/gcc/config/mingw/winnt.cc index f224966..fe2fb4c 100644 --- a/gcc/config/mingw/winnt.cc +++ b/gcc/config/mingw/winnt.cc @@ -339,6 +339,28 @@ mingw_pe_encode_section_info (tree decl, rtx rtl, int first) SYMBOL_REF_FLAGS (symbol) = flags; } +/* Handle a "ms_abi" attribute; arguments as in struct + attribute_spec.handler. */ + +tree +aarch64_handle_ms_abi_attribute (tree *node, tree name, tree, int, + bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + + return NULL_TREE; + } + + return NULL_TREE; +} + bool i386_pe_binds_local_p (const_tree exp) @@ -424,8 +446,11 @@ mingw_pe_unique_section (tree decl, int reloc) prefix = ".text$"; else if (decl_readonly_section (decl, reloc)) prefix = ".rdata$"; + /* Note that we need two dollar signs for TLS sections + because they need to be ASCII-sorted before .tls$ZZZ + to be properly laid out by the GNU linker. */ else if (DECL_THREAD_LOCAL_P (decl)) - prefix = ".tls$"; + prefix = ".tls$$"; else prefix = ".data$"; len = strlen (name) + strlen (prefix); @@ -500,9 +525,6 @@ mingw_pe_asm_named_section (const char *name, unsigned int flags, *f++ = 'e'; #endif - if (strcmp (name, ".tls$") == 0) - *f++ = 'd'; - if ((flags & (SECTION_CODE | SECTION_WRITE)) == 0) /* readonly data */ { @@ -511,6 +533,8 @@ mingw_pe_asm_named_section (const char *name, unsigned int flags, } else { + if (startswith (name, ".tls$")) + *f++ = 'd'; if (flags & SECTION_CODE) *f++ = 'x'; if (flags & SECTION_WRITE) diff --git a/gcc/config/mingw/winnt.h b/gcc/config/mingw/winnt.h index 23f4dc9..ccb5e58 100644 --- a/gcc/config/mingw/winnt.h +++ b/gcc/config/mingw/winnt.h @@ -20,6 +20,7 @@ http://www.gnu.org/licenses/. */ #ifndef USED_FOR_TARGET +extern tree aarch64_handle_ms_abi_attribute (tree *, tree, tree, int, bool *); extern tree mingw_handle_selectany_attribute (tree *, tree, tree, int, bool *); extern void mingw_pe_asm_named_section (const char *, unsigned int, tree); diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def index dfc4116..c45da84 100644 --- a/gcc/config/mips/mips-cpus.def +++ b/gcc/config/mips/mips-cpus.def @@ -62,6 +62,7 @@ MIPS_CPU ("r3900", PROCESSOR_R3900, MIPS_ISA_MIPS1, 0) /* MIPS II processors. */ MIPS_CPU ("r6000", PROCESSOR_R6000, MIPS_ISA_MIPS2, 0) +MIPS_CPU ("allegrex", PROCESSOR_ALLEGREX, MIPS_ISA_MIPS2, 0) /* MIPS III processors. */ MIPS_CPU ("r4000", PROCESSOR_R4000, MIPS_ISA_MIPS3, 0) diff --git a/gcc/config/mips/mips-tables.opt b/gcc/config/mips/mips-tables.opt index 027abd7..5fccd98 100644 --- a/gcc/config/mips/mips-tables.opt +++ b/gcc/config/mips/mips-tables.opt @@ -160,554 +160,557 @@ EnumValue Enum(mips_arch_opt_value) String(6k) Value(17) EnumValue -Enum(mips_arch_opt_value) String(r4000) Value(18) Canonical +Enum(mips_arch_opt_value) String(allegrex) Value(18) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4k) Value(18) +Enum(mips_arch_opt_value) String(r4000) Value(19) Canonical EnumValue -Enum(mips_arch_opt_value) String(4000) Value(18) +Enum(mips_arch_opt_value) String(r4k) Value(19) EnumValue -Enum(mips_arch_opt_value) String(4k) Value(18) +Enum(mips_arch_opt_value) String(4000) Value(19) EnumValue -Enum(mips_arch_opt_value) String(vr4100) Value(19) Canonical +Enum(mips_arch_opt_value) String(4k) Value(19) EnumValue -Enum(mips_arch_opt_value) String(4100) Value(19) +Enum(mips_arch_opt_value) String(vr4100) Value(20) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4100) Value(19) +Enum(mips_arch_opt_value) String(4100) Value(20) EnumValue -Enum(mips_arch_opt_value) String(vr4111) Value(20) Canonical +Enum(mips_arch_opt_value) String(r4100) Value(20) EnumValue -Enum(mips_arch_opt_value) String(4111) Value(20) +Enum(mips_arch_opt_value) String(vr4111) Value(21) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4111) Value(20) +Enum(mips_arch_opt_value) String(4111) Value(21) EnumValue -Enum(mips_arch_opt_value) String(vr4120) Value(21) Canonical +Enum(mips_arch_opt_value) String(r4111) Value(21) EnumValue -Enum(mips_arch_opt_value) String(4120) Value(21) +Enum(mips_arch_opt_value) String(vr4120) Value(22) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4120) Value(21) +Enum(mips_arch_opt_value) String(4120) Value(22) EnumValue -Enum(mips_arch_opt_value) String(vr4130) Value(22) Canonical +Enum(mips_arch_opt_value) String(r4120) Value(22) EnumValue -Enum(mips_arch_opt_value) String(4130) Value(22) +Enum(mips_arch_opt_value) String(vr4130) Value(23) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4130) Value(22) +Enum(mips_arch_opt_value) String(4130) Value(23) EnumValue -Enum(mips_arch_opt_value) String(vr4300) Value(23) Canonical +Enum(mips_arch_opt_value) String(r4130) Value(23) EnumValue -Enum(mips_arch_opt_value) String(4300) Value(23) +Enum(mips_arch_opt_value) String(vr4300) Value(24) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4300) Value(23) +Enum(mips_arch_opt_value) String(4300) Value(24) EnumValue -Enum(mips_arch_opt_value) String(r4400) Value(24) Canonical +Enum(mips_arch_opt_value) String(r4300) Value(24) EnumValue -Enum(mips_arch_opt_value) String(4400) Value(24) +Enum(mips_arch_opt_value) String(r4400) Value(25) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4600) Value(25) Canonical +Enum(mips_arch_opt_value) String(4400) Value(25) EnumValue -Enum(mips_arch_opt_value) String(4600) Value(25) +Enum(mips_arch_opt_value) String(r4600) Value(26) Canonical EnumValue -Enum(mips_arch_opt_value) String(orion) Value(26) Canonical +Enum(mips_arch_opt_value) String(4600) Value(26) EnumValue -Enum(mips_arch_opt_value) String(r4650) Value(27) Canonical +Enum(mips_arch_opt_value) String(orion) Value(27) Canonical EnumValue -Enum(mips_arch_opt_value) String(4650) Value(27) +Enum(mips_arch_opt_value) String(r4650) Value(28) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4700) Value(28) Canonical +Enum(mips_arch_opt_value) String(4650) Value(28) EnumValue -Enum(mips_arch_opt_value) String(4700) Value(28) +Enum(mips_arch_opt_value) String(r4700) Value(29) Canonical EnumValue -Enum(mips_arch_opt_value) String(r5900) Value(29) Canonical +Enum(mips_arch_opt_value) String(4700) Value(29) EnumValue -Enum(mips_arch_opt_value) String(5900) Value(29) +Enum(mips_arch_opt_value) String(r5900) Value(30) Canonical EnumValue -Enum(mips_arch_opt_value) String(loongson2e) Value(30) Canonical +Enum(mips_arch_opt_value) String(5900) Value(30) EnumValue -Enum(mips_arch_opt_value) String(loongson2f) Value(31) Canonical +Enum(mips_arch_opt_value) String(loongson2e) Value(31) Canonical EnumValue -Enum(mips_arch_opt_value) String(r8000) Value(32) Canonical +Enum(mips_arch_opt_value) String(loongson2f) Value(32) Canonical EnumValue -Enum(mips_arch_opt_value) String(r8k) Value(32) +Enum(mips_arch_opt_value) String(r8000) Value(33) Canonical EnumValue -Enum(mips_arch_opt_value) String(8000) Value(32) +Enum(mips_arch_opt_value) String(r8k) Value(33) EnumValue -Enum(mips_arch_opt_value) String(8k) Value(32) +Enum(mips_arch_opt_value) String(8000) Value(33) EnumValue -Enum(mips_arch_opt_value) String(r10000) Value(33) Canonical +Enum(mips_arch_opt_value) String(8k) Value(33) EnumValue -Enum(mips_arch_opt_value) String(r10k) Value(33) +Enum(mips_arch_opt_value) String(r10000) Value(34) Canonical EnumValue -Enum(mips_arch_opt_value) String(10000) Value(33) +Enum(mips_arch_opt_value) String(r10k) Value(34) EnumValue -Enum(mips_arch_opt_value) String(10k) Value(33) +Enum(mips_arch_opt_value) String(10000) Value(34) EnumValue -Enum(mips_arch_opt_value) String(r12000) Value(34) Canonical +Enum(mips_arch_opt_value) String(10k) Value(34) EnumValue -Enum(mips_arch_opt_value) String(r12k) Value(34) +Enum(mips_arch_opt_value) String(r12000) Value(35) Canonical EnumValue -Enum(mips_arch_opt_value) String(12000) Value(34) +Enum(mips_arch_opt_value) String(r12k) Value(35) EnumValue -Enum(mips_arch_opt_value) String(12k) Value(34) +Enum(mips_arch_opt_value) String(12000) Value(35) EnumValue -Enum(mips_arch_opt_value) String(r14000) Value(35) Canonical +Enum(mips_arch_opt_value) String(12k) Value(35) EnumValue -Enum(mips_arch_opt_value) String(r14k) Value(35) +Enum(mips_arch_opt_value) String(r14000) Value(36) Canonical EnumValue -Enum(mips_arch_opt_value) String(14000) Value(35) +Enum(mips_arch_opt_value) String(r14k) Value(36) EnumValue -Enum(mips_arch_opt_value) String(14k) Value(35) +Enum(mips_arch_opt_value) String(14000) Value(36) EnumValue -Enum(mips_arch_opt_value) String(r16000) Value(36) Canonical +Enum(mips_arch_opt_value) String(14k) Value(36) EnumValue -Enum(mips_arch_opt_value) String(r16k) Value(36) +Enum(mips_arch_opt_value) String(r16000) Value(37) Canonical EnumValue -Enum(mips_arch_opt_value) String(16000) Value(36) +Enum(mips_arch_opt_value) String(r16k) Value(37) EnumValue -Enum(mips_arch_opt_value) String(16k) Value(36) +Enum(mips_arch_opt_value) String(16000) Value(37) EnumValue -Enum(mips_arch_opt_value) String(vr5000) Value(37) Canonical +Enum(mips_arch_opt_value) String(16k) Value(37) EnumValue -Enum(mips_arch_opt_value) String(vr5k) Value(37) +Enum(mips_arch_opt_value) String(vr5000) Value(38) Canonical EnumValue -Enum(mips_arch_opt_value) String(5000) Value(37) +Enum(mips_arch_opt_value) String(vr5k) Value(38) EnumValue -Enum(mips_arch_opt_value) String(5k) Value(37) +Enum(mips_arch_opt_value) String(5000) Value(38) EnumValue -Enum(mips_arch_opt_value) String(r5000) Value(37) +Enum(mips_arch_opt_value) String(5k) Value(38) EnumValue -Enum(mips_arch_opt_value) String(r5k) Value(37) +Enum(mips_arch_opt_value) String(r5000) Value(38) EnumValue -Enum(mips_arch_opt_value) String(vr5400) Value(38) Canonical +Enum(mips_arch_opt_value) String(r5k) Value(38) EnumValue -Enum(mips_arch_opt_value) String(5400) Value(38) +Enum(mips_arch_opt_value) String(vr5400) Value(39) Canonical EnumValue -Enum(mips_arch_opt_value) String(r5400) Value(38) +Enum(mips_arch_opt_value) String(5400) Value(39) EnumValue -Enum(mips_arch_opt_value) String(vr5500) Value(39) Canonical +Enum(mips_arch_opt_value) String(r5400) Value(39) EnumValue -Enum(mips_arch_opt_value) String(5500) Value(39) +Enum(mips_arch_opt_value) String(vr5500) Value(40) Canonical EnumValue -Enum(mips_arch_opt_value) String(r5500) Value(39) +Enum(mips_arch_opt_value) String(5500) Value(40) EnumValue -Enum(mips_arch_opt_value) String(rm7000) Value(40) Canonical +Enum(mips_arch_opt_value) String(r5500) Value(40) EnumValue -Enum(mips_arch_opt_value) String(rm7k) Value(40) +Enum(mips_arch_opt_value) String(rm7000) Value(41) Canonical EnumValue -Enum(mips_arch_opt_value) String(7000) Value(40) +Enum(mips_arch_opt_value) String(rm7k) Value(41) EnumValue -Enum(mips_arch_opt_value) String(7k) Value(40) +Enum(mips_arch_opt_value) String(7000) Value(41) EnumValue -Enum(mips_arch_opt_value) String(r7000) Value(40) +Enum(mips_arch_opt_value) String(7k) Value(41) EnumValue -Enum(mips_arch_opt_value) String(r7k) Value(40) +Enum(mips_arch_opt_value) String(r7000) Value(41) EnumValue -Enum(mips_arch_opt_value) String(rm9000) Value(41) Canonical +Enum(mips_arch_opt_value) String(r7k) Value(41) EnumValue -Enum(mips_arch_opt_value) String(rm9k) Value(41) +Enum(mips_arch_opt_value) String(rm9000) Value(42) Canonical EnumValue -Enum(mips_arch_opt_value) String(9000) Value(41) +Enum(mips_arch_opt_value) String(rm9k) Value(42) EnumValue -Enum(mips_arch_opt_value) String(9k) Value(41) +Enum(mips_arch_opt_value) String(9000) Value(42) EnumValue -Enum(mips_arch_opt_value) String(r9000) Value(41) +Enum(mips_arch_opt_value) String(9k) Value(42) EnumValue -Enum(mips_arch_opt_value) String(r9k) Value(41) +Enum(mips_arch_opt_value) String(r9000) Value(42) EnumValue -Enum(mips_arch_opt_value) String(4kc) Value(42) Canonical +Enum(mips_arch_opt_value) String(r9k) Value(42) EnumValue -Enum(mips_arch_opt_value) String(r4kc) Value(42) +Enum(mips_arch_opt_value) String(4kc) Value(43) Canonical EnumValue -Enum(mips_arch_opt_value) String(4km) Value(43) Canonical +Enum(mips_arch_opt_value) String(r4kc) Value(43) EnumValue -Enum(mips_arch_opt_value) String(r4km) Value(43) +Enum(mips_arch_opt_value) String(4km) Value(44) Canonical EnumValue -Enum(mips_arch_opt_value) String(4kp) Value(44) Canonical +Enum(mips_arch_opt_value) String(r4km) Value(44) EnumValue -Enum(mips_arch_opt_value) String(r4kp) Value(44) +Enum(mips_arch_opt_value) String(4kp) Value(45) Canonical EnumValue -Enum(mips_arch_opt_value) String(4ksc) Value(45) Canonical +Enum(mips_arch_opt_value) String(r4kp) Value(45) EnumValue -Enum(mips_arch_opt_value) String(r4ksc) Value(45) +Enum(mips_arch_opt_value) String(4ksc) Value(46) Canonical EnumValue -Enum(mips_arch_opt_value) String(m4k) Value(46) Canonical +Enum(mips_arch_opt_value) String(r4ksc) Value(46) EnumValue -Enum(mips_arch_opt_value) String(m14kc) Value(47) Canonical +Enum(mips_arch_opt_value) String(m4k) Value(47) Canonical EnumValue -Enum(mips_arch_opt_value) String(m14k) Value(48) Canonical +Enum(mips_arch_opt_value) String(m14kc) Value(48) Canonical EnumValue -Enum(mips_arch_opt_value) String(m14ke) Value(49) Canonical +Enum(mips_arch_opt_value) String(m14k) Value(49) Canonical EnumValue -Enum(mips_arch_opt_value) String(m14kec) Value(50) Canonical +Enum(mips_arch_opt_value) String(m14ke) Value(50) Canonical EnumValue -Enum(mips_arch_opt_value) String(4kec) Value(51) Canonical +Enum(mips_arch_opt_value) String(m14kec) Value(51) Canonical EnumValue -Enum(mips_arch_opt_value) String(r4kec) Value(51) +Enum(mips_arch_opt_value) String(4kec) Value(52) Canonical EnumValue -Enum(mips_arch_opt_value) String(4kem) Value(52) Canonical +Enum(mips_arch_opt_value) String(r4kec) Value(52) EnumValue -Enum(mips_arch_opt_value) String(r4kem) Value(52) +Enum(mips_arch_opt_value) String(4kem) Value(53) Canonical EnumValue -Enum(mips_arch_opt_value) String(4kep) Value(53) Canonical +Enum(mips_arch_opt_value) String(r4kem) Value(53) EnumValue -Enum(mips_arch_opt_value) String(r4kep) Value(53) +Enum(mips_arch_opt_value) String(4kep) Value(54) Canonical EnumValue -Enum(mips_arch_opt_value) String(4ksd) Value(54) Canonical +Enum(mips_arch_opt_value) String(r4kep) Value(54) EnumValue -Enum(mips_arch_opt_value) String(r4ksd) Value(54) +Enum(mips_arch_opt_value) String(4ksd) Value(55) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kc) Value(55) Canonical +Enum(mips_arch_opt_value) String(r4ksd) Value(55) EnumValue -Enum(mips_arch_opt_value) String(r24kc) Value(55) +Enum(mips_arch_opt_value) String(24kc) Value(56) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kf2_1) Value(56) Canonical +Enum(mips_arch_opt_value) String(r24kc) Value(56) EnumValue -Enum(mips_arch_opt_value) String(r24kf2_1) Value(56) +Enum(mips_arch_opt_value) String(24kf2_1) Value(57) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kf) Value(57) Canonical +Enum(mips_arch_opt_value) String(r24kf2_1) Value(57) EnumValue -Enum(mips_arch_opt_value) String(r24kf) Value(57) +Enum(mips_arch_opt_value) String(24kf) Value(58) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kf1_1) Value(58) Canonical +Enum(mips_arch_opt_value) String(r24kf) Value(58) EnumValue -Enum(mips_arch_opt_value) String(r24kf1_1) Value(58) +Enum(mips_arch_opt_value) String(24kf1_1) Value(59) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kfx) Value(59) Canonical +Enum(mips_arch_opt_value) String(r24kf1_1) Value(59) EnumValue -Enum(mips_arch_opt_value) String(r24kfx) Value(59) +Enum(mips_arch_opt_value) String(24kfx) Value(60) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kx) Value(60) Canonical +Enum(mips_arch_opt_value) String(r24kfx) Value(60) EnumValue -Enum(mips_arch_opt_value) String(r24kx) Value(60) +Enum(mips_arch_opt_value) String(24kx) Value(61) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kec) Value(61) Canonical +Enum(mips_arch_opt_value) String(r24kx) Value(61) EnumValue -Enum(mips_arch_opt_value) String(r24kec) Value(61) +Enum(mips_arch_opt_value) String(24kec) Value(62) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kef2_1) Value(62) Canonical +Enum(mips_arch_opt_value) String(r24kec) Value(62) EnumValue -Enum(mips_arch_opt_value) String(r24kef2_1) Value(62) +Enum(mips_arch_opt_value) String(24kef2_1) Value(63) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kef) Value(63) Canonical +Enum(mips_arch_opt_value) String(r24kef2_1) Value(63) EnumValue -Enum(mips_arch_opt_value) String(r24kef) Value(63) +Enum(mips_arch_opt_value) String(24kef) Value(64) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kef1_1) Value(64) Canonical +Enum(mips_arch_opt_value) String(r24kef) Value(64) EnumValue -Enum(mips_arch_opt_value) String(r24kef1_1) Value(64) +Enum(mips_arch_opt_value) String(24kef1_1) Value(65) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kefx) Value(65) Canonical +Enum(mips_arch_opt_value) String(r24kef1_1) Value(65) EnumValue -Enum(mips_arch_opt_value) String(r24kefx) Value(65) +Enum(mips_arch_opt_value) String(24kefx) Value(66) Canonical EnumValue -Enum(mips_arch_opt_value) String(24kex) Value(66) Canonical +Enum(mips_arch_opt_value) String(r24kefx) Value(66) EnumValue -Enum(mips_arch_opt_value) String(r24kex) Value(66) +Enum(mips_arch_opt_value) String(24kex) Value(67) Canonical EnumValue -Enum(mips_arch_opt_value) String(34kc) Value(67) Canonical +Enum(mips_arch_opt_value) String(r24kex) Value(67) EnumValue -Enum(mips_arch_opt_value) String(r34kc) Value(67) +Enum(mips_arch_opt_value) String(34kc) Value(68) Canonical EnumValue -Enum(mips_arch_opt_value) String(34kf2_1) Value(68) Canonical +Enum(mips_arch_opt_value) String(r34kc) Value(68) EnumValue -Enum(mips_arch_opt_value) String(r34kf2_1) Value(68) +Enum(mips_arch_opt_value) String(34kf2_1) Value(69) Canonical EnumValue -Enum(mips_arch_opt_value) String(34kf) Value(69) Canonical +Enum(mips_arch_opt_value) String(r34kf2_1) Value(69) EnumValue -Enum(mips_arch_opt_value) String(r34kf) Value(69) +Enum(mips_arch_opt_value) String(34kf) Value(70) Canonical EnumValue -Enum(mips_arch_opt_value) String(34kf1_1) Value(70) Canonical +Enum(mips_arch_opt_value) String(r34kf) Value(70) EnumValue -Enum(mips_arch_opt_value) String(r34kf1_1) Value(70) +Enum(mips_arch_opt_value) String(34kf1_1) Value(71) Canonical EnumValue -Enum(mips_arch_opt_value) String(34kfx) Value(71) Canonical +Enum(mips_arch_opt_value) String(r34kf1_1) Value(71) EnumValue -Enum(mips_arch_opt_value) String(r34kfx) Value(71) +Enum(mips_arch_opt_value) String(34kfx) Value(72) Canonical EnumValue -Enum(mips_arch_opt_value) String(34kx) Value(72) Canonical +Enum(mips_arch_opt_value) String(r34kfx) Value(72) EnumValue -Enum(mips_arch_opt_value) String(r34kx) Value(72) +Enum(mips_arch_opt_value) String(34kx) Value(73) Canonical EnumValue -Enum(mips_arch_opt_value) String(34kn) Value(73) Canonical +Enum(mips_arch_opt_value) String(r34kx) Value(73) EnumValue -Enum(mips_arch_opt_value) String(r34kn) Value(73) +Enum(mips_arch_opt_value) String(34kn) Value(74) Canonical EnumValue -Enum(mips_arch_opt_value) String(74kc) Value(74) Canonical +Enum(mips_arch_opt_value) String(r34kn) Value(74) EnumValue -Enum(mips_arch_opt_value) String(r74kc) Value(74) +Enum(mips_arch_opt_value) String(74kc) Value(75) Canonical EnumValue -Enum(mips_arch_opt_value) String(74kf2_1) Value(75) Canonical +Enum(mips_arch_opt_value) String(r74kc) Value(75) EnumValue -Enum(mips_arch_opt_value) String(r74kf2_1) Value(75) +Enum(mips_arch_opt_value) String(74kf2_1) Value(76) Canonical EnumValue -Enum(mips_arch_opt_value) String(74kf) Value(76) Canonical +Enum(mips_arch_opt_value) String(r74kf2_1) Value(76) EnumValue -Enum(mips_arch_opt_value) String(r74kf) Value(76) +Enum(mips_arch_opt_value) String(74kf) Value(77) Canonical EnumValue -Enum(mips_arch_opt_value) String(74kf1_1) Value(77) Canonical +Enum(mips_arch_opt_value) String(r74kf) Value(77) EnumValue -Enum(mips_arch_opt_value) String(r74kf1_1) Value(77) +Enum(mips_arch_opt_value) String(74kf1_1) Value(78) Canonical EnumValue -Enum(mips_arch_opt_value) String(74kfx) Value(78) Canonical +Enum(mips_arch_opt_value) String(r74kf1_1) Value(78) EnumValue -Enum(mips_arch_opt_value) String(r74kfx) Value(78) +Enum(mips_arch_opt_value) String(74kfx) Value(79) Canonical EnumValue -Enum(mips_arch_opt_value) String(74kx) Value(79) Canonical +Enum(mips_arch_opt_value) String(r74kfx) Value(79) EnumValue -Enum(mips_arch_opt_value) String(r74kx) Value(79) +Enum(mips_arch_opt_value) String(74kx) Value(80) Canonical EnumValue -Enum(mips_arch_opt_value) String(74kf3_2) Value(80) Canonical +Enum(mips_arch_opt_value) String(r74kx) Value(80) EnumValue -Enum(mips_arch_opt_value) String(r74kf3_2) Value(80) +Enum(mips_arch_opt_value) String(74kf3_2) Value(81) Canonical EnumValue -Enum(mips_arch_opt_value) String(1004kc) Value(81) Canonical +Enum(mips_arch_opt_value) String(r74kf3_2) Value(81) EnumValue -Enum(mips_arch_opt_value) String(r1004kc) Value(81) +Enum(mips_arch_opt_value) String(1004kc) Value(82) Canonical EnumValue -Enum(mips_arch_opt_value) String(1004kf2_1) Value(82) Canonical +Enum(mips_arch_opt_value) String(r1004kc) Value(82) EnumValue -Enum(mips_arch_opt_value) String(r1004kf2_1) Value(82) +Enum(mips_arch_opt_value) String(1004kf2_1) Value(83) Canonical EnumValue -Enum(mips_arch_opt_value) String(1004kf) Value(83) Canonical +Enum(mips_arch_opt_value) String(r1004kf2_1) Value(83) EnumValue -Enum(mips_arch_opt_value) String(r1004kf) Value(83) +Enum(mips_arch_opt_value) String(1004kf) Value(84) Canonical EnumValue -Enum(mips_arch_opt_value) String(1004kf1_1) Value(84) Canonical +Enum(mips_arch_opt_value) String(r1004kf) Value(84) EnumValue -Enum(mips_arch_opt_value) String(r1004kf1_1) Value(84) +Enum(mips_arch_opt_value) String(1004kf1_1) Value(85) Canonical EnumValue -Enum(mips_arch_opt_value) String(interaptiv) Value(85) Canonical +Enum(mips_arch_opt_value) String(r1004kf1_1) Value(85) EnumValue -Enum(mips_arch_opt_value) String(p5600) Value(86) Canonical +Enum(mips_arch_opt_value) String(interaptiv) Value(86) Canonical EnumValue -Enum(mips_arch_opt_value) String(m5100) Value(87) Canonical +Enum(mips_arch_opt_value) String(p5600) Value(87) Canonical EnumValue -Enum(mips_arch_opt_value) String(m5101) Value(88) Canonical +Enum(mips_arch_opt_value) String(m5100) Value(88) Canonical EnumValue -Enum(mips_arch_opt_value) String(5kc) Value(89) Canonical +Enum(mips_arch_opt_value) String(m5101) Value(89) Canonical EnumValue -Enum(mips_arch_opt_value) String(r5kc) Value(89) +Enum(mips_arch_opt_value) String(5kc) Value(90) Canonical EnumValue -Enum(mips_arch_opt_value) String(5kf) Value(90) Canonical +Enum(mips_arch_opt_value) String(r5kc) Value(90) EnumValue -Enum(mips_arch_opt_value) String(r5kf) Value(90) +Enum(mips_arch_opt_value) String(5kf) Value(91) Canonical EnumValue -Enum(mips_arch_opt_value) String(20kc) Value(91) Canonical +Enum(mips_arch_opt_value) String(r5kf) Value(91) EnumValue -Enum(mips_arch_opt_value) String(r20kc) Value(91) +Enum(mips_arch_opt_value) String(20kc) Value(92) Canonical EnumValue -Enum(mips_arch_opt_value) String(sb1) Value(92) Canonical +Enum(mips_arch_opt_value) String(r20kc) Value(92) EnumValue -Enum(mips_arch_opt_value) String(sb1a) Value(93) Canonical +Enum(mips_arch_opt_value) String(sb1) Value(93) Canonical EnumValue -Enum(mips_arch_opt_value) String(sr71000) Value(94) Canonical +Enum(mips_arch_opt_value) String(sb1a) Value(94) Canonical EnumValue -Enum(mips_arch_opt_value) String(sr71k) Value(94) +Enum(mips_arch_opt_value) String(sr71000) Value(95) Canonical EnumValue -Enum(mips_arch_opt_value) String(xlr) Value(95) Canonical +Enum(mips_arch_opt_value) String(sr71k) Value(95) EnumValue -Enum(mips_arch_opt_value) String(loongson3a) Value(96) Canonical +Enum(mips_arch_opt_value) String(xlr) Value(96) Canonical EnumValue -Enum(mips_arch_opt_value) String(gs464) Value(97) Canonical +Enum(mips_arch_opt_value) String(loongson3a) Value(97) Canonical EnumValue -Enum(mips_arch_opt_value) String(gs464e) Value(98) Canonical +Enum(mips_arch_opt_value) String(gs464) Value(98) Canonical EnumValue -Enum(mips_arch_opt_value) String(gs264e) Value(99) Canonical +Enum(mips_arch_opt_value) String(gs464e) Value(99) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon) Value(100) Canonical +Enum(mips_arch_opt_value) String(gs264e) Value(100) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon+) Value(101) Canonical +Enum(mips_arch_opt_value) String(octeon) Value(101) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon2) Value(102) Canonical +Enum(mips_arch_opt_value) String(octeon+) Value(102) Canonical EnumValue -Enum(mips_arch_opt_value) String(octeon3) Value(103) Canonical +Enum(mips_arch_opt_value) String(octeon2) Value(103) Canonical EnumValue -Enum(mips_arch_opt_value) String(xlp) Value(104) Canonical +Enum(mips_arch_opt_value) String(octeon3) Value(104) Canonical EnumValue -Enum(mips_arch_opt_value) String(i6400) Value(105) Canonical +Enum(mips_arch_opt_value) String(xlp) Value(105) Canonical EnumValue -Enum(mips_arch_opt_value) String(i6500) Value(106) Canonical +Enum(mips_arch_opt_value) String(i6400) Value(106) Canonical EnumValue -Enum(mips_arch_opt_value) String(p6600) Value(107) Canonical +Enum(mips_arch_opt_value) String(i6500) Value(107) Canonical + +EnumValue +Enum(mips_arch_opt_value) String(p6600) Value(108) Canonical diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc index 42dfc3b..92b3470 100644 --- a/gcc/config/mips/mips.cc +++ b/gcc/config/mips/mips.cc @@ -1163,6 +1163,20 @@ static const struct mips_rtx_cost_data COSTS_N_INSNS (8), /* int_div_di */ 2, /* branch_cost */ 4 /* memory_latency */ + }, + { /* Allegrex */ + /* Has hard-float support for single precision only. */ + COSTS_N_INSNS (5), /* fp_add */ + COSTS_N_INSNS (5), /* fp_mult_sf */ + COSTS_N_INSNS (256), /* fp_mult_df */ + COSTS_N_INSNS (30), /* fp_div_sf */ + COSTS_N_INSNS (256), /* fp_div_df */ + COSTS_N_INSNS (7) , /* int_mult_si */ + COSTS_N_INSNS (27), /* int_mult_di */ + COSTS_N_INSNS (21), /* int_div_si */ + COSTS_N_INSNS (256), /* int_div_di */ + 2, /* branch_cost */ + 4 /* memory_latency */ } }; @@ -3315,7 +3329,7 @@ mips_unspec_address_offset (rtx base, rtx offset, enum mips_symbol_type symbol_type) { base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base), - UNSPEC_ADDRESS_FIRST + symbol_type); + UNSPEC_ADDRESS_FIRST + (int) symbol_type); if (offset != const0_rtx) base = gen_rtx_PLUS (Pmode, base, offset); return gen_rtx_CONST (Pmode, base); diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 494f14c..4d65bbf 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -291,6 +291,7 @@ struct mips_cpu_info { #define ISA_MIPS64R6 (mips_isa == MIPS_ISA_MIPS64R6) /* Architecture target defines. */ +#define TARGET_ALLEGREX (mips_arch == PROCESSOR_ALLEGREX) #define TARGET_LOONGSON_2E (mips_arch == PROCESSOR_LOONGSON_2E) #define TARGET_LOONGSON_2F (mips_arch == PROCESSOR_LOONGSON_2F) #define TARGET_LOONGSON_2EF (TARGET_LOONGSON_2E || TARGET_LOONGSON_2F) @@ -326,6 +327,7 @@ struct mips_cpu_info { || mips_tune == PROCESSOR_74KF2_1 \ || mips_tune == PROCESSOR_74KF1_1 \ || mips_tune == PROCESSOR_74KF3_2) +#define TUNE_ALLEGREX (mips_tune == PROCESSOR_ALLEGREX) #define TUNE_LOONGSON_2EF (mips_tune == PROCESSOR_LOONGSON_2E \ || mips_tune == PROCESSOR_LOONGSON_2F) #define TUNE_GS464 (mips_tune == PROCESSOR_GS464) @@ -1091,12 +1093,14 @@ struct mips_cpu_info { /* ISA has the integer conditional move instructions introduced in mips4 and ST Loongson 2E/2F. */ #define ISA_HAS_CONDMOVE (ISA_HAS_FP_CONDMOVE \ + || TARGET_ALLEGREX \ || TARGET_MIPS5900 \ || ISA_HAS_MIPS16E2 \ || TARGET_LOONGSON_2EF) /* ISA has LDC1 and SDC1. */ #define ISA_HAS_LDC1_SDC1 (!ISA_MIPS1 \ + && !TARGET_ALLEGREX \ && !TARGET_MIPS5900 \ && !TARGET_MIPS16) @@ -1135,16 +1139,19 @@ struct mips_cpu_info { /* ISA has conditional trap instructions. */ #define ISA_HAS_COND_TRAP (!ISA_MIPS1 \ + && !TARGET_ALLEGREX \ && !TARGET_MIPS16) /* ISA has conditional trap with immediate instructions. */ #define ISA_HAS_COND_TRAPI (!ISA_MIPS1 \ && mips_isa_rev <= 5 \ + && !TARGET_ALLEGREX \ && !TARGET_MIPS16) /* ISA has integer multiply-accumulate instructions, madd and msub. */ -#define ISA_HAS_MADD_MSUB (mips_isa_rev >= 1 \ - && mips_isa_rev <= 5) +#define ISA_HAS_MADD_MSUB ((mips_isa_rev >= 1 \ + && mips_isa_rev <= 5) \ + || TARGET_ALLEGREX) /* Integer multiply-accumulate instructions should be generated. */ #define GENERATE_MADD_MSUB (TARGET_IMADD && !TARGET_MIPS16) @@ -1199,7 +1206,8 @@ struct mips_cpu_info { #define ISA_HAS_IEEE_754_2008 (mips_isa_rev >= 2) /* ISA has count leading zeroes/ones instruction (not implemented). */ -#define ISA_HAS_CLZ_CLO (mips_isa_rev >= 1 && !TARGET_MIPS16) +#define ISA_HAS_CLZ_CLO ((mips_isa_rev >= 1 && !TARGET_MIPS16) \ + || TARGET_ALLEGREX) /* ISA has count trailing zeroes/ones instruction. */ #define ISA_HAS_CTZ_CTO (TARGET_LOONGSON_EXT2) @@ -1241,15 +1249,23 @@ struct mips_cpu_info { /* ISA has the "ror" (rotate right) instructions. */ #define ISA_HAS_ROR ((mips_isa_rev >= 2 \ + || TARGET_ALLEGREX \ || TARGET_MIPS5400 \ || TARGET_MIPS5500 \ || TARGET_SR71K \ || TARGET_SMARTMIPS) \ && !TARGET_MIPS16) +/* ISA has the "min" and "max" instructions (signed min/max). */ +#define ISA_HAS_MIN_MAX (TARGET_ALLEGREX) + /* ISA has the WSBH (word swap bytes within halfwords) instruction. 64-bit targets also provide DSBH and DSHD. */ -#define ISA_HAS_WSBH (mips_isa_rev >= 2 && !TARGET_MIPS16) +#define ISA_HAS_WSBH ((mips_isa_rev >= 2 && !TARGET_MIPS16) \ + || TARGET_ALLEGREX) + +/* Similar to WSBH but for 32 bit words (byte swap within a word). */ +#define ISA_HAS_WSBW (TARGET_ALLEGREX) /* ISA has data prefetch instructions. This controls use of 'pref'. */ #define ISA_HAS_PREFETCH ((ISA_MIPS4 \ @@ -1282,11 +1298,13 @@ struct mips_cpu_info { #define ISA_HAS_TRUNC_W (!ISA_MIPS1) /* ISA includes the MIPS32r2 seb and seh instructions. */ -#define ISA_HAS_SEB_SEH (mips_isa_rev >= 2 && !TARGET_MIPS16) +#define ISA_HAS_SEB_SEH ((mips_isa_rev >= 2 && !TARGET_MIPS16) \ + || TARGET_ALLEGREX) /* ISA includes the MIPS32/64 rev 2 ext and ins instructions. */ #define ISA_HAS_EXT_INS ((mips_isa_rev >= 2 && !TARGET_MIPS16) \ - || ISA_HAS_MIPS16E2) + || ISA_HAS_MIPS16E2 \ + || TARGET_ALLEGREX) /* ISA has instructions for accessing top part of 64-bit fp regs. */ #define ISA_HAS_MXHC1 (!TARGET_FLOAT32 \ @@ -1330,6 +1348,7 @@ struct mips_cpu_info { /* Likewise mtc1 and mfc1. */ #define ISA_HAS_XFER_DELAY (mips_isa <= MIPS_ISA_MIPS3 \ + && !TARGET_ALLEGREX \ && !TARGET_MIPS5900 \ && !TARGET_LOONGSON_2EF) @@ -1351,6 +1370,7 @@ struct mips_cpu_info { earlier-ISA CPUs for which CPU documentation declares that the instructions are really interlocked. */ #define ISA_HAS_HILO_INTERLOCKS (mips_isa_rev >= 1 \ + || TARGET_ALLEGREX \ || TARGET_MIPS5500 \ || TARGET_MIPS5900 \ || TARGET_LOONGSON_2EF) diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md index 99654cc..5117dfd 100644 --- a/gcc/config/mips/mips.md +++ b/gcc/config/mips/mips.md @@ -72,6 +72,7 @@ m5100 i6400 p6600 + allegrex ]) (define_c_enum "unspec" [ @@ -835,9 +836,11 @@ ;; conditional-move-type condition is needed. (define_mode_iterator MOVECC [SI (DI "TARGET_64BIT") (CC "TARGET_HARD_FLOAT + && !TARGET_ALLEGREX && !TARGET_LOONGSON_2EF && !TARGET_MIPS5900") (CCE "TARGET_HARD_FLOAT + && !TARGET_ALLEGREX && !TARGET_LOONGSON_2EF && !TARGET_MIPS5900")]) @@ -1792,9 +1795,12 @@ (set_attr "mode" "SI") (set_attr "insn_count" "1,1,2") (set (attr "enabled") - (cond [(eq_attr "alternative" "1,2") - (const_string "yes")] - (const_string "no")))]) + (cond [(eq_attr "alternative" "1") + (const_string "yes") + (and (eq_attr "alternative" "2") + (match_test "ISA_HAS_MUL3")) + (const_string "yes")] + (const_string "no")))]) ;; The same idea applies here. The middle alternative needs one less ;; clobber than the final alternative, so we add "*?" as a counterweight. @@ -2038,9 +2044,12 @@ (set_attr "mode" "SI") (set_attr "insn_count" "1,1,2") (set (attr "enabled") - (cond [(eq_attr "alternative" "1,2") - (const_string "yes")] - (const_string "no")))]) + (cond [(eq_attr "alternative" "1") + (const_string "yes") + (and (eq_attr "alternative" "2") + (match_test "ISA_HAS_MUL3")) + (const_string "yes")] + (const_string "no")))]) ;; Split *mul_sub_si if both the source and destination accumulator ;; values are GPRs. @@ -6038,16 +6047,29 @@ "wsbh\t%0,%1" [(set_attr "type" "shift")]) -(define_insn_and_split "bswapsi2" +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "=d") + (bswap:SI (match_operand:SI 1 "register_operand" "d")))] + "ISA_HAS_WSBW || (ISA_HAS_WSBH && ISA_HAS_ROR)" +{ + if (ISA_HAS_WSBW) { + emit_insn (gen_wsbwsi2 (operands[0], operands[1])); + } + else + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_wsbh (tmp, operands[1])); + emit_insn (gen_rotrsi3 (operands[0], tmp, GEN_INT(16))); + } + DONE; +}) + +(define_insn "wsbwsi2" [(set (match_operand:SI 0 "register_operand" "=d") (bswap:SI (match_operand:SI 1 "register_operand" "d")))] - "ISA_HAS_WSBH && ISA_HAS_ROR" - "#" - "&& 1" - [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_WSBH)) - (set (match_dup 0) (rotatert:SI (match_dup 0) (const_int 16)))] - "" - [(set_attr "insn_count" "2")]) + "ISA_HAS_WSBW" + "wsbw\t%0,%1" + [(set_attr "type" "shift")]) (define_insn_and_split "bswapdi2" [(set (match_operand:DI 0 "register_operand" "=d") @@ -7648,6 +7670,27 @@ DONE; }) +;; Min and max. + +(define_insn "sminsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (smin:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")))] + "ISA_HAS_MIN_MAX" + "min\t%0,%1,%2" + [(set_attr "type" "arith") + (set_attr "mode" "SI")]) + +(define_insn "smaxsi3" + [(set (match_operand:SI 0 "register_operand" "=d") + (smax:SI (match_operand:SI 1 "register_operand" "d") + (match_operand:SI 2 "register_operand" "d")))] + "ISA_HAS_MIN_MAX" + "max\t%0,%1,%2" + [(set_attr "type" "arith") + (set_attr "mode" "SI")]) + + (define_expand "speculation_barrier" [(unspec_volatile [(const_int 0)] VUNSPEC_SPECULATION_BARRIER)] "" diff --git a/gcc/config/mips/mips.opt.urls b/gcc/config/mips/mips.opt.urls index 5921d69..a85ea03 100644 --- a/gcc/config/mips/mips.opt.urls +++ b/gcc/config/mips/mips.opt.urls @@ -1,10 +1,10 @@ ; Autogenerated by regenerate-opt-urls.py from gcc/config/mips/mips.opt and generated HTML EB -UrlSuffix(gcc/MIPS-Options.html#index-EB-2) +UrlSuffix(gcc/MIPS-Options.html#index-EB-1) EL -UrlSuffix(gcc/MIPS-Options.html#index-EL-2) +UrlSuffix(gcc/MIPS-Options.html#index-EL-1) mabi= UrlSuffix(gcc/MIPS-Options.html#index-mabi-3) @@ -133,7 +133,7 @@ mplt UrlSuffix(gcc/MIPS-Options.html#index-mplt) mhard-float -UrlSuffix(gcc/MIPS-Options.html#index-mhard-float-4) +UrlSuffix(gcc/MIPS-Options.html#index-mhard-float-3) minterlink-compressed UrlSuffix(gcc/MIPS-Options.html#index-minterlink-compressed) @@ -154,7 +154,7 @@ mlocal-sdata UrlSuffix(gcc/MIPS-Options.html#index-mlocal-sdata) mlong-calls -UrlSuffix(gcc/MIPS-Options.html#index-mlong-calls-6) +UrlSuffix(gcc/MIPS-Options.html#index-mlong-calls-7) mlong32 UrlSuffix(gcc/MIPS-Options.html#index-mlong32) @@ -208,7 +208,7 @@ msmartmips UrlSuffix(gcc/MIPS-Options.html#index-msmartmips) msoft-float -UrlSuffix(gcc/MIPS-Options.html#index-msoft-float-8) +UrlSuffix(gcc/MIPS-Options.html#index-msoft-float-7) msplit-addresses UrlSuffix(gcc/MIPS-Options.html#index-msplit-addresses) diff --git a/gcc/config/mmix/mmix.opt.urls b/gcc/config/mmix/mmix.opt.urls index 6722f9e..dda41ca 100644 --- a/gcc/config/mmix/mmix.opt.urls +++ b/gcc/config/mmix/mmix.opt.urls @@ -22,7 +22,7 @@ mtoplevel-symbols UrlSuffix(gcc/MMIX-Options.html#index-mtoplevel-symbols) melf -UrlSuffix(gcc/MMIX-Options.html#index-melf-1) +UrlSuffix(gcc/MMIX-Options.html#index-melf) mbranch-predict UrlSuffix(gcc/MMIX-Options.html#index-mbranch-predict) diff --git a/gcc/config/or1k/or1k.opt.urls b/gcc/config/or1k/or1k.opt.urls index b3ba2df..0927d6a 100644 --- a/gcc/config/or1k/or1k.opt.urls +++ b/gcc/config/or1k/or1k.opt.urls @@ -13,10 +13,10 @@ msoft-mul UrlSuffix(gcc/OpenRISC-Options.html#index-msoft-mul) msoft-float -UrlSuffix(gcc/OpenRISC-Options.html#index-msoft-float-9) +UrlSuffix(gcc/OpenRISC-Options.html#index-msoft-float-8) mhard-float -UrlSuffix(gcc/OpenRISC-Options.html#index-mhard-float-5) +UrlSuffix(gcc/OpenRISC-Options.html#index-mhard-float-4) mdouble-float UrlSuffix(gcc/OpenRISC-Options.html#index-mdouble-float-3) diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc index b63ccf1..fb42a5c 100644 --- a/gcc/config/pa/pa.cc +++ b/gcc/config/pa/pa.cc @@ -1932,31 +1932,36 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg) /* We can only handle indexed addresses in the destination operand of floating point stores. Thus, we need to break out indexed - addresses from the destination operand. */ - if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0))) + addresses from the destination operand. We also need to break + out REG+D addresses with large offsets. */ + if (MEM_P (operand0) + && (IS_INDEX_ADDR_P (XEXP (operand0, 0)) + || (GET_CODE (XEXP (operand0, 0)) == PLUS + && REG_P (XEXP (XEXP (operand0, 0), 0)) + && CONST_INT_P (XEXP (XEXP (operand0, 0), 1)) + && !INT_14_BITS (XEXP (XEXP (operand0, 0), 1))))) { - gcc_assert (can_create_pseudo_p ()); - tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0)); operand0 = replace_equiv_address (operand0, tem); } /* On targets with non-equivalent space registers, break out unscaled - indexed addresses from the source operand before the final CSE. + indexed addresses from the source operand before reload is completed. We have to do this because the REG_POINTER flag is not correctly - carried through various optimization passes and CSE may substitute - a pseudo without the pointer set for one with the pointer set. As - a result, we loose various opportunities to create insns with - unscaled indexed addresses. */ - if (!TARGET_NO_SPACE_REGS - && !cse_not_expected - && GET_CODE (operand1) == MEM + carried through various optimization passes. We also need to break + out REG+D addresses with large offsets. */ + if (MEM_P (operand1) && GET_CODE (XEXP (operand1, 0)) == PLUS && REG_P (XEXP (XEXP (operand1, 0), 0)) - && REG_P (XEXP (XEXP (operand1, 0), 1))) - operand1 - = replace_equiv_address (operand1, - copy_to_mode_reg (Pmode, XEXP (operand1, 0))); + && ((!TARGET_NO_SPACE_REGS + && !reload_completed + && REG_P (XEXP (XEXP (operand1, 0), 1))) + || (CONST_INT_P (XEXP (XEXP (operand1, 0), 1)) + && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))))) + { + tem = copy_to_mode_reg (Pmode, XEXP (operand1, 0)); + operand1 = replace_equiv_address (operand1, tem); + } if (scratch_reg && reload_in_progress @@ -5764,11 +5769,22 @@ pa_print_operand (FILE *file, rtx x, int code) && GET_CODE (XEXP (XEXP (x, 0), 1)) == REG) { /* Because the REG_POINTER flag can get lost during reload, - pa_legitimate_address_p canonicalizes the order of the - index and base registers in the combined move patterns. */ + we now defer creation of instructions with scaled and + unscaled index addresses until after reload. We require + that the flag be set in the base register on targets + that use space registers. */ rtx base = XEXP (XEXP (x, 0), 1); rtx index = XEXP (XEXP (x, 0), 0); + /* Accept non-canonical register order. */ + if (!TARGET_NO_SPACE_REGS && !REG_POINTER (base)) + { + rtx tmp = base; + base = index; + index = tmp; + gcc_assert (REG_POINTER (base)); + } + fprintf (file, "%s(%s)", reg_names [REGNO (index)], reg_names [REGNO (base)]); } @@ -11001,12 +11017,15 @@ pa_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper) if (!TARGET_DISABLE_INDEXING /* Currently, the REG_POINTER flag is not set in a variety - of situations (e.g., call arguments and pointer arithmetic). - As a result, we can't reliably determine when unscaled - addresses are legitimate on targets that need space register - selection. */ - && TARGET_NO_SPACE_REGS + of situations (e.g., call arguments and pointer arithmetic) + and the flag can be lost during reload. So, we only allow + unscaled index addresses after reload. We can accept either + register order. */ && REG_P (index) + && (TARGET_NO_SPACE_REGS + || (reload_completed + && ((REG_POINTER (base) && !REG_POINTER (index)) + || (!REG_POINTER (base) && REG_POINTER (index))))) && MODE_OK_FOR_UNSCALED_INDEXING_P (mode) && (strict ? STRICT_REG_OK_FOR_INDEX_P (index) : REG_OK_FOR_INDEX_P (index)) @@ -11015,13 +11034,10 @@ pa_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper) return true; if (!TARGET_DISABLE_INDEXING - /* Only accept base operands with the REG_POINTER flag prior to + /* Only accept base operands with the REG_POINTER flag after reload on targets with non-equivalent space registers. */ && (TARGET_NO_SPACE_REGS - || reload_completed - || ((lra_in_progress || reload_in_progress) - && HARD_REGISTER_P (base)) - || REG_POINTER (base)) + || (reload_completed && REG_POINTER (base))) && GET_CODE (index) == MULT && REG_P (XEXP (index, 0)) && GET_MODE (XEXP (index, 0)) == Pmode diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h index 6972faa..b8756df 100644 --- a/gcc/config/pa/pa.h +++ b/gcc/config/pa/pa.h @@ -860,6 +860,16 @@ extern int may_call_alloca; || REGNO (X) == FRAME_POINTER_REGNUM \ || REGNO (X) >= FIRST_PSEUDO_REGISTER)) +/* Nonzero if X and Y are hard regs that can be used as base + and index regs in an unscaled index address. This is only + used after reload. */ +#define REGS_OK_FOR_BASE_INDEX(X,Y) \ + (REGNO (X) && REGNO (X) < 32 \ + && REGNO (Y) && REGNO (Y) < 32 \ + && (TARGET_NO_SPACE_REGS \ + || (REG_POINTER (X) && !REG_POINTER (Y)) \ + || (!REG_POINTER (X) && REG_POINTER (Y)))) + /* Nonzero if X is a hard reg that can be used as an index. */ #define STRICT_REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X)) diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index 2312994..99ab06e 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -2346,6 +2346,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && REG_OK_FOR_BASE_P (operands[2]) + && (TARGET_NO_SPACE_REGS || REG_POINTER (operands[2])) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SI (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2))) (match_dup 3)) @@ -2364,6 +2365,7 @@ && !TARGET_DISABLE_INDEXING && TARGET_64BIT && REG_OK_FOR_BASE_P (operands[2]) + && (TARGET_NO_SPACE_REGS || REG_POINTER (operands[2])) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SI (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2))) (match_dup 3)) @@ -2379,9 +2381,7 @@ (match_operand:SI 3 "register_operand" ""))] "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_INDEX_P (operands[1]) - && REG_OK_FOR_BASE_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SI (plus:SI (match_dup 1) (match_dup 2))) (match_dup 3)) @@ -2396,9 +2396,7 @@ (match_operand:SI 3 "register_operand" ""))] "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_BASE_P (operands[1]) - && REG_OK_FOR_INDEX_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SI (plus:SI (match_dup 2) (match_dup 1))) (match_dup 3)) @@ -2414,9 +2412,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && TARGET_64BIT - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_INDEX_P (operands[1]) - && REG_OK_FOR_BASE_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SI (plus:DI (match_dup 1) (match_dup 2))) (match_dup 3)) @@ -2432,9 +2428,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && TARGET_64BIT - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_BASE_P (operands[1]) - && REG_OK_FOR_INDEX_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SI (plus:DI (match_dup 2) (match_dup 1))) (match_dup 3)) @@ -3961,6 +3955,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && REG_OK_FOR_BASE_P (operands[2]) + && (TARGET_NO_SPACE_REGS || REG_POINTER (operands[2])) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:DF (plus:SI (mult:SI (match_dup 1) (const_int 8)) (match_dup 2))) (match_dup 3)) @@ -3978,6 +3973,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && REG_OK_FOR_BASE_P (operands[2]) + && (TARGET_NO_SPACE_REGS || REG_POINTER (operands[2])) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:DF (plus:SI (mult:SI (match_dup 1) (const_int 8)) (match_dup 2))) (match_dup 3)) @@ -3996,6 +3992,7 @@ && !TARGET_DISABLE_INDEXING && TARGET_64BIT && REG_OK_FOR_BASE_P (operands[2]) + && (TARGET_NO_SPACE_REGS || REG_POINTER (operands[2])) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:DF (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2))) (match_dup 3)) @@ -4014,6 +4011,7 @@ && !TARGET_DISABLE_INDEXING && TARGET_64BIT && REG_OK_FOR_BASE_P (operands[2]) + && (TARGET_NO_SPACE_REGS || REG_POINTER (operands[2])) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:DF (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2))) (match_dup 3)) @@ -4029,9 +4027,7 @@ (match_operand:DF 3 "register_operand" ""))] "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_INDEX_P (operands[1]) - && REG_OK_FOR_BASE_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:DF (plus:SI (match_dup 1) (match_dup 2))) (match_dup 3)) @@ -4046,9 +4042,7 @@ (match_operand:DF 3 "register_operand" ""))] "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_BASE_P (operands[1]) - && REG_OK_FOR_INDEX_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:DF (plus:SI (match_dup 2) (match_dup 1))) (match_dup 3)) @@ -4064,9 +4058,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && TARGET_64BIT - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_INDEX_P (operands[1]) - && REG_OK_FOR_BASE_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:DF (plus:DI (match_dup 1) (match_dup 2))) (match_dup 3)) @@ -4082,9 +4074,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && TARGET_64BIT - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_BASE_P (operands[1]) - && REG_OK_FOR_INDEX_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:DF (plus:DI (match_dup 2) (match_dup 1))) (match_dup 3)) @@ -4355,6 +4345,7 @@ && !TARGET_DISABLE_INDEXING && TARGET_64BIT && REG_OK_FOR_BASE_P (operands[2]) + && (TARGET_NO_SPACE_REGS || REG_POINTER (operands[2])) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:DI (plus:DI (mult:DI (match_dup 1) (const_int 8)) (match_dup 2))) (match_dup 3)) @@ -4371,9 +4362,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && TARGET_64BIT - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_INDEX_P (operands[1]) - && REG_OK_FOR_BASE_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:DI (plus:DI (match_dup 1) (match_dup 2))) (match_dup 3)) @@ -4389,9 +4378,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && TARGET_64BIT - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_BASE_P (operands[1]) - && REG_OK_FOR_INDEX_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:DI (plus:DI (match_dup 2) (match_dup 1))) (match_dup 3)) @@ -4625,6 +4612,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && REG_OK_FOR_BASE_P (operands[2]) + && (TARGET_NO_SPACE_REGS || REG_POINTER (operands[2])) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SF (plus:SI (mult:SI (match_dup 1) (const_int 4)) (match_dup 2))) (match_dup 3)) @@ -4643,6 +4631,7 @@ && !TARGET_DISABLE_INDEXING && TARGET_64BIT && REG_OK_FOR_BASE_P (operands[2]) + && (TARGET_NO_SPACE_REGS || REG_POINTER (operands[2])) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SF (plus:DI (mult:DI (match_dup 1) (const_int 4)) (match_dup 2))) (match_dup 3)) @@ -4658,9 +4647,7 @@ (match_operand:SF 3 "register_operand" ""))] "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_INDEX_P (operands[1]) - && REG_OK_FOR_BASE_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SF (plus:SI (match_dup 1) (match_dup 2))) (match_dup 3)) @@ -4675,9 +4662,7 @@ (match_operand:SF 3 "register_operand" ""))] "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_BASE_P (operands[1]) - && REG_OK_FOR_INDEX_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SF (plus:SI (match_dup 2) (match_dup 1))) (match_dup 3)) @@ -4693,9 +4678,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && TARGET_64BIT - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_INDEX_P (operands[1]) - && REG_OK_FOR_BASE_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SF (plus:DI (match_dup 1) (match_dup 2))) (match_dup 3)) @@ -4711,9 +4694,7 @@ "!TARGET_SOFT_FLOAT && !TARGET_DISABLE_INDEXING && TARGET_64BIT - && TARGET_NO_SPACE_REGS - && REG_OK_FOR_BASE_P (operands[1]) - && REG_OK_FOR_INDEX_P (operands[2]) + && REGS_OK_FOR_BASE_INDEX (operands[1], operands[2]) && FP_REGNO_P (REGNO (operands[3]))" [(set (mem:SF (plus:DI (match_dup 2) (match_dup 1))) (match_dup 3)) diff --git a/gcc/config/pa/pa.opt b/gcc/config/pa/pa.opt index fae4247..e90c4ca 100644 --- a/gcc/config/pa/pa.opt +++ b/gcc/config/pa/pa.opt @@ -87,7 +87,7 @@ Target Mask(LONG_CALLS) Always generate long calls. mlra -Target Var(pa_lra_p) Init(0) +Target Var(pa_lra_p) Init(1) Use LRA instead of reload (transitional). mlong-load-store diff --git a/gcc/config/pa/pa.opt.urls b/gcc/config/pa/pa.opt.urls index 5516332..29489f9 100644 --- a/gcc/config/pa/pa.opt.urls +++ b/gcc/config/pa/pa.opt.urls @@ -34,7 +34,7 @@ mlinker-opt UrlSuffix(gcc/HPPA-Options.html#index-mlinker-opt) mlong-calls -UrlSuffix(gcc/HPPA-Options.html#index-mlong-calls-5) +UrlSuffix(gcc/HPPA-Options.html#index-mlong-calls-6) ; skipping UrlSuffix for 'mlra' due to finding no URLs @@ -63,7 +63,7 @@ mschedule= UrlSuffix(gcc/HPPA-Options.html#index-mschedule) msoft-float -UrlSuffix(gcc/HPPA-Options.html#index-msoft-float-4) +UrlSuffix(gcc/HPPA-Options.html#index-msoft-float-3) msoft-mult UrlSuffix(gcc/HPPA-Options.html#index-msoft-mult) diff --git a/gcc/config/pa/pa64-linux.h b/gcc/config/pa/pa64-linux.h index 9b9c1f4..5af8253 100644 --- a/gcc/config/pa/pa64-linux.h +++ b/gcc/config/pa/pa64-linux.h @@ -21,6 +21,12 @@ along with GCC; see the file COPYING3. If not see #undef TARGET_ELF64 #define TARGET_ELF64 1 +/* Define this for shared library support because it isn't in the main + linux.h file. */ + +#undef GLIBC_DYNAMIC_LINKER +#define GLIBC_DYNAMIC_LINKER "/lib64/ld64.so.1" + #if 0 /* needs some work :-( */ /* If defined, this macro specifies a table of register pairs used to eliminate unneeded registers that point into the stack frame. */ diff --git a/gcc/config/pa/predicates.md b/gcc/config/pa/predicates.md index b0f8274..ecd2f25 100644 --- a/gcc/config/pa/predicates.md +++ b/gcc/config/pa/predicates.md @@ -472,16 +472,6 @@ if (! MEM_P (op)) return false; - /* Until problems with management of the REG_POINTER flag are resolved, - we need to delay creating move insns with unscaled indexed addresses - until CSE is not expected. */ - if (!TARGET_NO_SPACE_REGS - && !cse_not_expected - && GET_CODE (XEXP (op, 0)) == PLUS - && REG_P (XEXP (XEXP (op, 0), 0)) - && REG_P (XEXP (XEXP (op, 0), 1))) - return false; - return memory_address_p (mode, XEXP (op, 0)); }) @@ -496,16 +486,6 @@ if (! MEM_P (op)) return false; - /* Until problems with management of the REG_POINTER flag are resolved, - we need to delay creating move insns with unscaled indexed addresses - until CSE is not expected. */ - if (!TARGET_NO_SPACE_REGS - && !cse_not_expected - && GET_CODE (XEXP (op, 0)) == PLUS - && REG_P (XEXP (XEXP (op, 0), 0)) - && REG_P (XEXP (XEXP (op, 0), 1))) - return false; - return (memory_address_p (mode, XEXP (op, 0))); }) diff --git a/gcc/config/pdp11/pdp11.opt.urls b/gcc/config/pdp11/pdp11.opt.urls index f0544ca..2e8c7b1 100644 --- a/gcc/config/pdp11/pdp11.opt.urls +++ b/gcc/config/pdp11/pdp11.opt.urls @@ -28,7 +28,7 @@ mint32 UrlSuffix(gcc/PDP-11-Options.html#index-mint32-1) msoft-float -UrlSuffix(gcc/PDP-11-Options.html#index-msoft-float-10) +UrlSuffix(gcc/PDP-11-Options.html#index-msoft-float-9) msplit UrlSuffix(gcc/PDP-11-Options.html#index-msplit) @@ -37,5 +37,5 @@ munix-asm UrlSuffix(gcc/PDP-11-Options.html#index-munix-asm) mlra -UrlSuffix(gcc/PDP-11-Options.html#index-mlra-2) +UrlSuffix(gcc/PDP-11-Options.html#index-mlra-1) diff --git a/gcc/config/pru/pru-protos.h b/gcc/config/pru/pru-protos.h index d2e51ea..12eed90 100644 --- a/gcc/config/pru/pru-protos.h +++ b/gcc/config/pru/pru-protos.h @@ -72,6 +72,8 @@ extern int pru_get_ctable_base_offset (HOST_WIDE_INT caddr); extern int pru_symref2ioregno (rtx op); +extern rtx pru_fixup_jump_address_operand (rtx op); + /* Forward declarations to avoid unnecessarily including headers. */ class simple_ipa_opt_pass; class gimple_opt_pass; diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc index 3fdc56e..d7e8309 100644 --- a/gcc/config/pru/pru.cc +++ b/gcc/config/pru/pru.cc @@ -1534,6 +1534,23 @@ int pru_symref2ioregno (rtx op) return -1; } +/* TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS has no arguments to help discern + which insn is using the address. But PRU load/store instructions support + offsets, while call instructions do not. + So call this when expanding call patterns to revert the effect of + TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS. */ +rtx +pru_fixup_jump_address_operand (rtx op) +{ + if (MEM_P (op) + && GET_CODE (XEXP (op, 0)) == PLUS) + { + rtx tmpval = force_reg (SImode, XEXP (op, 0)); + op = gen_rtx_MEM (SImode, tmpval); + } + return op; +} + /* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P. */ static bool pru_addr_space_legitimate_address_p (machine_mode mode, rtx operand, @@ -1582,6 +1599,29 @@ pru_addr_space_legitimate_address_p (machine_mode mode, rtx operand, } return false; } + +/* Implement TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS. */ +rtx +pru_addr_space_legitimize_address (rtx x, rtx, machine_mode, addr_space_t) +{ + if (CONST_INT_P (x) && optimize > 0) + { + HOST_WIDE_INT mask, base, index; + rtx base_reg; + + /* Load/store with UBYTE offset is practically free for PRU. + If there two or more operations with addresses in the same UBYTE + address range, they all will share the base constant load operation. + Clearing the lower 8 bits is a good heuristic to + choose a common constant base address. */ + mask = 0xff; + base = INTVAL (x) & ~mask; + index = INTVAL (x) & mask; + base_reg = force_reg (SImode, GEN_INT (base)); + x = plus_constant (Pmode, base_reg, index); + } + return x; +} /* Output assembly language related definitions. */ @@ -3246,6 +3286,10 @@ pru_unwind_word_mode (void) #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \ pru_addr_space_legitimate_address_p +#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS +#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS \ + pru_addr_space_legitimize_address + #undef TARGET_INIT_LIBFUNCS #define TARGET_INIT_LIBFUNCS pru_init_libfuncs #undef TARGET_LIBFUNC_GNU_PREFIX diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h index 9d547ed..967bb79 100644 --- a/gcc/config/pru/pru.h +++ b/gcc/config/pru/pru.h @@ -239,14 +239,14 @@ enum reg_class #define REG_CLASS_CONTENTS \ { \ /* NO_REGS */ { 0, 0, 0, 0, 0}, \ - /* SIB_REGS */ { 0xf, 0xff000000, ~0, 0xffffff, 0}, \ + /* SIB_REGS */ { 0xf, 0xff000000u, ~0u, 0xffffffu, 0},\ /* LOOPCNTR_REGS */ { 0, 0, 0, 0, 0xf}, \ - /* MULDST_REGS */ { 0, 0, 0, 0x00000f00, 0}, \ - /* MULSRC0_REGS */ { 0, 0, 0, 0x000f0000, 0}, \ - /* MULSRC1_REGS */ { 0, 0, 0, 0x00f00000, 0}, \ - /* REGIO_REGS */ { 0, 0, 0, 0xff000000, 0}, \ - /* GP_REGS */ { ~0, ~0, ~0, ~0, 0}, \ - /* ALL_REGS */ { ~0,~0, ~0, ~0, ~0} \ + /* MULDST_REGS */ { 0, 0, 0, 0x00000f00u, 0}, \ + /* MULSRC0_REGS */ { 0, 0, 0, 0x000f0000u, 0}, \ + /* MULSRC1_REGS */ { 0, 0, 0, 0x00f00000u, 0}, \ + /* REGIO_REGS */ { 0, 0, 0, 0xff000000u, 0}, \ + /* GP_REGS */ { ~0u, ~0u, ~0u, ~0u, 0}, \ + /* ALL_REGS */ { ~0u, ~0u, ~0u, ~0u, ~0u} \ } diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md index b8ef55b..20f5807 100644 --- a/gcc/config/pru/pru.md +++ b/gcc/config/pru/pru.md @@ -1289,7 +1289,9 @@ (match_operand 1 "")) (clobber (reg:HI RA_REGNUM))])] "" - "") +{ + operands[0] = pru_fixup_jump_address_operand (operands[0]); +}) (define_expand "call_value" [(parallel [(set (match_operand 0 "") @@ -1297,7 +1299,9 @@ (match_operand 2 ""))) (clobber (reg:HI RA_REGNUM))])] "" - "") +{ + operands[1] = pru_fixup_jump_address_operand (operands[1]); +}) (define_insn "*call" [(call (mem:SI (match_operand:SI 0 "call_operand" "i,r")) @@ -1325,7 +1329,9 @@ (match_operand 1 "")) (return)])] "" - "") +{ + operands[0] = pru_fixup_jump_address_operand (operands[0]); +}) (define_expand "sibcall_value" [(parallel [(set (match_operand 0 "") @@ -1333,7 +1339,9 @@ (match_operand 2 ""))) (return)])] "" - "") +{ + operands[1] = pru_fixup_jump_address_operand (operands[1]); +}) (define_insn "*sibcall" [(call (mem:SI (match_operand:SI 0 "call_operand" "i,Rsib")) diff --git a/gcc/config/riscv/andes-23-series.md b/gcc/config/riscv/andes-23-series.md new file mode 100644 index 0000000..8e19e05 --- /dev/null +++ b/gcc/config/riscv/andes-23-series.md @@ -0,0 +1,190 @@ +;; DFA-based pipeline description for Andes 23 series. +;; +;; Copyright (C) 2025 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "andes_23_arch") + +(define_cpu_unit + "andes_23_alu0, andes_23_alu1, andes_23_lsu0, + andes_23_lsu1, andes_23_lsu2" + "andes_23_arch") + +(define_cpu_unit "andes_23_mdu" "andes_23_arch") +(define_cpu_unit "andes_23_fpu" "andes_23_arch") + +;; andes 23 unsupported insns are mapped to dummies reservations +(define_reservation "andes_23_dummies" + "andes_23_alu0 | andes_23_alu1 | andes_23_lsu0 | andes_23_lsu1 | + andes_23_lsu2 | andes_23_mdu | andes_23_fpu") + +(define_reservation "andes_23_alu" + "andes_23_alu0 | andes_23_alu1") + +(define_reservation "andes_23_lsu" + "andes_23_lsu0 | andes_23_lsu1 | andes_23_lsu2") + +(define_reservation "andes_23_pipe_unify" + "andes_23_alu0 + andes_23_alu1") + +(define_insn_reservation "andes_23_alu_insn" 1 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "unknown,const,arith,slt,multi,nop,move, + shift,logical,mvpair,auipc")) + "andes_23_alu") + +(define_insn_reservation "andes_23_load" 3 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "load")) + "andes_23_pipe_unify, andes_23_lsu*3") + +(define_insn_reservation "andes_23_store" 0 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "store")) + "andes_23_pipe_unify,andes_23_lsu*3") + +(define_insn_reservation "andes_23_branch" 0 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "branch,jump,call,jalr,ret,trap")) + "andes_23_pipe_unify") + +(define_insn_reservation "andes_23_imul" 2 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "imul")) + "andes_23_alu0, andes_23_mdu") + +(define_insn_reservation "andes_23_idivsi" 35 + (and (eq_attr "tune" "andes_23_series") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "SI"))) + "andes_23_pipe_unify, andes_23_mdu* 34") + +(define_insn_reservation "andes_23_idivdi" 35 + (and (eq_attr "tune" "andes_23_series") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "DI"))) + "andes_23_pipe_unify, andes_23_mdu* 34") + +(define_insn_reservation "andes_23_xfer" 1 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "mfc,mtc")) + "andes_23_alu") + +(define_insn_reservation "andes_23_fpu_alu" 4 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "fadd")) + "andes_23_pipe_unify, andes_23_fpu") + +(define_insn_reservation "andes_23_fpu_mul" 4 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "fmul")) + "andes_23_pipe_unify, andes_23_fpu") + +(define_insn_reservation "andes_23_fpu_mac" 4 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "fmadd")) + "andes_23_pipe_unify, andes_23_fpu") + +(define_insn_reservation "andes_23_fpu_div" 33 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "fdiv")) + "andes_23_pipe_unify, andes_23_fpu*33") + +(define_insn_reservation "andes_23_fpu_sqrt" 33 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "fsqrt")) + "andes_23_pipe_unify, andes_23_fpu*33") + +(define_insn_reservation "andes_23_fpu_move" 2 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "fmove,mtc,mfc")) + "andes_23_pipe_unify, andes_23_fpu") + +(define_insn_reservation "andes_23_fpu_cmp" 3 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "fcmp")) + "andes_23_pipe_unify, andes_23_fpu") + +(define_insn_reservation "andes_23_fpu_cvt" 3 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i")) + "andes_23_pipe_unify, andes_23_fpu") + +(define_insn_reservation "andes_23_fpu_load" 3 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "fpload")) + "andes_23_pipe_unify, andes_23_lsu*3") + +(define_insn_reservation "andes_23_fpu_store" 0 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "fpstore")) + "andes_23_pipe_unify, andes_23_lsu*3") + +(define_insn_reservation "andes_23_bitmanip" 1 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "bitmanip,minu,maxu,min,max,clmul,rotate,cpop,clz,ctz")) + "andes_23_alu0") + +(define_insn_reservation "andes_23_crypto" 1 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "crypto")) + "andes_23_alu0") + +(define_bypass 3 + "andes_23_fpu_mul" + "andes_23_fpu_alu,andes_23_fpu_mac, + andes_23_fpu_div,andes_23_fpu_sqrt") + +(define_bypass 3 + "andes_23_fpu_alu" + "andes_23_fpu_mul,andes_23_fpu_alu,andes_23_fpu_mac, + andes_23_fpu_div,andes_23_fpu_sqrt") + +(define_bypass 3 + "andes_23_fpu_mac" + "andes_23_fpu_mul,andes_23_fpu_alu,andes_23_fpu_mac, + andes_23_fpu_div,andes_23_fpu_sqrt") + +(define_bypass 2 + "andes_23_fpu_load" + "andes_23_fpu_div,andes_23_fpu_sqrt") + +(define_insn_reservation "andes_23_unknown" 1 + (and (eq_attr "tune" "andes_23_series") + (eq_attr "type" "ghost,zicond,mvpair,sfb_alu,condmove,atomic, + vclz,vror,vsha2ch,vsm4k,vaesef,vghsh,vsm4r,vsm3c, + vaeskf1,vandn,vaesdm,vclmul,vclmulh,vrol,vcpop,vbrev8, + vsm3me,vbrev,vctz,vgmul,vsha2ms,vaesz,vrev8, + vaeskf2,vsha2cl,vwsll,vaesdf,vaesem,vfwmaccbf16, + sf_vqmacc,sf_vc,sf_vc_se,sf_vfnrclip,vmsfs,vfwalu, + vnshift,vldm,vslidedown,vicmp,vfcvtftoi,vmffs,vlsegdux, + vfredo,vstux,vsshift,vfwcvtbf16,vmpop,vicalu,vldff, + vislide1down,vstox,vfwcvtftof,vfmov,vislide1up,vldr, + vfmul,vfrecp,vfncvtitof,vfwcvtftoi,vsts,viminmax,vext, + vaalu,vfdiv,vidiv,viwalu,vssegte,wrvxrm,vfmovvf,vlde, + vfclass,vshift,vimovxv,vssegtox,vfsqrt,vector,vmalu, + vfcvtitof,vlsegdff,vfslide1down,vimov,vialu,vmidx, + vsalu,vfmerge,rdvl,vlds,vfmuladd,vfsgnj,vslideup, + vfcmp,vfmovfv,vfwcvtitof,vfwmuladd,vfwredo,vlsegdox, + viwmul,vldox,vsmul,vstm,vfminmax,vmov,vfalu,vfncvtbf16, + vnclip,vimerge,vfwmul,vimovvx,vfncvtftoi,viwred,rdvlenb, + vfslide1up,vfncvtftof,vsetvl,viwmuladd,vfredu,vfwredu, + vlsegde,vmiota,vstr,vgather,vssegts,vldux,vlsegds,vimul, + vste,vsetvl_pre,vimuladd,vcompress,vssegtux,wrfrm,rdfrm, + vired")) + "andes_23_dummies") diff --git a/gcc/config/riscv/andes-25-series.md b/gcc/config/riscv/andes-25-series.md new file mode 100644 index 0000000..ef1a926 --- /dev/null +++ b/gcc/config/riscv/andes-25-series.md @@ -0,0 +1,322 @@ +;; DFA-based pipeline description for Andes 25 series. +;; +;; Copyright (C) 2025 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "andes_25_arch, andes_25_vector") + +;; Integer pipeline +(define_cpu_unit "andes_25_pipe" "andes_25_arch") +;; Division operation unit +(define_cpu_unit "andes_25_mdu" "andes_25_arch") +;; Floating point units +(define_cpu_unit "andes_25_fpu, andes_25_fpu_eu" "andes_25_arch") + +;; Vector execution unit. +(define_cpu_unit "andes_25_vpu_lsu, andes_25_vpu_alu, andes_25_vpu_mac, + andes_25_vpu_msk, andes_25_vpu_div, andes_25_vpu_fmac, + andes_25_vpu_fmis, andes_25_vpu_perm, andes_25_vpu_pipe" + "andes_25_vector") + +;; andes 25 series unsupported insns are mapped to dummies reservations +(define_reservation "andes_25_dummies" + "andes_25_pipe | andes_25_mdu | andes_25_fpu" +) + +;; andes 25 series vector unsupported insns are mapped to dummies reservations +(define_reservation "andes_25_vector_dummies" + "andes_25_vpu_lsu | andes_25_vpu_alu | andes_25_vpu_mac | andes_25_vpu_msk | + andes_25_vpu_div | andes_25_vpu_fmac | andes_25_vpu_fmis | + andes_25_vpu_perm | andes_25_vpu_pipe" +) + +(define_reservation "andes_25_fpu_arith" + "(andes_25_pipe + andes_25_fpu), andes_25_fpu_eu * 2") + +(define_reservation "andes_25_fpu_pipe" + "andes_25_pipe + andes_25_fpu") + +(define_insn_reservation "andes_25_alu_insn" 1 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "unknown,const,arith,shift,slt,multi,nop,logical,move, + auipc,atomic")) + "andes_25_pipe") + +(define_insn_reservation "andes_25_load_wd" 2 + (and (eq_attr "tune" "andes_25_series") + (and (eq_attr "type" "load") + (not (eq_attr "mode" "QI,HI")))) + "andes_25_pipe") + +(define_insn_reservation "andes_25_load_bh" 3 + (and (eq_attr "tune" "andes_25_series") + (and (eq_attr "type" "load") + (eq_attr "mode" "QI,HI"))) + "andes_25_pipe") + +(define_insn_reservation "andes_25_store" 0 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "store")) + "andes_25_pipe") + +(define_insn_reservation "andes_25_branch" 0 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "branch,jump,call,jalr,trap,ret")) + "andes_25_pipe") + +(define_insn_reservation "andes_25_imul" 3 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "imul")) + "andes_25_pipe") + +(define_insn_reservation "andes_25_idivsi" 38 + (and (eq_attr "tune" "andes_25_series") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "SI"))) + "andes_25_pipe, andes_25_mdu * 34") + +(define_insn_reservation "andes_25_idivdi" 70 + (and (eq_attr "tune" "andes_25_series") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "DI"))) + "andes_25_pipe, andes_25_mdu * 66") + +(define_insn_reservation "andes_25_xfer" 1 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "mfc,mtc")) + "andes_25_pipe") + +(define_insn_reservation "andes_25_fpu_alu" 5 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "fadd")) + "andes_25_fpu_arith") + +(define_insn_reservation "andes_25_fpu_mul" 5 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "fmul")) + "andes_25_fpu_arith") + +(define_insn_reservation "andes_25_fpu_mac" 5 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "fmadd")) + "andes_25_fpu_arith") + +(define_insn_reservation "andes_25_fpu_div" 33 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "fdiv")) + "andes_25_fpu_arith, andes_25_fpu_eu * 27") + +(define_insn_reservation "andes_25_fpu_sqrt" 33 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "fsqrt")) + "andes_25_fpu_arith, andes_25_fpu_eu * 27") + +(define_insn_reservation "andes_25_fpu_move" 3 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "fmove,mtc,mfc")) + "andes_25_fpu_pipe") + +(define_insn_reservation "andes_25_fpu_cmp" 3 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "fcmp")) + "andes_25_fpu_pipe") + +(define_insn_reservation "andes_25_fpu_cvt" 6 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i")) + "andes_25_fpu_arith, andes_25_fpu_eu") + +(define_insn_reservation "andes_25_fpu_load" 3 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "fpload")) + "andes_25_fpu_pipe") + +(define_insn_reservation "andes_25_fpu_store" 0 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "fpstore")) + "andes_25_fpu_pipe") + +(define_insn_reservation "andes_25_bitmanip" 1 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "bitmanip")) + "andes_25_pipe") + +;; Vector pipeline. + +(define_insn_reservation "andes_25_vload" 5 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vlde,vldm,vlds,vldff,vldr")) + "(andes_25_vpu_pipe + andes_25_vpu_lsu)*3") + +(define_insn_reservation "andes_25_index_vload" 8 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vldux,vldox")) + "(andes_25_vpu_pipe + andes_25_vpu_lsu)*3") + +(define_insn_reservation "andes_25_seg_vload" 16 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff")) + "(andes_25_vpu_pipe + andes_25_vpu_lsu)*3") + +(define_insn_reservation "andes_25_vstore" 0 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr,vssegte,\ + vssegts,vssegtux,vssegtox")) + "(andes_25_vpu_pipe + andes_25_vpu_lsu)*3") + +(define_insn_reservation "andes_25_vialu" 1 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vialu,vicalu,vshift,viminmax,vicmp,vimov,\ + vsalu,vaalu,vmov,vector,vimerge")) + "andes_25_vpu_pipe + andes_25_vpu_alu") + +(define_insn_reservation "andes_25_widen_vialu" 2 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "viwalu, vext, vsshift")) + "andes_25_vpu_pipe + andes_25_vpu_alu") + +(define_insn_reservation "andes_25_narrow_vialu" 3 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vnshift,vnclip")) + "andes_25_vpu_pipe + andes_25_vpu_alu") + +(define_insn_reservation "andes_25_vimul" 2 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vimul,vimuladd,vsmul")) + "andes_25_vpu_pipe + andes_25_vpu_mac") + +(define_insn_reservation "andes_25_widen_vimul" 3 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "viwmul,viwmuladd")) + "andes_25_vpu_pipe + andes_25_vpu_mac") + +(define_insn_reservation "andes_25_vperm" 3 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vslideup,vslidedown,vislide1up,vislide1down,\ + vfslide1up,vfslide1down,vgather")) + "andes_25_vpu_pipe + andes_25_vpu_perm") + +(define_insn_reservation "andes_25_vcompress" 4 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vcompress")) + "andes_25_vpu_pipe + andes_25_vpu_perm") + +(define_insn_reservation "andes_25_vmovv" 7 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vimovvx,vfmovvf")) + "(andes_25_vpu_pipe + andes_25_vpu_perm)*5") + +(define_insn_reservation "andes_25_vmovx" 3 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vimovxv,vfmovfv,vfmov")) + "andes_25_vpu_pipe + andes_25_vpu_perm") + +(define_insn_reservation "andes_25_vreduction" 3 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vired,viwred")) + "andes_25_vpu_pipe + andes_25_vpu_alu*5") + +(define_insn_reservation "andes_25_vidiv" 35 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vidiv")) + "andes_25_vpu_pipe + andes_25_vpu_div*34") + +(define_insn_reservation "andes_25_vmask_2" 2 + (eq_attr "type" "vmalu,vmsfs") + "andes_25_vpu_pipe + andes_25_vpu_msk") + +(define_insn_reservation "andes_25_vmask_3" 3 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vmiota,vmidx")) + "andes_25_vpu_pipe + andes_25_vpu_msk") + +(define_insn_reservation "andes_25_vpopc" 6 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vmpop")) + "andes_25_vpu_pipe + andes_25_vpu_msk") + +(define_insn_reservation "andes_25_vffs" 7 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vmffs")) + "andes_25_vpu_pipe + andes_25_vpu_msk") + +(define_insn_reservation "andes_25_vfadd" 4 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vfalu,vfwalu,vfmul,vfwmul,vfmuladd,\ + vfwmuladd")) + "andes_25_vpu_pipe + andes_25_vpu_fmac") + +(define_insn_reservation "andes_25_vfdiv" 39 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vfdiv,vfsqrt")) + "andes_25_vpu_pipe + andes_25_vpu_div*19") + +(define_insn_reservation "andes_25_vfmis" 2 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vfminmax,vfcmp,vfsgnj,vfclass,vfmerge")) + "andes_25_vpu_pipe + andes_25_vpu_fmis") + +(define_insn_reservation "andes_25_vfrecp" 3 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vfrecp")) + "andes_25_vpu_pipe + andes_25_vpu_div") + +(define_insn_reservation "andes_25_vfcvt" 2 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vfcvtitof,vfcvtftoi")) + "andes_25_vpu_pipe + andes_25_vpu_fmis") + +(define_insn_reservation "andes_25_widen_vfcvt" 5 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfwcvtbf16")) + "andes_25_vpu_pipe + andes_25_vpu_fmis") + +(define_insn_reservation "andes_25_narrow_vfcvt" 4 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vfncvtitof,vfncvtftoi,vfncvtftof,vfncvtbf16")) + "andes_25_vpu_pipe + andes_25_vpu_fmis") + +(define_insn_reservation "andes_25_vfreduction" 6 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vfredu,vfwredu,vfredo,vfwredo")) + "andes_25_vpu_pipe + andes_25_vpu_fmac*24") + +(define_insn_reservation "andes_25_vesetvl" 1 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vsetvl,vsetvl_pre")) + "andes_25_vpu_pipe") + +(define_insn_reservation "andes_25_vcsr" 1 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "wrvxrm,wrfrm,rdvlenb,rdvl")) + "andes_25_vpu_pipe") + +(define_insn_reservation "andes_25_unknown" 1 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "ghost,cpop,clz,ctz,zicond,mvpair,sfb_alu,minu,maxu, + min,max,clmul,rotate,crypto,condmove,rdfrm")) + "andes_25_dummies") + +(define_insn_reservation "andes_25_vector_unknown" 1 + (and (eq_attr "tune" "andes_25_series") + (eq_attr "type" "vclz,vror,vsha2ch,vsm4k,vaesef,vghsh,vsm4r,vsm3c, + vaeskf1,vandn,vaesdm,vclmul,vclmulh,vrol,vcpop,vbrev8, + vsm3me,vbrev,vctz,vgmul,vsha2ms,vaesz,vrev8, + vaeskf2,vsha2cl,vwsll,vaesdf,vaesem,vfwmaccbf16, + sf_vqmacc,sf_vc,sf_vc_se,sf_vfnrclip")) + "andes_25_vector_dummies") diff --git a/gcc/config/riscv/andes-45-series.md b/gcc/config/riscv/andes-45-series.md new file mode 100644 index 0000000..7693db8 --- /dev/null +++ b/gcc/config/riscv/andes-45-series.md @@ -0,0 +1,379 @@ +;; DFA-based pipeline description for Andes 45 series. +;; +;; Copyright (C) 2025 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "andes_45_arch, andes_45_vector") + +(define_cpu_unit "andes_45_pipe0" "andes_45_arch") +(define_cpu_unit "andes_45_pipe1" "andes_45_arch") +(define_cpu_unit "andes_45_vpu_pipe0" "andes_45_vector") +(define_cpu_unit "andes_45_vpu_pipe1" "andes_45_vector") + +(define_reservation "andes_45_vpu_pipe" "(andes_45_vpu_pipe0 + andes_45_pipe0 | andes_45_vpu_pipe1 + andes_45_pipe1)") + +(define_cpu_unit "andes_45_mdu,andes_45_alu0,andes_45_alu1,andes_45_bru0,andes_45_bru1,andes_45_lsu" "andes_45_arch") +(define_cpu_unit "andes_45_fpu_fmac,andes_45_fpu_fdiv,andes_45_fpu_fmis,andes_45_fpu_fmv" "andes_45_arch") +(define_cpu_unit "andes_45_vpu_alu,andes_45_vpu_mac,andes_45_vpu_fmis,andes_45_vpu_permut, + andes_45_vpu_div,andes_45_vpu_fdiv,andes_45_vpu_mask,andes_45_vpu_lsu" "andes_45_vector") + +(define_reservation "andes_45_fpu_arith" + "andes_45_pipe0 + andes_45_fpu_fmac | andes_45_pipe1 + andes_45_fpu_fmac") + +;; andes 45 series unsupported insns are mapped to dummies reservations +(define_reservation "andes_45_dummies" + "andes_45_pipe0 | andes_45_pipe1, andes_45_alu0 | andes_45_alu1") + +;; andes 45 series vector unsupported insns are mapped to dummies reservations +(define_reservation "andes_45_vector_dummies" + "andes_45_pipe0 | andes_45_pipe1, andes_45_vpu_alu") + +(define_insn_reservation "andes_45_alu_insn_s" 1 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "shift,nop,logical")) + "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1") + +(define_insn_reservation "andes_45_alu_insn_l" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "unknown,const,arith,multi,slt,move,auipc,atomic,bitmanip")) + "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1") + +(define_insn_reservation "andes_45_cmov" 1 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "condmove")) + "andes_45_pipe0 + andes_45_alu0 + andes_45_pipe1 + andes_45_alu1") + +(define_insn_reservation "andes_45_load_wd" 4 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "load") + (not (eq_attr "mode" "QI,HI")))) + "andes_45_pipe0 + andes_45_lsu | andes_45_pipe1 + andes_45_lsu") + +(define_insn_reservation "andes_45_load_bh" 5 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "load") + (eq_attr "mode" "QI,HI"))) + "andes_45_pipe0 + andes_45_lsu | andes_45_pipe1 + andes_45_lsu") + +(define_insn_reservation "andes_45_store_d" 0 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "store") + (eq_attr "mode" "DI,SI"))) + "andes_45_pipe0 + andes_45_lsu | andes_45_pipe1 + andes_45_lsu") + +(define_insn_reservation "andes_45_store" 0 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "store") + (not (eq_attr "mode" "DI,SI")))) + "andes_45_pipe0 + andes_45_pipe1 + andes_45_lsu") + +(define_insn_reservation "andes_45_branch" 1 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "branch,jump,call,ret,jalr,trap")) + "andes_45_pipe0 + andes_45_bru0 | andes_45_pipe1 + andes_45_bru1") + +(define_insn_reservation "andes_45_imul" 3 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "imul")) + "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1, andes_45_mdu * 2") + +(define_insn_reservation "andes_45_idivsi" 38 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "SI"))) + "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1, andes_45_mdu * 2") + +(define_insn_reservation "andes_45_idivdi" 70 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "DI"))) + "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1, andes_45_mdu * 2") + +(define_insn_reservation "andes_45_xfer" 1 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "mfc,mtc")) + "andes_45_pipe0 + andes_45_alu0 | andes_45_pipe1 + andes_45_alu1") + +(define_insn_reservation "andes_45_fpu_alu_s" 3 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "fadd") + (eq_attr "mode" "SF"))) + "andes_45_fpu_arith") + +(define_insn_reservation "andes_45_fpu_alu_d" 4 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "fadd") + (eq_attr "mode" "DF"))) + "andes_45_fpu_arith") + +(define_insn_reservation "andes_45_fpu_mul_s" 3 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "fmul") + (eq_attr "mode" "SF"))) + "andes_45_fpu_arith") + +(define_insn_reservation "andes_45_fpu_mul_d" 4 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "fmul") + (eq_attr "mode" "DF"))) + "andes_45_fpu_arith") + +(define_insn_reservation "andes_45_fpu_mac_s" 3 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "fmadd") + (eq_attr "mode" "SF"))) + "(andes_45_pipe0 | andes_45_pipe1) + andes_45_fpu_fmac + andes_45_fpu_fmv + andes_45_fpu_fmis") + +(define_insn_reservation "andes_45_fpu_mac_d" 4 + (and (eq_attr "tune" "andes_45_series") + (and (eq_attr "type" "fmadd") + (eq_attr "mode" "DF"))) + "(andes_45_pipe0 | andes_45_pipe1) + andes_45_fpu_fmac + andes_45_fpu_fmv + andes_45_fpu_fmis") + +(define_insn_reservation "andes_45_fpu_div" 33 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "fdiv")) + "andes_45_pipe0 + andes_45_fpu_fdiv | andes_45_pipe1 + andes_45_fpu_fdiv, andes_45_fpu_fdiv * 27") + +(define_insn_reservation "andes_45_fpu_sqrt" 33 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "fsqrt")) + "andes_45_pipe0 + andes_45_fpu_fdiv | andes_45_pipe1 + andes_45_fpu_fdiv, andes_45_fpu_fdiv * 27") + +(define_insn_reservation "andes_45_fpu_move" 1 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "fmove,mtc,mfc")) + "andes_45_pipe0 + andes_45_fpu_fmv | andes_45_pipe1 + andes_45_fpu_fmv") + +(define_insn_reservation "andes_45_fpu_cmp" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "fcmp")) + "andes_45_pipe0 + andes_45_fpu_fmis | andes_45_pipe1 + andes_45_fpu_fmis") + +(define_insn_reservation "andes_45_fpu_cvt" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "fcvt,fcvt_f2i,fcvt_i2f")) + "andes_45_pipe0 + andes_45_fpu_fmis | andes_45_pipe1 + andes_45_fpu_fmis") + +(define_insn_reservation "andes_45_fpu_load" 4 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "fpload")) + "andes_45_pipe0 + andes_45_pipe1 + andes_45_lsu") + +(define_insn_reservation "andes_45_fpu_store" 0 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "fpstore")) + "andes_45_pipe0 + andes_45_pipe1 + andes_45_lsu") + +(define_insn_reservation "andes_45_vpu_load_e" 8 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vlde,vldm,vldr,vlsegde,vldff,vlsegdff")) + "(andes_45_vpu_pipe + andes_45_vpu_lsu), andes_45_vpu_lsu * 2") + +(define_insn_reservation "andes_45_vpu_load_s" 10 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vlds,vlsegds")) + "(andes_45_vpu_pipe + andes_45_vpu_lsu), andes_45_vpu_lsu * 3") + +(define_insn_reservation "andes_45_vpu_load_x" 12 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vldox,vldux,vlsegdox,vlsegdux")) + "(andes_45_vpu_pipe + andes_45_vpu_lsu), andes_45_vpu_lsu * 4") + +(define_insn_reservation "andes_45_vpu_store" 0 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vste,vstm,vstr,vsts,vstux,vstox,vssegtox,vssegte, + vssegtux,vssegts")) + "andes_45_vpu_pipe + andes_45_lsu + andes_45_vpu_lsu") + +(define_insn_reservation "andes_45_vpu_alu" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vialu,viwalu,vicalu,vsalu,vaalu,vector")) + "andes_45_vpu_pipe + andes_45_vpu_alu") + +(define_insn_reservation "andes_45_vpu_ext" 3 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vext")) + "andes_45_vpu_pipe + andes_45_vpu_permut") + +(define_insn_reservation "andes_45_vpu_shift" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vshift,vnshift,vsshift")) + "andes_45_vpu_pipe + andes_45_vpu_alu") + +(define_insn_reservation "andes_45_vpu_minmax" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "viminmax")) + "andes_45_vpu_pipe + andes_45_vpu_alu") + +(define_insn_reservation "andes_45_vpu_cmp" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vicmp")) + "andes_45_vpu_pipe + andes_45_vpu_alu") + +(define_insn_reservation "andes_45_vpu_mul" 3 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vimul,viwmul,vsmul")) + "andes_45_vpu_pipe + andes_45_vpu_mac") + +(define_insn_reservation "andes_45_vpu_div" 36 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vidiv")) + "andes_45_vpu_pipe + andes_45_vpu_div * 35") + +(define_insn_reservation "andes_45_vpu_madd" 4 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vimuladd,viwmuladd")) + "andes_45_vpu_pipe + andes_45_vpu_mac") + +(define_insn_reservation "andes_45_vpu_merge" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vimerge")) + "andes_45_vpu_pipe + andes_45_vpu_alu") + +(define_insn_reservation "andes_45_vpu_move" 3 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vimov,vimovvx,vimovxv,vmov,vslideup,vslidedown,vislide1up,vislide1down")) + "andes_45_vpu_pipe + andes_45_vpu_permut") + +(define_insn_reservation "andes_45_vpu_clip" 3 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vnclip")) + "andes_45_vpu_pipe + andes_45_vpu_alu") + +(define_insn_reservation "andes_45_vpu_falu" 4 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfalu,vfwalu,vfmul,vfwmul")) + "andes_45_vpu_pipe + andes_45_vpu_mac") + +(define_insn_reservation "andes_45_vpu_fdiv" 38 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfdiv,vfsqrt")) + "andes_45_vpu_pipe + andes_45_vpu_fdiv") + +(define_insn_reservation "andes_45_vpu_fmadd" 5 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfmuladd,vfwmuladd")) + "andes_45_vpu_pipe + andes_45_vpu_mac") + +(define_insn_reservation "andes_45_vpu_fminmax" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfminmax")) + "andes_45_vpu_pipe + andes_45_vpu_fmis") + +(define_insn_reservation "andes_45_vpu_fcmp" 3 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfcmp,vfrecp")) + "andes_45_vpu_pipe + andes_45_vpu_fmis") + +(define_insn_reservation "andes_45_vpu_fsgnj" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfsgnj")) + "andes_45_vpu_pipe + andes_45_vpu_fmis") + +(define_insn_reservation "andes_45_vpu_fclass" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfclass")) + "andes_45_vpu_pipe + andes_45_vpu_fmis") + +(define_insn_reservation "andes_45_vpu_fmerge" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfmerge")) + "andes_45_vpu_pipe + andes_45_vpu_fmis") + +(define_insn_reservation "andes_45_vpu_fmove" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfmov,vfmovvf,vfmovfv,vfslide1up,vfslide1down")) + "andes_45_vpu_pipe + andes_45_vpu_permut") + +(define_insn_reservation "andes_45_vpu_fcvt" 3 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof, + vfncvtitof,vfncvtftoi,vfncvtftof,vfwcvtbf16,vfncvtbf16")) + "andes_45_vpu_pipe + andes_45_vpu_fmis") + +(define_insn_reservation "andes_45_vpu_red" 9 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vired,viwred")) + "andes_45_vpu_pipe + andes_45_vpu_alu") + +(define_insn_reservation "andes_45_vpu_fredu" 6 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfredu,vfwredu")) + "andes_45_vpu_pipe + andes_45_vpu_mac") + +(define_insn_reservation "andes_45_vpu_fredo" 34 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vfredo,vfwredo")) + "andes_45_vpu_pipe + andes_45_vpu_mac") + +(define_insn_reservation "andes_45_vpu_malu" 3 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vmalu")) + "andes_45_vpu_pipe + andes_45_vpu_mask") + +(define_insn_reservation "andes_45_vpu_mask" 4 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vmpop,vmffs,vmsfs,vmiota,vmidx")) + "andes_45_vpu_pipe + andes_45_vpu_mask") + +(define_insn_reservation "andes_45_vpu_gather" 2 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vgather")) + "andes_45_vpu_pipe + andes_45_vpu_permut") + +(define_insn_reservation "andes_45_vpu_compress" 4 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vcompress")) + "andes_45_vpu_pipe + andes_45_vpu_permut") + +(define_insn_reservation "andes_45_vcpu_csr" 1 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "wrvxrm,wrfrm,rdvlenb,rdvl,vsetvl,vsetvl_pre")) + "andes_45_vpu_pipe") + +(define_bypass 1 + "andes_45_fpu_alu_s, andes_45_fpu_mul_s, andes_45_fpu_mac_s" + "andes_45_load_wd, andes_45_load_bh, andes_45_store, + andes_45_fpu_load, andes_45_fpu_store") + +(define_bypass 2 + "andes_45_fpu_alu_d, andes_45_fpu_mul_d, andes_45_fpu_mac_d" + "andes_45_load_wd, andes_45_load_bh, andes_45_store, + andes_45_fpu_load, andes_45_fpu_store") + +(define_bypass 1 + "andes_45_fpu_cmp, andes_45_fpu_cvt" + "andes_45_load_wd, andes_45_load_bh, andes_45_store, + andes_45_fpu_load, andes_45_fpu_store, andes_45_alu_insn_s, + andes_45_alu_insn_l, andes_45_xfer") + +(define_insn_reservation "andes_45_unknown" 1 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "ghost,cpop,clz,ctz,zicond,mvpair,sfb_alu,minu,maxu, + min,max,clmul,rotate,crypto,condmove,rdfrm")) + "andes_45_dummies") + +(define_insn_reservation "andes_45_vector_unknown" 1 + (and (eq_attr "tune" "andes_45_series") + (eq_attr "type" "vclz,vror,vsha2ch,vsm4k,vaesef,vghsh,vsm4r,vsm3c, + vaeskf1,vandn,vaesdm,vclmul,vclmulh,vrol,vcpop,vbrev8, + vsm3me,vbrev,vctz,vgmul,vsha2ms,vaesz,vrev8, + vaeskf2,vsha2cl,vwsll,vaesdf,vaesem,vfwmaccbf16, + sf_vqmacc,sf_vc,sf_vc_se,sf_vfnrclip,vlsegde")) + "andes_45_vector_dummies") diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index d2705cf..40627fa 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1951,6 +1951,61 @@ } [(set_attr "type" "viwalu")]) +(define_insn_and_split "*widen_mul_plus_vx_<mode>" + [(set (match_operand:VWEXTI 0 "register_operand") + (plus:VWEXTI + (mult:VWEXTI + (zero_extend:VWEXTI + (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")) + (vec_duplicate:VWEXTI + (zero_extend:<VEL> + (match_operand:<VSUBEL> 1 "register_operand")))) + (match_operand:VWEXTI 3 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + insn_code icode = code_for_pred_widen_mul_plus_u_vx (<MODE>mode); + rtx v_undef = RVV_VUNDEF(<MODE>mode); + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], v_undef}; + + riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops); + + DONE; + } + [(set_attr "type" "viwmuladd")]) + +(define_insn_and_split "*pred_cmp_swapped<mode>_scalar" + [(set (match_operand:<VM> 0 "register_operand") + (if_then_else:<VM> + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operator:<VM> 3 "comparison_swappable_operator" + [(vec_duplicate:V_VLSI + (match_operand:<VEL> 4 "register_operand")) + (match_operand:V_VLSI 5 "register_operand")]) + (unspec:<VM> + [(match_operand:DI 2 "register_operand")] UNSPEC_VUNDEF)))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::expand_vx_cmp_vec_dup_vec (operands[0], operands[4], + operands[5], + GET_CODE (operands[3]), + <MODE>mode); + + DONE; + } + [(set_attr "type" "vicmp")]) + ;; ============================================================================= ;; Combine vec_duplicate + op.vv to op.vf ;; Include @@ -2399,3 +2454,44 @@ } [(set_attr "type" "vfalu")] ) + +;; Combine vsext.vf + vsll.vi to vwsll.vi depends on ZVBB. +;; The vwsll.vi is zero extend, thus only the ashift bits +;; is equal or greater than double truncated bits is valid. +;; Appears in the satd function of x264. +(define_insn_and_split "*vwsll_sign_extend_<mode>" + [(set (match_operand:VWEXTI 0 "register_operand") + (ashift:VWEXTI + (sign_extend:VWEXTI + (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand")) + (match_operand 2 "const_int_operand")))] + "TARGET_VECTOR && TARGET_ZVBB && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + int imm = INTVAL (operands[2]); + int trunc_prec = GET_MODE_PRECISION (GET_MODE_INNER (<V_DOUBLE_TRUNC>mode)); + + if (imm >= trunc_prec) + { + insn_code icode = code_for_pred_vwsll_scalar (<MODE>mode); + emit_vlmax_insn (icode, riscv_vector::BINARY_OP, operands); + } + else + { + insn_code icode = code_for_pred_vf2 (SIGN_EXTEND, <MODE>mode); + rtx extend = gen_reg_rtx (<MODE>mode); + rtx unary_ops[] = {extend, operands[1]}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::UNARY_OP, + unary_ops); + + icode = code_for_pred_scalar (ASHIFT, <MODE>mode); + rtx binary_ops[] = {operands[0], extend, operands[2]}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, + binary_ops); + } + + DONE; + } +) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 48de5ef..c694684 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -55,8 +55,8 @@ [(match_operand:RATIO64 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO64I 2 "register_operand") - (match_operand 3 "<RATIO64:gs_extension>") - (match_operand 4 "<RATIO64:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO64:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -71,8 +71,8 @@ [(match_operand:RATIO32 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO32I 2 "register_operand") - (match_operand 3 "<RATIO32:gs_extension>") - (match_operand 4 "<RATIO32:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO32:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -87,8 +87,8 @@ [(match_operand:RATIO16 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO16I 2 "register_operand") - (match_operand 3 "<RATIO16:gs_extension>") - (match_operand 4 "<RATIO16:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO16:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -103,8 +103,8 @@ [(match_operand:RATIO8 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO8I 2 "register_operand") - (match_operand 3 "<RATIO8:gs_extension>") - (match_operand 4 "<RATIO8:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO8:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -119,8 +119,8 @@ [(match_operand:RATIO4 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO4I 2 "register_operand") - (match_operand 3 "<RATIO4:gs_extension>") - (match_operand 4 "<RATIO4:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO4:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -135,8 +135,8 @@ [(match_operand:RATIO2 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO2I 2 "register_operand") - (match_operand 3 "<RATIO2:gs_extension>") - (match_operand 4 "<RATIO2:gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<RATIO2:VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -155,8 +155,8 @@ [(match_operand:RATIO1 0 "register_operand") (match_operand 1 "pmode_reg_or_0_operand") (match_operand:RATIO1 2 "register_operand") - (match_operand 3 "<gs_extension>") - (match_operand 4 "<gs_scale>") + (match_operand 3 "const_1_operand") + (match_operand 4 "const_1_operand") (match_operand:<VM> 5 "vector_mask_operand") (match_operand 6 "maskload_else_operand") (match_operand 7 "autovec_length_operand") @@ -174,8 +174,8 @@ (define_expand "mask_len_scatter_store<RATIO64:mode><RATIO64I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO64I 1 "register_operand") - (match_operand 2 "<RATIO64:gs_extension>") - (match_operand 3 "<RATIO64:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO64 4 "register_operand") (match_operand:<RATIO64:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -189,8 +189,8 @@ (define_expand "mask_len_scatter_store<RATIO32:mode><RATIO32I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO32I 1 "register_operand") - (match_operand 2 "<RATIO32:gs_extension>") - (match_operand 3 "<RATIO32:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO32 4 "register_operand") (match_operand:<RATIO32:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -204,8 +204,8 @@ (define_expand "mask_len_scatter_store<RATIO16:mode><RATIO16I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO16I 1 "register_operand") - (match_operand 2 "<RATIO16:gs_extension>") - (match_operand 3 "<RATIO16:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO16 4 "register_operand") (match_operand:<RATIO16:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -219,8 +219,8 @@ (define_expand "mask_len_scatter_store<RATIO8:mode><RATIO8I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO8I 1 "register_operand") - (match_operand 2 "<RATIO8:gs_extension>") - (match_operand 3 "<RATIO8:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO8 4 "register_operand") (match_operand:<RATIO8:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -234,8 +234,8 @@ (define_expand "mask_len_scatter_store<RATIO4:mode><RATIO4I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO4I 1 "register_operand") - (match_operand 2 "<RATIO4:gs_extension>") - (match_operand 3 "<RATIO4:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO4 4 "register_operand") (match_operand:<RATIO4:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -249,8 +249,8 @@ (define_expand "mask_len_scatter_store<RATIO2:mode><RATIO2I:mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO2I 1 "register_operand") - (match_operand 2 "<RATIO2:gs_extension>") - (match_operand 3 "<RATIO2:gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO2 4 "register_operand") (match_operand:<RATIO2:VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -268,8 +268,8 @@ (define_expand "mask_len_scatter_store<mode><mode>" [(match_operand 0 "pmode_reg_or_0_operand") (match_operand:RATIO1 1 "register_operand") - (match_operand 2 "<gs_extension>") - (match_operand 3 "<gs_scale>") + (match_operand 2 "const_1_operand") + (match_operand 3 "const_1_operand") (match_operand:RATIO1 4 "register_operand") (match_operand:<VM> 5 "vector_mask_operand") (match_operand 6 "autovec_length_operand") @@ -1335,10 +1335,11 @@ ;; == SELECT_VL ;; ========================================================================= -(define_expand "select_vl<mode>" +(define_expand "select_vl<V:mode><P:mode>" [(match_operand:P 0 "register_operand") (match_operand:P 1 "vector_length_operand") - (match_operand:P 2 "immediate_operand")] + (match_operand:P 2 "immediate_operand") + (match_operand:V 3)] "TARGET_VECTOR" { riscv_vector::expand_select_vl (operands); @@ -1350,9 +1351,9 @@ ;; ------------------------------------------------------------------------- (define_expand "vec_set<mode>" - [(match_operand:V_VLS 0 "register_operand") - (match_operand:<VEL> 1 "register_operand") - (match_operand 2 "nonmemory_operand")] + [(match_operand:V_VLS_ZVFH 0 "register_operand") + (match_operand:<VEL> 1 "register_operand") + (match_operand 2 "nonmemory_operand")] "TARGET_VECTOR" { /* If we set the first element, emit an v(f)mv.s.[xf]. */ @@ -2301,6 +2302,37 @@ }) ;; ------------------------------------------------------------------------- +;; ---- [INT] Mask reductions +;; ------------------------------------------------------------------------- + +(define_expand "reduc_sbool_and_scal_<mode>" + [(match_operand:QI 0 "register_operand") + (match_operand:VB_VLS 1 "register_operand")] + "TARGET_VECTOR" +{ + riscv_vector::expand_mask_reduction (operands, AND); + DONE; +}) + +(define_expand "reduc_sbool_ior_scal_<mode>" + [(match_operand:QI 0 "register_operand") + (match_operand:VB_VLS 1 "register_operand")] + "TARGET_VECTOR" +{ + riscv_vector::expand_mask_reduction (operands, IOR); + DONE; +}) + +(define_expand "reduc_sbool_xor_scal_<mode>" + [(match_operand:QI 0 "register_operand") + (match_operand:VB_VLS 1 "register_operand")] + "TARGET_VECTOR" +{ + riscv_vector::expand_mask_reduction (operands, XOR); + DONE; +}) + +;; ------------------------------------------------------------------------- ;; ---- [FP] Tree reductions ;; ------------------------------------------------------------------------- ;; Includes: diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 697198f..166ddd9 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -357,7 +357,7 @@ { if (TARGET_XTHEADBB && !immediate_operand (operands[2], VOIDmode)) FAIL; - if (TARGET_64BIT && register_operand (operands[2], QImode)) + if (TARGET_64BIT) { rtx t = gen_reg_rtx (DImode); emit_insn (gen_rotrsi3_sext (t, operands[1], operands[2])); diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md index 37ab5c3..98bb4d6 100644 --- a/gcc/config/riscv/crypto.md +++ b/gcc/config/riscv/crypto.md @@ -173,6 +173,40 @@ (zero_extend:SI (match_dup 2)))))] "operands[1] = gen_lowpart (SImode, operands[1]);") +(define_split + [(set (match_operand:DI 0 "register_operand") + (ior:DI (zero_extend:DI (match_operand:HI 1 "register_operand")) + (ashift:DI + (sign_extend:DI (match_operand:HI 2 "register_operand")) + (const_int 16))))] + "TARGET_ZBKB && TARGET_64BIT" + [(set (match_dup 0) + (sign_extend:DI (ior:SI (ashift:SI (match_dup 2) (const_int 16)) + (zero_extend:SI (match_dup 1)))))] + "operands[2] = gen_lowpart (SImode, operands[2]);") + +(define_split + [(set (match_operand:DI 0 "register_operand") + (ior:DI (sign_extend:DI + (ashift:SI (match_operand:SI 1 "register_operand") + (const_int 16))) + (zero_extend:DI (match_operand:HI 2 "register_operand"))))] + "TARGET_ZBKB && TARGET_64BIT" + [(set (match_dup 0) + (sign_extend:DI (ior:SI (ashift:SI (match_dup 1) (const_int 16)) + (zero_extend:SI (match_dup 2)))))]) + +(define_split + [(set (match_operand:DI 0 "register_operand") + (ior:DI (zero_extend:DI (match_operand:HI 1 "register_operand")) + (sign_extend:DI + (ashift:SI (match_operand:SI 2 "register_operand") + (const_int 16)))))] + "TARGET_ZBKB && TARGET_64BIT" + [(set (match_dup 0) + (sign_extend:DI (ior:SI (ashift:SI (match_dup 2) (const_int 16)) + (zero_extend:SI (match_dup 1)))))]) + ;; And this patches the result of the splitter above. (define_insn "*riscv_packw_2" [(set (match_operand:DI 0 "register_operand" "=r") diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index f811a4e..5b44165 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -611,6 +611,9 @@ (define_predicate "comparison_except_ge_operator" (match_code "eq,ne,le,leu,gt,gtu,lt,ltu")) +(define_predicate "comparison_swappable_operator" + (match_code "gtu,gt")) + (define_predicate "ge_operator" (match_code "ge,geu")) diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc index d497326..24537d5 100644 --- a/gcc/config/riscv/riscv-c.cc +++ b/gcc/config/riscv/riscv-c.cc @@ -165,15 +165,6 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile) if (!subset_list) return; - /* Define profile macro if a profile was used. */ - const char *profile_name = subset_list->get_profile_name (); - if (profile_name) - { - char *profile_macro = (char *)alloca (strlen (profile_name) + 10); - sprintf (profile_macro, "__riscv_%s", profile_name); - builtin_define (profile_macro); - } - size_t max_ext_len = 0; /* Figure out the max length of extension name for reserving buffer. */ @@ -222,6 +213,49 @@ riscv_pragma_intrinsic (cpp_reader *) error ("unknown %<#pragma riscv intrinsic%> option %qs", name); } +/* Implement TARGETM.TARGET_OPTION.PRAGMA_PARSE. */ + +static bool +riscv_pragma_target_parse (tree args, tree pop_target) +{ + /* If args is not NULL then process it and setup the target-specific + information that it specifies. */ + if (args) + { + if (!riscv_process_target_attr_for_pragma (args)) + return false; + + riscv_override_options_internal (&global_options); + } + /* args is NULL, restore to the state described in pop_target. */ + else + { + pop_target = pop_target ? pop_target : target_option_default_node; + cl_target_option_restore (&global_options, &global_options_set, + TREE_TARGET_OPTION (pop_target)); + } + + target_option_current_node + = build_target_option_node (&global_options, &global_options_set); + + riscv_reset_previous_fndecl (); + + /* For the definitions, ensure all newly defined macros are considered + as used for -Wunused-macros. There is no point warning about the + compiler predefined macros. */ + cpp_options *cpp_opts = cpp_get_options (parse_in); + unsigned char saved_warn_unused_macros = cpp_opts->warn_unused_macros; + cpp_opts->warn_unused_macros = 0; + + cpp_force_token_locations (parse_in, BUILTINS_LOCATION); + riscv_cpu_cpp_builtins (parse_in); + cpp_stop_forcing_token_locations (parse_in); + + cpp_opts->warn_unused_macros = saved_warn_unused_macros; + + return true; +} + /* Implement TARGET_CHECK_BUILTIN_CALL. */ static bool riscv_check_builtin_call (location_t loc, vec<location_t> arg_loc, tree fndecl, @@ -281,5 +315,6 @@ riscv_register_pragmas (void) { targetm.resolve_overloaded_builtin = riscv_resolve_overloaded_builtin; targetm.check_builtin_call = riscv_check_builtin_call; + targetm.target_option.pragma_parse = riscv_pragma_target_parse; c_register_pragma ("riscv", "intrinsic", riscv_pragma_intrinsic); } diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index cc9d5c0..7266b5e 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -50,9 +50,13 @@ RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info) RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info) RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info) RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info) +RISCV_TUNE("spacemit-x60", spacemit_x60, spacemit_x60_tune_info) RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info) RISCV_TUNE("size", generic, optimize_size_tune_info) RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info) +RISCV_TUNE("andes-25-series", andes_25_series, andes_25_tune_info) +RISCV_TUNE("andes-23-series", andes_23_series, andes_23_tune_info) +RISCV_TUNE("andes-45-series", andes_45_series, andes_45_tune_info) #undef RISCV_TUNE @@ -171,4 +175,27 @@ RISCV_CORE("xiangshan-kunminghu", "rv64imafdcbvh_sdtrig_sha_shcounterenw_" RISCV_CORE("mips-p8700", "rv64imfd_zicsr_zifencei_zalrsc_zba_zbb", "mips-p8700") + +RISCV_CORE("andes-n22", "rv32imc_zicsr_zifencei_xandesperf", "andes-25-series") +RISCV_CORE("andes-n25", "rv32imc_zicsr_zifencei_xandesperf", "andes-25-series") +RISCV_CORE("andes-a25", "rv32imafdc_zicsr_zifencei_xandesperf", "andes-25-series") +RISCV_CORE("andes-nx25", "rv64imc_zicsr_zifencei_xandesperf", "andes-25-series") +RISCV_CORE("andes-ax25", "rv64imafdc_zicsr_zifencei_xandesperf", "andes-25-series") +RISCV_CORE("andes-a27", "rv32imafdc_zicsr_zifencei_xandesperf", "andes-25-series") +RISCV_CORE("andes-ax27", "rv64imafdc_zicsr_zifencei_xandesperf", "andes-25-series") +RISCV_CORE("andes-n225", "rv32im_zicsr_zifencei_zca_zcb_zcmp_zcmt_" + "zba_zbb_zbc_zbs_xandesperf", + "andes-23-series") +RISCV_CORE("andes-d23", "rv32im_zicsr_zifencei_zicbop_zicbom_zicboz_" + "zca_zcb_zcmp_zcmt_zba_zbb_zbc_zbs_xandesperf", + "andes-23-series") +RISCV_CORE("andes-n45", "rv32imc_zicsr_zifencei_xandesperf", "andes-45-series") +RISCV_CORE("andes-nx45", "rv64imc_zicsr_zifencei_xandesperf", "andes-45-series") +RISCV_CORE("andes-a45", "rv32imafdc_zicsr_zifencei_xandesperf", "andes-45-series") +RISCV_CORE("andes-ax45", "rv64imafdc_zicsr_zifencei_xandesperf", "andes-45-series") + +RISCV_CORE("spacemit-x60", "rv64imafdcv_zba_zbb_zbc_zbs_zicboz_zicond_" + "zbkc_zfh_zvfh_zvkt_zvl256b_sscofpmf_xsmtvdot", + "spacemit-x60") + #undef RISCV_CORE diff --git a/gcc/config/riscv/riscv-ext-spacemit.def b/gcc/config/riscv/riscv-ext-spacemit.def new file mode 100644 index 0000000..3482384 --- /dev/null +++ b/gcc/config/riscv/riscv-ext-spacemit.def @@ -0,0 +1,36 @@ +/* SpacemiT extension definition file for RISC-V. + Copyright (C) 2025 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. + +Please run `make riscv-regen` in build folder to make sure updated anything. + +Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def. */ + +DEFINE_RISCV_EXT( + /* NAME */ xsmtvdot, + /* UPPERCASE_NAME */ XSMTVDOT, + /* FULL_NAME */ "SpacemiT vector dot product extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"zve32x"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ xsmt, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + diff --git a/gcc/config/riscv/riscv-ext.def b/gcc/config/riscv/riscv-ext.def index 80f534c..62d6380 100644 --- a/gcc/config/riscv/riscv-ext.def +++ b/gcc/config/riscv/riscv-ext.def @@ -2084,3 +2084,4 @@ DEFINE_RISCV_EXT( #include "riscv-ext-ventana.def" #include "riscv-ext-mips.def" #include "riscv-ext-andes.def" +#include "riscv-ext-spacemit.def" diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt index 2036c16..af8e556 100644 --- a/gcc/config/riscv/riscv-ext.opt +++ b/gcc/config/riscv/riscv-ext.opt @@ -56,6 +56,9 @@ TargetVariable int riscv_xsf_subext TargetVariable +int riscv_xsmt_subext + +TargetVariable int riscv_xthead_subext TargetVariable @@ -403,18 +406,6 @@ Mask(SVADE) Var(riscv_sv_subext) Mask(SVBARE) Var(riscv_sv_subext) -Mask(XANDESPERF) Var(riscv_xandes_subext) - -Mask(XANDESBFHCVT) Var(riscv_xandes_subext) - -Mask(XANDESVBFHCVT) Var(riscv_xandes_subext) - -Mask(XANDESVSINTLOAD) Var(riscv_xandes_subext) - -Mask(XANDESVPACKFPH) Var(riscv_xandes_subext) - -Mask(XANDESVDOT) Var(riscv_xandes_subext) - Mask(XCVALU) Var(riscv_xcv_subext) Mask(XCVBI) Var(riscv_xcv_subext) @@ -466,3 +457,18 @@ Mask(XVENTANACONDOPS) Var(riscv_xventana_subext) Mask(XMIPSCMOV) Var(riscv_xmips_subext) Mask(XMIPSCBOP) Var(riscv_xmips_subext) + +Mask(XANDESPERF) Var(riscv_xandes_subext) + +Mask(XANDESBFHCVT) Var(riscv_xandes_subext) + +Mask(XANDESVBFHCVT) Var(riscv_xandes_subext) + +Mask(XANDESVSINTLOAD) Var(riscv_xandes_subext) + +Mask(XANDESVPACKFPH) Var(riscv_xandes_subext) + +Mask(XANDESVDOT) Var(riscv_xandes_subext) + +Mask(XSMTVDOT) Var(riscv_xsmt_subext) + diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def index 55f7fd0..f3d987e 100644 --- a/gcc/config/riscv/riscv-modes.def +++ b/gcc/config/riscv/riscv-modes.def @@ -331,7 +331,7 @@ RVV_NF4_MODES (4) \ ADJUST_ALIGNMENT (RVVM4x##NF##QI, 1); \ ADJUST_ALIGNMENT (RVVM4x##NF##HI, 2); \ - ADJUST_ALIGNMENT (RVVM4x##NF##BF, 2); \ + ADJUST_ALIGNMENT (RVVM4x##NF##BF, 2); \ ADJUST_ALIGNMENT (RVVM4x##NF##HF, 2); \ ADJUST_ALIGNMENT (RVVM4x##NF##SI, 4); \ ADJUST_ALIGNMENT (RVVM4x##NF##SF, 4); \ @@ -395,6 +395,7 @@ ADJUST_PRECISION (V4096BI, 4096); VECTOR_MODE_WITH_PREFIX (V, INT, SI, NBYTES / 4, 1); \ VECTOR_MODE_WITH_PREFIX (V, INT, DI, NBYTES / 8, 1); \ VECTOR_MODE_WITH_PREFIX (V, FLOAT, HF, NBYTES / 2, 1); \ + VECTOR_MODE_WITH_PREFIX (V, FLOAT, BF, NBYTES / 2, 1); \ VECTOR_MODE_WITH_PREFIX (V, FLOAT, SF, NBYTES / 4, 1); \ VECTOR_MODE_WITH_PREFIX (V, FLOAT, DF, NBYTES / 8, 1); @@ -403,6 +404,7 @@ VECTOR_MODE_WITH_PREFIX (V, INT, HI, 1, 1); /* V1HI */ VECTOR_MODE_WITH_PREFIX (V, INT, SI, 1, 1); /* V1SI */ VECTOR_MODE_WITH_PREFIX (V, INT, DI, 1, 1); /* V1DI */ VECTOR_MODE_WITH_PREFIX (V, FLOAT, HF, 1, 1); /* V1HF */ +VECTOR_MODE_WITH_PREFIX (V, FLOAT, BF, 1, 1); /* V1BF */ VECTOR_MODE_WITH_PREFIX (V, FLOAT, SF, 1, 1); /* V1SF */ VECTOR_MODE_WITH_PREFIX (V, FLOAT, DF, 1, 1); /* V1DF */ VECTOR_MODE_WITH_PREFIX (V, INT, QI, 2, 1); /* V2QI */ @@ -411,18 +413,20 @@ VECTOR_MODE_WITH_PREFIX (V, INT, QI, 8, 1); /* V8QI */ VECTOR_MODE_WITH_PREFIX (V, INT, HI, 2, 1); /* V2HI */ VECTOR_MODE_WITH_PREFIX (V, INT, HI, 4, 1); /* V4HI */ VECTOR_MODE_WITH_PREFIX (V, FLOAT, HF, 2, 1); /* V2HF */ +VECTOR_MODE_WITH_PREFIX (V, FLOAT, BF, 2, 1); /* V2BF */ VECTOR_MODE_WITH_PREFIX (V, FLOAT, HF, 4, 1); /* V4HF */ +VECTOR_MODE_WITH_PREFIX (V, FLOAT, BF, 4, 1); /* V4BF */ VECTOR_MODE_WITH_PREFIX (V, INT, SI, 2, 1); /* V2SI */ VECTOR_MODE_WITH_PREFIX (V, FLOAT, SF, 2, 1); /* V2SF */ -VLS_MODES (16); /* V16QI V8HI V4SI V2DI V8HF V4SF V2DF */ -VLS_MODES (32); /* V32QI V16HI V8SI V4DI V16HF V8SF V4DF */ -VLS_MODES (64); /* V64QI V32HI V16SI V8DI V32HF V16SF V8DF */ -VLS_MODES (128); /* V128QI V64HI V32SI V16DI V64HF V32SF V16DF */ -VLS_MODES (256); /* V256QI V128HI V64SI V32DI V128HF V64SF V32DF */ -VLS_MODES (512); /* V512QI V256HI V128SI V64DI V256HF V128SF V64DF */ -VLS_MODES (1024); /* V1024QI V512HI V256SI V128DI V512HF V256SF V128DF */ -VLS_MODES (2048); /* V2048QI V1024HI V512SI V256DI V1024HF V512SF V256DF */ -VLS_MODES (4096); /* V4096QI V2048HI V1024SI V512DI V2048HF V1024SF V512DF */ +VLS_MODES (16); /* V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF */ +VLS_MODES (32); /* V32QI V16HI V8SI V4DI V16HF V16BF V8SF V4DF */ +VLS_MODES (64); /* V64QI V32HI V16SI V8DI V32HF V32BF V16SF V8DF */ +VLS_MODES (128); /* V128QI V64HI V32SI V16DI V64HF V64BF V32SF V16DF */ +VLS_MODES (256); /* V256QI V128HI V64SI V32DI V128HF V128BF V64SF V32DF */ +VLS_MODES (512); /* V512QI V256HI V128SI V64DI V256HF V256BF V128SF V64DF */ +VLS_MODES (1024); /* V1024QI V512HI V256SI V128DI V512HF V512BF V256SF V128DF */ +VLS_MODES (2048); /* V2048QI V1024HI V512SI V256DI V1024HF V1024BF V512SF V256DF */ +VLS_MODES (4096); /* V4096QI V2048HI V1024SI V512DI V2048HF V2048BF V1024SF V512DF */ /* TODO: According to RISC-V 'V' ISA spec, the maximum vector length can be 65536 for a single vector register which means the vector mode in diff --git a/gcc/config/riscv/riscv-opt-popretz.cc b/gcc/config/riscv/riscv-opt-popretz.cc new file mode 100644 index 0000000..43b2d5e --- /dev/null +++ b/gcc/config/riscv/riscv-opt-popretz.cc @@ -0,0 +1,294 @@ +/* RISC-V cm.popretz optimization pass. + Copyright (C) 2025 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* + This pass combines "li a0, 0" + "cm.popret" into "cm.popretz" instruction + for the RISC-V Zcmp extension. + + Rationale: + --------- + Ideally, cm.popretz should be generated during prologue/epilogue expansion. + However, as documented in PR113715 [1], this approach causes shrink-wrapping + analysis to fail, resulting in incorrect code generation. + + To address this issue, we use a dedicated RTL pass to combine these + instructions later in the compilation pipeline, after shrink-wrapping has + completed. + + Why not use peephole2? + ---------------------- + An alternative approach would be to use a peephole2 pattern to perform this + optimization. However, between "li a0, 0" and "cm.popret", there can be + STACK_TIE and other instructions that make it difficult to write a robust + peephole pattern that handles all cases. + + For example, in RV32, when the return value is in DImode but the low part + (a0) is zero, this pattern is hard to describe effectively in peephole2. + Using a dedicated pass gives us more flexibility to handle these cases. + + [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113715 */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "tm_p.h" +#include "emit-rtl.h" +#include "dumpfile.h" +#include "tree-pass.h" +#include "insn-config.h" +#include "insn-opinit.h" +#include "recog.h" + +namespace { + +const pass_data pass_data_combine_popretz = +{ + RTL_PASS, /* type. */ + "popretz", /* name. */ + OPTGROUP_NONE, /* optinfo_flags. */ + TV_MACH_DEP, /* tv_id. */ + 0, /* properties_required. */ + 0, /* properties_provided. */ + 0, /* properties_destroyed. */ + 0, /* todo_flags_start. */ + 0, /* todo_flags_finish. */ +}; + +class pass_combine_popretz : public rtl_opt_pass +{ +public: + pass_combine_popretz (gcc::context *ctxt) + : rtl_opt_pass (pass_data_combine_popretz, ctxt) + {} + + virtual bool gate (function *) + { + return TARGET_ZCMP && !frame_pointer_needed; + } + + virtual unsigned int execute (function *); +}; // class pass_combine_popretz + + +/* Check if the given instruction code is a cm.popret instruction. + Returns true if the code corresponds to any variant of gpr_multi_popret + (for different register bounds and modes). */ +static bool +riscv_popret_insn_p (int code) +{ +#define CASE_CODE_FOR_POPRET_(REG_BOUND, MODE) \ + case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE: +#define CASE_CODE_FOR_POPRET(REG_BOUND) \ + CASE_CODE_FOR_POPRET_(REG_BOUND, si) \ + CASE_CODE_FOR_POPRET_(REG_BOUND, di) +#define ALL_CASE_CODE_FOR_POPRET \ + CASE_CODE_FOR_POPRET(ra) \ + CASE_CODE_FOR_POPRET(s0) \ + CASE_CODE_FOR_POPRET(s1) \ + CASE_CODE_FOR_POPRET(s2) \ + CASE_CODE_FOR_POPRET(s3) \ + CASE_CODE_FOR_POPRET(s4) \ + CASE_CODE_FOR_POPRET(s5) \ + CASE_CODE_FOR_POPRET(s6) \ + CASE_CODE_FOR_POPRET(s7) \ + CASE_CODE_FOR_POPRET(s8) \ + CASE_CODE_FOR_POPRET(s9) \ + CASE_CODE_FOR_POPRET(s11) \ + + switch (code) + { + ALL_CASE_CODE_FOR_POPRET + return true; + default: + return false; + } + +#undef CASE_CODE_FOR_POPRET_ +#undef CASE_CODE_FOR_POPRET +#undef ALL_CASE_CODE_FOR_POPRET +} + +/* Convert a cm.popret instruction code to its corresponding cm.popretz code. + Given an instruction code for gpr_multi_popret, returns the equivalent + gpr_multi_popretz instruction code. Returns CODE_FOR_nothing if the + input is not a valid popret instruction. */ +static int +riscv_code_for_popretz (int code) +{ +#define CASE_CODE_FOR_POPRETZ_(REG_BOUND, MODE) \ + case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE: \ + return CODE_FOR_gpr_multi_popretz_up_to_##REG_BOUND##_##MODE; + +#define CASE_CODE_FOR_POPRETZ(REG_BOUND) \ + CASE_CODE_FOR_POPRETZ_(REG_BOUND, si) \ + CASE_CODE_FOR_POPRETZ_(REG_BOUND, di) + +#define ALL_CASE_CODE_FOR_POPRETZ \ + CASE_CODE_FOR_POPRETZ(ra) \ + CASE_CODE_FOR_POPRETZ(s0) \ + CASE_CODE_FOR_POPRETZ(s1) \ + CASE_CODE_FOR_POPRETZ(s2) \ + CASE_CODE_FOR_POPRETZ(s3) \ + CASE_CODE_FOR_POPRETZ(s4) \ + CASE_CODE_FOR_POPRETZ(s5) \ + CASE_CODE_FOR_POPRETZ(s6) \ + CASE_CODE_FOR_POPRETZ(s7) \ + CASE_CODE_FOR_POPRETZ(s8) \ + CASE_CODE_FOR_POPRETZ(s9) \ + CASE_CODE_FOR_POPRETZ(s11) \ + + switch (code) + { + ALL_CASE_CODE_FOR_POPRETZ + default: + return CODE_FOR_nothing; + } + +#undef CASE_CODE_FOR_POPRETZ_ +#undef CASE_CODE_FOR_POPRETZ +#undef ALL_CASE_CODE_FOR_POPRETZ +} + +/* Combine "li a0, 0" with "cm.popret" to form "cm.popretz". + + This pass scans basic blocks that precede the exit block, looking for + the following pattern: + 1. A cm.popret instruction (function epilogue with return) + 2. A (use a0) pseudo-instruction before the cm.popret + 3. A "li a0, 0" instruction (set a0 to zero) before the use + + When this pattern is found AND a0 is not referenced by any other + instructions between the "li a0, 0" and the (use a0), we can safely + combine them into a single cm.popretz instruction, which performs + the same operations more efficiently. + + This is a late RTL pass that runs before branch shortening. */ +unsigned int +pass_combine_popretz::execute (function *fn) +{ + timevar_push (TV_MACH_DEP); + edge e; + edge_iterator ei; + + /* Only visit exit block's pred since popret will only appear there. */ + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (fn)->preds) + { + basic_block bb = e->src; + rtx_insn *popret_insn = BB_END (bb); + if (!JUMP_P (popret_insn)) + continue; + int code = recog_memoized (popret_insn); + if (!riscv_popret_insn_p (code)) + continue; + + rtx_insn *def_a0_insn = NULL; + rtx_insn *use_a0_insn = NULL; + rtx a0_reg = NULL; + /* Scan backwards from popret to find the pattern: + 1. First, find the (use a0) pseudo-instruction + 2. Continue scanning to find "li a0, 0" (set a0 to const0_rtx) + 3. Ensure a0 is not referenced by any instructions between them + 4. Stop at the first definition of a0 (to ensure we have the + last/most recent def before the use). */ + for (rtx_insn *def_insn = PREV_INSN (popret_insn); + def_insn && def_insn != PREV_INSN (BB_HEAD (bb)); + def_insn = PREV_INSN (def_insn)) + { + if (!INSN_P (def_insn)) + continue; + rtx def_pat = PATTERN (def_insn); + if (GET_CODE (def_pat) == USE + && REG_P (XEXP (def_pat, 0)) + && REGNO (XEXP (def_pat, 0)) == A0_REGNUM) + { + a0_reg = XEXP (def_pat, 0); + use_a0_insn = def_insn; + continue; + } + + if (use_a0_insn && reg_referenced_p (a0_reg, def_pat)) + { + /* a0 is used by other instruction before its use in popret. */ + use_a0_insn = NULL; + break; + } + + if (use_a0_insn + && GET_CODE (def_pat) == SET + && REG_P (SET_DEST (def_pat)) + && REGNO (SET_DEST (def_pat)) == A0_REGNUM) + { + if (SET_SRC (def_pat) == CONST0_RTX (GET_MODE (SET_SRC (def_pat)))) + def_a0_insn = def_insn; + /* Stop the search regardless of the value assigned to a0, + because we only want to match the last (most recent) + definition of a0 before the (use a0). */ + break; + } + } + + /* If we found a def of a0 before its use, and the value is zero, + we can replace the popret with popretz. */ + if (!def_a0_insn || !use_a0_insn) + continue; + + int code_for_popretz = riscv_code_for_popretz (code); + gcc_assert (code_for_popretz != CODE_FOR_nothing); + + /* Extract the stack adjustment value from the popret instruction. + The popret pattern is a PARALLEL, and the first element is the + stack pointer adjustment: (set sp (plus sp const_int)). */ + rtx stack_adj_rtx = XVECEXP (PATTERN (popret_insn), 0, 0); + gcc_assert (GET_CODE (stack_adj_rtx) == SET + && REG_P (SET_DEST (stack_adj_rtx)) + && REGNO (SET_DEST (stack_adj_rtx)) == SP_REGNUM + && GET_CODE (SET_SRC (stack_adj_rtx)) == PLUS + && CONST_INT_P (XEXP (SET_SRC (stack_adj_rtx), 1))); + + rtx stack_adj_val = XEXP (SET_SRC (stack_adj_rtx), 1); + + /* Generate and insert the popretz instruction at the position of + the original popret. emit_insn_after places the new instruction + after PREV_INSN(popret_insn). */ + rtx popretz = GEN_FCN (code_for_popretz) (stack_adj_val); + emit_insn_after (popretz, PREV_INSN (popret_insn)); + + /* Clean up those instructions. */ + remove_insn (popret_insn); + remove_insn (use_a0_insn); + remove_insn (def_a0_insn); + } + + timevar_pop (TV_MACH_DEP); + return 0; +} + +} // anon namespace + +rtl_opt_pass * +make_pass_combine_popretz (gcc::context *ctxt) +{ + return new pass_combine_popretz (ctxt); +} diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 4e4e9d8..9b92a96 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -61,6 +61,10 @@ enum riscv_microarchitecture_type { generic_ooo, mips_p8700, tt_ascalon_d8, + andes_25_series, + andes_23_series, + andes_45_series, + spacemit_x60, }; extern enum riscv_microarchitecture_type riscv_microarchitecture; diff --git a/gcc/config/riscv/riscv-passes.def b/gcc/config/riscv/riscv-passes.def index 5aa4122..d41cc58 100644 --- a/gcc/config/riscv/riscv-passes.def +++ b/gcc/config/riscv/riscv-passes.def @@ -22,5 +22,6 @@ INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs); INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop); INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl); INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad); +INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_combine_popretz); INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst); diff --git a/gcc/config/riscv/riscv-profiles.def b/gcc/config/riscv/riscv-profiles.def index 741c471..ea1c235 100644 --- a/gcc/config/riscv/riscv-profiles.def +++ b/gcc/config/riscv/riscv-profiles.def @@ -61,7 +61,7 @@ RISCV_PROFILE("rva23s64", "rv64imafdcbv_zicsr_zicntr_zihpm_ziccif_ziccrse" "_zicbom_zicbop_zicboz_zfhmin_zkt_zvfhmin_zvbb_zvkt_zihintntl" "_zicond_zimop_zcmop_zcb_zfa_zawrs_svbare_svade_ssccptr" "_sstvecd_sstvala_sscounterenw_svpbmt_svinval_svnapot_sstc" - "_sscofpmf_ssnpm_ssu64xl_sha_supm") + "_sscofpmf_ssnpm_ssu64xl_sha_supm_zifencei") /* RVB23 contains all mandatory base ISA for RVA22U64 and the new extension 'zihintntl,zicond,zimop,zcmop,zfa,zawrs' as mandatory extensions. */ @@ -77,6 +77,6 @@ RISCV_PROFILE("rvb23s64", "rv64imafdcb_zicsr_zicntr_zihpm_ziccif_ziccrse" "_zicbom_zicbop_zicboz_zfhmin_zkt_zvfhmin_zvbb_zvkt" "_zihintntl_zicond_zimop_zcmop_zcb_zfa_zawrs_svbare_svade" "_ssccptr_sstvecd_sstvala_sscounterenw_svpbmt_svinval_svnapot" - "_sstc_sscofpmf_ssu64xl_supm") + "_sstc_sscofpmf_ssu64xl_supm_zifencei") #undef RISCV_PROFILE diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index cdb706a..abf9df7 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -208,7 +208,13 @@ rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt); rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt); rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt); rtl_opt_pass * make_pass_bclr_lowest_set_bit (gcc::context *ctxt); +rtl_opt_pass * make_pass_combine_popretz (gcc::context *ctxt); +/* Routines implemented in riscv-vsetvl.cc. */ +extern bool has_vtype_op (rtx_insn *); +extern bool mask_agnostic_p (rtx_insn *); +extern rtx get_avl (rtx_insn *); +extern bool vsetvl_insn_p (rtx_insn *); /* Routines implemented in riscv-string.c. */ extern bool riscv_expand_block_compare (rtx, rtx, rtx, rtx); @@ -658,6 +664,7 @@ bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool); void expand_cond_len_unop (unsigned, rtx *); void expand_cond_len_binop (unsigned, rtx *); void expand_reduction (unsigned, unsigned, unsigned, rtx *, rtx); +void expand_mask_reduction (rtx *, rtx_code); void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode); void expand_vec_floor (rtx, rtx, machine_mode, machine_mode); void expand_vec_nearbyint (rtx, rtx, machine_mode, machine_mode); @@ -685,6 +692,7 @@ void expand_vx_binary_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode); void expand_vx_binary_vec_vec_dup (rtx, rtx, rtx, rtx_code, machine_mode); void expand_vx_binary_vxrm_vec_vec_dup (rtx, rtx, rtx, int, int, machine_mode); void expand_vx_binary_vxrm_vec_dup_vec (rtx, rtx, rtx, int, int, machine_mode); +void expand_vx_cmp_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode); #endif bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, bool, void (*)(rtx *, rtx), enum avl_type); @@ -841,12 +849,15 @@ riscv_option_valid_attribute_p (tree, tree, tree, int); extern bool riscv_option_valid_version_attribute_p (tree, tree, tree, int); extern bool +riscv_process_target_attr_for_pragma (tree); +extern bool riscv_process_target_version_attr (tree, location_t *); extern bool riscv_process_target_version_str (string_slice, location_t *); extern void riscv_override_options_internal (struct gcc_options *); extern void riscv_option_override (void); +extern void riscv_reset_previous_fndecl (void); extern rtx riscv_prefetch_cookie (rtx, rtx); extern bool riscv_prefetch_offset_address_p (rtx, machine_mode); diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc index 61c4a09..c5710e4 100644 --- a/gcc/config/riscv/riscv-string.cc +++ b/gcc/config/riscv/riscv-string.cc @@ -923,6 +923,10 @@ riscv_expand_block_move_scalar (rtx dest, rtx src, rtx length) unsigned HOST_WIDE_INT hwi_length = UINTVAL (length); unsigned HOST_WIDE_INT factor, align; + if (riscv_memcpy_size_threshold >= 0 + && hwi_length > (unsigned HOST_WIDE_INT) riscv_memcpy_size_threshold) + return false; + if (riscv_slow_unaligned_access_p) { align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD); @@ -1233,6 +1237,21 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in, bool movmem_p) if (!use_vector_stringop_p (info, potential_ew, length_in)) return false; + if (CONST_INT_P (length_in)) + { + HOST_WIDE_INT length = INTVAL (length_in); + if (movmem_p + && riscv_memmove_size_threshold >= 0 + && length > riscv_memmove_size_threshold) + return false; + else if (!movmem_p + && riscv_memmove_size_threshold >= 0 + && length > riscv_memcpy_size_threshold) + return false; + } + else + return false; + /* Inlining general memmove is a pessimisation: we can't avoid having to decide which direction to go at runtime, which is costly in instruction count however for situations where the entire move fits in one vector @@ -1615,6 +1634,16 @@ expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in) if (!use_vector_stringop_p (info, 1, length_in) || info.need_loop) return false; + if (CONST_INT_P (length_in)) + { + HOST_WIDE_INT length = INTVAL (length_in); + if (riscv_memset_size_threshold >= 0 + && length > riscv_memset_size_threshold) + return false; + } + else + return false; + rtx dst_addr = copy_addr_to_reg (XEXP (dst_in, 0)); rtx dst = change_address (dst_in, info.vmode, dst_addr); diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h index 1887ed7..4cd860f 100644 --- a/gcc/config/riscv/riscv-subset.h +++ b/gcc/config/riscv/riscv-subset.h @@ -105,8 +105,6 @@ public: unsigned xlen () const {return m_xlen;}; - const char *get_profile_name () const; - riscv_subset_list *clone () const; static riscv_subset_list *parse (const char *, location_t *); diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc index 54edeeb..eb3e688 100644 --- a/gcc/config/riscv/riscv-target-attr.cc +++ b/gcc/config/riscv/riscv-target-attr.cc @@ -44,6 +44,7 @@ public: , m_cpu_info (nullptr) , m_tune (nullptr) , m_priority (0) + , m_max_vect (false) { } @@ -51,6 +52,7 @@ public: bool handle_cpu (const char *); bool handle_tune (const char *); bool handle_priority (const char *); + bool handle_max_vect (const char *); void update_settings (struct gcc_options *opts) const; private: @@ -66,31 +68,35 @@ private: const riscv_cpu_info *m_cpu_info; const char *m_tune; int m_priority; + bool m_max_vect; }; } /* All the information needed to handle a target attribute. NAME is the name of the attribute. - HANDLER is the function that takes the attribute string as an argument. */ + HANDLER is the function that takes the attribute string as an argument. + REQUIRES_ARG indicates whether this attribute requires an argument value. */ struct riscv_attribute_info { const char *name; bool (riscv_target_attr_parser::*handler) (const char *); + bool requires_arg; }; /* The target attributes that we support. */ static const struct riscv_attribute_info riscv_target_attrs[] - = {{"arch", &riscv_target_attr_parser::handle_arch}, - {"cpu", &riscv_target_attr_parser::handle_cpu}, - {"tune", &riscv_target_attr_parser::handle_tune}, - {NULL, NULL}}; + = {{"arch", &riscv_target_attr_parser::handle_arch, true}, + {"cpu", &riscv_target_attr_parser::handle_cpu, true}, + {"tune", &riscv_target_attr_parser::handle_tune, true}, + {"max-vectorization", &riscv_target_attr_parser::handle_max_vect, false}, + {NULL, NULL, false}}; static const struct riscv_attribute_info riscv_target_version_attrs[] - = {{"arch", &riscv_target_attr_parser::handle_arch}, - {"priority", &riscv_target_attr_parser::handle_priority}, - {NULL, NULL}}; + = {{"arch", &riscv_target_attr_parser::handle_arch, true}, + {"priority", &riscv_target_attr_parser::handle_priority, true}, + {NULL, NULL, false}}; bool riscv_target_attr_parser::parse_arch (const char *str) @@ -254,6 +260,17 @@ riscv_target_attr_parser::handle_priority (const char *str) return true; } +/* Handle max-vectorization. There are no further options, just + enable it. */ + +bool +riscv_target_attr_parser::handle_max_vect (const char *str ATTRIBUTE_UNUSED) +{ + m_max_vect = true; + + return true; +} + void riscv_target_attr_parser::update_settings (struct gcc_options *opts) const { @@ -279,6 +296,9 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const if (m_priority) opts->x_riscv_fmv_priority = m_priority; + + if (m_max_vect) + opts->x_riscv_max_vectorization = true; } /* Parse ARG_STR which contains the definition of one target attribute. @@ -303,33 +323,50 @@ riscv_process_one_target_attr (char *arg_str, char *str_to_check = buf.get(); strcpy (str_to_check, arg_str); + /* Split attribute name from argument (if present). */ char *arg = strchr (str_to_check, '='); - - if (!arg) + if (arg) { - if (loc) - error_at (*loc, "attribute %<target(\"%s\")%> does not " - "accept an argument", str_to_check); - return false; + *arg = '\0'; + ++arg; + /* Check for empty argument after '='. */ + if (*arg == '\0') + { + if (loc) + error_at (*loc, "attribute %<target(\"%s\")%> has empty argument", + str_to_check); + return false; + } } - arg[0] = '\0'; - ++arg; - for (const auto *attr = attrs; - attr->name; - ++attr) + /* Find matching attribute. */ + for (const auto *attr = attrs; attr->name; ++attr) { - /* If the names don't match up, or the user has given an argument - to an attribute that doesn't accept one, or didn't give an argument - to an attribute that expects one, fail to match. */ - if (strncmp (str_to_check, attr->name, strlen (attr->name)) != 0) + if (strcmp (str_to_check, attr->name) != 0) continue; + /* Validate argument presence matches expectations. */ + if (attr->requires_arg && !arg) + { + if (loc) + error_at (*loc, "attribute %<target(\"%s\")%> expects " + "an argument", str_to_check); + return false; + } + + if (!attr->requires_arg && arg) + { + if (loc) + error_at (*loc, "attribute %<target(\"%s\")%> does not " + "accept an argument", str_to_check); + return false; + } + return (&attr_parser->*attr->handler) (arg); } if (loc) - error_at (*loc, "Got unknown attribute %<target(\"%s\")%>", str_to_check); + error_at (*loc, "unknown attribute %<target(\"%s\")%>", str_to_check); return false; } @@ -371,6 +408,7 @@ riscv_process_target_str (string_slice args, std::unique_ptr<char[]> buf (new char[len+1]); char *str_to_check = buf.get (); + str_to_check[len] = '\0'; strncpy (str_to_check, args.begin (), args.size ()); /* Used to catch empty spaces between semi-colons i.e. @@ -488,6 +526,17 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int) return ret; } +/* Public wrapper for pragma processing. + Parse ARGS (a TREE_LIST of target attributes) and update global_options. + This is used by #pragma GCC target. */ + +bool +riscv_process_target_attr_for_pragma (tree args) +{ + location_t loc = UNKNOWN_LOCATION; + return riscv_process_target_attr (args, &loc, riscv_target_attrs); +} + /* Parse the tree in ARGS that contains the target_version attribute information and update the global target options space. If LOC is nonnull, report diagnostics against *LOC, otherwise remain silent. */ diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 707924d..f3c4431 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1811,7 +1811,8 @@ expand_const_vector_onestep (rtx target, rvv_builder &builder) rtx dest = gen_reg_rtx (mode); insn_code icode = code_for_pred_mov (mode); rtx ops3[] = {dest, tmp3, tmp1}; - emit_nonvlmax_insn (icode, __MASK_OP_TUMA | UNARY_OP_P, ops3, GEN_INT (n)); + emit_nonvlmax_insn (icode, (unsigned) __MASK_OP_TUMA | UNARY_OP_P, + ops3, GEN_INT (n)); emit_move_insn (target, dest); } @@ -4747,8 +4748,6 @@ void expand_gather_scatter (rtx *ops, bool is_load) { rtx ptr, vec_offset, vec_reg; - bool zero_extend_p; - int shift; rtx mask = ops[5]; rtx len = ops[6]; if (is_load) @@ -4758,78 +4757,18 @@ expand_gather_scatter (rtx *ops, bool is_load) vec_reg = ops[0]; ptr = ops[1]; vec_offset = ops[2]; - zero_extend_p = INTVAL (ops[3]); - shift = exact_log2 (INTVAL (ops[4])); } else { vec_reg = ops[4]; ptr = ops[0]; vec_offset = ops[1]; - zero_extend_p = INTVAL (ops[2]); - shift = exact_log2 (INTVAL (ops[3])); } machine_mode vec_mode = GET_MODE (vec_reg); machine_mode idx_mode = GET_MODE (vec_offset); - scalar_mode inner_idx_mode = GET_MODE_INNER (idx_mode); - unsigned inner_offsize = GET_MODE_BITSIZE (inner_idx_mode); - poly_int64 nunits = GET_MODE_NUNITS (vec_mode); bool is_vlmax = is_vlmax_len_p (vec_mode, len); - bool use_widening_shift = false; - - /* Extend the offset element to address width. */ - if (inner_offsize < BITS_PER_WORD) - { - use_widening_shift = TARGET_ZVBB && zero_extend_p && shift == 1; - /* 7.2. Vector Load/Store Addressing Modes. - If the vector offset elements are narrower than XLEN, they are - zero-extended to XLEN before adding to the ptr effective address. If - the vector offset elements are wider than XLEN, the least-significant - XLEN bits are used in the address calculation. An implementation must - raise an illegal instruction exception if the EEW is not supported for - offset elements. - - RVV spec only refers to the shift == 0 case. */ - if (!zero_extend_p || shift) - { - if (zero_extend_p) - inner_idx_mode - = int_mode_for_size (inner_offsize * 2, 0).require (); - else - inner_idx_mode = int_mode_for_size (BITS_PER_WORD, 0).require (); - machine_mode new_idx_mode - = get_vector_mode (inner_idx_mode, nunits).require (); - if (!use_widening_shift) - { - rtx tmp = gen_reg_rtx (new_idx_mode); - emit_insn (gen_extend_insn (tmp, vec_offset, new_idx_mode, idx_mode, - zero_extend_p ? true : false)); - vec_offset = tmp; - } - idx_mode = new_idx_mode; - } - } - - if (shift) - { - rtx tmp; - if (!use_widening_shift) - tmp = expand_binop (idx_mode, ashl_optab, vec_offset, - gen_int_mode (shift, Pmode), NULL_RTX, 0, - OPTAB_DIRECT); - else - { - tmp = gen_reg_rtx (idx_mode); - insn_code icode = code_for_pred_vwsll_scalar (idx_mode); - rtx ops[] = {tmp, vec_offset, const1_rtx}; - emit_vlmax_insn (icode, BINARY_OP, ops); - } - - vec_offset = tmp; - } - insn_code icode = prepare_gather_scatter (vec_mode, idx_mode, is_load); if (is_vlmax) { @@ -4946,6 +4885,54 @@ expand_reduction (unsigned unspec, unsigned unspec_for_vl0_safe, emit_insn (gen_pred_extract_first (m1_mode, scalar_dest, m1_tmp2)); } +/* Expand mask reductions. OPS are {dest, src} where DEST's mode + is QImode and SRC's mode is a mask mode. + CODE is one of AND, IOR, XOR. */ + +void +expand_mask_reduction (rtx *ops, rtx_code code) +{ + machine_mode mode = GET_MODE (ops[1]); + rtx dest = ops[0]; + gcc_assert (GET_MODE (dest) == QImode); + + rtx tmp = gen_reg_rtx (Xmode); + rtx cpop_ops[] = {tmp, ops[1]}; + emit_vlmax_insn (code_for_pred_popcount (mode, Xmode), CPOP_OP, cpop_ops); + + bool eq_zero = false; + + /* AND reduction is popcount (mask) == len, + IOR reduction is popcount (mask) != 0, + XOR reduction is popcount (mask) & 1 != 0. */ + if (code == AND) + { + rtx len = gen_int_mode (GET_MODE_NUNITS (mode), HImode); + tmp = expand_binop (Xmode, sub_optab, tmp, len, NULL, true, + OPTAB_DIRECT); + eq_zero = true; + } + else if (code == IOR) + ; + else if (code == XOR) + tmp = expand_binop (Xmode, and_optab, tmp, GEN_INT (1), NULL, true, + OPTAB_DIRECT); + else + gcc_unreachable (); + + rtx els = gen_label_rtx (); + rtx end = gen_label_rtx (); + + riscv_expand_conditional_branch (els, eq_zero ? EQ : NE, tmp, const0_rtx); + emit_move_insn (dest, const0_rtx); + emit_jump_insn (gen_jump (end)); + emit_barrier (); + + emit_label (els); + emit_move_insn (dest, const1_rtx); + emit_label (end); +} + /* Prepare ops for ternary operations. It can be called before or after RA. */ void @@ -5327,7 +5314,7 @@ emit_vec_cvt_x_f (rtx op_dest, rtx op_src, rtx mask, { insn_code icode = code_for_pred_fcvt_x_f (UNSPEC_VFCVT, vec_mode); - if (type & USE_VUNDEF_MERGE_P) + if (type & (insn_type) USE_VUNDEF_MERGE_P) { rtx cvt_x_ops[] = {op_dest, mask, op_src}; emit_vlmax_insn (icode, type, cvt_x_ops); @@ -5395,7 +5382,7 @@ emit_vec_cvt_x_f_rtz (rtx op_dest, rtx op_src, rtx mask, { insn_code icode = code_for_pred (FIX, vec_mode); - if (type & USE_VUNDEF_MERGE_P) + if (type & (insn_type) USE_VUNDEF_MERGE_P) { rtx cvt_x_ops[] = {op_dest, mask, op_src}; emit_vlmax_insn (icode, type, cvt_x_ops); @@ -5923,6 +5910,40 @@ expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2, emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops); } +static rtx_code +get_swapped_cmp_rtx_code (rtx_code code) +{ + switch (code) + { + case GTU: + return LTU; + case GT: + return LT; + default: + gcc_unreachable (); + } +} + +/* Expand the binary vx combine with the format like v2 = vec_dup(x) > v1. + Aka the first op comes from the vec_duplicate, and the second op is the vector + reg. Unfortunately, the RVV vms* only form like v2 = v1 < vec_dup(x), so + we need to swap the op_1 and op_2, then emit the swapped(from gtu to ltu) + insn instead. */ + +void +expand_vx_cmp_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2, rtx_code code, + machine_mode mode) +{ + machine_mode mask_mode = get_mask_mode (mode); + rtx_code swapped_code = get_swapped_cmp_rtx_code (code); + + insn_code icode = code_for_pred_cmp_scalar (mode); + rtx cmp = gen_rtx_fmt_ee (swapped_code, mask_mode, op_2, op_1); + rtx ops[] = {op_0, cmp, op_2, op_1}; + + emit_vlmax_insn (icode, COMPARE_OP, ops); +} + /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as well. */ void @@ -6133,6 +6154,10 @@ can_be_broadcast_p (rtx op) if (mode == HFmode && !TARGET_ZVFH) return false; + /* We don't have a vfmv.bf16.v.f. */ + if (mode == BFmode) + return false; + /* Same for float, just that we can always handle 64-bit doubles even on !TARGET_64BIT. We have ruled out 16-bit HF already above. */ @@ -6172,6 +6197,10 @@ strided_broadcast_p (rtx op) if (!TARGET_ZVFH && mode == HFmode) return true; + /* We don't have a vfmv.bf16.v.f. */ + if (mode == BFmode) + return true; + return false; } diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index 22b77cc..d00403a 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -2130,7 +2130,8 @@ public: rtx expand (function_expander &e) const override { - return e.use_exact_insn (code_for_pred_fault_load (e.vector_mode ())); + return e.use_contiguous_load_insn + (code_for_pred_fault_load (e.vector_mode ())); } }; diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index a3e596a..f92e94b 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -4443,7 +4443,7 @@ function_builder::get_attributes (const function_instance &instance) registered_function & function_builder::add_function (const function_instance &instance, const char *name, tree fntype, tree attrs, - bool placeholder_p, const char *overload_name, + const char *overload_name, const vec<tree> &argument_types, enum required_ext required, bool overloaded_p = false) @@ -4464,7 +4464,7 @@ function_builder::add_function (const function_instance &instance, nodes and remove the target hook. For now, however, we need to appease the validation and return a non-NULL, non-error_mark_node node, so we arbitrarily choose integer_zero_node. */ - tree decl = placeholder_p || in_lto_p + tree decl = in_lto_p ? integer_zero_node : simulate_builtin_function_decl (input_location, name, fntype, code, NULL, attrs); @@ -4508,7 +4508,7 @@ function_builder::add_unique_function (const function_instance &instance, argument_types.address ()); tree attrs = get_attributes (instance); registered_function &rfn - = add_function (instance, name, fntype, attrs, false, overload_name, + = add_function (instance, name, fntype, attrs, overload_name, argument_types.copy (), required); /* Enter the function into the hash table. */ @@ -4523,7 +4523,7 @@ function_builder::add_unique_function (const function_instance &instance, /* Attribute lists shouldn't be shared. */ tree attrs = get_attributes (instance); if (m_direct_overloads) - add_function (instance, overload_name, fntype, attrs, false, NULL, + add_function (instance, overload_name, fntype, attrs, NULL, vNULL, required); else { @@ -4562,7 +4562,7 @@ function_builder::add_overloaded_function (const function_instance &instance, /* To avoid API conflicting, take void return type and void argument for the overloaded function. */ tree fntype = build_function_type (void_type_node, void_list_node); - add_function (instance, name, fntype, NULL_TREE, false, name, + add_function (instance, name, fntype, NULL_TREE, name, vNULL, required, true); obstack_free (&m_string_obstack, name); } @@ -4709,7 +4709,8 @@ function_expander::use_exact_insn (insn_code icode) /* The RVV floating-point only support dynamic rounding mode in the FRM register. */ - if (opno != insn_data[icode].n_generator_args) + if (base->may_require_frm_p () + && opno < insn_data[icode].n_generator_args) add_input_operand (Pmode, gen_int_mode (riscv_vector::FRM_DYN, Pmode)); return generate_insn (icode); @@ -4894,7 +4895,8 @@ function_expander::use_ternop_insn (bool vd_accum_p, insn_code icode) /* The RVV floating-point only support dynamic rounding mode in the FRM register. */ - if (opno != insn_data[icode].n_generator_args) + if (base->may_require_frm_p () + && opno < insn_data[icode].n_generator_args) add_input_operand (Pmode, gen_int_mode (riscv_vector::FRM_DYN, Pmode)); return generate_insn (icode); @@ -4938,7 +4940,8 @@ function_expander::use_widen_ternop_insn (insn_code icode) /* The RVV floating-point only support dynamic rounding mode in the FRM register. */ - if (opno != insn_data[icode].n_generator_args) + if (base->may_require_frm_p () + && opno < insn_data[icode].n_generator_args) add_input_operand (Pmode, gen_int_mode (riscv_vector::FRM_DYN, Pmode)); return generate_insn (icode); diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h index 8cb243b..9e1a474 100644 --- a/gcc/config/riscv/riscv-vector-builtins.h +++ b/gcc/config/riscv/riscv-vector-builtins.h @@ -435,7 +435,7 @@ private: tree get_attributes (const function_instance &); registered_function &add_function (const function_instance &, const char *, - tree, tree, bool, const char *, + tree, tree, const char *, const vec<tree> &, enum required_ext, bool); diff --git a/gcc/config/riscv/riscv-vector-switch.def b/gcc/config/riscv/riscv-vector-switch.def index 1b0d619..6b71ab6 100644 --- a/gcc/config/riscv/riscv-vector-switch.def +++ b/gcc/config/riscv/riscv-vector-switch.def @@ -401,6 +401,18 @@ VLS_ENTRY (V256HF, riscv_vector::vls_mode_valid_p (V256HFmode) && TARGET_VECTOR_ VLS_ENTRY (V512HF, riscv_vector::vls_mode_valid_p (V512HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 1024) VLS_ENTRY (V1024HF, riscv_vector::vls_mode_valid_p (V1024HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 2048) VLS_ENTRY (V2048HF, riscv_vector::vls_mode_valid_p (V2048HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 4096) +VLS_ENTRY (V1BF, riscv_vector::vls_mode_valid_p (V1BFmode) && TARGET_VECTOR_ELEN_BF_16) +VLS_ENTRY (V2BF, riscv_vector::vls_mode_valid_p (V2BFmode) && TARGET_VECTOR_ELEN_BF_16) +VLS_ENTRY (V4BF, riscv_vector::vls_mode_valid_p (V4BFmode) && TARGET_VECTOR_ELEN_BF_16) +VLS_ENTRY (V8BF, riscv_vector::vls_mode_valid_p (V8BFmode) && TARGET_VECTOR_ELEN_BF_16) +VLS_ENTRY (V16BF, riscv_vector::vls_mode_valid_p (V16BFmode) && TARGET_VECTOR_ELEN_BF_16) +VLS_ENTRY (V32BF, riscv_vector::vls_mode_valid_p (V32BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 64) +VLS_ENTRY (V64BF, riscv_vector::vls_mode_valid_p (V64BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 128) +VLS_ENTRY (V128BF, riscv_vector::vls_mode_valid_p (V128BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 256) +VLS_ENTRY (V256BF, riscv_vector::vls_mode_valid_p (V256BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 512) +VLS_ENTRY (V512BF, riscv_vector::vls_mode_valid_p (V512BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 1024) +VLS_ENTRY (V1024BF, riscv_vector::vls_mode_valid_p (V1024BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 2048) +VLS_ENTRY (V2048BF, riscv_vector::vls_mode_valid_p (V2048BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 4096) VLS_ENTRY (V1SF, riscv_vector::vls_mode_valid_p (V1SFmode) && TARGET_VECTOR_ELEN_FP_32) VLS_ENTRY (V2SF, riscv_vector::vls_mode_valid_p (V2SFmode) && TARGET_VECTOR_ELEN_FP_32) VLS_ENTRY (V4SF, riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32) diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 3586d0c..127187b 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -258,7 +258,7 @@ policy_to_str (bool agnostic_p) /* Return true if it is an RVV instruction depends on VTYPE global status register. */ -static bool +bool has_vtype_op (rtx_insn *rinsn) { return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn); @@ -306,7 +306,7 @@ vector_config_insn_p (rtx_insn *rinsn) } /* Return true if it is vsetvldi or vsetvlsi. */ -static bool +bool vsetvl_insn_p (rtx_insn *rinsn) { if (!rinsn || !vector_config_insn_p (rinsn)) @@ -386,7 +386,7 @@ get_vl (rtx_insn *rinsn) } /* Helper function to get AVL operand. */ -static rtx +rtx get_avl (rtx_insn *rinsn) { if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn)) @@ -411,7 +411,7 @@ get_default_ma () } /* Helper function to get MA operand. */ -static bool +bool mask_agnostic_p (rtx_insn *rinsn) { /* If it doesn't have MA, we return agnostic by default. */ @@ -1176,7 +1176,7 @@ public: if (fault_first_load_p (insn->rtl ())) { for (insn_info *i = insn->next_nondebug_insn (); - i->bb () == insn->bb (); i = i->next_nondebug_insn ()) + i && i->bb () == insn->bb (); i = i->next_nondebug_insn ()) { if (find_access (i->defs (), VL_REGNUM)) break; diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 63404d3..96519c9 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -735,6 +735,105 @@ static const struct riscv_tune_param mips_p8700_tune_info = { true, /* prefer-agnostic. */ }; +/* Costs to use when optimizing for Andes 25 series. */ +static const struct riscv_tune_param andes_25_tune_info = { + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */ + {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */ + 1, /* issue_rate */ + 3, /* branch_cost */ + 3, /* memory_cost */ + 8, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + true, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + false, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align. */ + true, /* prefer-agnostic. */ +}; + +static const struct riscv_tune_param spacemit_x60_tune_info= { + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (15), COSTS_N_INSNS (22)}, /* fp_div */ + {COSTS_N_INSNS (3), COSTS_N_INSNS (6)}, /* int_mul */ + {COSTS_N_INSNS (12), COSTS_N_INSNS (20)}, /* int_div */ + 2, /* issue_rate */ + 3, /* branch_cost */ + 5, /* memory_cost */ + 6, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + false, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + false, /* use_zero_stride_load */ + true, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ + true, /* prefer-agnostic. */ +}; + +/* Costs to use when optimizing for Andes 23 series. */ +static const struct riscv_tune_param andes_23_tune_info = { + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */ + {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */ + 2, /* issue_rate */ + 3, /* branch_cost */ + 3, /* memory_cost */ + 8, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + true, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + false, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ + true, /* prefer-agnostic. */ +}; + +/* Costs to use when optimizing for Andes 45 series. */ +static const struct riscv_tune_param andes_45_tune_info = { + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ + {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */ + {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ + {COSTS_N_INSNS (2), COSTS_N_INSNS (2)}, /* int_mul */ + {COSTS_N_INSNS (24), COSTS_N_INSNS (24)}, /* int_div */ + 2, /* issue_rate */ + 3, /* branch_cost */ + 3, /* memory_cost */ + 8, /* fmv_cost */ + false, /* slow_unaligned_access */ + false, /* vector_unaligned_access */ + true, /* use_divmod_expansion */ + false, /* overlap_op_by_pieces */ + false, /* use_zero_stride_load */ + false, /* speculative_sched_vsetvl */ + RISCV_FUSE_NOTHING, /* fusible_ops */ + NULL, /* vector cost */ + NULL, /* function_align */ + NULL, /* jump_align */ + NULL, /* loop_align */ + true, /* prefer-agnostic. */ +}; + static bool riscv_avoid_shrink_wrapping_separate (); static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *); static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *); @@ -1741,8 +1840,19 @@ riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type) /* Nonzero offsets are only valid for references that don't use the GOT. */ switch (*symbol_type) { - case SYMBOL_ABSOLUTE: case SYMBOL_PCREL: + /* In 64-bit mode, PC-relative offsets with ranges beyond +/-1GiB are + more likely than not to end up out of range for an auipc instruction + randomly-placed within the 2GB range usable by medany, and such + offsets are quite unlikely to come up by chance, so be conservative + and separate the offset for them when in 64-bit mode, where they don't + wrap around. */ + if (TARGET_64BIT) + return sext_hwi (INTVAL (offset), 30) == INTVAL (offset); + + /* Fall through. */ + + case SYMBOL_ABSOLUTE: case SYMBOL_TLS_LE: /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */ return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); @@ -2765,7 +2875,7 @@ riscv_unspec_address_offset (rtx base, rtx offset, enum riscv_symbol_type symbol_type) { base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base), - UNSPEC_ADDRESS_FIRST + symbol_type); + UNSPEC_ADDRESS_FIRST + (int) symbol_type); if (offset != const0_rtx) base = gen_rtx_PLUS (Pmode, base, offset); return gen_rtx_CONST (Pmode, base); @@ -3731,8 +3841,7 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) /* The low-part must be zero-extended when ELEN == 32 and mode == 64. */ if (num == 2 && i == 0) - emit_insn (gen_extend_insn (int_reg, result, mode, smode, - true)); + int_reg = convert_modes (mode, smode, result, true); if (i == 1) { @@ -3779,6 +3888,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) temp_reg = gen_reg_rtx (word_mode); zero_extend_p = (LOAD_EXTEND_OP (mode) == ZERO_EXTEND); + /* SRC is a MEM, so we can always extend it directly, so + no need to indirect through convert_modes. */ emit_insn (gen_extend_insn (temp_reg, src, word_mode, mode, zero_extend_p)); riscv_emit_move (dest, gen_lowpart (mode, temp_reg)); @@ -3833,9 +3944,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) { rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode)); rtx temp = gen_reg_rtx (word_mode); - emit_insn (gen_extend_insn (temp, - gen_lowpart (HImode, src), - word_mode, HImode, 1)); + temp = convert_modes (word_mode, HImode, + gen_lowpart (HImode, src), true); if (word_mode == SImode) emit_insn (gen_iorsi3 (temp, mask, temp)); else @@ -4722,6 +4832,13 @@ riscv_noce_conversion_profitable_p (rtx_insn *seq, if (last_dest) last_dest = dest; } + else if (REG_P (dest) && src == CONST0_RTX (GET_MODE (dest))) + { + /* A GPR set to zero can always be replaced with x0, so any + insn that sets a GPR to zero will eventually be eliminated. */ + riscv_if_info.original_cost += COSTS_N_INSNS (1); + riscv_if_info.max_seq_cost += COSTS_N_INSNS (1); + } else last_dest = NULL_RTX; @@ -4908,7 +5025,7 @@ riscv_output_move (rtx dest, rtx src) if (TARGET_ZFHMIN || TARGET_ZFBFMIN) return "fmv.x.h\t%0,%1"; /* Using fmv.x.s + sign-extend to emulate fmv.x.h. */ - return "fmv.x.s\t%0,%1;slli\t%0,%0,16;srai\t%0,%0,16"; + return "fmv.x.s\t%0,%1\n\tslli\t%0,%0,16\n\tsrai\t%0,%0,16"; case 4: return "fmv.x.s\t%0,%1"; case 8: @@ -5886,11 +6003,47 @@ static int riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields, int n, HOST_WIDE_INT offset, bool ignore_zero_width_bit_field_p, + bool ignore_empty_union_and_zero_len_array_p, bool vls_p = false, unsigned abi_vlen = 0) { int max_aggregate_field = vls_p ? 8 : 2; switch (TREE_CODE (type)) { + case UNION_TYPE: + { + if (!ignore_empty_union_and_zero_len_array_p) + return -1; + /* Empty union should ignore. */ + if (TYPE_SIZE (type) == NULL || integer_zerop (TYPE_SIZE (type))) + return n; + /* Or all union member are empty union or empty struct. */ + for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) + { + if (TREE_CODE (f) != FIELD_DECL) + continue; + int m; + HOST_WIDE_INT pos = offset + int_byte_position (f); + switch (TREE_CODE (TREE_TYPE (f))) + { + case ARRAY_TYPE: + case UNION_TYPE: + case RECORD_TYPE: + m = riscv_flatten_aggregate_field ( + TREE_TYPE (f), fields, n, pos, + ignore_zero_width_bit_field_p, + true); + /* Any non-empty struct/union/array will stop the flatten. */ + if (m != n) + return -1; + break; + default: + /* Any member are not struct, union or array will stop the + flatten. */ + return -1; + } + } + return n; + } case RECORD_TYPE: /* Can't handle incomplete types nor sizes that are not fixed. */ if (!COMPLETE_TYPE_P (type) @@ -5916,7 +6069,9 @@ riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields, { HOST_WIDE_INT pos = offset + int_byte_position (f); n = riscv_flatten_aggregate_field ( - TREE_TYPE (f), fields, n, pos, ignore_zero_width_bit_field_p, + TREE_TYPE (f), fields, n, pos, + ignore_zero_width_bit_field_p, + ignore_empty_union_and_zero_len_array_p, vls_p, abi_vlen); } if (n < 0) @@ -5930,14 +6085,20 @@ riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields, riscv_aggregate_field subfields[8]; tree index = TYPE_DOMAIN (type); tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type)); + + /* Array with zero size member should be ignored. */ + if (ignore_empty_union_and_zero_len_array_p && integer_zerop (elt_size)) + return n; + int n_subfields - = riscv_flatten_aggregate_field (TREE_TYPE (type), subfields, 0, - offset, - ignore_zero_width_bit_field_p, vls_p, - abi_vlen); + = riscv_flatten_aggregate_field ( + TREE_TYPE (type), subfields, 0, + offset, + ignore_zero_width_bit_field_p, + ignore_empty_union_and_zero_len_array_p, + vls_p, abi_vlen); /* Can't handle incomplete types nor sizes that are not fixed. */ - if (n_subfields <= 0 - || !COMPLETE_TYPE_P (type) + if (!COMPLETE_TYPE_P (type) || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST || !index || !TYPE_MAX_VALUE (index) @@ -5947,6 +6108,15 @@ riscv_flatten_aggregate_field (const_tree type, riscv_aggregate_field *fields, || !tree_fits_uhwi_p (elt_size)) return -1; + /* Zero-length array with empty union/struct should be ignored. */ + if (ignore_empty_union_and_zero_len_array_p && n_subfields == 0 + && integer_zerop (TYPE_MIN_VALUE (index)) + && integer_all_onesp (TYPE_MAX_VALUE (index))) + return n; + + if (n_subfields <= 0) + return -1; + n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) - tree_to_uhwi (TYPE_MIN_VALUE (index)); gcc_assert (n_elts >= 0); @@ -6026,14 +6196,25 @@ static int riscv_flatten_aggregate_argument (const_tree type, riscv_aggregate_field *fields, bool ignore_zero_width_bit_field_p, + bool ignore_empty_union_and_zero_len_array_p, bool vls_p = false, unsigned abi_vlen = 0) { if (!type || TREE_CODE (type) != RECORD_TYPE) return -1; return riscv_flatten_aggregate_field (type, fields, 0, 0, - ignore_zero_width_bit_field_p, vls_p, - abi_vlen); + ignore_zero_width_bit_field_p, + ignore_empty_union_and_zero_len_array_p, + vls_p, abi_vlen); +} + +static bool +riscv_any_non_float_type_field (riscv_aggregate_field *fields, int n) +{ + for (int i = 0; i < n; i++) + if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) + return true; + return false; } /* See whether TYPE is a record whose fields should be returned in one or @@ -6044,24 +6225,18 @@ riscv_pass_aggregate_in_fpr_pair_p (const_tree type, riscv_aggregate_field fields[2]) { static int warned = 0; + if (!type) + return 0; /* This is the old ABI, which differs for C++ and C. */ - int n_old = riscv_flatten_aggregate_argument (type, fields, false); - for (int i = 0; i < n_old; i++) - if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) - { - n_old = -1; - break; - } + int n_old = riscv_flatten_aggregate_argument (type, fields, false, false); + if (riscv_any_non_float_type_field (fields, n_old)) + n_old = -1; /* This is the new ABI, which is the same for C++ and C. */ - int n_new = riscv_flatten_aggregate_argument (type, fields, true); - for (int i = 0; i < n_new; i++) - if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) - { - n_new = -1; - break; - } + int n_new = riscv_flatten_aggregate_argument (type, fields, true, false); + if (riscv_any_non_float_type_field (fields, n_new)) + n_new = -1; if ((n_old != n_new) && (warned == 0)) { @@ -6070,7 +6245,58 @@ riscv_pass_aggregate_in_fpr_pair_p (const_tree type, warned = 1; } - return n_new > 0 ? n_new : 0; + /* ABI with fixing flatten empty union. */ + int n_new2 = riscv_flatten_aggregate_argument (type, fields, true, true); + if (riscv_any_non_float_type_field (fields, n_new2)) + n_new2 = -1; + + bool num_fpr = riscv_pass_mode_in_fpr_p (TYPE_MODE (type)); + + /* There is a special case for struct with zero length array with struct and a + floating point member. + e.g: + struct S0ae_1f { + struct { + } e1[0]; + float f; + }; + + This case we will got 1, but legacy ABI will got -1, however legacy ABI + will got 1 in later logic, so we should consider this case as compatible. + */ + bool compatible_p = n_new2 == 1 && n_new == -1 && num_fpr == 1; + + if ((n_new2 != n_new) + && !compatible_p && (warned == 0)) + { + warning (OPT_Wpsabi, "ABI for flattened empty union and zero " + "length array changed in GCC 16"); + warned = 1; + } + + return n_new2 > 0 ? n_new2 : 0; +} + +struct riscv_aggregate_field_info_t { + unsigned num_fpr; + unsigned num_gpr; + + riscv_aggregate_field_info_t () + : num_fpr (0), num_gpr (0) + {} +}; + +static riscv_aggregate_field_info_t +riscv_parse_aggregate_field_info (riscv_aggregate_field *fields, int n) +{ + riscv_aggregate_field_info_t info; + for (int i = 0; i < n; i++) + { + info.num_fpr += SCALAR_FLOAT_TYPE_P (fields[i].type); + info.num_gpr += INTEGRAL_TYPE_P (fields[i].type); + } + + return info; } /* See whether TYPE is a record whose fields should be returned in one or @@ -6084,35 +6310,48 @@ riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type, static int warned = 0; /* This is the old ABI, which differs for C++ and C. */ - unsigned num_int_old = 0, num_float_old = 0; - int n_old = riscv_flatten_aggregate_argument (type, fields, false); - for (int i = 0; i < n_old; i++) - { - num_float_old += SCALAR_FLOAT_TYPE_P (fields[i].type); - num_int_old += INTEGRAL_TYPE_P (fields[i].type); - } + int n_old = riscv_flatten_aggregate_argument (type, fields, false, false); + riscv_aggregate_field_info_t old_info; + old_info = riscv_parse_aggregate_field_info (fields, n_old); /* This is the new ABI, which is the same for C++ and C. */ - unsigned num_int_new = 0, num_float_new = 0; - int n_new = riscv_flatten_aggregate_argument (type, fields, true); - for (int i = 0; i < n_new; i++) - { - num_float_new += SCALAR_FLOAT_TYPE_P (fields[i].type); - num_int_new += INTEGRAL_TYPE_P (fields[i].type); - } + int n_new = riscv_flatten_aggregate_argument (type, fields, true, false); + riscv_aggregate_field_info_t new_info; + new_info = riscv_parse_aggregate_field_info (fields, n_new); + + bool values_changed = old_info.num_fpr != new_info.num_fpr + || old_info.num_gpr != new_info.num_gpr; + bool old_is_one_one = old_info.num_fpr == 1 && old_info.num_gpr == 1; + bool new_is_one_one = new_info.num_fpr == 1 && new_info.num_gpr == 1; - if (((num_int_old == 1 && num_float_old == 1 - && (num_int_old != num_int_new || num_float_old != num_float_new)) - || (num_int_new == 1 && num_float_new == 1 - && (num_int_old != num_int_new || num_float_old != num_float_new))) - && (warned == 0)) + if (values_changed + && (old_is_one_one || new_is_one_one) + && warned == 0) { warning (OPT_Wpsabi, "ABI for flattened struct with zero-length " "bit-fields changed in GCC 10"); warned = 1; } - return num_int_new == 1 && num_float_new == 1; + /* ABI with fixing flatten empty union. */ + int n_new2 = riscv_flatten_aggregate_argument (type, fields, true, true); + riscv_aggregate_field_info_t new2_info; + new2_info = riscv_parse_aggregate_field_info (fields, n_new2); + + values_changed = new_info.num_fpr != new2_info.num_fpr + || new_info.num_gpr != new2_info.num_gpr; + bool new2_is_one_one = new2_info.num_fpr == 1 && new2_info.num_gpr == 1; + + if (values_changed + && (new_is_one_one || new2_is_one_one) + && warned == 0) + { + warning (OPT_Wpsabi, "ABI for flattened empty union and zero " + "length array changed in GCC 16"); + warned = 1; + } + + return new2_is_one_one; } /* Return the representation of an argument passed or returned in an FPR @@ -6466,7 +6705,7 @@ riscv_pass_aggregate_in_vr (struct riscv_arg_info *info, riscv_aggregate_field fields[8]; unsigned int abi_vlen = riscv_get_cc_abi_vlen (cum->variant_cc); int i; - int n = riscv_flatten_aggregate_argument (type, fields, true, + int n = riscv_flatten_aggregate_argument (type, fields, true, true, /* vls_p */ true, abi_vlen); if (n == -1) @@ -10576,6 +10815,71 @@ riscv_issue_rate (void) return tune_param->issue_rate; } +/* Structure for very basic vector configuration tracking in the scheduler. */ +struct last_vconfig +{ + bool valid; + bool ta; + bool ma; + uint8_t sew; + uint8_t vlmul; + rtx avl; +} last_vconfig; + +/* Clear LAST_VCONFIG so we have no known state. */ +static void +clear_vconfig (void) +{ + memset (&last_vconfig, 0, sizeof (last_vconfig)); +} + +/* Return TRUE if INSN is a vector insn needing a particular + vector configuration that is trivially equal to the last + vector insn issued. Return FALSE otherwise. */ +static bool +compatible_with_last_vconfig (rtx_insn *insn) +{ + /* We might be able to extract the data from a preexisting vsetvl. */ + if (vsetvl_insn_p (insn)) + return false; + + /* Nothing to do for these cases. */ + if (!NONDEBUG_INSN_P (insn) || !has_vtype_op (insn)) + return false; + + extract_insn_cached (insn); + + rtx avl = get_avl (insn); + if (avl != last_vconfig.avl) + return false; + + if (get_sew (insn) != last_vconfig.sew) + return false; + + if (get_vlmul (insn) != last_vconfig.vlmul) + return false; + + if (tail_agnostic_p (insn) != last_vconfig.ta) + return false; + + if (mask_agnostic_p (insn) != last_vconfig.ma) + return false; + + /* No differences found, they're trivially compatible. */ + return true; +} + +/* Implement TARGET_SCHED_INIT, we use this to track the vector configuration + of the last issued vector instruction. We can then use that information + to potentially adjust the ready queue to issue instructions of a compatible + vector configuration instead of a conflicting configuration. That will + reduce the number of vsetvl instructions we ultimately emit. */ +static void +riscv_sched_init (FILE *, int, int) +{ + clear_vconfig (); +} + /* Implement TARGET_SCHED_VARIABLE_ISSUE. */ static int riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) @@ -10600,9 +10904,88 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) an assert so we can find and fix this problem. */ gcc_assert (insn_has_dfa_reservation_p (insn)); + /* If this is a vector insn with vl/vtype info, then record the last + vector configuration. */ + if (vsetvl_insn_p (insn)) + clear_vconfig (); + else if (NONDEBUG_INSN_P (insn) && has_vtype_op (insn)) + { + extract_insn_cached (insn); + + rtx avl = get_avl (insn); + if (avl == RVV_VLMAX) + avl = const0_rtx; + + if (!avl || !CONST_INT_P (avl)) + clear_vconfig (); + else + { + last_vconfig.valid = true; + last_vconfig.avl = avl; + last_vconfig.sew = get_sew (insn); + last_vconfig.vlmul = get_vlmul (insn); + last_vconfig.ta = tail_agnostic_p (insn); + last_vconfig.ma = mask_agnostic_p (insn); + } + } + return more - 1; } +/* Implement TARGET_SCHED_REORDER. The goal here is to look at the ready + queue and reorder it ever so slightly to encourage issing an insn with + the same vector configuration as the most recently issued vector + instruction. That will reduce vsetvl instructions. */ +static int +riscv_sched_reorder (FILE *, int, rtx_insn **ready, int *nreadyp, int) +{ + /* If we don't have a valid prior vector configuration, then there is + no point in reordering the ready queue, similarly if there is + just one entry in the queue. */ + if (!last_vconfig.valid || *nreadyp == 1) + return riscv_issue_rate (); + + return riscv_issue_rate (); + int nready = *nreadyp; + int priority = INSN_PRIORITY (ready[nready - 1]); + for (int i = nready - 1; i >= 0; i--) + { + rtx_insn *insn = ready[i]; + + /* On a high performance core, vsetvl instructions should be + inexpensive. Removing them is very much a secondary concern, so + be extremely conservative with reordering, essentially only + allowing reordering within the highest priority value. + + Lower end cores may benefit from more flexibility here. That + tuning is left to those who understand their core's behavior + and can thoroughly benchmark the result. Assuming such + designs appear, we can probably put an entry in the tuning + structure to indicate how much difference in priority to allow. */ + if (INSN_PRIORITY (insn) < priority) + break; + + if (compatible_with_last_vconfig (insn)) + { + /* This entry is compatible with the last vconfig and has + the same priority as the most important insn. So swap + it so that we keep the vector configuration as-is and + ultimately eliminate a vsetvl. + + Note no need to swap if this is the first entry in the + queue. */ + if (i == nready - 1) + break; + + std::swap (ready[i], ready[nready - 1]); + break; + } + } + + return riscv_issue_rate (); +} + + /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports instruction fusion of some sort. */ @@ -11842,6 +12225,12 @@ riscv_override_options_internal (struct gcc_options *opts) /* Convert -march and -mrvv-vector-bits to a chunks count. */ riscv_vector_chunks = riscv_convert_vector_chunks (opts); + /* Set scalar costing to a high value such that we always pick + vectorization. Increase scalar costing by 100x. */ + if (opts->x_riscv_max_vectorization) + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + param_vect_scalar_cost_multiplier, 10000); + if (opts->x_flag_cf_protection != CF_NONE) { if ((opts->x_flag_cf_protection & CF_RETURN) == CF_RETURN @@ -12079,6 +12468,39 @@ riscv_option_restore (struct gcc_options *opts, static GTY (()) tree riscv_previous_fndecl; +/* Reset the previous function declaration. */ + +void +riscv_reset_previous_fndecl (void) +{ + riscv_previous_fndecl = NULL; +} + +/* Implement TARGET_OPTION_SAVE. */ + +static void +riscv_option_save (struct cl_target_option *ptr, + struct gcc_options *opts, + struct gcc_options * /* opts_set */) +{ + ptr->x_riscv_arch_string = opts->x_riscv_arch_string; + ptr->x_riscv_tune_string = opts->x_riscv_tune_string; + ptr->x_riscv_cpu_string = opts->x_riscv_cpu_string; +} + +/* Implement TARGET_OPTION_PRINT. */ + +static void +riscv_option_print (FILE *file, int indent, struct cl_target_option *ptr) +{ + fprintf (file, "%*sarch = %s\n", indent, "", + ptr->x_riscv_arch_string ? ptr->x_riscv_arch_string : "default"); + fprintf (file, "%*stune = %s\n", indent, "", + ptr->x_riscv_tune_string ? ptr->x_riscv_tune_string : "default"); + if (ptr->x_riscv_cpu_string) + fprintf (file, "%*scpu = %s\n", indent, "", ptr->x_riscv_cpu_string); +} + /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ static void @@ -12415,7 +12837,7 @@ riscv_get_interrupt_type (tree decl) /* Implement `TARGET_SET_CURRENT_FUNCTION'. Unpack the codegen decisions like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET of the function, if such exists. This function may be called multiple - times on a single function so use aarch64_previous_fndecl to avoid + times on a single function so use riscv_previous_fndecl to avoid setting up identical state. */ /* Sanity checking for above function attributes. */ @@ -13871,84 +14293,14 @@ riscv_get_raw_result_mode (int regno) return default_get_reg_raw_mode (regno); } -/* Generate a REG rtx of Xmode from the given rtx and mode. - The rtx x can be REG (QI/HI/SI/DI) or const_int. - The machine_mode mode is the original mode from define pattern. - The rtx_code can be ZERO_EXTEND or SIGN_EXTEND. - - If rtx is REG: - - 1. If rtx Xmode, the RTX x will be returned directly. - 2. If rtx non-Xmode, the value extended into a new REG of Xmode will be - returned. - - The scalar ALU like add don't support non-Xmode like QI/HI. Then the - gen_lowpart will have problem here. For example, when we would like - to add -1 (0xff if QImode) and 2 (0x2 if QImode). The 0xff and 0x2 will - be loaded to register for adding. Aka: - - 0xff + 0x2 = 0x101 instead of -1 + 2 = 1. - - Thus we need to sign extend 0xff to 0xffffffffffffffff if Xmode is DImode - for correctness. Similar the unsigned also need zero extend. - - If rtx is const_int: - - 1. A new REG rtx will be created to hold the value of const_int. - - According to the gccint doc, the constants generated for modes with fewer - bits than in HOST_WIDE_INT must be sign extended to full width. Thus there - will be two cases here, take QImode as example. - - For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple - mov from const_int to the new REG rtx is good enough here. - - For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand. - Aka 0xfffffffffffffffe in Xmode of RV64 but we actually need 0xfe in Xmode - of RV64. So we need to cleanup the highest 56 bits of the new REG rtx moved - from the (const_int -2). - - Then the underlying expanding can perform the code generation based on - the REG rtx of Xmode, instead of taking care of these in expand func. */ - +/* Force X into an Xmode register. */ static rtx riscv_extend_to_xmode_reg (rtx x, machine_mode mode, enum rtx_code rcode) { gcc_assert (rcode == ZERO_EXTEND || rcode == SIGN_EXTEND); - rtx xmode_reg = gen_reg_rtx (Xmode); - - if (CONST_INT_P (x)) - { - if (mode == Xmode) - emit_move_insn (xmode_reg, x); - else if (rcode == ZERO_EXTEND) - { - /* Combine deliberately does not simplify extensions of constants - (long story). So try to generate the zero extended constant - efficiently. - - First extract the constant and mask off all the bits not in - MODE. */ - HOST_WIDE_INT val = INTVAL (x); - val &= GET_MODE_MASK (mode); - - /* X may need synthesis, so do not blindly copy it. */ - xmode_reg = force_reg (Xmode, gen_int_mode (val, Xmode)); - } - else /* SIGN_EXTEND. */ - { - rtx x_reg = gen_reg_rtx (mode); - emit_move_insn (x_reg, x); - riscv_emit_unary (rcode, xmode_reg, x_reg); - } - } - else if (mode == Xmode) - return x; - else - riscv_emit_unary (rcode, xmode_reg, x); - - return xmode_reg; + rtx t = convert_modes (Xmode, mode, x, rcode == ZERO_EXTEND); + return force_reg (Xmode, t); } /* Implements the unsigned saturation add standard name usadd for int mode. @@ -14295,7 +14647,7 @@ riscv_expand_ustrunc (rtx dest, rtx src) gcc_assert (precision < 64); uint64_t max = ((uint64_t)1u << precision) - 1u; - rtx xmode_src = gen_lowpart (Xmode, src); + rtx xmode_src = riscv_extend_to_xmode_reg (src, GET_MODE (src), ZERO_EXTEND); rtx xmode_dest = gen_reg_rtx (Xmode); rtx xmode_lt = gen_reg_rtx (Xmode); @@ -15598,7 +15950,8 @@ synthesize_and (rtx operands[3]) if (tmode != VOIDmode) { rtx tmp = gen_lowpart (tmode, operands[1]); - emit_insn (gen_extend_insn (operands[0], tmp, word_mode, tmode, 1)); + emit_move_insn (operands[0], convert_modes (word_mode, tmode, + tmp, true)); return true; } } @@ -15995,9 +16348,15 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode) #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE riscv_option_override +#undef TARGET_OPTION_SAVE +#define TARGET_OPTION_SAVE riscv_option_save + #undef TARGET_OPTION_RESTORE #define TARGET_OPTION_RESTORE riscv_option_restore +#undef TARGET_OPTION_PRINT +#define TARGET_OPTION_PRINT riscv_option_print + #undef TARGET_OPTION_VALID_ATTRIBUTE_P #define TARGET_OPTION_VALID_ATTRIBUTE_P riscv_option_valid_attribute_p @@ -16011,9 +16370,15 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode) #undef TARGET_SCHED_MACRO_FUSION_PAIR_P #define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT riscv_sched_init + #undef TARGET_SCHED_VARIABLE_ISSUE #define TARGET_SCHED_VARIABLE_ISSUE riscv_sched_variable_issue +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER riscv_sched_reorder + #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index a0ad75c..6a3e537 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -60,17 +60,18 @@ extern const char *riscv_arch_help (int argc, const char **argv); { "riscv_arch_help", riscv_arch_help }, /* Support for a compile-time default CPU, et cetera. The rules are: - --with-arch is ignored if -march or -mcpu is specified. + --with-arch and --with-cpu are ignored if -march or -mcpu is specified. --with-abi is ignored if -mabi is specified. --with-tune is ignored if -mtune or -mcpu is specified. --with-isa-spec is ignored if -misa-spec is specified. --with-tls is ignored if -mtls-dialect is specified. - But using default -march/-mtune value if -mcpu don't have valid option. */ + Uses default values if -mcpu doesn't have a valid option. */ #define OPTION_DEFAULT_SPECS \ {"tune", "%{!mtune=*:" \ " %{!mcpu=*:-mtune=%(VALUE)}" \ " %{mcpu=*:-mtune=%:riscv_default_mtune(%* %(VALUE))}}" }, \ + {"cpu", "%{!march=*:%{!mcpu=*:%:riscv_expand_arch_from_cpu(%(VALUE))}}" }, \ {"arch", "%{!march=*|march=unset:" \ " %{!mcpu=*:-march=%(VALUE)}" \ " %{mcpu=*:%:riscv_expand_arch_from_cpu(%* %(VALUE))}}" }, \ diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 640ca5f..6f8cd26 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -273,6 +273,7 @@ V1SI,V2SI,V4SI,V8SI,V16SI,V32SI,V64SI,V128SI,V256SI,V512SI,V1024SI, V1DI,V2DI,V4DI,V8DI,V16DI,V32DI,V64DI,V128DI,V256DI,V512DI, V1HF,V2HF,V4HF,V8HF,V16HF,V32HF,V64HF,V128HF,V256HF,V512HF,V1024HF,V2048HF, + V1BF,V2BF,V4BF,V8BF,V16BF,V32BF,V64BF,V128BF,V256BF,V512BF,V1024BF,V2048BF, V1SF,V2SF,V4SF,V8SF,V16SF,V32SF,V64SF,V128SF,V256SF,V512SF,V1024SF, V1DF,V2DF,V4DF,V8DF,V16DF,V32DF,V64DF,V128DF,V256DF,V512DF, V1BI,V2BI,V4BI,V8BI,V16BI,V32BI,V64BI,V128BI,V256BI,V512BI,V1024BI,V2048BI,V4096BI" @@ -672,7 +673,8 @@ ;; Microarchitectures we know how to tune for. ;; Keep this in sync with enum riscv_microarchitecture. (define_attr "tune" - "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8" + "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700, + tt_ascalon_d8,andes_25_series,andes_23_series,andes_45_series,spacemit_x60" (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)"))) ;; Describe a user's asm statement. @@ -790,14 +792,8 @@ rtx t6 = gen_reg_rtx (DImode); emit_insn (gen_addsi3_extended (t6, operands[1], operands[2])); - if (GET_CODE (operands[1]) != CONST_INT) - emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); - else - t4 = operands[1]; - if (GET_CODE (operands[2]) != CONST_INT) - emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); - else - t5 = operands[2]; + t4 = convert_modes (DImode, SImode, operands[1], false); + t5 = convert_modes (DImode, SImode, operands[2], false); emit_insn (gen_adddi3 (t3, t4, t5)); rtx t7 = gen_lowpart (SImode, t6); SUBREG_PROMOTED_VAR_P (t7) = 1; @@ -834,10 +830,7 @@ rtx t3 = gen_reg_rtx (DImode); rtx t4 = gen_reg_rtx (DImode); - if (GET_CODE (operands[1]) != CONST_INT) - emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); - else - t3 = operands[1]; + t3 = convert_modes (DImode, SImode, operands[1], 0); emit_insn (gen_addsi3_extended (t4, operands[1], operands[2])); rtx t5 = gen_lowpart (SImode, t4); SUBREG_PROMOTED_VAR_P (t5) = 1; @@ -981,14 +974,8 @@ rtx t6 = gen_reg_rtx (DImode); emit_insn (gen_subsi3_extended (t6, operands[1], operands[2])); - if (GET_CODE (operands[1]) != CONST_INT) - emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); - else - t4 = operands[1]; - if (GET_CODE (operands[2]) != CONST_INT) - emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); - else - t5 = operands[2]; + t4 = convert_modes (DImode, SImode, operands[1], false); + t5 = convert_modes (DImode, SImode, operands[2], false); emit_insn (gen_subdi3 (t3, t4, t5)); rtx t7 = gen_lowpart (SImode, t6); SUBREG_PROMOTED_VAR_P (t7) = 1; @@ -1028,10 +1015,7 @@ rtx t3 = gen_reg_rtx (DImode); rtx t4 = gen_reg_rtx (DImode); - if (GET_CODE (operands[1]) != CONST_INT) - emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); - else - t3 = operands[1]; + t3 = convert_modes (DImode, SImode, operands[1], false); emit_insn (gen_subsi3_extended (t4, operands[1], operands[2])); rtx t5 = gen_lowpart (SImode, t4); SUBREG_PROMOTED_VAR_P (t5) = 1; @@ -1191,18 +1175,12 @@ rtx t5 = gen_reg_rtx (DImode); rtx t6 = gen_reg_rtx (DImode); - if (GET_CODE (operands[1]) != CONST_INT) - emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0)); - else - t4 = operands[1]; - if (GET_CODE (operands[2]) != CONST_INT) - emit_insn (gen_extend_insn (t5, operands[2], DImode, SImode, 0)); - else - t5 = operands[2]; + t4 = convert_modes (DImode, SImode, operands[1], false); + t5 = convert_modes (DImode, SImode, operands[2], false); emit_insn (gen_muldi3 (t3, t4, t5)); emit_move_insn (operands[0], gen_lowpart (SImode, t3)); - emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0)); + t6 = convert_modes (DImode, SImode, operands[0], false); riscv_expand_conditional_branch (operands[3], NE, t6, t3); } @@ -1238,14 +1216,8 @@ rtx t7 = gen_reg_rtx (DImode); rtx t8 = gen_reg_rtx (DImode); - if (GET_CODE (operands[1]) != CONST_INT) - emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0)); - else - t3 = operands[1]; - if (GET_CODE (operands[2]) != CONST_INT) - emit_insn (gen_extend_insn (t4, operands[2], DImode, SImode, 0)); - else - t4 = operands[2]; + t3 = convert_modes (DImode, SImode, operands[1], false); + t4 = convert_modes (DImode, SImode, operands[2], false); emit_insn (gen_ashldi3 (t5, t3, GEN_INT (32))); emit_insn (gen_ashldi3 (t6, t4, GEN_INT (32))); @@ -3752,6 +3724,57 @@ [(set_attr "type" "slt") (set_attr "mode" "<X:MODE>")]) +;; We can sometimes do better for unsigned comparisons against +;; values where there's a run of 1s in the LSBs. +;; +(define_split + [(set (match_operand:X 0 "register_operand") + (gtu:X (match_operand:X 1 "register_operand") + (match_operand 2 "const_int_operand"))) + (clobber (match_operand:X 3 "register_operand"))] + "exact_log2 (INTVAL (operands[2]) + 1) >= 0" + [(set (match_dup 3) (lshiftrt:X (match_dup 1) (match_dup 2))) + (set (match_dup 0) (ne:X (match_dup 3) (const_int 0)))] +{ operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]) + 1)); }) + +(define_split + [(set (match_operand:X 0 "register_operand") + (leu:X (match_operand:X 1 "register_operand") + (match_operand 2 "const_int_operand"))) + (clobber (match_operand:X 3 "register_operand"))] + "exact_log2 (INTVAL (operands[2]) + 1) >= 0" + [(set (match_dup 3) (lshiftrt:X (match_dup 1) (match_dup 2))) + (set (match_dup 0) (eq:X (match_dup 3) (const_int 0)))] +{ operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]) + 1)); }) + +;; Alternate forms that are ultimately just sltiu +(define_insn "" + [(set (match_operand:X 0 "register_operand" "=r") + (eq:X (zero_extract:X (match_operand:X 1 "register_operand" "r") + (match_operand 2 "const_int_operand") + (match_operand 3 "const_int_operand")) + (const_int 0)))] + "(INTVAL (operands[3]) < 11 + && INTVAL (operands[2]) + INTVAL (operands[3]) == BITS_PER_WORD)" +{ + operands[2] = GEN_INT (HOST_WIDE_INT_1U << INTVAL (operands[3])); + return "sltiu\t%0,%1,%2"; +} + [(set_attr "type" "slt") + (set_attr "mode" "<X:MODE>")]) + +(define_insn "" + [(set (match_operand:X 0 "register_operand" "=r") + (eq:X (lshiftrt:X (match_operand:X 1 "register_operand" "r") + (match_operand 2 "const_int_operand")) + (const_int 0)))] + "INTVAL (operands[2]) < 11" +{ + operands[2] = GEN_INT (HOST_WIDE_INT_1U << INTVAL (operands[2])); + return "sltiu\t%0,%1,%2"; +} + [(set_attr "type" "slt") + (set_attr "mode" "<X:MODE>")]) ;; ;; .................... ;; @@ -4966,3 +4989,7 @@ (include "generic-vector-ooo.md") (include "generic-ooo.md") (include "tt-ascalon-d8.md") +(include "andes-23-series.md") +(include "andes-25-series.md") +(include "andes-45-series.md") +(include "spacemit-x60.md") diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 6543fd1..452062c 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -341,6 +341,10 @@ Target Undocumented RejectNegative Joined Enum(vsetvl_strategy) Var(vsetvl_strat Target Undocumented Uinteger Var(riscv_two_source_permutes) Init(0) -param=riscv-two-source-permutes Enable permutes with two source vectors. +mmax-vectorization +Target Var(riscv_max_vectorization) Save +Override the scalar cost model such that vectorization is always profitable. + Enum Name(stringop_strategy) Type(enum stringop_strategy_enum) Valid arguments to -mstringop-strategy=: @@ -361,6 +365,18 @@ mstringop-strategy= Target RejectNegative Joined Enum(stringop_strategy) Var(stringop_strategy) Init(STRATEGY_AUTO) Specify stringop expansion strategy. +-param=memcpy-size-threshold= +Target Joined UInteger Var(riscv_memcpy_size_threshold) Init(-1) Param +Constant memcpy size in bytes above which to start using libcalls over inlining. + +-param=memmove-size-threshold= +Target Joined UInteger Var(riscv_memmove_size_threshold) Init(-1) Param +Constant memmove size in bytes above which to start using libcalls over inlining. + +-param=memset-size-threshold= +Target Joined UInteger Var(riscv_memset_size_threshold) Init(-1) Param +Constant memset size in bytes above which to start using libcalls over inlining. + Enum Name(rvv_vector_bits) Type(enum rvv_vector_bits_enum) The possible RVV vector register lengths: diff --git a/gcc/config/riscv/riscv.opt.urls b/gcc/config/riscv/riscv.opt.urls index fe88ec8..bfb1a2d 100644 --- a/gcc/config/riscv/riscv.opt.urls +++ b/gcc/config/riscv/riscv.opt.urls @@ -96,6 +96,8 @@ UrlSuffix(gcc/RISC-V-Options.html#index-minline-strncmp) minline-strlen UrlSuffix(gcc/RISC-V-Options.html#index-minline-strlen) +; skipping UrlSuffix for 'mmax-vectorization' due to finding no URLs + ; skipping UrlSuffix for 'mtls-dialect=' due to finding no URLs mfence-tso diff --git a/gcc/config/riscv/spacemit-x60.md b/gcc/config/riscv/spacemit-x60.md new file mode 100644 index 0000000..c991f89 --- /dev/null +++ b/gcc/config/riscv/spacemit-x60.md @@ -0,0 +1,190 @@ +;; spacemit_x60 DFA-based pipeline description for RISC-V targets. +;; Copyright (C) 2011-2025 Free Software Foundation, Inc. +;; Contributed by Andrew Waterman (andrew@sifive.com). +;; Based on MIPS target for GNU compiler. + +;; This file is part of GCC. + +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. + +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +;; ---------------------------------------------------- +;; Spacemit-x60 Units +;; 2*alu + 2*lsu + 1*fpalu + 1*fdivsqrt + 1*vxu +;; +;; There's actually two VXU units and ops get split across them +;; to give the illusion of a single wider unit with higher +;; performance. There are a few ops that can only be fed into +;; one of the two units. Probably best to initially model as +;; a single unit +;; +;; The VXU is not currently modeled. +;; Some ops like shadd.uw and add.uw, cpop take an extra cycle +;; Given everything is in-order, anti-dependencies probably matter +;; FP sign injection isn't handled correctly +;; ---------------------------------------------------- + +(define_automaton "spacemit_x60") +(define_cpu_unit "spacemit_x60_alu0,spacemit_x60_alu1" "spacemit_x60") +(define_cpu_unit "spacemit_x60_lsu0,spacemit_x60_lsu1" "spacemit_x60") +;;(define_cpu_unit "spacemit_x60_vxu0" "spacemit_x60") +(define_cpu_unit "spacemit_x60_fpalu" "spacemit_x60") +(define_cpu_unit "spacemit_x60_fdivsqrt" "spacemit_x60") + +(define_reservation "spacemit_x60_lsu" "spacemit_x60_lsu0, spacemit_x60_lsu1") +(define_reservation "spacemit_x60_alu" "spacemit_x60_alu0, spacemit_x60_alu1") + +;; ---------------------------------------------------- +;; Memory (load/store) +;; ---------------------------------------------------- + +(define_insn_reservation "spacemit_x60_load" 5 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "load,fpload,atomic")) + "spacemit_x60_lsu") + +(define_insn_reservation "spacemit_x60_store" 3 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "store,fpstore")) + "spacemit_x60_lsu") + +;; ---------------------------------------------------- +;; Int +;; ---------------------------------------------------- + +;; alu0 handles div/rem and jumps +(define_insn_reservation "spacemit_x60_jump" 1 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "branch,jump,call,jalr,ret,trap,sfb_alu")) + "spacemit_x60_alu0") + +(define_insn_reservation "spacemit_x60_idivsi" 12 + (and (eq_attr "tune" "spacemit_x60") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "SI"))) + "spacemit_x60_alu0*12") + +(define_insn_reservation "spacemit_x60_idivdi" 20 + (and (eq_attr "tune" "spacemit_x60") + (and (eq_attr "type" "idiv") + (eq_attr "mode" "DI"))) + "spacemit_x60_alu0*20") + +(define_insn_reservation "spacemit_x60_imulsi" 3 + (and (eq_attr "tune" "spacemit_x60") + (and (eq_attr "type" "imul") + (eq_attr "mode" "SI"))) + "spacemit_x60_alu") + +(define_insn_reservation "spacemit_x60_imuldi" 5 + (and (eq_attr "tune" "spacemit_x60") + (and (eq_attr "type" "imul") + (eq_attr "mode" "DI"))) + "spacemit_x60_alu") + +(define_insn_reservation "spacemit_x60_clmul" 5 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "clmul")) + "spacemit_x60_alu") + +(define_insn_reservation "spacemit_x60_mtc_mfc" 3 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "mtc,mfc")) + "spacemit_x60_alu") + +(define_insn_reservation "spacemit_x60_fcvt_i2f" 4 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "fcvt_i2f")) + "spacemit_x60_alu") + +(define_insn_reservation "spacemit_x60_fcvt_f2i" 6 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "fcvt_f2i")) + "spacemit_x60_alu") + +(define_insn_reservation "spacemit_x60_alu" 1 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,nop,logical,\ + move,bitmanip,min,max,minu,maxu,clz,ctz,rotate,\ + condmove,crypto,mvpair,zicond,cpop")) + "spacemit_x60_alu") + +(define_insn_reservation "spacemit_x60_alu2c" 2 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "cpop")) + "spacemit_x60_alu") + +;; ---------------------------------------------------- +;; Float +;; ---------------------------------------------------- + +(define_insn_reservation "spacemit_x60_fcvt" 4 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "fcvt,fmove")) + "spacemit_x60_fpalu") + +(define_insn_reservation "spacemit_x60_fcmp" 6 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "fcmp")) + "spacemit_x60_fpalu") + +(define_insn_reservation "spacemit_x60_fmul_half_single" 4 + (and (eq_attr "tune" "spacemit_x60") + (and (eq_attr "type" "fadd,fmul") + (ior (eq_attr "mode" "HF") + (eq_attr "mode" "SF")))) + "spacemit_x60_fpalu") + +(define_insn_reservation "spacemit_x60_fmadd_half_single" 5 + (and (eq_attr "tune" "spacemit_x60") + (and (eq_attr "type" "fmadd") + (ior (eq_attr "mode" "HF") + (eq_attr "mode" "SF")))) + "spacemit_x60_fpalu") + +(define_insn_reservation "spacemit_x60_fmul_double" 5 + (and (eq_attr "tune" "spacemit_x60") + (and (eq_attr "type" "fadd,fmul") + (eq_attr "mode" "DF"))) + "spacemit_x60_fpalu") + +(define_insn_reservation "spacemit_x60_fmadd_double" 5 + (and (eq_attr "tune" "spacemit_x60") + (and (eq_attr "type" "fmadd") + (eq_attr "mode" "DF"))) + "spacemit_x60_fpalu") + +(define_insn_reservation "spacemit_x60_fdiv_half" 12 + (and (eq_attr "tune" "spacemit_x60") + (and (eq_attr "type" "fdiv,fsqrt") + (eq_attr "mode" "HF"))) + "spacemit_x60_fdivsqrt*12") + +(define_insn_reservation "spacemit_x60_fdiv_single" 15 + (and (eq_attr "tune" "spacemit_x60") + (and (eq_attr "type" "fdiv,fsqrt") + (eq_attr "mode" "SF"))) + "spacemit_x60_fdivsqrt*15") + +(define_insn_reservation "spacemit_x60_fdiv_double" 22 + (and (eq_attr "tune" "spacemit_x60") + (and (eq_attr "type" "fdiv,fsqrt") + (eq_attr "mode" "DF"))) + "spacemit_x60_fdivsqrt*22") + +(define_insn_reservation "spacemi6_x60_dummy" 1 + (and (eq_attr "tune" "spacemit_x60") + (eq_attr "type" "viminmax,vfmuladd,vfmovvf,vssegte,vlsegds,rdvlenb,vaesef,vfcmp,vmpop,vwsll,vsha2cl,vfwcvtbf16,vfncvtftoi,vgather,vsha2ch,vsts,vldm,vmsfs,vfmul,vcompress,vaesz,vssegtox,vstox,vclmulh,vghsh,vaalu,vslideup,vfalu,vaeskf1,vfcvtitof,vaesdm,vmffs,vandn,vstm,vgmul,vlds,viwmul,vfmerge,vlsegdff,vshift,vaesem,vaesdf,vste,ghost,viwred,vsalu,vfwredu,vmidx,sf_vfnrclip,vstux,vfslide1down,vfcvtftoi,vfncvtitof,vnshift,vsm3me,vired,vlde,vfwalu,sf_vc_se,vlsegdux,vicmp,vfncvtftof,vror,vfwmaccbf16,vfminmax,vldff,vstr,vsm3c,vfwcvtftoi,vbrev,vaeskf2,vidiv,vfwcvtftof,rdvl,vimul,vfsgnj,vimovvx,vsha2ms,vialu,vfredo,vctz,vlsegde,viwmuladd,vcpop,vsetvl,vldux,vfwmuladd,vector,wrvxrm,vsshift,vfredu,vimerge,vlsegdox,vfrecp,vnclip,vfclass,vbrev8,vslidedown,vldox,vmalu,vext,vimuladd,sf_vqmacc,vldr,vrol,vmov,vsmul,vclmul,vfmov,vislide1up,vssegtux,vclz,rdfrm,vfwcvtitof,vfncvtbf16,vfmovfv,vislide1down,vfwmul,vfsqrt,vrev8,vicalu,vimov,wrfrm,vfdiv,sf_vc,vsm4k,vmiota,vsm4r,viwalu,vsetvl_pre,vimovxv,vfwredo,vfslide1up,vssegts")) + "nothing") + diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md index 37f15d8..01eab1a 100644 --- a/gcc/config/riscv/sync.md +++ b/gcc/config/riscv/sync.md @@ -603,8 +603,7 @@ { /* We don't have SI mode compare on RV64, so we need to make sure expected value is sign-extended. */ - rtx tmp0 = gen_reg_rtx (word_mode); - emit_insn (gen_extend_insn (tmp0, operands[3], word_mode, <MODE>mode, 0)); + rtx tmp0 = convert_modes (word_mode, <MODE>mode, operands[3], false); operands[3] = gen_lowpart (<MODE>mode, tmp0); } @@ -702,17 +701,8 @@ operands[6], operands[7])); - rtx val = gen_reg_rtx (SImode); - if (operands[1] != const0_rtx) - emit_move_insn (val, gen_rtx_SIGN_EXTEND (SImode, operands[1])); - else - emit_move_insn (val, const0_rtx); - - rtx exp = gen_reg_rtx (SImode); - if (operands[3] != const0_rtx) - emit_move_insn (exp, gen_rtx_SIGN_EXTEND (SImode, operands[3])); - else - emit_move_insn (exp, const0_rtx); + rtx val = convert_modes (SImode, <SHORT:MODE>mode, operands[1], false); + rtx exp = convert_modes (SImode, <SHORT:MODE>mode, operands[3], false); rtx compare = val; if (exp != const0_rtx) diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index b53a2df..2761e5e 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -89,6 +89,12 @@ riscv-sr.o: $(srcdir)/config/riscv/riscv-sr.cc $(CONFIG_H) \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-sr.cc +riscv-opt-popretz.o: $(srcdir)/config/riscv/riscv-opt-popretz.cc $(CONFIG_H) \ + $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(TARGET_H) recog.h insn-opinit.h \ + tree-pass.h emit-rtl.h insn-config.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/riscv/riscv-opt-popretz.cc + riscv-c.o: $(srcdir)/config/riscv/riscv-c.cc $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ @@ -216,7 +222,8 @@ RISCV_EXT_DEFS = \ $(srcdir)/config/riscv/riscv-ext-thead.def \ $(srcdir)/config/riscv/riscv-ext-ventana.def \ $(srcdir)/config/riscv/riscv-ext-mips.def \ - $(srcdir)/config/riscv/riscv-ext-andes.def + $(srcdir)/config/riscv/riscv-ext-andes.def \ + $(srcdir)/config/riscv/riscv-ext-spacemit.def $(srcdir)/config/riscv/riscv-ext.opt: $(RISCV_EXT_DEFS) diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md index 20e82e6..42171a5 100644 --- a/gcc/config/riscv/thead.md +++ b/gcc/config/riscv/thead.md @@ -34,7 +34,7 @@ (define_insn "*th_srri<mode>3" [(set (match_operand:GPR 0 "register_operand" "=r") (rotatert:GPR (match_operand:GPR 1 "register_operand" "r") - (match_operand 2 "const_int_operand" "n")))] + (match_operand 2 "const_int_operand" "n")))] "TARGET_XTHEADBB && (TARGET_64BIT || <MODE>mode == SImode)" { bool wform = TARGET_64BIT && (<MODE>mode == SImode); @@ -45,6 +45,22 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "<GPR:MODE>")]) +;; Version with explicit sign extension to facilitate sign extension +;; removal. +(define_insn "*th_srrisi3_extended" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (rotatert:SI (match_operand:SI 1 "register_operand" "r") + (match_operand 2 "const_int_operand" "n"))))] + "TARGET_XTHEADBB && TARGET_64BIT" + { + operands[2] = GEN_INT (INTVAL (operands[2]) + & (GET_MODE_BITSIZE (SImode) - 1)); + return "th.srriw\t%0,%1,%2"; + } + [(set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + (define_insn "*th_ext<mode>4" [(set (match_operand:GPR 0 "register_operand" "=r") (sign_extract:GPR (match_operand:GPR 1 "register_operand" "r") diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index 45af656..90865a3 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -173,6 +173,21 @@ (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64") ]) +(define_mode_iterator VLSF_ZVFBF16 [ + (V1BF "riscv_vector::vls_mode_valid_p (V1BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V2BF "riscv_vector::vls_mode_valid_p (V2BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V4BF "riscv_vector::vls_mode_valid_p (V4BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V8BF "riscv_vector::vls_mode_valid_p (V8BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V16BF "riscv_vector::vls_mode_valid_p (V16BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V32BF "riscv_vector::vls_mode_valid_p (V32BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 64") + (V64BF "riscv_vector::vls_mode_valid_p (V64BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 128") + (V128BF "riscv_vector::vls_mode_valid_p (V128BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 256") + (V256BF "riscv_vector::vls_mode_valid_p (V256BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 512") + (V512BF "riscv_vector::vls_mode_valid_p (V512BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 1024") + (V1024BF "riscv_vector::vls_mode_valid_p (V1024BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 2048") + (V2048BF "riscv_vector::vls_mode_valid_p (V2048BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 4096") +]) + (define_mode_iterator VF_ZVFHMIN [ (RVVM8HF "TARGET_VECTOR_ELEN_FP_16") (RVVM4HF "TARGET_VECTOR_ELEN_FP_16") (RVVM2HF "TARGET_VECTOR_ELEN_FP_16") (RVVM1HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16") @@ -1646,6 +1661,18 @@ (V512HF "riscv_vector::vls_mode_valid_p (V512HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 1024") (V1024HF "riscv_vector::vls_mode_valid_p (V1024HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 2048") (V2048HF "riscv_vector::vls_mode_valid_p (V2048HFmode) && TARGET_ZVFH && TARGET_MIN_VLEN >= 4096") + (V1BF "riscv_vector::vls_mode_valid_p (V1BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V2BF "riscv_vector::vls_mode_valid_p (V2BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V4BF "riscv_vector::vls_mode_valid_p (V4BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V8BF "riscv_vector::vls_mode_valid_p (V8BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V16BF "riscv_vector::vls_mode_valid_p (V16BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V32BF "riscv_vector::vls_mode_valid_p (V32BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 64") + (V64BF "riscv_vector::vls_mode_valid_p (V64BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 128") + (V128BF "riscv_vector::vls_mode_valid_p (V128BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 256") + (V256BF "riscv_vector::vls_mode_valid_p (V256BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 512") + (V512BF "riscv_vector::vls_mode_valid_p (V512BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 1024") + (V1024BF "riscv_vector::vls_mode_valid_p (V1024BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 2048") + (V2048BF "riscv_vector::vls_mode_valid_p (V2048BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 4096") (V1SF "riscv_vector::vls_mode_valid_p (V1SFmode) && TARGET_VECTOR_ELEN_FP_32") (V2SF "riscv_vector::vls_mode_valid_p (V2SFmode) && TARGET_VECTOR_ELEN_FP_32") (V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32") @@ -1671,23 +1698,39 @@ (define_mode_iterator VB_VLS [VB VLSB]) -(define_mode_iterator VLS [VLSI VLSF_ZVFHMIN]) +;; VLSI + VLSF but including half-float modes for Zvfhmin and Zvfbf16, +;; to be used for loads/stores. +(define_mode_iterator VLS [VLSI VLSF_ZVFHMIN VLSF_ZVFBF16]) +;; VLSI + VLSF but including half-float modes iff TARGET_ZVFH. (define_mode_iterator VLS_ZVFH [VLSI VLSF]) -(define_mode_iterator V [VI VF_ZVFBF16 VF_ZVFHMIN]) +;; VI + VF but including half-float modes for Zvfhmin and Zvfbf16, +;; to be used for loads/stores. +(define_mode_iterator V [VI VF_ZVFHMIN VF_ZVFBF16]) +;; VI + VF but including half-float modes iff TARGET_ZVFH. (define_mode_iterator V_ZVFH [VI VF]) +;; Used for permutes and loads/stores, i.e. operations that are int/float +;; agnostic. Some loads/stores still only use V for now but we can change +;; that. (define_mode_iterator V_VLS [V VLS]) +;; Same as V_VLS but with ZVFH instead of ZVFHMIN. Currently only used for +;; vec_extract and vec_set because those use v(f)mv. As those are just +;; permutes we could pun with an integer type when the actual mode is +;; not supported. (Just as we already do for broadcasting unsupported +;; modes, see V_VLSF_FALLBACK). (define_mode_iterator V_VLS_ZVFH [V_ZVFH VLS_ZVFH]) (define_mode_iterator V_VLSI [VI VLSI]) (define_mode_iterator V_VLSF [VF VLSF]) -(define_mode_iterator V_VLSF_ZVFHMIN [VF_ZVFBF16 VF_ZVFHMIN VLSF_ZVFHMIN]) +;; All modes that cannot be broadcast directly so we either use strided +;; broadcast or gather broadcast. +(define_mode_iterator V_VLSF_FALLBACK [VF_ZVFBF16 VLSF_ZVFBF16 VF_ZVFHMIN VLSF_ZVFHMIN]) (define_mode_iterator VT [V1T V2T V4T V8T V16T V32T]) @@ -1842,6 +1885,18 @@ (V512HF "V512HI") (V1024HF "V1024HI") (V2048HF "V2048HI") + (V1BF "V1HI") + (V2BF "V2HI") + (V4BF "V4HI") + (V8BF "V8HI") + (V16BF "V16HI") + (V32BF "V32HI") + (V64BF "V64HI") + (V128BF "V128HI") + (V256BF "V256HI") + (V512BF "V512HI") + (V1024BF "V1024HI") + (V2048BF "V2048HI") (V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI") @@ -2064,6 +2119,9 @@ (V1HF "V1BI") (V2HF "V2BI") (V4HF "V4BI") (V8HF "V8BI") (V16HF "V16BI") (V32HF "V32BI") (V64HF "V64BI") (V128HF "V128BI") (V256HF "V256BI") (V512HF "V512BI") (V1024HF "V1024BI") (V2048HF "V2048BI") + (V1BF "V1BI") (V2BF "V2BI") (V4BF "V4BI") (V8BF "V8BI") (V16BF "V16BI") + (V32BF "V32BI") (V64BF "V64BI") (V128BF "V128BI") (V256BF "V256BI") + (V512BF "V512BI") (V1024BF "V1024BI") (V2048BF "V2048BI") (V1SF "V1BI") (V2SF "V2BI") (V4SF "V4BI") (V8SF "V8BI") (V16SF "V16BI") (V32SF "V32BI") (V64SF "V64BI") (V128SF "V128BI") (V256SF "V256BI") (V512SF "V512BI") (V1024SF "V1024BI") @@ -2175,6 +2233,9 @@ (V1HF "v1bi") (V2HF "v2bi") (V4HF "v4bi") (V8HF "v8bi") (V16HF "v16bi") (V32HF "v32bi") (V64HF "v64bi") (V128HF "v128bi") (V256HF "v256bi") (V512HF "v512bi") (V1024HF "v1024bi") (V2048HF "v2048bi") + (V1BF "v1bi") (V2BF "v2bi") (V4BF "v4bi") (V8BF "v8bi") (V16BF "v16bi") + (V32BF "v32bi") (V64BF "v64bi") (V128BF "v128bi") (V256BF "v256bi") + (V512BF "v512bi") (V1024BF "v1024bi") (V2048BF "v2048bi") (V1SF "v1bi") (V2SF "v2bi") (V4SF "v4bi") (V8SF "v8bi") (V16SF "v16bi") (V32SF "v32bi") (V64SF "v64bi") (V128SF "v128bi") (V256SF "v256bi") (V512SF "v512bi") (V1024SF "v1024bi") @@ -2209,6 +2270,8 @@ (V1DI "DI") (V2DI "DI") (V4DI "DI") (V8DI "DI") (V16DI "DI") (V32DI "DI") (V64DI "DI") (V128DI "DI") (V256DI "DI") (V512DI "DI") (V1HF "HF") (V2HF "HF") (V4HF "HF") (V8HF "HF") (V16HF "HF") (V32HF "HF") (V64HF "HF") (V128HF "HF") (V256HF "HF") (V512HF "HF") (V1024HF "HF") (V2048HF "HF") + (V1BF "BF") (V2BF "BF") (V4BF "BF") (V8BF "BF") (V16BF "BF") (V32BF "BF") (V64BF "BF") (V128BF "BF") (V256BF "BF") + (V512BF "BF") (V1024BF "BF") (V2048BF "BF") (V1SF "SF") (V2SF "SF") (V4SF "SF") (V8SF "SF") (V16SF "SF") (V32SF "SF") (V64SF "SF") (V128SF "SF") (V256SF "SF") (V512SF "SF") (V1024SF "SF") (V1DF "DF") (V2DF "DF") (V4DF "DF") (V8DF "DF") (V16DF "DF") (V32DF "DF") (V64DF "DF") (V128DF "DF") (V256DF "DF") (V512DF "DF") @@ -2308,6 +2371,8 @@ (V1DI "di") (V2DI "di") (V4DI "di") (V8DI "di") (V16DI "di") (V32DI "di") (V64DI "di") (V128DI "di") (V256DI "di") (V512DI "di") (V1HF "hf") (V2HF "hf") (V4HF "hf") (V8HF "hf") (V16HF "hf") (V32HF "hf") (V64HF "hf") (V128HF "hf") (V256HF "hf") (V512HF "hf") (V1024HF "hf") (V2048HF "hf") + (V1BF "BF") (V2BF "BF") (V4BF "BF") (V8BF "BF") (V16BF "BF") (V32BF "BF") (V64BF "BF") (V128BF "BF") (V256BF "BF") + (V512BF "BF") (V1024BF "BF") (V2048BF "BF") (V1SF "sf") (V2SF "sf") (V4SF "sf") (V8SF "sf") (V16SF "sf") (V32SF "sf") (V64SF "sf") (V128SF "sf") (V256SF "sf") (V512SF "sf") (V1024SF "sf") (V1DF "df") (V2DF "df") (V4DF "df") (V8DF "df") (V16DF "df") (V32DF "df") (V64DF "df") (V128DF "df") (V256DF "df") (V512DF "df") @@ -2689,6 +2754,8 @@ (V1DI "64") (V2DI "64") (V4DI "64") (V8DI "64") (V16DI "64") (V32DI "64") (V64DI "64") (V128DI "64") (V256DI "64") (V512DI "64") (V1HF "16") (V2HF "16") (V4HF "16") (V8HF "16") (V16HF "16") (V32HF "16") (V64HF "16") (V128HF "16") (V256HF "16") (V512HF "16") (V1024HF "16") (V2048HF "16") + (V1BF "16") (V2BF "16") (V4BF "16") (V8BF "16") (V16BF "16") (V32BF "16") (V64BF "16") (V128BF "16") (V256BF "16") + (V512BF "16") (V1024BF "16") (V2048BF "16") (V1SF "32") (V2SF "32") (V4SF "32") (V8SF "32") (V16SF "32") (V32SF "32") (V64SF "32") (V128SF "32") (V256SF "32") (V512SF "32") (V1024SF "32") (V1DF "64") (V2DF "64") (V4DF "64") (V8DF "64") (V16DF "64") (V32DF "64") (V64DF "64") (V128DF "64") (V256DF "64") (V512DF "64") @@ -3702,6 +3769,18 @@ (V512HF "vector_eew16_stride_operand") (V1024HF "vector_eew16_stride_operand") (V2048HF "vector_eew16_stride_operand") + (V1BF "vector_eew16_stride_operand") + (V2BF "vector_eew16_stride_operand") + (V4BF "vector_eew16_stride_operand") + (V8BF "vector_eew16_stride_operand") + (V16BF "vector_eew16_stride_operand") + (V32BF "vector_eew16_stride_operand") + (V64BF "vector_eew16_stride_operand") + (V128BF "vector_eew16_stride_operand") + (V256BF "vector_eew16_stride_operand") + (V512BF "vector_eew16_stride_operand") + (V1024BF "vector_eew16_stride_operand") + (V2048BF "vector_eew16_stride_operand") (V1SF "vector_eew32_stride_operand") (V2SF "vector_eew32_stride_operand") (V4SF "vector_eew32_stride_operand") @@ -3816,6 +3895,18 @@ (V512HF "rJ,rJ,rJ,k02,k02,k02") (V1024HF "rJ,rJ,rJ,k02,k02,k02") (V2048HF "rJ,rJ,rJ,k02,k02,k02") + (V1BF "rJ,rJ,rJ,k02,k02,k02") + (V2BF "rJ,rJ,rJ,k02,k02,k02") + (V4BF "rJ,rJ,rJ,k02,k02,k02") + (V8BF "rJ,rJ,rJ,k02,k02,k02") + (V16BF "rJ,rJ,rJ,k02,k02,k02") + (V32BF "rJ,rJ,rJ,k02,k02,k02") + (V64BF "rJ,rJ,rJ,k02,k02,k02") + (V128BF "rJ,rJ,rJ,k02,k02,k02") + (V256BF "rJ,rJ,rJ,k02,k02,k02") + (V512BF "rJ,rJ,rJ,k02,k02,k02") + (V1024BF "rJ,rJ,rJ,k02,k02,k02") + (V2048BF "rJ,rJ,rJ,k02,k02,k02") (V1SF "rJ,rJ,rJ,k04,k04,k04") (V2SF "rJ,rJ,rJ,k04,k04,k04") (V4SF "rJ,rJ,rJ,k04,k04,k04") @@ -3930,6 +4021,18 @@ (V512HF "rJ,k02") (V1024HF "rJ,k02") (V2048HF "rJ,k02") + (V1BF "rJ,k02") + (V2BF "rJ,k02") + (V4BF "rJ,k02") + (V8BF "rJ,k02") + (V16BF "rJ,k02") + (V32BF "rJ,k02") + (V64BF "rJ,k02") + (V128BF "rJ,k02") + (V256BF "rJ,k02") + (V512BF "rJ,k02") + (V1024BF "rJ,k02") + (V2048BF "rJ,k02") (V1SF "rJ,k04") (V2SF "rJ,k04") (V4SF "rJ,k04") @@ -4409,6 +4512,11 @@ (V4HF "riscv_vector::vls_mode_valid_p (V4HFmode) && TARGET_VECTOR_ELEN_FP_16") (V8HF "riscv_vector::vls_mode_valid_p (V8HFmode) && TARGET_VECTOR_ELEN_FP_16") (V16HF "riscv_vector::vls_mode_valid_p (V16HFmode) && TARGET_VECTOR_ELEN_FP_16") + (V1BF "riscv_vector::vls_mode_valid_p (V1BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V2BF "riscv_vector::vls_mode_valid_p (V2BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V4BF "riscv_vector::vls_mode_valid_p (V4BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V8BF "riscv_vector::vls_mode_valid_p (V8BFmode) && TARGET_VECTOR_ELEN_BF_16") + (V16BF "riscv_vector::vls_mode_valid_p (V16BFmode) && TARGET_VECTOR_ELEN_BF_16") (V1SF "riscv_vector::vls_mode_valid_p (V1SFmode) && TARGET_VECTOR_ELEN_FP_32") (V2SF "riscv_vector::vls_mode_valid_p (V2SFmode) && TARGET_VECTOR_ELEN_FP_32") (V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32") @@ -4461,6 +4569,13 @@ (V512HF "riscv_vector::vls_mode_valid_p (V512HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 1024") (V1024HF "riscv_vector::vls_mode_valid_p (V1024HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 2048") (V2048HF "riscv_vector::vls_mode_valid_p (V2048HFmode) && TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 4096") + (V32BF "riscv_vector::vls_mode_valid_p (V32BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 64") + (V64BF "riscv_vector::vls_mode_valid_p (V64BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 128") + (V128BF "riscv_vector::vls_mode_valid_p (V128BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 256") + (V256BF "riscv_vector::vls_mode_valid_p (V256BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 512") + (V512BF "riscv_vector::vls_mode_valid_p (V512BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 1024") + (V1024BF "riscv_vector::vls_mode_valid_p (V1024BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 2048") + (V2048BF "riscv_vector::vls_mode_valid_p (V2048BFmode) && TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN >= 4096") (V32SF "riscv_vector::vls_mode_valid_p (V32SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") (V64SF "riscv_vector::vls_mode_valid_p (V64SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 256") (V128SF "riscv_vector::vls_mode_valid_p (V128SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 512") diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 3cb87bf..ba4a43b 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -139,7 +139,8 @@ RVVM2x3HF,RVVM1x3HF,RVVMF2x3HF,RVVMF4x3HF,\ RVVM4x2HF,RVVM2x2HF,RVVM1x2HF,RVVMF2x2HF,RVVMF4x2HF,\ V1HI,V2HI,V4HI,V8HI,V16HI,V32HI,V64HI,V128HI,V256HI,V512HI,V1024HI,V2048HI,\ - V1HF,V2HF,V4HF,V8HF,V16HF,V32HF,V64HF,V128HF,V256HF,V512HF,V1024HF,V2048HF") + V1HF,V2HF,V4HF,V8HF,V16HF,V32HF,V64HF,V128HF,V256HF,V512HF,V1024HF,V2048HF,\ + V1BF,V2BF,V4BF,V8BF,V16BF,V32BF,V64BF,V128BF,V256BF,V512BF,V1024BF,V2048BF") (const_int 16) (eq_attr "mode" "RVVM8SI,RVVM4SI,RVVM2SI,RVVM1SI,RVVMF2SI,\ RVVM8SF,RVVM4SF,RVVM2SF,RVVM1SF,RVVMF2SF,\ @@ -446,6 +447,18 @@ (eq_attr "mode" "V512HF") (symbol_ref "riscv_vector::get_vlmul(E_V512HFmode)") (eq_attr "mode" "V1024HF") (symbol_ref "riscv_vector::get_vlmul(E_V1024HFmode)") (eq_attr "mode" "V2048HF") (symbol_ref "riscv_vector::get_vlmul(E_V2048HFmode)") + (eq_attr "mode" "V1BF") (symbol_ref "riscv_vector::get_vlmul(E_V1BFmode)") + (eq_attr "mode" "V2BF") (symbol_ref "riscv_vector::get_vlmul(E_V2BFmode)") + (eq_attr "mode" "V4BF") (symbol_ref "riscv_vector::get_vlmul(E_V4BFmode)") + (eq_attr "mode" "V8BF") (symbol_ref "riscv_vector::get_vlmul(E_V8BFmode)") + (eq_attr "mode" "V16BF") (symbol_ref "riscv_vector::get_vlmul(E_V16BFmode)") + (eq_attr "mode" "V32BF") (symbol_ref "riscv_vector::get_vlmul(E_V32BFmode)") + (eq_attr "mode" "V64BF") (symbol_ref "riscv_vector::get_vlmul(E_V64BFmode)") + (eq_attr "mode" "V128BF") (symbol_ref "riscv_vector::get_vlmul(E_V128BFmode)") + (eq_attr "mode" "V256BF") (symbol_ref "riscv_vector::get_vlmul(E_V256BFmode)") + (eq_attr "mode" "V512BF") (symbol_ref "riscv_vector::get_vlmul(E_V512BFmode)") + (eq_attr "mode" "V1024BF") (symbol_ref "riscv_vector::get_vlmul(E_V1024BFmode)") + (eq_attr "mode" "V2048BF") (symbol_ref "riscv_vector::get_vlmul(E_V2048BFmode)") (eq_attr "mode" "V1SF") (symbol_ref "riscv_vector::get_vlmul(E_V1SFmode)") (eq_attr "mode" "V2SF") (symbol_ref "riscv_vector::get_vlmul(E_V2SFmode)") (eq_attr "mode" "V4SF") (symbol_ref "riscv_vector::get_vlmul(E_V4SFmode)") @@ -762,6 +775,18 @@ (eq_attr "mode" "V512HF") (symbol_ref "riscv_vector::get_ratio(E_V512HFmode)") (eq_attr "mode" "V1024HF") (symbol_ref "riscv_vector::get_ratio(E_V1024HFmode)") (eq_attr "mode" "V2048HF") (symbol_ref "riscv_vector::get_ratio(E_V2048HFmode)") + (eq_attr "mode" "V1BF") (symbol_ref "riscv_vector::get_ratio(E_V1BFmode)") + (eq_attr "mode" "V2BF") (symbol_ref "riscv_vector::get_ratio(E_V2BFmode)") + (eq_attr "mode" "V4BF") (symbol_ref "riscv_vector::get_ratio(E_V4BFmode)") + (eq_attr "mode" "V8BF") (symbol_ref "riscv_vector::get_ratio(E_V8BFmode)") + (eq_attr "mode" "V16BF") (symbol_ref "riscv_vector::get_ratio(E_V16BFmode)") + (eq_attr "mode" "V32BF") (symbol_ref "riscv_vector::get_ratio(E_V32BFmode)") + (eq_attr "mode" "V64BF") (symbol_ref "riscv_vector::get_ratio(E_V64BFmode)") + (eq_attr "mode" "V128BF") (symbol_ref "riscv_vector::get_ratio(E_V128BFmode)") + (eq_attr "mode" "V256BF") (symbol_ref "riscv_vector::get_ratio(E_V256BFmode)") + (eq_attr "mode" "V512BF") (symbol_ref "riscv_vector::get_ratio(E_V512BFmode)") + (eq_attr "mode" "V1024BF") (symbol_ref "riscv_vector::get_ratio(E_V1024BFmode)") + (eq_attr "mode" "V2048BF") (symbol_ref "riscv_vector::get_ratio(E_V2048BFmode)") (eq_attr "mode" "V1SF") (symbol_ref "riscv_vector::get_ratio(E_V1SFmode)") (eq_attr "mode" "V2SF") (symbol_ref "riscv_vector::get_ratio(E_V2SFmode)") (eq_attr "mode" "V4SF") (symbol_ref "riscv_vector::get_ratio(E_V4SFmode)") @@ -1437,6 +1462,8 @@ [(set_attr "type" "vlde,vste,vmov") (set_attr "mode" "<MODE>") (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE)) + (set (attr "has_vl_op") (const_string "false")) + (set (attr "has_vtype_op") (const_string "false")) (set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE)) (set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))] ) @@ -2402,19 +2429,19 @@ (set_attr "mode" "<MODE>")]) (define_insn_and_split "*pred_strided_broadcast<mode>_zvfhmin" - [(set (match_operand:V_VLSF_ZVFHMIN 0 "register_operand" "=vr, vr, vr, vr") - (if_then_else:V_VLSF_ZVFHMIN + [(set (match_operand:V_VLSF_FALLBACK 0 "register_operand" "=vr, vr, vr, vr") + (if_then_else:V_VLSF_FALLBACK (unspec:<VM> - [(match_operand:<VM> 1 "strided_broadcast_mask_operand" " vm, vm, Wc1, Wc1") - (match_operand 4 "vector_length_operand" "rvl, rvl, rvl, rvl") - (match_operand 5 "const_int_operand" " i, i, i, i") - (match_operand 6 "const_int_operand" " i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i") + [(match_operand:<VM> 1 "strided_broadcast_mask_operand" " vm, vm, Wc1, Wc1") + (match_operand 4 "vector_length_operand" "rvl, rvl, rvl, rvl") + (match_operand 5 "const_int_operand" " i, i, i, i") + (match_operand 6 "const_int_operand" " i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (vec_duplicate:V_VLSF_ZVFHMIN - (match_operand:<VEL> 3 "strided_broadcast_operand" " A, A, A, A")) - (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand" " vu, 0, vu, 0")))] + (vec_duplicate:V_VLSF_FALLBACK + (match_operand:<VEL> 3 "strided_broadcast_operand" " A, A, A, A")) + (match_operand:V_VLSF_FALLBACK 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "@ vlse<sew>.v\t%0,%3,zero,%1.t @@ -2422,7 +2449,8 @@ vlse<sew>.v\t%0,%3,zero vlse<sew>.v\t%0,%3,zero" "&& !strided_load_broadcast_p () - && <VEL>mode == HFmode + && (<VEL>mode == HFmode + || <VEL>mode == BFmode) && can_create_pseudo_p ()" [(const_int 0)] { @@ -4171,6 +4199,7 @@ "TARGET_VECTOR" "vw<plus_minus:insn><any_extend:u>.wx\t%0,%3,%z4%p1" [(set_attr "type" "vi<widen_binop_insn_type>") + (set_attr "mode_idx" "3") (set_attr "mode" "<V_DOUBLE_TRUNC>")]) (define_insn "@pred_single_widen_add<any_extend:su><mode>_extended_scalar" @@ -4437,6 +4466,7 @@ "TARGET_VECTOR" "v<insn>.vx\t%0,%3,%4%p1" [(set_attr "type" "<int_binop_insn_type>") + (set_attr "mode_idx" "3") (set_attr "mode" "<MODE>")]) (define_insn "@pred_<optab><mode>_scalar" @@ -4458,6 +4488,7 @@ "TARGET_VECTOR" "v<insn>.vx\t%0,%3,%4%p1" [(set_attr "type" "<int_binop_insn_type>") + (set_attr "mode_idx" "3") (set_attr "mode" "<MODE>")]) (define_expand "@pred_<optab><mode>_scalar" @@ -4512,6 +4543,7 @@ "TARGET_VECTOR" "v<insn>.vx\t%0,%3,%4%p1" [(set_attr "type" "<int_binop_insn_type>") + (set_attr "mode_idx" "3") (set_attr "mode" "<MODE>")]) (define_insn "*pred_<optab><mode>_extended_scalar" @@ -4534,6 +4566,7 @@ "TARGET_VECTOR && !TARGET_64BIT" "v<insn>.vx\t%0,%3,%4%p1" [(set_attr "type" "<int_binop_insn_type>") + (set_attr "mode_idx" "3") (set_attr "mode" "<MODE>")]) (define_expand "@pred_<optab><mode>_scalar" @@ -4588,6 +4621,7 @@ "TARGET_VECTOR" "v<insn>.vx\t%0,%3,%z4%p1" [(set_attr "type" "<int_binop_insn_type>") + (set_attr "mode_idx" "3") (set_attr "mode" "<MODE>")]) (define_insn "*pred_<optab><mode>_extended_scalar" @@ -4610,6 +4644,7 @@ "TARGET_VECTOR && !TARGET_64BIT" "v<insn>.vx\t%0,%3,%z4%p1" [(set_attr "type" "<int_binop_insn_type>") + (set_attr "mode_idx" "3") (set_attr "mode" "<MODE>")]) (define_insn "@pred_<sat_op><mode>" @@ -4655,6 +4690,7 @@ "TARGET_VECTOR" "v<sat_op>.vx\t%0,%3,%z4%p1" [(set_attr "type" "<sat_insn_type>") + (set_attr "mode_idx" "3") (set_attr "mode" "<MODE>")]) (define_insn "@pred_<sat_op><mode>_scalar" @@ -8641,7 +8677,7 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (unspec:VT - [(match_operand 3 "pmode_reg_or_0_operand" " rJ, rJ, rJ") + [(match_operand:VT 3 "memory_operand" " m, m, m") (mem:BLK (scratch))] UNSPEC_VLEFF) (match_operand:VT 2 "vector_merge_operand" " 0, vu, vu"))) (set (reg:SI VL_REGNUM) @@ -8656,7 +8692,7 @@ [(match_dup 3) (mem:BLK (scratch))] UNSPEC_VLEFF) (match_dup 2))] UNSPEC_MODIFY_VL))] "TARGET_VECTOR" - "vlseg<nf>e<sew>ff.v\t%0,(%z3)%p1" + "vlseg<nf>e<sew>ff.v\t%0,%3%p1" [(set_attr "type" "vlsegdff") (set_attr "mode" "<MODE>")]) @@ -9042,6 +9078,56 @@ riscv_vector::prepare_ternary_operands (operands); }) +(define_insn "*pred_widen_mul_plus_u_vx<mode>_undef" + [(set (match_operand:VWEXTI 0 "register_operand" "=&vr") + (if_then_else:VWEXTI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1") + (match_operand 6 "vector_length_operand" " rvl") + (match_operand 7 "const_int_operand" " i") + (match_operand 8 "const_int_operand" " i") + (match_operand 9 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:VWEXTI + (mult:VWEXTI + (zero_extend:VWEXTI + (vec_duplicate:<V_DOUBLE_TRUNC> + (match_operand:<VSUBEL> 3 "register_operand" " rJ"))) + (zero_extend:VWEXTI + (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" " vr"))) + (match_operand:VWEXTI 5 "register_operand" " 0")) + (match_operand:VWEXTI 2 "vector_undef_operand")))] + "TARGET_VECTOR" + "vwmaccu.vx\t%0,%z3,%4%p1" + [(set_attr "type" "vimuladd") + (set_attr "mode" "<MODE>")]) + +(define_expand "@pred_widen_mul_plus_u_vx<mode>" + [(set (match_operand:VWEXTI 0 "register_operand") + (if_then_else:VWEXTI + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:VWEXTI + (mult:VWEXTI + (zero_extend:VWEXTI + (vec_duplicate:<V_DOUBLE_TRUNC> + (match_operand:<VSUBEL> 2 "register_operand"))) + (zero_extend:VWEXTI + (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))) + (match_operand:VWEXTI 4 "register_operand")) + (match_operand:VWEXTI 5 "vector_merge_operand")))] + "TARGET_VECTOR" + { + riscv_vector::prepare_ternary_operands (operands); + }) + (include "autovec.md") (include "autovec-opt.md") (include "sifive-vector.md") diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index fa33680..3336b0c 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -3625,10 +3625,7 @@ emit_insn (gen_vsx_xvcvsphp (rtx_tmp_hi, operands[1])); emit_insn (gen_vsx_xvcvsphp (rtx_tmp_lo, operands[2])); - if (!BYTES_BIG_ENDIAN) - emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_hi, rtx_tmp_lo)); - else - emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_lo, rtx_tmp_hi)); + emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_hi, rtx_tmp_lo)); DONE; }) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 647e89a..5133dac 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -2166,3 +2166,8 @@ (and (match_code "subreg") (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) == SUBREG_BYTE (op)"))) + +; Else operand for LEN_LOAD. +(define_predicate "lxvl_else_operand" + (and (match_code "const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) diff --git a/gcc/config/rs6000/rs6000-logue.cc b/gcc/config/rs6000/rs6000-logue.cc index 5377ad6..0005039 100644 --- a/gcc/config/rs6000/rs6000-logue.cc +++ b/gcc/config/rs6000/rs6000-logue.cc @@ -5332,18 +5332,18 @@ rs6000_output_function_epilogue (FILE *file) /* Tbtab format type. Use format type 0. */ fputs ("\t.byte 0,", file); - /* Language type. Unfortunately, there does not seem to be any - official way to discover the language being compiled, so we - use language_string. - C is 0. Fortran is 1. Ada is 3. Modula-2 is 8. C++ is 9. - Java is 13. Objective-C is 14. Objective-C++ isn't assigned - a number, so for now use 9. LTO, Go, D, and JIT aren't assigned - numbers either, so for now use 0. */ + /* Language type. Unfortunately, there does not seem to be any official + way to discover the language being compiled, so we use + language_string. C is 0. Fortran is 1. Ada is 3. Modula-2 is 8. + C++ is 9. Java is 13. Objective-C is 14. Objective-C++ isn't + assigned a number, so for now use 9. LTO, Go, D, Algol 68 and JIT + aren't assigned numbers either, so for now use 0. */ if (lang_GNU_C () || ! strcmp (language_string, "GNU GIMPLE") || ! strcmp (language_string, "GNU Go") || ! strcmp (language_string, "GNU D") || ! strcmp (language_string, "GNU Rust") + || ! strcmp (language_string, "GNU Algol 68") || ! strcmp (language_string, "libgccjit")) i = 0; else if (! strcmp (language_string, "GNU F77") diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 1d5cd25..bf899ad 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -28490,7 +28490,7 @@ static inline built_in_function complex_multiply_builtin_code (machine_mode mode) { gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT)); - int func = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT; + int func = BUILT_IN_COMPLEX_MUL_MIN + (mode - MIN_MODE_COMPLEX_FLOAT); return (built_in_function) func; } @@ -28501,7 +28501,7 @@ static inline built_in_function complex_divide_builtin_code (machine_mode mode) { gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT)); - int func = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT; + int func = BUILT_IN_COMPLEX_DIV_MIN + (mode - MIN_MODE_COMPLEX_FLOAT); return (built_in_function) func; } diff --git a/gcc/config/rs6000/rs6000.opt.urls b/gcc/config/rs6000/rs6000.opt.urls index 0b418c0..ff2a534 100644 --- a/gcc/config/rs6000/rs6000.opt.urls +++ b/gcc/config/rs6000/rs6000.opt.urls @@ -37,10 +37,10 @@ mmultiple UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-mmultiple) msoft-float -UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-msoft-float-11) +UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-msoft-float-10) mhard-float -UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-mhard-float-6) +UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-mhard-float-5) mpopcntd UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-mpopcntd) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index dd3573b..4d47833 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5798,13 +5798,14 @@ (define_expand "len_load_v16qi" [(match_operand:V16QI 0 "vlogical_operand") (match_operand:V16QI 1 "memory_operand") - (match_operand:QI 2 "gpc_reg_operand") - (match_operand:QI 3 "zero_constant")] + (match_operand:V16QI 2 "lxvl_else_operand") + (match_operand:QI 3 "gpc_reg_operand") + (match_operand:QI 4 "zero_constant")] "TARGET_P9_VECTOR && TARGET_64BIT" { rtx mem = XEXP (operands[1], 0); mem = force_reg (DImode, mem); - rtx len = gen_lowpart (DImode, operands[2]); + rtx len = gen_lowpart (DImode, operands[3]); emit_insn (gen_lxvl (operands[0], mem, len)); DONE; }) diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md index c7b93bd..e181399 100644 --- a/gcc/config/s390/predicates.md +++ b/gcc/config/s390/predicates.md @@ -614,3 +614,8 @@ (define_predicate "vll_bias_operand" (and (match_code "const_int") (match_test "op == CONSTM1_RTX (QImode)"))) + +; Else operand for LEN_LOAD. +(define_predicate "vll_else_operand" + (and (match_code "const_vector") + (match_test "op == CONST0_RTX (GET_MODE (op))"))) diff --git a/gcc/config/s390/s390-builtins.h b/gcc/config/s390/s390-builtins.h index e19fc7e..1e596ae 100644 --- a/gcc/config/s390/s390-builtins.h +++ b/gcc/config/s390/s390-builtins.h @@ -134,10 +134,10 @@ S390_OVERLOADED_BUILTIN_VAR_MAX #define S390_OVERLOADED_BUILTIN_OFFSET S390_BUILTIN_MAX #define S390_OVERLOADED_BUILTIN_VAR_OFFSET \ - (S390_BUILTIN_MAX + S390_OVERLOADED_BUILTIN_MAX) -#define S390_ALL_BUILTIN_MAX \ - (S390_BUILTIN_MAX + S390_OVERLOADED_BUILTIN_MAX + \ - S390_OVERLOADED_BUILTIN_VAR_MAX) + ((int)S390_BUILTIN_MAX + (int)S390_OVERLOADED_BUILTIN_MAX) +#define S390_ALL_BUILTIN_MAX \ + ((int)S390_BUILTIN_MAX + (int)S390_OVERLOADED_BUILTIN_MAX + \ + (int)S390_OVERLOADED_BUILTIN_VAR_MAX) extern const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1]; extern const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1]; @@ -172,6 +172,4 @@ opflags_for_builtin (int fcode) return opflags_builtin[fcode]; } -extern GTY(()) tree s390_builtin_decls[S390_BUILTIN_MAX + - S390_OVERLOADED_BUILTIN_MAX + - S390_OVERLOADED_BUILTIN_VAR_MAX]; +extern GTY(()) tree s390_builtin_decls[S390_ALL_BUILTIN_MAX]; diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h index 9cacb2c..29dd4a5 100644 --- a/gcc/config/s390/s390-opts.h +++ b/gcc/config/s390/s390-opts.h @@ -53,4 +53,12 @@ enum indirect_branch { indirect_branch_thunk_inline, indirect_branch_thunk_extern }; + + +/* Where to get the canary for the stack protector. */ +enum stack_protector_guard +{ + SP_TLS, /* per-thread canary in TLS block */ + SP_GLOBAL /* global canary */ +}; #endif diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index d651090..359ea1c 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -686,9 +686,7 @@ opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] = tree s390_builtin_types[BT_MAX]; tree s390_builtin_fn_types[BT_FN_MAX]; -tree s390_builtin_decls[S390_BUILTIN_MAX + - S390_OVERLOADED_BUILTIN_MAX + - S390_OVERLOADED_BUILTIN_VAR_MAX]; +tree s390_builtin_decls[S390_ALL_BUILTIN_MAX]; static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = { #undef B_DEF @@ -771,12 +769,12 @@ s390_init_builtins (void) ATTRS); #undef OB_DEF #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \ - if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \ + if (s390_builtin_decls[(int)S390_OVERLOADED_BUILTIN_##NAME + (int)S390_BUILTIN_MAX] \ == NULL) \ - s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \ + s390_builtin_decls[(int)S390_OVERLOADED_BUILTIN_##NAME + (int)S390_BUILTIN_MAX] = \ add_builtin_function ("__builtin_" #NAME, \ s390_builtin_fn_types[FNTYPE], \ - S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \ + (int)S390_OVERLOADED_BUILTIN_##NAME + (int)S390_BUILTIN_MAX, \ BUILT_IN_MD, \ NULL, \ 0); diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h index 6478be8..a5a3db0 100644 --- a/gcc/config/s390/s390.h +++ b/gcc/config/s390/s390.h @@ -251,6 +251,9 @@ enum processor_flags && (s390_tune < PROCESSOR_2964_Z13 || (VAL) != const0_rtx) \ && (!CONST_INT_P (LEN) || INTVAL ((LEN)) > TARGET_SETMEM_PREFETCH_DISTANCE)) +#define TARGET_SP_GLOBAL_GUARD (s390_stack_protector_guard == SP_GLOBAL) +#define TARGET_SP_TLS_GUARD (s390_stack_protector_guard == SP_TLS) + /* Run-time target specification. */ /* Defaults for option flags defined only on some subtargets. */ diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 06876a5..db02bd4 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -312,6 +312,7 @@ ; Stack Protector UNSPECV_SP_GET_TP + UNSPECV_SP_GLOBAL_GUARD_ADDR ]) ;; @@ -12059,6 +12060,36 @@ ; Stack Protector Patterns ; +(define_insn "stack_protect_global_guard_addr<mode>" + [(set (match_operand:P 0 "register_operand" "=d") + (unspec_volatile:P [(const_int 0)] UNSPECV_SP_GLOBAL_GUARD_ADDR))] + "" +{ + if (flag_s390_stack_protector_guard_record) + fprintf (asm_out_file, "1:\n"); + if (flag_pic) + { + if (TARGET_Z10) + output_asm_insn ("l<g>rl\t%0,__stack_chk_guard@GOTENT", operands); + else + { + output_asm_insn ("larl\t%0,__stack_chk_guard@GOTENT", operands); + output_asm_insn ("l<g>\t%0,0(%0)", operands); + } + } + else + output_asm_insn ("larl\t%0,__stack_chk_guard", operands); + if (flag_s390_stack_protector_guard_record) + fprintf (asm_out_file, "\t.section __stack_protector_loc,\\"a\\",@progbits\n" + "\t.%s 1b\n" + "\t.previous\n", TARGET_64BIT ? "quad" : "long"); + return ""; +} + [(set (attr "mnemonic") + (cond [(match_test "flag_pic && TARGET_Z10") (const_string "l<g>rl") + (match_test "flag_pic && !TARGET_Z10") (const_string "*")] + (const_string "larl")))]) + ; Insns stack_protect_get_tp{si,di} are similar to *get_tp_{31,64} but still ; distinct in the sense that they force recomputation of the thread pointer ; instead of potentially reloading it from stack. @@ -12087,16 +12118,28 @@ (match_operand 1 "memory_operand" ""))] "" { -#ifdef TARGET_THREAD_SSP_OFFSET - rtx tp = gen_reg_rtx (Pmode); - if (TARGET_64BIT) - emit_insn (gen_stack_protect_get_tpdi (tp)); + if (TARGET_SP_GLOBAL_GUARD) + { + rtx addr = gen_reg_rtx (Pmode); + if (TARGET_64BIT) + emit_insn (gen_stack_protect_global_guard_addrdi (addr)); + else + emit_insn (gen_stack_protect_global_guard_addrsi (addr)); + operands[1] = gen_rtx_MEM (Pmode, addr); + } else - emit_insn (gen_stack_protect_get_tpsi (tp)); - operands[1] - = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp, - GEN_INT (TARGET_THREAD_SSP_OFFSET))); + { +#ifdef TARGET_THREAD_SSP_OFFSET + rtx tp = gen_reg_rtx (Pmode); + if (TARGET_64BIT) + emit_insn (gen_stack_protect_get_tpdi (tp)); + else + emit_insn (gen_stack_protect_get_tpsi (tp)); + operands[1] + = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp, + GEN_INT (TARGET_THREAD_SSP_OFFSET))); #endif + } if (TARGET_64BIT) emit_insn (gen_stack_protect_setdi (operands[0], operands[1])); else @@ -12120,16 +12163,28 @@ "" { rtx cc_reg, test; -#ifdef TARGET_THREAD_SSP_OFFSET - rtx tp = gen_reg_rtx (Pmode); - if (TARGET_64BIT) - emit_insn (gen_stack_protect_get_tpdi (tp)); + if (TARGET_SP_GLOBAL_GUARD) + { + rtx addr = gen_reg_rtx (Pmode); + if (TARGET_64BIT) + emit_insn (gen_stack_protect_global_guard_addrdi (addr)); + else + emit_insn (gen_stack_protect_global_guard_addrsi (addr)); + operands[1] = gen_rtx_MEM (Pmode, addr); + } else - emit_insn (gen_stack_protect_get_tpsi (tp)); - operands[1] - = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp, - GEN_INT (TARGET_THREAD_SSP_OFFSET))); + { +#ifdef TARGET_THREAD_SSP_OFFSET + rtx tp = gen_reg_rtx (Pmode); + if (TARGET_64BIT) + emit_insn (gen_stack_protect_get_tpdi (tp)); + else + emit_insn (gen_stack_protect_get_tpsi (tp)); + operands[1] + = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp, + GEN_INT (TARGET_THREAD_SSP_OFFSET))); #endif + } if (TARGET_64BIT) emit_insn (gen_stack_protect_testdi (operands[0], operands[1])); else diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt index 6753a93..a82992e 100644 --- a/gcc/config/s390/s390.opt +++ b/gcc/config/s390/s390.opt @@ -196,6 +196,24 @@ mno-stack-guard Target RejectNegative Alias(mstack-guard=,0) Negative(mstack-guard=) Switches off the -mstack-guard= option. +mstack-protector-guard= +Target RejectNegative Joined Enum(stack_protector_guard) Var(s390_stack_protector_guard) Init(SP_TLS) +Use given stack-protector guard. + +Enum +Name(stack_protector_guard) Type(enum stack_protector_guard) +Valid arguments to -mstack-protector-guard=: + +EnumValue +Enum(stack_protector_guard) String(tls) Value(SP_TLS) + +EnumValue +Enum(stack_protector_guard) String(global) Value(SP_GLOBAL) + +mstack-protector-guard-record +Target Var(flag_s390_stack_protector_guard_record) +Generate section __stack_protector_loc containing pointers to all instructions which load the address of the global guard. + mstack-size= Target RejectNegative Joined UInteger Var(s390_stack_size) Save Emit extra code in the function prologue in order to trap if the stack size exceeds the given limit. diff --git a/gcc/config/s390/s390.opt.urls b/gcc/config/s390/s390.opt.urls index bb53fc9..021f2d7 100644 --- a/gcc/config/s390/s390.opt.urls +++ b/gcc/config/s390/s390.opt.urls @@ -22,16 +22,16 @@ mhard-dfp UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mhard-dfp-1) mhard-float -UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mhard-float-7) +UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mhard-float-6) mhotpatch= UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mhotpatch) mlong-double-128 -UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mlong-double-128) +UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mlong-double-128-1) mlong-double-64 -UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mlong-double-64) +UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mlong-double-64-1) mhtm UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mhtm-1) @@ -46,13 +46,21 @@ msmall-exec UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-msmall-exec) msoft-float -UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-msoft-float-12) +UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-msoft-float-11) mstack-guard= UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mstack-guard) +mstack-protector-guard= +UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mstack-protector-guard-4) + +mstack-protector-guard-record +UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mstack-protector-guard-record) + mstack-size= -UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mstack-size-2) +UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mstack-size-1) + +; skipping UrlSuffix for 'mno-stack-size' due to finding no URLs mtune= UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mtune-14) diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 375e3e8..367389c 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -3557,15 +3557,16 @@ (define_expand "len_load_v16qi" [(match_operand:V16QI 0 "register_operand") (match_operand:V16QI 1 "memory_operand") - (match_operand:QI 2 "register_operand") - (match_operand:QI 3 "vll_bias_operand") + (match_operand:V16QI 2 "vll_else_operand") + (match_operand:QI 3 "register_operand") + (match_operand:QI 4 "vll_bias_operand") ] "TARGET_VX && TARGET_64BIT" { rtx mem = adjust_address (operands[1], BLKmode, 0); rtx len = gen_reg_rtx (SImode); - emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[2])); + emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[3])); emit_insn (gen_vllv16qi (operands[0], len, mem)); DONE; }) diff --git a/gcc/config/sol2.h b/gcc/config/sol2.h index 2405c10..026d363 100644 --- a/gcc/config/sol2.h +++ b/gcc/config/sol2.h @@ -41,6 +41,12 @@ along with GCC; see the file COPYING3. If not see #undef WINT_TYPE_SIZE #define WINT_TYPE_SIZE 32 +/* Same for pid_t. See SCD 2.4.2, p. 6P-12, Figure 6-59 (64-bit). There's + no corresponding 32-bit definition, but this is what Solaris 8 + <sys/types.h> uses. */ + +#define PID_TYPE (TARGET_64BIT ? "int" : "long int") + #define SIG_ATOMIC_TYPE "int" /* ??? This definition of int8_t follows the system header but does @@ -158,11 +164,6 @@ along with GCC; see the file COPYING3. If not see "%{!symbolic:\ %{p|pg:-ldl} -lc}" -#ifndef CROSS_DIRECTORY_STRUCTURE -#undef MD_EXEC_PREFIX -#define MD_EXEC_PREFIX "/usr/ccs/bin/" -#endif - /* Enable constructor priorities if the configured linker supports it. */ #undef SUPPORTS_INIT_PRIORITY #define SUPPORTS_INIT_PRIORITY HAVE_INITFINI_ARRAY_SUPPORT @@ -199,13 +200,9 @@ along with GCC; see the file COPYING3. If not see %{ansi|std=c*|std=iso9899\\:199409:values-Xc.o%s; :values-Xa.o%s} \ %{std=c90|std=gnu90:values-xpg4.o%s; :values-xpg6.o%s}}}" -#if defined(HAVE_LD_PIE) #define STARTFILE_CRTBEGIN_SPEC "%{static:crtbegin.o%s; \ shared|" PIE_SPEC ":crtbeginS.o%s; \ :crtbegin.o%s}" -#else -#define STARTFILE_CRTBEGIN_SPEC "crtbegin.o%s" -#endif #if ENABLE_VTABLE_VERIFY #if SUPPORTS_INIT_PRIORITY @@ -233,7 +230,7 @@ along with GCC; see the file COPYING3. If not see in that case, and for executable link with --{,no-}whole-archive around it to force everything into the executable. */ -#ifndef USE_GNU_LD +#if !HAVE_GNU_LD #define LD_WHOLE_ARCHIVE_OPTION "-z allextract" #define LD_NO_WHOLE_ARCHIVE_OPTION "-z defaultextract" #else @@ -270,51 +267,29 @@ along with GCC; see the file COPYING3. If not see crti.o%s %(startfile_arch) %(startfile_crtbegin) \ %(startfile_vtv)" -#if defined(HAVE_LD_PIE) #define ENDFILE_CRTEND_SPEC "%{static:crtend.o%s; \ shared|" PIE_SPEC ":crtendS.o%s; \ :crtend.o%s}" -#else -#define ENDFILE_CRTEND_SPEC "crtend.o%s" -#endif #undef ENDFILE_SPEC #define ENDFILE_SPEC \ "%{Ofast|ffast-math|funsafe-math-optimizations:%{!shared:crtfastmath.o%s}} \ %(endfile_arch) %(endfile_vtv) %(endfile_crtend) crtn.o%s" -#undef LINK_ARCH32_SPEC_BASE -#define LINK_ARCH32_SPEC_BASE \ +#undef LINK_ARCH_SPEC_BASE +#define LINK_ARCH_SPEC_BASE \ "%{G:-G} \ %{YP,*} \ - %{R*} \ - %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp%R/lib:%R/usr/lib} \ - %{!p:%{!pg:-Y P,%R/lib:%R/usr/lib}}}" + %{R*}" -#undef LINK_ARCH32_SPEC -#define LINK_ARCH32_SPEC LINK_ARCH32_SPEC_BASE - -/* This should be the same as LINK_ARCH32_SPEC_BASE, except with - ARCH64_SUBDIR appended to the paths. */ -#undef LINK_ARCH64_SPEC_BASE -#define LINK_ARCH64_SPEC_BASE \ - "%{G:-G} \ - %{YP,*} \ - %{R*} \ - %{!YP,*:%{p|pg:-Y P,%R/usr/lib/libp/" ARCH64_SUBDIR ":%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "} \ - %{!p:%{!pg:-Y P,%R/lib/" ARCH64_SUBDIR ":%R/usr/lib/" ARCH64_SUBDIR "}}}" - -#undef LINK_ARCH64_SPEC -#ifndef USE_GLD -/* FIXME: Used to be SPARC-only. Not SPARC-specfic but for the model name! */ -#define LINK_ARCH64_SPEC \ - "%{mcmodel=medlow:-M /usr/lib/ld/" ARCH64_SUBDIR "/map.below4G} " \ - LINK_ARCH64_SPEC_BASE +#if !HAVE_GNU_LD +#define LINK_ARCH_SPEC_1 \ + "%{mcmodel=medlow:-M /usr/lib/ld/map.below4G} " LINK_ARCH_SPEC_BASE #else -#define LINK_ARCH64_SPEC LINK_ARCH64_SPEC_BASE +#define LINK_ARCH_SPEC_1 LINK_ARCH_SPEC_BASE #endif -#ifdef USE_GLD +#if HAVE_GNU_LD #if DEFAULT_ARCH32_P #define ARCH_DEFAULT_EMULATION ARCH32_EMULATION #else @@ -327,46 +302,32 @@ along with GCC; see the file COPYING3. If not see #define TARGET_LD_EMULATION "" #endif -#undef LINK_ARCH_SPEC #if DISABLE_MULTILIB #if DEFAULT_ARCH32_P -#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \ -%{m32:%(link_arch32)} \ -%{m64:%edoes not support multilib} \ -%{!m32:%{!m64:%(link_arch_default)}} \ -" +#define LINK_ARCH_ERROR_SPEC "%{m64:%edoes not support multilib}" #else -#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \ -%{m32:%edoes not support multilib} \ -%{m64:%(link_arch64)} \ -%{!m32:%{!m64:%(link_arch_default)}} \ -" +#define LINK_ARCH_ERROR_SPEC "%{m32:%edoes not support multilib}" #endif #else -#define LINK_ARCH_SPEC TARGET_LD_EMULATION " \ -%{m32:%(link_arch32)} \ -%{m64:%(link_arch64)} \ -%{!m32:%{!m64:%(link_arch_default)}}" +#define LINK_ARCH_ERROR_SPEC "" #endif -#define LINK_ARCH_DEFAULT_SPEC \ -(DEFAULT_ARCH32_P ? LINK_ARCH32_SPEC : LINK_ARCH64_SPEC) +#undef LINK_ARCH_SPEC +#define LINK_ARCH_SPEC TARGET_LD_EMULATION \ + " " LINK_ARCH_ERROR_SPEC " " LINK_ARCH_SPEC_1 #undef SUBTARGET_EXTRA_SPECS #define SUBTARGET_EXTRA_SPECS \ { "startfile_arch", STARTFILE_ARCH_SPEC }, \ { "startfile_crtbegin", STARTFILE_CRTBEGIN_SPEC }, \ { "startfile_vtv", STARTFILE_VTV_SPEC }, \ - { "link_arch32", LINK_ARCH32_SPEC }, \ - { "link_arch64", LINK_ARCH64_SPEC }, \ - { "link_arch_default", LINK_ARCH_DEFAULT_SPEC }, \ { "link_arch", LINK_ARCH_SPEC }, \ { "endfile_arch", ENDFILE_ARCH_SPEC }, \ { "endfile_crtend", ENDFILE_CRTEND_SPEC }, \ { "endfile_vtv", ENDFILE_VTV_SPEC }, \ SUBTARGET_CPU_EXTRA_SPECS -#ifndef USE_GLD +#if !HAVE_GNU_LD /* With Sun ld, -rdynamic is a no-op. */ #define RDYNAMIC_SPEC "" #else @@ -374,12 +335,12 @@ along with GCC; see the file COPYING3. If not see #define RDYNAMIC_SPEC "--export-dynamic" #endif -#ifndef USE_GLD +#if !HAVE_GNU_LD /* Prefer native form with Solaris ld. */ #define SYSROOT_SPEC "-z sysroot=%R" #endif -#if !defined(USE_GLD) && defined(ENABLE_SHARED_LIBGCC) +#if !HAVE_GNU_LD && defined(ENABLE_SHARED_LIBGCC) /* With Sun ld, use mapfile to enforce direct binding to libgcc_s unwinder. */ #define LINK_LIBGCC_MAPFILE_SPEC \ "%{shared|shared-libgcc:-M %slibgcc-unwind.map}" @@ -412,26 +373,18 @@ along with GCC; see the file COPYING3. If not see #define USE_LD_AS_NEEDED 1 #endif -#ifdef USE_GLD +#if HAVE_GNU_LD /* GNU ld needs --eh-frame-hdr to create the required .eh_frame_hdr sections. */ -#if defined(HAVE_LD_EH_FRAME_HDR) #define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} " -#endif /* HAVE_LD_EH_FRAME */ #endif -#if defined(HAVE_LD_PIE) -#ifdef USE_GLD +#if HAVE_GNU_LD /* Assert -z text by default to match Solaris ld. */ #define LD_PIE_SPEC "-pie %{!mimpure-text:-z text}" #else /* Solaris ld needs -z type=pie instead of -pie. */ #define LD_PIE_SPEC "-z type=pie %{mimpure-text:-z textoff}" #endif -#else -/* Error out if some part of PIE support is missing. */ -#define LINK_PIE_SPEC \ - "%{no-pie:} %{pie:%e-pie is not supported in this configuration} " -#endif /* collect2.cc can only parse GNU nm -n output. Solaris nm needs -png to produce the same format. */ @@ -476,7 +429,7 @@ along with GCC; see the file COPYING3. If not see } \ while (0) -#ifndef USE_GAS +#if !HAVE_GNU_AS #undef TARGET_ASM_ASSEMBLE_VISIBILITY #define TARGET_ASM_ASSEMBLE_VISIBILITY solaris_assemble_visibility diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h index 03a19bf..8db85c2 100644 --- a/gcc/config/sparc/sol2.h +++ b/gcc/config/sparc/sol2.h @@ -64,7 +64,7 @@ along with GCC; see the file COPYING3. If not see #define AS_SPARC32_FLAG "" #define AS_SPARC64_FLAG "" -#ifndef USE_GAS +#if !HAVE_GNU_AS #undef ASM_ARCH32_SPEC #define ASM_ARCH32_SPEC "-m32" #undef ASM_ARCH64_SPEC @@ -307,16 +307,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); %{!mcpu*:%(asm_cpu_default)} \ " -#ifdef USE_GLD -/* Since binutils 2.21, GNU ld supports new *_sol2 emulations to strictly - follow the Solaris 2 ABI. Prefer them if present. */ -#ifdef HAVE_LD_SOL2_EMULATION +#if HAVE_GNU_LD #define ARCH32_EMULATION "elf32_sparc_sol2" #define ARCH64_EMULATION "elf64_sparc_sol2" -#else -#define ARCH32_EMULATION "elf32_sparc" -#define ARCH64_EMULATION "elf64_sparc" -#endif #endif #define ARCH64_SUBDIR "sparcv9" @@ -333,7 +326,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); /* Register the Solaris-specific #pragma directives. */ #define REGISTER_TARGET_PRAGMAS() solaris_register_pragmas () -#if defined(USE_GAS) && defined(HAVE_AS_TLS) +#if HAVE_GNU_AS && defined(HAVE_AS_TLS) /* Use GNU extensions to TLS support. */ #undef TARGET_SUN_TLS #undef TARGET_GNU_TLS @@ -412,7 +405,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); } \ while (0) -#ifndef USE_GAS +#if !HAVE_GNU_AS /* This is how to output an assembler line that says to advance the location counter to a multiple of 2**LOG bytes using the NOP instruction as padding. The filler pattern doesn't work @@ -428,10 +421,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); /* Sun as requires doublequoted section names on SPARC. While GNU as supports that, too, we prefer the standard variant. */ #define SECTION_NAME_FORMAT "\"%s\"" -#endif /* !USE_GAS */ +#endif /* !HAVE_GNU_AS */ /* Undefine this so that attribute((init_priority)) works with GNU ld. */ -#ifdef USE_GLD +#if HAVE_GNU_LD #undef CTORS_SECTION_ASM_OP #undef DTORS_SECTION_ASM_OP #endif diff --git a/gcc/config/sparc/sparc.opt.urls b/gcc/config/sparc/sparc.opt.urls index 1188f88..a221b6b 100644 --- a/gcc/config/sparc/sparc.opt.urls +++ b/gcc/config/sparc/sparc.opt.urls @@ -4,10 +4,10 @@ mfpu UrlSuffix(gcc/SPARC-Options.html#index-mfpu-4) mhard-float -UrlSuffix(gcc/SPARC-Options.html#index-mhard-float-8) +UrlSuffix(gcc/SPARC-Options.html#index-mhard-float-7) msoft-float -UrlSuffix(gcc/SPARC-Options.html#index-msoft-float-13) +UrlSuffix(gcc/SPARC-Options.html#index-msoft-float-12) mflat UrlSuffix(gcc/SPARC-Options.html#index-mflat) diff --git a/gcc/config/usegas.h b/gcc/config/usegas.h deleted file mode 100644 index f099ae8..0000000 --- a/gcc/config/usegas.h +++ /dev/null @@ -1,20 +0,0 @@ -/* Copyright (C) 2001-2025 Free Software Foundation, Inc. - - This file is part of GCC. - - GCC is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - GCC is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with GCC; see the file COPYING3. If not see - <http://www.gnu.org/licenses/>. */ - -/* Just set a single flag we can test for it inside other files. */ -#define USE_GAS 1 diff --git a/gcc/config/usegld.h b/gcc/config/usegld.h deleted file mode 100644 index 0667732..0000000 --- a/gcc/config/usegld.h +++ /dev/null @@ -1 +0,0 @@ -#define USE_GLD 1 diff --git a/gcc/config/v850/v850.opt.urls b/gcc/config/v850/v850.opt.urls index a06f483..6f8102f 100644 --- a/gcc/config/v850/v850.opt.urls +++ b/gcc/config/v850/v850.opt.urls @@ -18,7 +18,7 @@ mghs UrlSuffix(gcc/V850-Options.html#index-mghs) mlong-calls -UrlSuffix(gcc/V850-Options.html#index-mlong-calls-7) +UrlSuffix(gcc/V850-Options.html#index-mlong-calls-8) mprolog-function UrlSuffix(gcc/V850-Options.html#index-mprolog-function) @@ -71,10 +71,10 @@ mlong-jumps UrlSuffix(gcc/V850-Options.html#index-mlong-jumps) msoft-float -UrlSuffix(gcc/V850-Options.html#index-msoft-float-14) +UrlSuffix(gcc/V850-Options.html#index-msoft-float-13) mhard-float -UrlSuffix(gcc/V850-Options.html#index-mhard-float-9) +UrlSuffix(gcc/V850-Options.html#index-mhard-float-8) mrh850-abi UrlSuffix(gcc/V850-Options.html#index-mrh850-abi) diff --git a/gcc/config/vax/vax.opt.urls b/gcc/config/vax/vax.opt.urls index 7813b88..758a5a7 100644 --- a/gcc/config/vax/vax.opt.urls +++ b/gcc/config/vax/vax.opt.urls @@ -19,5 +19,5 @@ munix UrlSuffix(gcc/VAX-Options.html#index-munix) mlra -UrlSuffix(gcc/VAX-Options.html#index-mlra-3) +UrlSuffix(gcc/VAX-Options.html#index-mlra-2) diff --git a/gcc/config/visium/visium.opt.urls b/gcc/config/visium/visium.opt.urls index 38ba88d..16984e7 100644 --- a/gcc/config/visium/visium.opt.urls +++ b/gcc/config/visium/visium.opt.urls @@ -10,10 +10,10 @@ mfpu UrlSuffix(gcc/Visium-Options.html#index-mfpu-5) mhard-float -UrlSuffix(gcc/Visium-Options.html#index-mhard-float-10) +UrlSuffix(gcc/Visium-Options.html#index-mhard-float-9) msoft-float -UrlSuffix(gcc/Visium-Options.html#index-msoft-float-15) +UrlSuffix(gcc/Visium-Options.html#index-msoft-float-14) mcpu= UrlSuffix(gcc/Visium-Options.html#index-mcpu-13) diff --git a/gcc/config/vxworks/base/b_NULL.h b/gcc/config/vxworks/base/b_NULL.h new file mode 100644 index 0000000..3d677f9 --- /dev/null +++ b/gcc/config/vxworks/base/b_NULL.h @@ -0,0 +1,32 @@ +/* This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* This header wrapper addresses the issue that some versions of VxWorks have + started defining NULL in a way that doesn't work with C++ < 11, so we override + it with GCC's own stddef's NULL. Include the VxWorks version of this header + nevertheless, as it might do other things than defining NULL, and beware that + it usually defines NULL unconditionally without undefining it first, unlike + what stddef.h does. */ + +#undef NULL +#include_next <base/b_NULL.h> +#define __need_NULL +#include <stddef.h> diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md index 4ba7f54..1e88a60 100644 --- a/gcc/config/xtensa/xtensa.md +++ b/gcc/config/xtensa/xtensa.md @@ -43,6 +43,7 @@ UNSPEC_FRAME_BLOCKAGE UNSPEC_CEIL UNSPEC_FLOOR + UNSPEC_ROUND ]) (define_c_enum "unspecv" [ @@ -102,10 +103,12 @@ (define_code_attr m_float [(float "float") (unsigned_float "ufloat")]) (define_code_attr s_float [(float "") (unsigned_float "uns")]) -;; This iterator and attribute allow FP-to-integer rounding of two types +;; This iterator and attribute allow FP-to-integer rounding of three types ;; to be generated from one template. -(define_int_iterator ANY_ROUND [UNSPEC_CEIL UNSPEC_FLOOR]) -(define_int_attr m_round [(UNSPEC_CEIL "ceil") (UNSPEC_FLOOR "floor")]) +(define_int_iterator ANY_ROUND [UNSPEC_CEIL UNSPEC_FLOOR + (UNSPEC_ROUND "flag_unsafe_math_optimizations")]) +(define_int_attr m_round [(UNSPEC_CEIL "ceil") (UNSPEC_FLOOR "floor") + (UNSPEC_ROUND "round")]) ;; Attributes. @@ -691,13 +694,15 @@ }) (define_insn "negsf2" - [(set (match_operand:SF 0 "register_operand" "=f") - (neg:SF (match_operand:SF 1 "register_operand" "f")))] + [(set (match_operand:SF 0 "register_operand") + (neg:SF (match_operand:SF 1 "register_operand"))) + (clobber (match_scratch:SI 2))] "TARGET_HARD_FLOAT" - "neg.s\t%0, %1" - [(set_attr "type" "farith") - (set_attr "mode" "SF") - (set_attr "length" "3")]) + {@ [cons: =0, 1, =2; attrs: type, length] + [D, D, &a; arith , 7] movi.n\t%2, 1\;slli\t%2, %2, 31\;add.n\t%0, %1, %2 + [f, f, X; farith, 3] neg.s\t%0, %1 + } + [(set_attr "mode" "SF")]) ;; Logical instructions. @@ -1139,7 +1144,7 @@ (define_insn "*fix<s_fix>_truncsfsi2_scaled" [(set (match_operand:SI 0 "register_operand" "=a") (any_fix:SI (mult:SF (match_operand:SF 1 "register_operand" "f") - (match_operand:SF 2 "fix_scaling_operand" "F"))))] + (match_operand:SF 2 "fix_scaling_operand" ""))))] "TARGET_HARD_FLOAT" "<m_fix>.s\t%0, %1, %U2" [(set_attr "type" "fconv") @@ -1158,7 +1163,7 @@ (define_insn "*float<s_float>sisf2_scaled" [(set (match_operand:SF 0 "register_operand" "=f") (mult:SF (any_float:SF (match_operand:SI 1 "register_operand" "a")) - (match_operand:SF 2 "float_scaling_operand" "F")))] + (match_operand:SF 2 "float_scaling_operand" "")))] "TARGET_HARD_FLOAT" "<m_float>.s\t%0, %1, %V2" [(set_attr "type" "fconv") @@ -1187,7 +1192,7 @@ (define_insn "*l<m_round>sfsi2_scaled" [(set (match_operand:SI 0 "register_operand" "=a") (unspec:SI [(mult:SF (match_operand:SF 1 "register_operand" "f") - (match_operand:SF 2 "fix_scaling_operand" "F"))] ANY_ROUND))] + (match_operand:SF 2 "fix_scaling_operand" ""))] ANY_ROUND))] "TARGET_HARD_FLOAT" "<m_round>.s\t%0, %1, %U2" [(set_attr "type" "fconv") |
