diff options
Diffstat (limited to 'gcc/config/riscv')
26 files changed, 1703 insertions, 423 deletions
diff --git a/gcc/config/riscv/arch-canonicalize b/gcc/config/riscv/arch-canonicalize index fd55255..15a3985 100755 --- a/gcc/config/riscv/arch-canonicalize +++ b/gcc/config/riscv/arch-canonicalize @@ -20,77 +20,326 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -# TODO: Extract riscv_subset_t from riscv-common.cc and make it can be compiled -# standalone to replace this script, that also prevents us implementing -# that twice and keep sync again and again. - from __future__ import print_function import sys import argparse import collections import itertools +import re +import os from functools import reduce SUPPORTED_ISA_SPEC = ["2.2", "20190608", "20191213"] -CANONICAL_ORDER = "imafdgqlcbkjtpvn" +CANONICAL_ORDER = "imafdqlcbkjtpvnh" LONG_EXT_PREFIXES = ['z', 's', 'h', 'x'] +def parse_define_riscv_ext(content): + """Parse DEFINE_RISCV_EXT macros using position-based parsing.""" + extensions = [] + + # Find all DEFINE_RISCV_EXT blocks + pattern = r'DEFINE_RISCV_EXT\s*\(' + matches = [] + + pos = 0 + while True: + match = re.search(pattern, content[pos:]) + if not match: + break + + start_pos = pos + match.start() + paren_count = 0 + current_pos = pos + match.end() - 1 # Start at the opening parenthesis + + # Find the matching closing parenthesis + while current_pos < len(content): + if content[current_pos] == '(': + paren_count += 1 + elif content[current_pos] == ')': + paren_count -= 1 + if paren_count == 0: + break + current_pos += 1 + + if paren_count == 0: + # Extract the content inside parentheses + macro_content = content[pos + match.end():current_pos] + ext_data = parse_macro_arguments(macro_content) + if ext_data: + extensions.append(ext_data) + + pos = current_pos + 1 + + return extensions + +def parse_macro_arguments(macro_content): + """Parse the arguments of a DEFINE_RISCV_EXT macro.""" + # Remove comments /* ... */ + cleaned_content = re.sub(r'/\*[^*]*\*/', '', macro_content) + + # Split arguments by comma, but respect nested structures + args = [] + current_arg = "" + paren_count = 0 + brace_count = 0 + in_string = False + escape_next = False + + for char in cleaned_content: + if escape_next: + current_arg += char + escape_next = False + continue + + if char == '\\': + escape_next = True + current_arg += char + continue + + if char == '"' and not escape_next: + in_string = not in_string + current_arg += char + continue + + if in_string: + current_arg += char + continue + + if char == '(': + paren_count += 1 + elif char == ')': + paren_count -= 1 + elif char == '{': + brace_count += 1 + elif char == '}': + brace_count -= 1 + elif char == ',' and paren_count == 0 and brace_count == 0: + args.append(current_arg.strip()) + current_arg = "" + continue + + current_arg += char + + # Add the last argument + if current_arg.strip(): + args.append(current_arg.strip()) + + # We need at least 6 arguments to get DEP_EXTS (position 5) + if len(args) < 6: + return None + + ext_name = args[0].strip() + dep_exts_arg = args[5].strip() # DEP_EXTS is at position 5 + + # Parse dependency extensions from the DEP_EXTS argument + deps = parse_dep_exts(dep_exts_arg) + + return { + 'name': ext_name, + 'dep_exts': deps + } + +def parse_dep_exts(dep_exts_str): + """Parse the DEP_EXTS argument to extract dependency list with conditions.""" + # Remove outer parentheses if present + dep_exts_str = dep_exts_str.strip() + if dep_exts_str.startswith('(') and dep_exts_str.endswith(')'): + dep_exts_str = dep_exts_str[1:-1].strip() + + # Remove outer braces if present + if dep_exts_str.startswith('{') and dep_exts_str.endswith('}'): + dep_exts_str = dep_exts_str[1:-1].strip() + + if not dep_exts_str: + return [] + + deps = [] + + # First, find and process conditional dependencies + conditional_pattern = r'\{\s*"([^"]+)"\s*,\s*(\[.*?\]\s*\([^)]*\)\s*->\s*bool.*?)\}' + conditional_matches = [] + + for match in re.finditer(conditional_pattern, dep_exts_str, re.DOTALL): + ext_name = match.group(1) + condition_code = match.group(2) + deps.append({'ext': ext_name, 'type': 'conditional', 'condition': condition_code}) + # The conditional_pattern RE matches only the first code block enclosed + # in braces. + # + # Extend the match to the condition block's closing brace, encompassing + # all code blocks, by simply trying to match the numbers of opening + # and closing braces. While crude, this avoids writing a complicated + # parse here. + closing_braces_left = condition_code.count('{') - condition_code.count('}') + condition_end = match.end() + while closing_braces_left > 0: + condition_end = dep_exts_str.find('}', condition_end) + closing_braces_left -= 1 + conditional_matches.append((match.start(), condition_end)) + + # Remove conditional dependency blocks from the string + remaining_str = dep_exts_str + for start, end in reversed(conditional_matches): # Reverse order to maintain indices + remaining_str = remaining_str[:start] + remaining_str[end:] + + # Now handle simple quoted strings in the remaining text + for match in re.finditer(r'"([^"]+)"', remaining_str): + deps.append({'ext': match.group(1), 'type': 'simple'}) + + # Remove duplicates while preserving order + seen = set() + unique_deps = [] + for dep in deps: + key = (dep['ext'], dep['type']) + if key not in seen: + seen.add(key) + unique_deps.append(dep) + + return unique_deps + +def evaluate_conditional_dependency(ext, dep, xlen, current_exts): + """Evaluate whether a conditional dependency should be included.""" + ext_name = dep['ext'] + condition = dep['condition'] + # Parse the condition based on known patterns + if ext_name == 'zcf' and ext in ['zca', 'c', 'zce']: + # zcf depends on RV32 and F extension + return xlen == 32 and 'f' in current_exts + elif ext_name == 'zcd' and ext in ['zca', 'c']: + # zcd depends on D extension + return 'd' in current_exts + elif ext_name == 'c' and ext in ['zca']: + # Special case for zca -> c conditional dependency + if xlen == 32: + if 'd' in current_exts: + return 'zcf' in current_exts and 'zcd' in current_exts + elif 'f' in current_exts: + return 'zcf' in current_exts + else: + return True + elif xlen == 64: + if 'd' in current_exts: + return 'zcd' in current_exts + else: + return True + return False + else: + # Report error for unhandled conditional dependencies + import sys + print(f"ERROR: Unhandled conditional dependency: '{ext_name}' with condition:", file=sys.stderr) + print(f" Condition code: {condition[:100]}...", file=sys.stderr) + print(f" Current context: xlen={xlen}, exts={sorted(current_exts)}", file=sys.stderr) + # For now, return False to be safe + return False + +def resolve_dependencies(arch_parts, xlen): + """Resolve all dependencies including conditional ones.""" + current_exts = set(arch_parts) + implied_deps = set() + + # Keep resolving until no new dependencies are found + changed = True + while changed: + changed = False + new_deps = set() + + for ext in current_exts | implied_deps: + if ext in IMPLIED_EXT: + for dep in IMPLIED_EXT[ext]: + if dep['type'] == 'simple': + if dep['ext'] not in current_exts and dep['ext'] not in implied_deps: + new_deps.add(dep['ext']) + changed = True + elif dep['type'] == 'conditional': + should_include = evaluate_conditional_dependency(ext, dep, xlen, current_exts | implied_deps) + if should_include: + if dep['ext'] not in current_exts and dep['ext'] not in implied_deps: + new_deps.add(dep['ext']) + changed = True + + implied_deps.update(new_deps) + + return implied_deps + +def parse_def_file(file_path, script_dir, processed_files=None, collect_all=False): + """Parse a single .def file and recursively process #include directives.""" + if processed_files is None: + processed_files = set() + + # Avoid infinite recursion + if file_path in processed_files: + return ({}, set()) if collect_all else {} + processed_files.add(file_path) + + implied_ext = {} + all_extensions = set() if collect_all else None + + if not os.path.exists(file_path): + return (implied_ext, all_extensions) if collect_all else implied_ext + + with open(file_path, 'r') as f: + content = f.read() + + # Process #include directives first + include_pattern = r'#include\s+"([^"]+)"' + includes = re.findall(include_pattern, content) + + for include_file in includes: + include_path = os.path.join(script_dir, include_file) + if collect_all: + included_ext, included_all = parse_def_file(include_path, script_dir, processed_files, collect_all) + implied_ext.update(included_ext) + all_extensions.update(included_all) + else: + included_ext = parse_def_file(include_path, script_dir, processed_files, collect_all) + implied_ext.update(included_ext) + + # Parse DEFINE_RISCV_EXT blocks using position-based parsing + parsed_exts = parse_define_riscv_ext(content) + + for ext_data in parsed_exts: + ext_name = ext_data['name'] + deps = ext_data['dep_exts'] + + if collect_all: + all_extensions.add(ext_name) + + if deps: + implied_ext[ext_name] = deps + + return (implied_ext, all_extensions) if collect_all else implied_ext + +def parse_def_files(): + """Parse RISC-V extension definition files starting from riscv-ext.def.""" + # Get directory containing this script + try: + script_dir = os.path.dirname(os.path.abspath(__file__)) + except NameError: + # When __file__ is not defined (e.g., interactive mode) + script_dir = os.getcwd() + + # Start with the main definition file + main_def_file = os.path.join(script_dir, 'riscv-ext.def') + return parse_def_file(main_def_file, script_dir) + +def get_all_extensions(): + """Get all supported extensions and their implied extensions.""" + # Get directory containing this script + try: + script_dir = os.path.dirname(os.path.abspath(__file__)) + except NameError: + # When __file__ is not defined (e.g., interactive mode) + script_dir = os.getcwd() + + # Start with the main definition file + main_def_file = os.path.join(script_dir, 'riscv-ext.def') + return parse_def_file(main_def_file, script_dir, collect_all=True) + # # IMPLIED_EXT(ext) -> implied extension list. +# This is loaded dynamically from .def files # -IMPLIED_EXT = { - "d" : ["f", "zicsr"], - - "a" : ["zaamo", "zalrsc"], - "zabha" : ["zaamo"], - "zacas" : ["zaamo"], - - "f" : ["zicsr"], - "b" : ["zba", "zbb", "zbs"], - "zdinx" : ["zfinx", "zicsr"], - "zfinx" : ["zicsr"], - "zhinx" : ["zhinxmin", "zfinx", "zicsr"], - "zhinxmin" : ["zfinx", "zicsr"], - - "zk" : ["zkn", "zkr", "zkt"], - "zkn" : ["zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"], - "zks" : ["zbkb", "zbkc", "zbkx", "zksed", "zksh"], - - "v" : ["zvl128b", "zve64d"], - "zve32x" : ["zvl32b"], - "zve64x" : ["zve32x", "zvl64b"], - "zve32f" : ["f", "zve32x"], - "zve64f" : ["f", "zve32f", "zve64x"], - "zve64d" : ["d", "zve64f"], - - "zvl64b" : ["zvl32b"], - "zvl128b" : ["zvl64b"], - "zvl256b" : ["zvl128b"], - "zvl512b" : ["zvl256b"], - "zvl1024b" : ["zvl512b"], - "zvl2048b" : ["zvl1024b"], - "zvl4096b" : ["zvl2048b"], - "zvl8192b" : ["zvl4096b"], - "zvl16384b" : ["zvl8192b"], - "zvl32768b" : ["zvl16384b"], - "zvl65536b" : ["zvl32768b"], - - "zvkn" : ["zvkned", "zvknhb", "zvkb", "zvkt"], - "zvknc" : ["zvkn", "zvbc"], - "zvkng" : ["zvkn", "zvkg"], - "zvks" : ["zvksed", "zvksh", "zvkb", "zvkt"], - "zvksc" : ["zvks", "zvbc"], - "zvksg" : ["zvks", "zvkg"], - "zvbb" : ["zvkb"], - "zvbc" : ["zve64x"], - "zvkb" : ["zve32x"], - "zvkg" : ["zve32x"], - "zvkned" : ["zve32x"], - "zvknha" : ["zve32x"], - "zvknhb" : ["zve64x"], - "zvksed" : ["zve32x"], - "zvksh" : ["zve32x"], -} +IMPLIED_EXT = parse_def_files() def arch_canonicalize(arch, isa_spec): # TODO: Support extension version. @@ -123,21 +372,31 @@ def arch_canonicalize(arch, isa_spec): long_exts += extra_long_ext # - # Handle implied extensions. + # Handle implied extensions using new conditional logic. # - any_change = True - while any_change: - any_change = False - for ext in std_exts + long_exts: - if ext in IMPLIED_EXT: - implied_exts = IMPLIED_EXT[ext] - for implied_ext in implied_exts: - if implied_ext == 'zicsr' and is_isa_spec_2p2: - continue + # Extract xlen from architecture string + # TODO: We should support profile here. + if arch.startswith('rv32'): + xlen = 32 + elif arch.startswith('rv64'): + xlen = 64 + else: + raise Exception("Unsupported prefix `%s`" % arch) - if implied_ext not in std_exts + long_exts: - long_exts.append(implied_ext) - any_change = True + # Get all current extensions + current_exts = std_exts + long_exts + + # Resolve dependencies + implied_deps = resolve_dependencies(current_exts, xlen) + + # Filter out zicsr for ISA spec 2.2 + if is_isa_spec_2p2: + implied_deps.discard('zicsr') + + # Add implied dependencies to long_exts + for dep in implied_deps: + if dep not in current_exts: + long_exts.append(dep) # Single letter extension might appear in the long_exts list, # because we just append extensions list to the arch string. @@ -179,17 +438,177 @@ def arch_canonicalize(arch, isa_spec): return new_arch -if len(sys.argv) < 2: - print ("Usage: %s <arch_str> [<arch_str>*]" % sys.argv) - sys.exit(1) +def dump_all_extensions(): + """Dump all extensions and their implied extensions.""" + implied_ext, all_extensions = get_all_extensions() + + print("All supported RISC-V extensions:") + print("=" * 60) + + if not all_extensions: + print("No extensions found.") + return -parser = argparse.ArgumentParser() -parser.add_argument('-misa-spec', type=str, - default='20191213', - choices=SUPPORTED_ISA_SPEC) -parser.add_argument('arch_strs', nargs=argparse.REMAINDER) + # Sort all extensions for consistent output + sorted_all = sorted(all_extensions) -args = parser.parse_args() + # Print all extensions with their dependencies (if any) + for ext_name in sorted_all: + if ext_name in implied_ext: + deps = implied_ext[ext_name] + dep_strs = [] + for dep in deps: + if dep['type'] == 'simple': + dep_strs.append(dep['ext']) + else: + dep_strs.append(f"{dep['ext']}*") # Mark conditional deps with * + print(f"{ext_name:15} -> {', '.join(dep_strs)}") + else: + print(f"{ext_name:15} -> (no dependencies)") + + print(f"\nTotal extensions: {len(all_extensions)}") + print(f"Extensions with dependencies: {len(implied_ext)}") + print(f"Extensions without dependencies: {len(all_extensions) - len(implied_ext)}") + +def run_unit_tests(): + """Run unit tests using pytest dynamically imported.""" + try: + import pytest + except ImportError: + print("Error: pytest is required for running unit tests.") + print("Please install pytest: pip install pytest") + return 1 + + # Define test functions + def test_basic_arch_parsing(): + """Test basic architecture string parsing.""" + result = arch_canonicalize("rv64i", "20191213") + assert result == "rv64i" + + def test_simple_extensions(): + """Test simple extension handling.""" + result = arch_canonicalize("rv64im", "20191213") + assert "zmmul" in result + + def test_implied_extensions(): + """Test implied extension resolution.""" + result = arch_canonicalize("rv64imaf", "20191213") + assert "zicsr" in result + + def test_conditional_dependencies(): + """Test conditional dependency evaluation.""" + # Test RV32 with F extension should include zcf when c is present + result = arch_canonicalize("rv32ifc", "20191213") + parts = result.split("_") + if "c" in parts: + assert "zca" in parts + if "f" in parts: + assert "zcf" in parts + + def test_parse_dep_exts(): + """Test dependency parsing function.""" + # Test simple dependency + deps = parse_dep_exts('{"ext1", "ext2"}') + assert len(deps) == 2 + assert deps[0]['ext'] == 'ext1' + assert deps[0]['type'] == 'simple' + + def test_evaluate_conditional_dependency(): + """Test conditional dependency evaluation.""" + # Test zcf condition for RV32 with F + dep = {'ext': 'zcf', 'type': 'conditional', 'condition': 'test'} + result = evaluate_conditional_dependency('zce', dep, 32, {'f'}) + assert result == True + + # Test zcf condition for RV64 with F (should be False) + result = evaluate_conditional_dependency('zce', dep, 64, {'f'}) + assert result == False + + def test_parse_define_riscv_ext(): + """Test DEFINE_RISCV_EXT parsing.""" + content = ''' + DEFINE_RISCV_EXT( + /* NAME */ test, + /* UPPERCASE_NAME */ TEST, + /* FULL_NAME */ "Test extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"dep1", "dep2"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ test, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 0, + /* EXTRA_EXTENSION_FLAGS */ 0) + ''' + + extensions = parse_define_riscv_ext(content) + assert len(extensions) == 1 + assert extensions[0]['name'] == 'test' + assert len(extensions[0]['dep_exts']) == 2 -for arch in args.arch_strs: - print (arch_canonicalize(arch, args.misa_spec)) + def test_parse_long_condition_block(): + """Test condition block containing several code blocks.""" + result = arch_canonicalize("rv32ec", "20191213") + assert "rv32ec_zca" in result + + # Collect test functions + test_functions = [ + test_basic_arch_parsing, + test_simple_extensions, + test_implied_extensions, + test_conditional_dependencies, + test_parse_dep_exts, + test_evaluate_conditional_dependency, + test_parse_define_riscv_ext, + test_parse_long_condition_block + ] + + # Run tests manually first, then optionally with pytest + print("Running unit tests...") + + passed = 0 + failed = 0 + + for i, test_func in enumerate(test_functions): + try: + print(f" Running {test_func.__name__}...", end=" ") + test_func() + print("PASSED") + passed += 1 + except Exception as e: + print(f"FAILED: {e}") + failed += 1 + + print(f"\nTest Summary: {passed} passed, {failed} failed") + + if failed == 0: + print("\nAll tests passed!") + return 0 + else: + print(f"\n{failed} test(s) failed!") + return 1 + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-misa-spec', type=str, + default='20191213', + choices=SUPPORTED_ISA_SPEC) + parser.add_argument('--dump-all', action='store_true', + help='Dump all extensions and their implied extensions') + parser.add_argument('--selftest', action='store_true', + help='Run unit tests using pytest') + parser.add_argument('arch_strs', nargs='*', + help='Architecture strings to canonicalize') + + args = parser.parse_args() + + if args.dump_all: + dump_all_extensions() + elif args.selftest: + sys.exit(run_unit_tests()) + elif args.arch_strs: + for arch in args.arch_strs: + print (arch_canonicalize(arch, args.misa_spec)) + else: + parser.print_help() + sys.exit(1) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 6531996..9695fdc 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1679,6 +1679,26 @@ ;; Combine vec_duplicate + op.vv to op.vx ;; Include ;; - vadd.vx +;; - vsub.vx +;; - vrsub.vx +;; - vand.vx +;; - vor.vx +;; - vmul.vx +;; - vdiv.vx +;; - vdivu.vx +;; - vrem.vx +;; - vremu.vx +;; - vmax.vx +;; - vmaxu.vx +;; - vmin.vx +;; - vminu.vx +;; - vsadd.vx +;; - vsaddu.vx +;; - vssub.vx +;; - vssubu.vx +;; - vaadd.vx +;; - vaaddu.vx +;; - vmerge.vxm ;; ============================================================================= (define_insn_and_split "*<optab>_vx_<mode>" [(set (match_operand:V_VLSI 0 "register_operand") @@ -1694,6 +1714,8 @@ riscv_vector::expand_vx_binary_vec_dup_vec (operands[0], operands[2], operands[1], <CODE>, <MODE>mode); + + DONE; } [(set_attr "type" "vialu")]) @@ -1711,6 +1733,8 @@ riscv_vector::expand_vx_binary_vec_vec_dup (operands[0], operands[1], operands[2], <CODE>, <MODE>mode); + + DONE; } [(set_attr "type" "vialu")]) @@ -1782,6 +1806,69 @@ } [(set_attr "type" "vaalu")]) +(define_insn_and_split "*merge_vx_<mode>" + [(set (match_operand:V_VLSI 0 "register_operand") + (if_then_else:V_VLSI + (match_operand:<VM> 3 "vector_mask_operand") + (vec_duplicate:V_VLSI + (match_operand:<VEL> 2 "reg_or_int_operand")) + (match_operand:V_VLSI 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + insn_code icode = code_for_pred_merge_scalar (<MODE>mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::MERGE_OP, operands); + DONE; + } + [(set_attr "type" "vimerge")]) + +(define_insn_and_split "*vmacc_vx_<mode>" + [(set (match_operand:V_VLSI 0 "register_operand") + (plus:V_VLSI + (mult:V_VLSI + (vec_duplicate:V_VLSI + (match_operand:<VEL> 1 "register_operand")) + (match_operand:V_VLSI 2 "register_operand")) + (match_operand:V_VLSI 3 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + insn_code icode = code_for_pred_mul_plus_vx (<MODE>mode); + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], + RVV_VUNDEF(<MODE>mode)}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops); + + DONE; + } + [(set_attr "type" "vimuladd")]) + +(define_insn_and_split "*vnmsac_vx_<mode>" + [(set (match_operand:V_VLSI 0 "register_operand") + (minus:V_VLSI + (match_operand:V_VLSI 3 "register_operand") + (mult:V_VLSI + (vec_duplicate:V_VLSI + (match_operand:<VEL> 1 "register_operand")) + (match_operand:V_VLSI 2 "register_operand"))))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + insn_code icode = code_for_pred_vnmsac_vx (<MODE>mode); + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], + RVV_VUNDEF(<MODE>mode)}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops); + + DONE; + } + [(set_attr "type" "vimuladd")]) + + ;; ============================================================================= ;; Combine vec_duplicate + op.vv to op.vf ;; Include @@ -1962,3 +2049,98 @@ } [(set_attr "type" "vfwmuladd")] ) + +;; vfmul.vf +(define_insn_and_split "*vfmul_vf_<mode>" + [(set (match_operand:V_VLSF 0 "register_operand") + (mult:V_VLSF + (vec_duplicate:V_VLSF + (match_operand:<VEL> 2 "register_operand")) + (match_operand:V_VLSF 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_scalar (MULT, <MODE>mode), + riscv_vector::BINARY_OP_FRM_DYN, operands); + DONE; + } + [(set_attr "type" "vfmul")] +) + +;; vfrdiv.vf +(define_insn_and_split "*vfrdiv_vf_<mode>" + [(set (match_operand:V_VLSF 0 "register_operand") + (div:V_VLSF + (vec_duplicate:V_VLSF + (match_operand:<VEL> 2 "register_operand")) + (match_operand:V_VLSF 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_reverse_scalar (DIV, <MODE>mode), + riscv_vector::BINARY_OP_FRM_DYN, operands); + DONE; + } + [(set_attr "type" "vfdiv")] +) + +;; vfmin.vf +(define_insn_and_split "*vfmin_vf_<mode>" + [(set (match_operand:V_VLSF 0 "register_operand") + (smin:V_VLSF + (vec_duplicate:V_VLSF + (match_operand:<VEL> 2 "register_operand")) + (match_operand:V_VLSF 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_scalar (SMIN, <MODE>mode), + riscv_vector::BINARY_OP, operands); + DONE; + } + [(set_attr "type" "vfminmax")] +) + +(define_insn_and_split "*vfmin_vf_ieee_<mode>" + [(set (match_operand:V_VLSF 0 "register_operand") + (unspec:V_VLSF [ + (vec_duplicate:V_VLSF + (match_operand:<VEL> 2 "register_operand")) + (match_operand:V_VLSF 1 "register_operand") + ] UNSPEC_VFMIN))] + "TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_scalar (UNSPEC_VFMIN, <MODE>mode), + riscv_vector::BINARY_OP, operands); + DONE; + } + [(set_attr "type" "vfminmax")] +) + +(define_insn_and_split "*vfmin_vf_ieee_<mode>" + [(set (match_operand:V_VLSF 0 "register_operand") + (unspec:V_VLSF [ + (match_operand:V_VLSF 1 "register_operand") + (vec_duplicate:V_VLSF + (match_operand:<VEL> 2 "register_operand")) + ] UNSPEC_VFMIN))] + "TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_scalar (UNSPEC_VFMIN, <MODE>mode), + riscv_vector::BINARY_OP, operands); + DONE; + } + [(set_attr "type" "vfminmax")] +) diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md index 5ecaa19..979e0df 100644 --- a/gcc/config/riscv/constraints.md +++ b/gcc/config/riscv/constraints.md @@ -330,3 +330,7 @@ (define_constraint "Q" "An address operand that is valid for a prefetch instruction" (match_operand 0 "prefetch_operand")) + +(define_address_constraint "ZD" + "An address operand that is valid for a mips prefetch instruction" + (match_test "riscv_prefetch_offset_address_p (op, mode)")) diff --git a/gcc/config/riscv/gen-riscv-ext-opt.cc b/gcc/config/riscv/gen-riscv-ext-opt.cc index 17b8f5b..1ca339c 100644 --- a/gcc/config/riscv/gen-riscv-ext-opt.cc +++ b/gcc/config/riscv/gen-riscv-ext-opt.cc @@ -4,50 +4,6 @@ #include <stdio.h> #include "riscv-opts.h" -struct version_t -{ - int major; - int minor; - version_t (int major, int minor, - enum riscv_isa_spec_class spec = ISA_SPEC_CLASS_NONE) - : major (major), minor (minor) - {} - bool operator<(const version_t &other) const - { - if (major != other.major) - return major < other.major; - return minor < other.minor; - } - - bool operator== (const version_t &other) const - { - return major == other.major && minor == other.minor; - } -}; - -static void -print_ext_doc_entry (const std::string &ext_name, const std::string &full_name, - const std::string &desc, - const std::vector<version_t> &supported_versions) -{ - // Implementation of the function to print the documentation entry - // for the extension. - std::set<version_t> unique_versions; - for (const auto &version : supported_versions) - unique_versions.insert (version); - printf ("@item %s\n", ext_name.c_str ()); - printf ("@tab"); - for (const auto &version : unique_versions) - { - printf (" %d.%d", version.major, version.minor); - } - printf ("\n"); - printf ("@tab %s", full_name.c_str ()); - if (desc.size ()) - printf (", %s", desc.c_str ()); - printf ("\n\n"); -} - int main () { diff --git a/gcc/config/riscv/gen-riscv-mcpu-texi.cc b/gcc/config/riscv/gen-riscv-mcpu-texi.cc new file mode 100644 index 0000000..9681438 --- /dev/null +++ b/gcc/config/riscv/gen-riscv-mcpu-texi.cc @@ -0,0 +1,43 @@ +#include <string> +#include <vector> +#include <stdio.h> + +int +main () +{ + puts ("@c Copyright (C) 2025 Free Software Foundation, Inc."); + puts ("@c This is part of the GCC manual."); + puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi."); + puts (""); + puts ("@c This file is generated automatically using"); + puts ("@c gcc/config/riscv/gen-riscv-mcpu-texi.cc from:"); + puts ("@c gcc/config/riscv/riscv-cores.def"); + puts (""); + puts ("@c Please *DO NOT* edit manually."); + puts (""); + puts ("@samp{Core Name}"); + puts (""); + puts ("@opindex mcpu"); + puts ("@item -mcpu=@var{processor-string}"); + puts ("Use architecture of and optimize the output for the given processor, specified"); + puts ("by particular CPU name. Permissible values for this option are:"); + puts (""); + puts (""); + + std::vector<std::string> coreNames; + +#define RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH) \ + coreNames.push_back (CORE_NAME); +#include "riscv-cores.def" +#undef RISCV_CORE + + for (size_t i = 0; i < coreNames.size(); ++i) { + if (i == coreNames.size() - 1) { + printf("@samp{%s}.\n", coreNames[i].c_str()); + } else { + printf("@samp{%s},\n\n", coreNames[i].c_str()); + } + } + + return 0; +} diff --git a/gcc/config/riscv/gen-riscv-mtune-texi.cc b/gcc/config/riscv/gen-riscv-mtune-texi.cc new file mode 100644 index 0000000..1bdfe2a --- /dev/null +++ b/gcc/config/riscv/gen-riscv-mtune-texi.cc @@ -0,0 +1,41 @@ +#include <string> +#include <vector> +#include <stdio.h> + +int +main () +{ + puts ("@c Copyright (C) 2025 Free Software Foundation, Inc."); + puts ("@c This is part of the GCC manual."); + puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi."); + puts (""); + puts ("@c This file is generated automatically using"); + puts ("@c gcc/config/riscv/gen-riscv-mtune-texi.cc from:"); + puts ("@c gcc/config/riscv/riscv-cores.def"); + puts (""); + puts ("@c Please *DO NOT* edit manually."); + puts (""); + puts ("@samp{Tune Name}"); + puts (""); + puts ("@opindex mtune"); + puts ("@item -mtune=@var{processor-string}"); + puts ("Optimize the output for the given processor, specified by microarchitecture or"); + puts ("particular CPU name. Permissible values for this option are:"); + puts (""); + puts (""); + + std::vector<std::string> tuneNames; + +#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \ + tuneNames.push_back (TUNE_NAME); +#include "riscv-cores.def" +#undef RISCV_TUNE + + for (size_t i = 0; i < tuneNames.size(); ++i) { + printf("@samp{%s},\n\n", tuneNames[i].c_str()); + } + + puts ("and all valid options for @option{-mcpu=}."); + + return 0; +} diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index 381f96c..bdb3d22 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -27,10 +27,14 @@ (ior (match_operand 0 "const_arith_operand") (match_operand 0 "register_operand"))) +(define_predicate "prefetch_const_operand" + (and (match_code "const_int") + (match_test "(IN_RANGE (INTVAL (op), 0, 511))"))) + ;; REG or REG+D where D fits in a simm12 and has the low 5 bits ;; off. The REG+D form can be reloaded into a temporary if needed ;; after FP elimination if that exposes an invalid offset. -(define_predicate "prefetch_operand" +(define_predicate "zicbop_prefetch_operand" (ior (match_operand 0 "register_operand") (and (match_test "const_arith_operand (op, VOIDmode)") (match_test "(INTVAL (op) & 0x1f) == 0")) @@ -39,6 +43,20 @@ (match_test "const_arith_operand (XEXP (op, 1), VOIDmode)") (match_test "(INTVAL (XEXP (op, 1)) & 0x1f) == 0")))) +;; REG or REG+D where D fits in a uimm9 +(define_predicate "mips_prefetch_operand" + (ior (match_operand 0 "register_operand") + (match_test "prefetch_const_operand (op, VOIDmode)") + (and (match_code "plus") + (match_test "register_operand (XEXP (op, 0), word_mode)") + (match_test "prefetch_const_operand (XEXP (op, 1), VOIDmode)")))) + +;; MIPS specific or Standard RISCV Extension +(define_predicate "prefetch_operand" + (if_then_else (match_test "TARGET_XMIPSCBOP") + (match_operand 0 "mips_prefetch_operand") + (match_operand 0 "zicbop_prefetch_operand"))) + (define_predicate "lui_operand" (and (match_code "const_int") (match_test "LUI_OPERAND (INTVAL (op))"))) diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc index 3031c29..b8547a7 100644 --- a/gcc/config/riscv/riscv-avlprop.cc +++ b/gcc/config/riscv/riscv-avlprop.cc @@ -156,6 +156,7 @@ get_insn_vtype_mode (rtx_insn *rinsn) extract_insn_cached (rinsn); int mode_idx = get_attr_mode_idx (rinsn); gcc_assert (mode_idx != INVALID_ATTRIBUTE); + gcc_assert (mode_idx < recog_data.n_operands); return GET_MODE (recog_data.operand[mode_idx]); } @@ -205,6 +206,7 @@ simplify_replace_vlmax_avl (rtx_insn *rinsn, rtx new_avl) { int index = get_attr_avl_type_idx (rinsn); gcc_assert (index != INVALID_ATTRIBUTE); + gcc_assert (index < recog_data.n_operands); validate_change_or_fail (rinsn, recog_data.operand_loc[index], get_avl_type_rtx (avl_type::NONVLMAX), false); } @@ -361,6 +363,8 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) const is not depend on. */ extract_insn_cached (use_insn->rtl ()); int merge_op_idx = get_attr_merge_op_idx (use_insn->rtl ()); + gcc_assert (merge_op_idx == INVALID_ATTRIBUTE + || merge_op_idx < recog_data.n_operands); if (merge_op_idx != INVALID_ATTRIBUTE && !satisfies_constraint_vu (recog_data.operand[merge_op_idx]) && refers_to_regno_p (set->regno (), @@ -531,7 +535,14 @@ pass_avlprop::execute (function *fn) && !m_avl_propagations->get (candidate.second) && imm_avl_p (vtype_mode)) { - rtx new_avl = gen_int_mode (GET_MODE_NUNITS (vtype_mode), Pmode); + /* For segmented operations AVL refers to a single register and + not all NF registers. Therefore divide the mode size by NF + to obtain the proper AVL. */ + int nf = 1; + if (riscv_v_ext_tuple_mode_p (vtype_mode)) + nf = get_nf (vtype_mode); + rtx new_avl = gen_int_mode + (GET_MODE_NUNITS (vtype_mode).to_constant () / nf, Pmode); simplify_replace_vlmax_avl (rinsn, new_avl); } } diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index 98f3470..8f0f630 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -113,7 +113,7 @@ RISCV_CORE("xt-c908v", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_" "zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_" "xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_" "xtheadfmemidx_xtheadmac_xtheadmemidx_" - "xtheadmempair_xtheadsync_xtheadvdot", + "xtheadmempair_xtheadsync", "xt-c908") RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" @@ -121,7 +121,7 @@ RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" "xtheadmemidx_xtheadmempair_xtheadsync", "xt-c910") RISCV_CORE("xt-c910v2", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_" - "zicsr_zifencei _zihintntl_zihintpause_zihpm_" + "zicsr_zifencei_zihintntl_zihintpause_zihpm_" "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_" "zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_" "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" @@ -135,13 +135,13 @@ RISCV_CORE("xt-c920", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" "xtheadvector", "xt-c910") RISCV_CORE("xt-c920v2", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_" - "zicsr_zifencei _zihintntl_zihintpause_zihpm_" + "zicsr_zifencei_zihintntl_zihintpause_zihpm_" "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_" "zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_" "svinval_svnapot_svpbmt_xtheadba_xtheadbb_" "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_" "xtheadmac_xtheadmemidx_xtheadmempair_" - "xtheadsync_xtheadvdot", + "xtheadsync", "xt-c920v2") RISCV_CORE("tt-ascalon-d8", "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_" diff --git a/gcc/config/riscv/riscv-ext-mips.def b/gcc/config/riscv/riscv-ext-mips.def index 5d7836d..132f6c1 100644 --- a/gcc/config/riscv/riscv-ext-mips.def +++ b/gcc/config/riscv/riscv-ext-mips.def @@ -33,3 +33,16 @@ DEFINE_RISCV_EXT ( /* BITMASK_GROUP_ID. */ BITMASK_NOT_YET_ALLOCATED, /* BITMASK_BIT_POSITION. */ BITMASK_NOT_YET_ALLOCATED, /* EXTRA_EXTENSION_FLAGS. */ 0) + +DEFINE_RISCV_EXT ( + /* NAME. */ xmipscbop, + /* UPPERCASE_NAME. */ XMIPSCBOP, + /* FULL_NAME. */ "Mips Prefetch extension", + /* DESC. */ "", + /* URL. */ , + /* DEP_EXTS. */ ({}), + /* SUPPORTED_VERSIONS. */ ({{1, 0}}), + /* FLAG_GROUP. */ xmips, + /* BITMASK_GROUP_ID. */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION. */ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS. */ 0) diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt index 26d6e68..ced05d2 100644 --- a/gcc/config/riscv/riscv-ext.opt +++ b/gcc/config/riscv/riscv-ext.opt @@ -449,3 +449,5 @@ Mask(XTHEADVECTOR) Var(riscv_xthead_subext) Mask(XVENTANACONDOPS) Var(riscv_xventana_subext) Mask(XMIPSCMOV) Var(riscv_xmips_subext) + +Mask(XMIPSCBOP) Var(riscv_xmips_subext) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 539321f..46b256d 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -143,6 +143,8 @@ extern void riscv_expand_sstrunc (rtx, rtx); extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t); extern bool synthesize_ior_xor (rtx_code, rtx [3]); extern bool synthesize_and (rtx [3]); +extern bool synthesize_add (rtx [3]); +extern bool synthesize_add_extended (rtx [3]); #ifdef RTX_CODE extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0); @@ -830,16 +832,18 @@ extern bool th_print_operand_address (FILE *, machine_mode, rtx); extern bool strided_load_broadcast_p (void); extern bool riscv_use_divmod_expander (void); -void riscv_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); +void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, int); extern bool riscv_option_valid_attribute_p (tree, tree, tree, int); extern bool riscv_option_valid_version_attribute_p (tree, tree, tree, int); extern bool -riscv_process_target_version_attr (tree, location_t); +riscv_process_target_version_attr (tree, location_t *); extern void riscv_override_options_internal (struct gcc_options *); extern void riscv_option_override (void); +extern rtx riscv_prefetch_cookie (rtx, rtx); +extern bool riscv_prefetch_offset_address_p (rtx, machine_mode); struct riscv_tune_param; /* Information about one micro-arch we know about. */ diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h index a35537d..4cd860f 100644 --- a/gcc/config/riscv/riscv-subset.h +++ b/gcc/config/riscv/riscv-subset.h @@ -52,8 +52,9 @@ private: /* Original arch string. */ const char *m_arch; - /* Location of arch string, used for report error. */ - location_t m_loc; + /* A pointer to the location that should be used for diagnostics, + or null if diagnostics should be suppressed. */ + location_t *m_loc; /* Head of subset info list. */ riscv_subset_t *m_head; @@ -70,7 +71,7 @@ private: /* Allow adding the same extension more than once. */ bool m_allow_adding_dup; - riscv_subset_list (const char *, location_t); + riscv_subset_list (const char *, location_t *); const char *parsing_subset_version (const char *, const char *, unsigned *, unsigned *, bool, bool *); @@ -106,12 +107,12 @@ public: riscv_subset_list *clone () const; - static riscv_subset_list *parse (const char *, location_t); + static riscv_subset_list *parse (const char *, location_t *); const char *parse_single_ext (const char *, bool exact_single_p = true); int match_score (riscv_subset_list *) const; - void set_loc (location_t); + void set_loc (location_t *); void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; } @@ -182,7 +183,7 @@ extern void riscv_set_arch_by_subset_list (riscv_subset_list *, struct gcc_options *); extern bool riscv_minimal_hwprobe_feature_bits (const char *, struct riscv_feature_bits *, - location_t); + location_t *); extern bool riscv_ext_is_subset (struct cl_target_option *, struct cl_target_option *); diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc index 8ad3025..5e01c92 100644 --- a/gcc/config/riscv/riscv-target-attr.cc +++ b/gcc/config/riscv/riscv-target-attr.cc @@ -34,7 +34,7 @@ namespace { class riscv_target_attr_parser { public: - riscv_target_attr_parser (location_t loc) + riscv_target_attr_parser (location_t *loc) : m_found_arch_p (false) , m_found_tune_p (false) , m_found_cpu_p (false) @@ -62,7 +62,7 @@ private: bool m_found_cpu_p; bool m_found_priority_p; riscv_subset_list *m_subset_list; - location_t m_loc; + location_t *m_loc; const riscv_cpu_info *m_cpu_info; const char *m_tune; int m_priority; @@ -102,15 +102,17 @@ riscv_target_attr_parser::parse_arch (const char *str) { if (TARGET_64BIT && strncmp ("32", str + 2, strlen ("32")) == 0) { - error_at (m_loc, "unexpected arch for %<target()%> attribute: " - "must start with rv64 but found %qs", str); + if (m_loc) + error_at (*m_loc, "unexpected arch for %<target()%> attribute: " + "must start with rv64 but found %qs", str); goto fail; } if (!TARGET_64BIT && strncmp ("64", str + 2, strlen ("64")) == 0) { - error_at (m_loc, "unexpected arch for %<target()%> attribute: " - "must start with rv32 but found %qs", str); + if (m_loc) + error_at (*m_loc, "unexpected arch for %<target()%> attribute: " + "must start with rv32 but found %qs", str); goto fail; } @@ -140,10 +142,9 @@ riscv_target_attr_parser::parse_arch (const char *str) { if (token[0] != '+') { - error_at ( - m_loc, - "unexpected arch for %<target()%> attribute: must start " - "with + or rv"); + if (*m_loc) + error_at (*m_loc, "unexpected arch for %<target()%> " + "attribute: must start with + or rv"); goto fail; } @@ -151,10 +152,9 @@ riscv_target_attr_parser::parse_arch (const char *str) /* Check parse_single_ext has consume all string. */ if (*result != '\0') { - error_at ( - m_loc, - "unexpected arch for %<target()%> attribute: bad " - "string found %qs", token); + if (m_loc) + error_at (*m_loc, "unexpected arch for %<target()%> " + "attribute: bad string found %qs", token); goto fail; } @@ -179,8 +179,8 @@ fail: bool riscv_target_attr_parser::handle_arch (const char *str) { - if (m_found_arch_p) - error_at (m_loc, "%<target()%> attribute: arch appears more than once"); + if (m_found_arch_p && m_loc) + error_at (*m_loc, "%<target()%> attribute: arch appears more than once"); m_found_arch_p = true; return parse_arch (str); } @@ -190,15 +190,16 @@ riscv_target_attr_parser::handle_arch (const char *str) bool riscv_target_attr_parser::handle_cpu (const char *str) { - if (m_found_cpu_p) - error_at (m_loc, "%<target()%> attribute: cpu appears more than once"); + if (m_found_cpu_p && m_loc) + error_at (*m_loc, "%<target()%> attribute: cpu appears more than once"); m_found_cpu_p = true; const riscv_cpu_info *cpu_info = riscv_find_cpu (str); if (!cpu_info) { - error_at (m_loc, "%<target()%> attribute: unknown CPU %qs", str); + if (m_loc) + error_at (*m_loc, "%<target()%> attribute: unknown CPU %qs", str); return false; } @@ -218,14 +219,15 @@ riscv_target_attr_parser::handle_cpu (const char *str) bool riscv_target_attr_parser::handle_tune (const char *str) { - if (m_found_tune_p) - error_at (m_loc, "%<target()%> attribute: tune appears more than once"); + if (m_found_tune_p && m_loc) + error_at (*m_loc, "%<target()%> attribute: tune appears more than once"); m_found_tune_p = true; const struct riscv_tune_info *tune = riscv_parse_tune (str, true); if (tune == nullptr) { - error_at (m_loc, "%<target()%> attribute: unknown TUNE %qs", str); + if (m_loc) + error_at (*m_loc, "%<target()%> attribute: unknown TUNE %qs", str); return false; } @@ -237,13 +239,15 @@ riscv_target_attr_parser::handle_tune (const char *str) bool riscv_target_attr_parser::handle_priority (const char *str) { - if (m_found_priority_p) - error_at (m_loc, "%<target()%> attribute: priority appears more than once"); + if (m_found_priority_p && m_loc) + error_at (*m_loc, "%<target()%> attribute: priority appears " + "more than once"); m_found_priority_p = true; if (sscanf (str, "%d", &m_priority) != 1) { - error_at (m_loc, "%<target()%> attribute: invalid priority %qs", str); + if (m_loc) + error_at (*m_loc, "%<target()%> attribute: invalid priority %qs", str); return false; } @@ -282,7 +286,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const static bool riscv_process_one_target_attr (char *arg_str, - location_t loc, + location_t *loc, riscv_target_attr_parser &attr_parser, const struct riscv_attribute_info *attrs) { @@ -290,7 +294,8 @@ riscv_process_one_target_attr (char *arg_str, if (len == 0) { - error_at (loc, "malformed %<target()%> attribute"); + if (loc) + error_at (*loc, "malformed %<target()%> attribute"); return false; } @@ -302,10 +307,9 @@ riscv_process_one_target_attr (char *arg_str, if (!arg) { - error_at ( - loc, - "attribute %<target(\"%s\")%> does not accept an argument", - str_to_check); + if (loc) + error_at (*loc, "attribute %<target(\"%s\")%> does not " + "accept an argument", str_to_check); return false; } @@ -324,7 +328,8 @@ riscv_process_one_target_attr (char *arg_str, return (&attr_parser->*attr->handler) (arg); } - error_at (loc, "Got unknown attribute %<target(\"%s\")%>", str_to_check); + if (loc) + error_at (*loc, "Got unknown attribute %<target(\"%s\")%>", str_to_check); return false; } @@ -347,11 +352,12 @@ num_occurrences_in_str (char c, char *str) } /* Parse the string in ARGS that contains the target attribute information - and update the global target options space. */ + and update the global target options space. If LOC is nonnull, report + diagnostics against location *LOC, otherwise remain silent. */ bool riscv_process_target_attr (const char *args, - location_t loc, + location_t *loc, const struct riscv_attribute_info *attrs) { size_t len = strlen (args); @@ -387,8 +393,8 @@ riscv_process_target_attr (const char *args, if (num_attrs != num_semicolons + 1) { - error_at (loc, "malformed %<target(\"%s\")%> attribute", - args); + if (loc) + error_at (*loc, "malformed %<target(\"%s\")%> attribute", args); return false; } @@ -399,11 +405,12 @@ riscv_process_target_attr (const char *args, } /* Parse the tree in ARGS that contains the target attribute information - and update the global target options space. */ + and update the global target options space. If LOC is nonnull, report + diagnostics against *LOC, otherwise remain silent. */ static bool riscv_process_target_attr (tree args, - location_t loc, + location_t *loc, const struct riscv_attribute_info *attrs) { if (TREE_CODE (args) == TREE_LIST) @@ -424,7 +431,8 @@ riscv_process_target_attr (tree args, if (TREE_CODE (args) != STRING_CST) { - error_at (loc, "attribute %<target%> argument not a string"); + if (loc) + error_at (*loc, "attribute %<target%> argument not a string"); return false; } @@ -466,7 +474,7 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int) TREE_TARGET_OPTION (target_option_default_node)); /* Now we can parse the attributes and set &global_options accordingly. */ - ret = riscv_process_target_attr (args, loc, riscv_target_attrs); + ret = riscv_process_target_attr (args, &loc, riscv_target_attrs); if (ret) { riscv_override_options_internal (&global_options); @@ -481,16 +489,19 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int) } /* Parse the tree in ARGS that contains the target_version attribute - information and update the global target options space. */ + information and update the global target options space. If LOC is nonnull, + report diagnostics against *LOC, otherwise remain silent. */ bool -riscv_process_target_version_attr (tree args, location_t loc) +riscv_process_target_version_attr (tree args, location_t *loc) { if (TREE_CODE (args) == TREE_LIST) { if (TREE_CHAIN (args)) { - error ("attribute %<target_version%> has multiple values"); + if (loc) + error_at (*loc, "attribute %<target_version%> " + "has multiple values"); return false; } args = TREE_VALUE (args); @@ -498,7 +509,8 @@ riscv_process_target_version_attr (tree args, location_t loc) if (!args || TREE_CODE (args) != STRING_CST) { - error ("attribute %<target_version%> argument not a string"); + if (loc) + error_at (*loc, "attribute %<target_version%> argument not a string"); return false; } @@ -541,7 +553,7 @@ riscv_option_valid_version_attribute_p (tree fndecl, tree, tree args, int) cl_target_option_restore (&global_options, &global_options_set, TREE_TARGET_OPTION (target_option_current_node)); - ret = riscv_process_target_version_attr (args, loc); + ret = riscv_process_target_version_attr (args, &loc); /* Set up any additional state. */ if (ret) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index c9c8328..b27a0be 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -63,20 +63,37 @@ imm_avl_p (machine_mode mode) { poly_uint64 nunits = GET_MODE_NUNITS (mode); + /* For segmented operations AVL refers to a single register and not all NF + registers. Therefore divide the mode size by NF before checking if it is + in range. */ + int nf = 1; + if (riscv_v_ext_tuple_mode_p (mode)) + nf = get_nf (mode); + return nunits.is_constant () /* The vsetivli can only hold register 0~31. */ - ? (IN_RANGE (nunits.to_constant (), 0, 31)) + ? (IN_RANGE (nunits.to_constant () / nf, 0, 31)) /* Only allowed in VLS-VLMAX mode. */ : false; } -/* Return true if LEN is equal to NUNITS that out of the range [0, 31]. */ +/* Return true if LEN equals the number of units in MODE if MODE is either a + VLA mode or MODE is a VLS mode its size equals the vector size. + In that case we can emit a VLMAX insn which can be optimized more easily + by the vsetvl pass. */ + static bool is_vlmax_len_p (machine_mode mode, rtx len) { poly_int64 value; + if (poly_int_rtx_p (len, &value) + && known_eq (value, GET_MODE_NUNITS (mode)) + && known_eq (GET_MODE_UNIT_SIZE (mode) * value, BYTES_PER_RISCV_VECTOR)) + return true; + return poly_int_rtx_p (len, &value) - && known_eq (value, GET_MODE_NUNITS (mode)); + && !GET_MODE_NUNITS (mode).is_constant () + && known_eq (value, GET_MODE_NUNITS (mode)); } /* Helper functions for insn_flags && insn_types */ @@ -954,6 +971,26 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx sel, rtx mask) emit_vlmax_insn (icode, BINARY_OP_TAMU, ops); } +/* Function to emit a vslide1up instruction of mode MODE with destination + DEST and slideup element ELT. */ + +rtx +expand_slide1up (machine_mode mode, rtx dest, rtx elt) +{ + unsigned int unspec + = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP; + insn_code icode = code_for_pred_slide (unspec, mode); + /* RVV Spec 16.3.1 + The destination vector register group for vslideup cannot overlap the + source vector register group, otherwise the instruction encoding + is reserved. Thus, we need a new register. */ + rtx tmp = gen_reg_rtx (mode); + rtx ops[] = {tmp, dest, elt}; + emit_vlmax_insn (icode, BINARY_OP, ops); + return tmp; +} + + /* According to RVV ISA spec (16.5.1. Synthesizing vdecompress): https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc @@ -1175,16 +1212,7 @@ expand_vector_init_trailing_same_elem (rtx target, { rtx dup = expand_vector_broadcast (mode, builder.elt (nelts_reqd - 1)); for (int i = nelts_reqd - trailing_ndups - 1; i >= 0; i--) - { - unsigned int unspec - = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP; - insn_code icode = code_for_pred_slide (unspec, mode); - rtx tmp = gen_reg_rtx (mode); - rtx ops[] = {tmp, dup, builder.elt (i)}; - emit_vlmax_insn (icode, BINARY_OP, ops); - /* slide1up need source and dest to be different REG. */ - dup = tmp; - } + dup = expand_slide1up (mode, dup, builder.elt (i)); emit_move_insn (target, dup); return true; @@ -1717,6 +1745,77 @@ expand_const_vector_stepped (rtx target, rtx src, rvv_builder *builder) gcc_unreachable (); } +/* We don't actually allow this case in legitimate_constant_p but + the middle-end still expects us to handle it in an expander + (see PR121334). This is assumed to happen very rarely so the + implementation is not very efficient, particularly + for short vectors. +*/ + +static void +expand_const_vector_onestep (rtx target, rvv_builder &builder) +{ + machine_mode mode = GET_MODE (target); + gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); + gcc_assert (builder.nelts_per_pattern () == 2); + + /* We have n encoded patterns + {csta_0, cstb_0}, + {csta_1, cstb_1}, + ... + {csta_{n-1}, cstb_{n-1}} + which should become one vector: + {csta_0, csta_1, ..., csta_{n-1}, + cstb_0, cstb_1, ..., cstb_{n-1}, + ... + cstb_0, cstb_1, ..., cstb_{n-1}}. + + In order to achieve this we create a permute/gather constant + sel = {0, 1, ..., n - 1, 0, 1, ..., n - 1, ...} + and two vectors + va = {csta_0, csta_1, ..., csta_{n-1}}, + vb = {cstb_0, cstb_1, ..., cstb_{n-1}}. + + Then we use a VLMAX gather to "broadcast" vb and afterwards + overwrite the first n elements with va. */ + + int n = builder.npatterns (); + /* { 0, 1, 2, ..., n - 1 }. */ + rtx vid = gen_reg_rtx (mode); + expand_vec_series (vid, const0_rtx, const1_rtx); + + /* { 0, 1, ..., n - 1, 0, 1, ..., n - 1, ... }. */ + rtx sel = gen_reg_rtx (mode); + rtx and_ops[] = {sel, vid, GEN_INT (n)}; + emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops); + + /* va = { ELT (0), ELT (1), ... ELT (n - 1) }. */ + rtx tmp1 = gen_reg_rtx (mode); + rtx ops1[] = {tmp1, builder.elt (0)}; + expand_broadcast (mode, ops1); + for (int i = 1; i < n; i++) + tmp1 = expand_slide1up (mode, tmp1, builder.elt (i)); + + /* vb = { ELT (n), ELT (n + 1), ... ELT (2 * n - 1) }. */ + rtx tmp2 = gen_reg_rtx (mode); + rtx ops2[] = {tmp2, builder.elt (n)}; + expand_broadcast (mode, ops2); + for (int i = 1; i < n; i++) + tmp2 = expand_slide1up (mode, tmp2, builder.elt (n + i)); + + /* Duplicate vb. */ + rtx tmp3 = gen_reg_rtx (mode); + emit_vlmax_gather_insn (tmp3, tmp2, sel); + + /* Overwrite the first n - 1 elements with va. */ + rtx dest = gen_reg_rtx (mode); + insn_code icode = code_for_pred_mov (mode); + rtx ops3[] = {dest, tmp3, tmp1}; + emit_nonvlmax_insn (icode, __MASK_OP_TUMA | UNARY_OP_P, ops3, GEN_INT (n)); + + emit_move_insn (target, dest); +} + static void expand_const_vector (rtx target, rtx src) { @@ -1744,6 +1843,8 @@ expand_const_vector (rtx target, rtx src) if (CONST_VECTOR_DUPLICATE_P (src)) return expand_const_vector_duplicate (target, &builder); + else if (CONST_VECTOR_NELTS_PER_PATTERN (src) == 2) + return expand_const_vector_onestep (target, builder); else if (CONST_VECTOR_STEPPED_P (src)) return expand_const_vector_stepped (target, src, &builder); @@ -2648,8 +2749,14 @@ expand_vector_init_merge_repeating_sequence (rtx target, = get_repeating_sequence_dup_machine_mode (builder, mask_bit_mode); uint64_t full_nelts = builder.full_nelts ().to_constant (); + gcc_assert (builder.nelts_per_pattern () == 1 + || builder.nelts_per_pattern () == 2); + + rtx first + = builder.nelts_per_pattern () == 1 ? builder.elt (0) : builder.elt (1); + /* Step 1: Broadcast the first pattern. */ - rtx ops[] = {target, force_reg (builder.inner_mode (), builder.elt (0))}; + rtx ops[] = {target, force_reg (builder.inner_mode (), first)}; expand_broadcast (builder.mode (), ops); /* Step 2: Merge the rest iteration of pattern. */ for (unsigned int i = 1; i < builder.npatterns (); i++) @@ -2677,7 +2784,10 @@ expand_vector_init_merge_repeating_sequence (rtx target, emit_move_insn (mask, gen_lowpart (mask_bit_mode, dup)); /* Step 2-2: Merge pattern according to the mask. */ - rtx ops[] = {target, target, builder.elt (i), mask}; + unsigned int which = i; + if (builder.nelts_per_pattern () == 2) + which = 2 * which + 1; + rtx ops[] = {target, target, builder.elt (which), mask}; emit_vlmax_insn (code_for_pred_merge_scalar (GET_MODE (target)), MERGE_OP, ops); } @@ -3220,15 +3330,17 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) mask_mode = get_mask_mode (data_mode); rtx mask = gen_reg_rtx (mask_mode); rtx max_sel = gen_const_vector_dup (sel_mode, nunits); + bool overlap = reg_overlap_mentioned_p (target, op1); + rtx tmp_target = overlap ? gen_reg_rtx (data_mode) : target; /* Step 1: generate a mask that should select everything >= nunits into the * mask. */ expand_vec_cmp (mask, GEU, sel_mod, max_sel); - /* Step2: gather every op0 values indexed by sel into target, + /* Step2: gather every op0 values indexed by sel into TMP_TARGET, we don't need to care about the result of the element whose index >= nunits. */ - emit_vlmax_gather_insn (target, op0, sel_mod); + emit_vlmax_gather_insn (tmp_target, op0, sel_mod); /* Step3: shift the range from (nunits, max_of_mode] to [0, max_of_mode - nunits]. */ @@ -3238,7 +3350,10 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel) /* Step4: gather those into the previously masked-out elements of target. */ - emit_vlmax_masked_gather_mu_insn (target, op1, tmp, mask); + emit_vlmax_masked_gather_mu_insn (tmp_target, op1, tmp, mask); + + if (overlap) + emit_move_insn (tmp_target, target); } /* Implement TARGET_VECTORIZE_VEC_PERM_CONST for RVV. */ @@ -4078,11 +4193,7 @@ shuffle_off_by_one_patterns (struct expand_vec_perm_d *d) emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode)); /* Insert the scalar into element 0. */ - unsigned int unspec - = FLOAT_MODE_P (d->vmode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP; - insn_code icode = code_for_pred_slide (unspec, d->vmode); - rtx ops[] = {d->target, d->op1, tmp}; - emit_vlmax_insn (icode, BINARY_OP, ops); + expand_slide1up (d->vmode, d->op1, tmp); } return true; @@ -4376,13 +4487,11 @@ expand_strided_load (machine_mode mode, rtx *ops) int idx = 4; get_else_operand (ops[idx++]); rtx len = ops[idx]; - poly_int64 len_val; insn_code icode = code_for_pred_strided_load (mode); rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride}; - if (poly_int_rtx_p (len, &len_val) - && known_eq (len_val, GET_MODE_NUNITS (mode))) + if (is_vlmax_len_p (mode, len)) emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops); else { @@ -4400,11 +4509,9 @@ expand_strided_store (machine_mode mode, rtx *ops) rtx stride = ops[1]; rtx mask = ops[3]; rtx len = ops[4]; - poly_int64 len_val; rtx vl_type; - if (poly_int_rtx_p (len, &len_val) - && known_eq (len_val, GET_MODE_NUNITS (mode))) + if (is_vlmax_len_p (mode, len)) { len = gen_reg_rtx (Pmode); emit_vlmax_vsetvl (mode, len); diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index df924fa..5e6cb67 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -275,13 +275,13 @@ loop_invariant_op_p (class loop *loop, /* Return true if the variable should be counted into liveness. */ static bool variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info, - slp_tree node ATTRIBUTE_UNUSED, tree var, bool lhs_p) + slp_tree node, tree var, bool lhs_p) { if (!var) return false; gimple *stmt = STMT_VINFO_STMT (stmt_info); stmt_info = vect_stmt_to_vectorize (stmt_info); - enum stmt_vec_info_type type = STMT_VINFO_TYPE (stmt_info); + enum stmt_vec_info_type type = SLP_TREE_TYPE (node); if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)) { if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE @@ -400,7 +400,7 @@ costs::compute_local_live_ranges ( pair &live_range = live_ranges->get_or_insert (lhs, &existed_p); gcc_assert (!existed_p); - if (STMT_VINFO_MEMORY_ACCESS_TYPE (program_point.stmt_info) + if (SLP_TREE_MEMORY_ACCESS_TYPE (*node) == VMAT_LOAD_STORE_LANES) point = get_first_lane_point (program_points, program_point.stmt_info); @@ -418,8 +418,7 @@ costs::compute_local_live_ranges ( bool existed_p = false; pair &live_range = live_ranges->get_or_insert (var, &existed_p); - if (STMT_VINFO_MEMORY_ACCESS_TYPE ( - program_point.stmt_info) + if (SLP_TREE_MEMORY_ACCESS_TYPE (*node) == VMAT_LOAD_STORE_LANES) point = get_last_lane_point (program_points, program_point.stmt_info); @@ -602,13 +601,13 @@ get_store_value (gimple *stmt) /* Return true if additional vector vars needed. */ bool costs::need_additional_vector_vars_p (stmt_vec_info stmt_info, - slp_tree node ATTRIBUTE_UNUSED) + slp_tree node) { - enum stmt_vec_info_type type = STMT_VINFO_TYPE (stmt_info); + enum stmt_vec_info_type type = SLP_TREE_TYPE (node); if (type == load_vec_info_type || type == store_vec_info_type) { if (STMT_VINFO_GATHER_SCATTER_P (stmt_info) - && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) return true; machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)); @@ -694,7 +693,7 @@ costs::update_local_live_ranges ( if (!node) continue; - if (STMT_VINFO_TYPE (stmt_info) == undef_vec_info_type) + if (SLP_TREE_TYPE (*node) == undef_vec_info_type) continue; for (j = 0; j < gimple_phi_num_args (phi); j++) @@ -773,7 +772,7 @@ costs::update_local_live_ranges ( slp_tree *node = vinfo_slp_map.get (stmt_info); if (!node) continue; - enum stmt_vec_info_type type = STMT_VINFO_TYPE (stmt_info); + enum stmt_vec_info_type type = SLP_TREE_TYPE (*node); if (need_additional_vector_vars_p (stmt_info, *node)) { /* For non-adjacent load/store STMT, we will potentially @@ -1086,7 +1085,7 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const load/store. */ static int segment_loadstore_group_size (enum vect_cost_for_stmt kind, - stmt_vec_info stmt_info) + stmt_vec_info stmt_info, slp_tree node) { if (stmt_info && (kind == vector_load || kind == vector_store) @@ -1094,7 +1093,7 @@ segment_loadstore_group_size (enum vect_cost_for_stmt kind, { stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); if (stmt_info - && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES) + && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_LOAD_STORE_LANES) return DR_GROUP_SIZE (stmt_info); } return 0; @@ -1108,7 +1107,7 @@ segment_loadstore_group_size (enum vect_cost_for_stmt kind, unsigned costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop, stmt_vec_info stmt_info, - slp_tree, tree vectype, int stmt_cost) + slp_tree node, tree vectype, int stmt_cost) { const cpu_vector_cost *costs = get_vector_costs (); switch (kind) @@ -1131,7 +1130,8 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop, each vector in the group. Here we additionally add permute costs for each. */ /* TODO: Indexed and ordered/unordered cost. */ - int group_size = segment_loadstore_group_size (kind, stmt_info); + int group_size = segment_loadstore_group_size (kind, stmt_info, + node); if (group_size > 1) { switch (group_size) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 0a9fcef..591122f 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3685,7 +3685,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) /* This test can fail if (for example) we want a HF and Z[v]fh is not enabled. In that case we just want to let the standard expansion path run. */ - if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode)) + if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode) + && gen_lowpart_common (vmode, SUBREG_REG (src))) { rtx v = gen_lowpart (vmode, SUBREG_REG (src)); rtx int_reg = dest; @@ -3958,41 +3959,6 @@ riscv_extend_cost (rtx op, bool unsigned_p) return COSTS_N_INSNS (2); } -/* Return the cost of the vector binary rtx like add, minus, mult. - The cost of scalar2vr_cost will be appended if there one of the - op comes from the VEC_DUPLICATE. */ - -static int -get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost) -{ - gcc_assert (riscv_v_ext_mode_p (GET_MODE (x))); - - rtx neg; - rtx op_0; - rtx op_1; - - if (GET_CODE (x) == UNSPEC) - { - op_0 = XVECEXP (x, 0, 0); - op_1 = XVECEXP (x, 0, 1); - } - else - { - op_0 = XEXP (x, 0); - op_1 = XEXP (x, 1); - } - - if (GET_CODE (op_0) == VEC_DUPLICATE - || GET_CODE (op_1) == VEC_DUPLICATE) - return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); - else if (GET_CODE (neg = op_0) == NEG - && (GET_CODE (op_1) == VEC_DUPLICATE - || GET_CODE (XEXP (neg, 0)) == VEC_DUPLICATE)) - return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); - else - return COSTS_N_INSNS (1); -} - /* Implement TARGET_RTX_COSTS. */ #define SINGLE_SHIFT_COST 1 @@ -4014,73 +3980,20 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN { case SET: { - switch (GET_CODE (x)) + if (GET_CODE (x) == VEC_DUPLICATE) + *total = (scalar2vr_cost + 1) * COSTS_N_INSNS (1); + else { - case VEC_DUPLICATE: - *total = gr2vr_cost * COSTS_N_INSNS (1); - break; - case IF_THEN_ELSE: - { - rtx op = XEXP (x, 1); + int vec_dup_count = 0; + subrtx_var_iterator::array_type array; - switch (GET_CODE (op)) - { - case DIV: - case UDIV: - case MOD: - case UMOD: - case US_PLUS: - case US_MINUS: - case SS_PLUS: - case SS_MINUS: - *total = get_vector_binary_rtx_cost (op, scalar2vr_cost); - break; - case UNSPEC: - { - switch (XINT (op, 1)) - { - case UNSPEC_VAADDU: - case UNSPEC_VAADD: - *total - = get_vector_binary_rtx_cost (op, scalar2vr_cost); - break; - default: - *total = COSTS_N_INSNS (1); - break; - } - } - break; - default: - *total = COSTS_N_INSNS (1); - break; - } - } - break; - case PLUS: - case MINUS: - case AND: - case IOR: - case XOR: - case MULT: - case SMAX: - case UMAX: - case SMIN: - case UMIN: - { - rtx op; - rtx op_0 = XEXP (x, 0); - rtx op_1 = XEXP (x, 1); + FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) + if (GET_CODE (*iter) == VEC_DUPLICATE) + vec_dup_count++; - if (GET_CODE (op = op_0) == MULT - || GET_CODE (op = op_1) == MULT) - *total = get_vector_binary_rtx_cost (op, scalar2vr_cost); - else - *total = get_vector_binary_rtx_cost (x, scalar2vr_cost); - } - break; - default: - *total = COSTS_N_INSNS (1); - break; + int total_vec_dup_cost = vec_dup_count * scalar2vr_cost; + + *total = COSTS_N_INSNS (1) * (total_vec_dup_cost + 1); } } break; @@ -5532,9 +5445,9 @@ canonicalize_comparands (rtx_code code, rtx *op0, rtx *op1) /* We might have been handed back a SUBREG. Just to make things easy, force it into a REG. */ - if (!REG_P (*op0) && !CONST_INT_P (*op0)) + if (!REG_P (*op0) && !CONST_INT_P (*op0) && INTEGRAL_MODE_P (GET_MODE (*op0))) *op0 = force_reg (word_mode, *op0); - if (!REG_P (*op1) && !CONST_INT_P (*op1)) + if (!REG_P (*op1) && !CONST_INT_P (*op1) && INTEGRAL_MODE_P (GET_MODE (*op1))) *op1 = force_reg (word_mode, *op1); } @@ -6213,7 +6126,8 @@ riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode, For a library call, FNTYPE is 0. */ void -riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int) +riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, + rtx, tree, int) { memset (cum, 0, sizeof (*cum)); @@ -6494,30 +6408,44 @@ riscv_arg_partial_bytes (cumulative_args_t cum, return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0; } -/* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls, - VALTYPE is the return type and MODE is VOIDmode. For libcalls, - VALTYPE is null and MODE is the mode of the return value. */ +/* Implements hook TARGET_FUNCTION_VALUE. */ rtx -riscv_function_value (const_tree type, const_tree func, machine_mode mode) +riscv_function_value (const_tree ret_type, const_tree fn_decl_or_type, + bool) { struct riscv_arg_info info; CUMULATIVE_ARGS args; - if (type) + if (fn_decl_or_type) { - int unsigned_p = TYPE_UNSIGNED (type); + const_tree fntype = TREE_CODE (fn_decl_or_type) == FUNCTION_DECL ? + TREE_TYPE (fn_decl_or_type) : fn_decl_or_type; + riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0); + } + else + memset (&args, 0, sizeof args); - mode = TYPE_MODE (type); + int unsigned_p = TYPE_UNSIGNED (ret_type); - /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, - return values, promote the mode here too. */ - mode = promote_function_mode (type, mode, &unsigned_p, func, 1); - } + machine_mode mode = TYPE_MODE (ret_type); - memset (&args, 0, sizeof args); + /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, + return values, promote the mode here too. */ + mode = promote_function_mode (ret_type, mode, &unsigned_p, fn_decl_or_type, 1); - return riscv_get_arg_info (&info, &args, mode, type, true, true); + return riscv_get_arg_info (&info, &args, mode, ret_type, true, true); +} + +/* Implements hook TARGET_LIBCALL_VALUE. */ + +rtx +riscv_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) +{ + struct riscv_arg_info info; + CUMULATIVE_ARGS args; + memset (&args, 0, sizeof args); + return riscv_get_arg_info (&info, &args, mode, NULL_TREE, true, true); } /* Implement TARGET_PASS_BY_REFERENCE. */ @@ -13867,9 +13795,9 @@ riscv_expand_xmode_usmul (rtx dest, rtx x, rtx y) riscv_emit_binary (MULT, mul, x, y); if (TARGET_64BIT) - emit_insn (gen_usmuldi3_highpart (mulhu, x, y)); + emit_insn (gen_umuldi3_highpart (mulhu, x, y)); else - emit_insn (gen_usmulsi3_highpart (mulhu, x, y)); + emit_insn (gen_umulsi3_highpart (mulhu, x, y)); riscv_emit_binary (NE, overflow_p, mulhu, CONST0_RTX (Xmode)); riscv_emit_unary (NEG, overflow_p, overflow_p); @@ -14037,10 +13965,13 @@ riscv_c_mode_for_floating_type (enum tree_index ti) return default_mode_for_floating_type (ti); } -/* This parses the attribute arguments to target_version in DECL and modifies - the feature mask and priority required to select those targets. */ +/* Parse the attribute arguments to target_version in DECL and modify + the feature mask and priority required to select those targets. + If LOC is nonnull, report diagnostics against *LOC, otherwise + remain silent. */ static void parse_features_for_version (tree decl, + location_t *loc, struct riscv_feature_bits &res, int &priority) { @@ -14071,14 +14002,12 @@ parse_features_for_version (tree decl, cl_target_option_restore (&global_options, &global_options_set, default_opts); - riscv_process_target_version_attr (TREE_VALUE (version_attr), - DECL_SOURCE_LOCATION (decl)); + riscv_process_target_version_attr (TREE_VALUE (version_attr), loc); priority = global_options.x_riscv_fmv_priority; const char *arch_string = global_options.x_riscv_arch_string; bool parse_res - = riscv_minimal_hwprobe_feature_bits (arch_string, &res, - DECL_SOURCE_LOCATION (decl)); + = riscv_minimal_hwprobe_feature_bits (arch_string, &res, loc); gcc_assert (parse_res); cl_target_option_restore (&global_options, &global_options_set, @@ -14135,8 +14064,8 @@ riscv_compare_version_priority (tree decl1, tree decl2) struct riscv_feature_bits mask1, mask2; int prio1, prio2; - parse_features_for_version (decl1, mask1, prio1); - parse_features_for_version (decl2, mask2, prio2); + parse_features_for_version (decl1, nullptr, mask1, prio1); + parse_features_for_version (decl2, nullptr, mask2, prio2); return compare_fmv_features (mask1, mask2, prio1, prio2); } @@ -14439,6 +14368,7 @@ dispatch_function_versions (tree dispatch_decl, version_info.version_decl = version_decl; // Get attribute string, parse it and find the right features. parse_features_for_version (version_decl, + &DECL_SOURCE_LOCATION (version_decl), version_info.features, version_info.prio); function_versions.push_back (version_info); @@ -15441,6 +15371,217 @@ synthesize_and (rtx operands[3]) return true; } +/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2]. + + OPERANDS[0] and OPERANDS[1] will be a REG and may be the same + REG. + + OPERANDS[2] is a CONST_INT. + + Return TRUE if the operation was fully synthesized and the caller + need not generate additional code. Return FALSE if the operation + was not synthesized and the caller is responsible for emitting the + proper sequence. */ + +bool +synthesize_add (rtx operands[3]) +{ + /* Trivial cases that don't need synthesis. */ + if (SMALL_OPERAND (INTVAL (operands[2]))) + return false; + + int budget1 = riscv_const_insns (operands[2], true); + int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true); + + HOST_WIDE_INT ival = INTVAL (operands[2]); + + /* If we can emit two addi insns then that's better than synthesizing + the constant into a temporary, then adding the temporary to the + other input. The exception is when the constant can be loaded + in a single instruction which can issue whenever its convenient. */ + if (SUM_OF_TWO_S12 (ival) && budget1 >= 2) + { + HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1); + + if (ival >= 0) + saturated = ~saturated; + + ival -= saturated; + + rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (saturated)); + emit_insn (gen_rtx_SET (operands[0], x)); + rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (ival)); + emit_insn (gen_rtx_SET (operands[0], output)); + return true; + } + + /* If we can shift the constant by 1, 2, or 3 bit positions + and the result is a cheaper constant, then do so. */ + ival = INTVAL (operands[2]); + if (TARGET_ZBA + && (((ival % 2) == 0 && budget1 + > riscv_const_insns (GEN_INT (ival >> 1), true)) + || ((ival % 4) == 0 && budget1 + > riscv_const_insns (GEN_INT (ival >> 2), true)) + || ((ival % 8) == 0 && budget1 + > riscv_const_insns (GEN_INT (ival >> 3), true)))) + { + // Load the shifted constant into a temporary + int shct = ctz_hwi (ival); + + /* We can handle shifting up to 3 bit positions via shNadd. */ + if (shct > 3) + shct = 3; + + /* The adjusted constant may still need synthesis, so do not copy + it directly into register. Let the expander handle it. */ + rtx tmp = force_reg (word_mode, GEN_INT (ival >> shct)); + + /* Generate shift-add of temporary and operands[1] + into the final destination. */ + rtx x = gen_rtx_ASHIFT (word_mode, tmp, GEN_INT (shct)); + rtx output = gen_rtx_PLUS (word_mode, x, operands[1]); + emit_insn (gen_rtx_SET (operands[0], output)); + return true; + } + + /* If the negated constant is cheaper than the original, then negate + the constant and use sub. */ + if (budget2 < budget1) + { + // load -INTVAL (operands[2]) into a temporary + rtx tmp = force_reg (word_mode, GEN_INT (-INTVAL (operands[2]))); + + // subtract operads[2] from operands[1] + rtx output = gen_rtx_MINUS (word_mode, operands[1], tmp); + emit_insn (gen_rtx_SET (operands[0], output)); + return true; + } + + /* No add synthesis was found. Synthesize the constant into + a temporary and use that. */ + rtx x = force_reg (word_mode, operands[2]); + x = gen_rtx_PLUS (word_mode, operands[1], x); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; +} + +/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2]. + + For 32-bit object cases with a 64-bit target. + + OPERANDS[0] and OPERANDS[1] will be a REG and may be the same + REG. + + OPERANDS[2] is a CONST_INT. + + Return TRUE if the operation was fully synthesized and the caller + need not generate additional code. Return FALSE if the operation + was not synthesized and the caller is responsible for emitting the + proper sequence. */ + + +bool +synthesize_add_extended (rtx operands[3]) +{ + +/* If operands[2] is a 12-bit signed immediate, + no synthesis needs to be done. */ + + if (SMALL_OPERAND (INTVAL (operands[2]))) + return false; + + HOST_WIDE_INT ival = INTVAL (operands[2]); + int budget1 = riscv_const_insns (operands[2], true); + int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true); + +/* If operands[2] can be split into two 12-bit signed immediates, + split add into two adds. */ + + if (SUM_OF_TWO_S12 (ival)) + { + HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1); + + if (ival >= 0) + saturated = ~saturated; + + ival -= saturated; + + rtx temp = gen_reg_rtx (DImode); + emit_insn (gen_addsi3_extended (temp, operands[1], GEN_INT (saturated))); + temp = gen_lowpart (SImode, temp); + SUBREG_PROMOTED_VAR_P (temp) = 1; + SUBREG_PROMOTED_SET (temp, SRP_SIGNED); + emit_insn (gen_rtx_SET (operands[0], temp)); + rtx t = gen_reg_rtx (DImode); + emit_insn (gen_addsi3_extended (t, operands[0], GEN_INT (ival))); + t = gen_lowpart (SImode, t); + SUBREG_PROMOTED_VAR_P (t) = 1; + SUBREG_PROMOTED_SET (t, SRP_SIGNED); + emit_move_insn (operands[0], t); + return true; + } + + +/* If the negated value is cheaper to synthesize, subtract that from + operands[1]. */ + + if (budget2 < budget1) + { + rtx tmp = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (tmp, GEN_INT (-INTVAL (operands[2])))); + + rtx t = gen_reg_rtx (DImode); + emit_insn (gen_subsi3_extended (t, operands[1], tmp)); + t = gen_lowpart (SImode, t); + SUBREG_PROMOTED_VAR_P (t) = 1; + SUBREG_PROMOTED_SET (t, SRP_SIGNED); + emit_move_insn (operands[0], t); + return true; + } + + rtx tsrc = force_reg (SImode, operands[2]); + rtx tdest = gen_reg_rtx (DImode); + emit_insn (gen_addsi3_extended (tdest, operands[1], tsrc)); + tdest = gen_lowpart (SImode, tdest); + SUBREG_PROMOTED_VAR_P (tdest) = 1; + SUBREG_PROMOTED_SET (tdest, SRP_SIGNED); + emit_move_insn (operands[0], tdest); + return true; + +} + + +/* + HINT : argument specify the target cache + + TODO : LOCALITY is unused. + + Return the first operand of the associated PREF or PREFX insn. */ +rtx +riscv_prefetch_cookie (rtx hint, rtx locality) +{ + return (GEN_INT (INTVAL (hint) + + CacheHint::DCACHE_HINT + INTVAL (locality) * 0)); +} + +/* Return true if X is a legitimate address with offset for prefetch. + MODE is the mode of the value being accessed. */ +bool +riscv_prefetch_offset_address_p (rtx x, machine_mode mode) +{ + struct riscv_address_info addr; + + if (riscv_classify_address (&addr, x, mode, false) + && addr.type == ADDRESS_REG) + { + if (TARGET_XMIPSCBOP) + return (CONST_INT_P (addr.offset) + && MIPS_RISCV_9BIT_OFFSET_P (INTVAL (addr.offset))); + } + + return true; +} /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP @@ -15804,6 +15945,12 @@ synthesize_and (rtx operands[3]) #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE riscv_function_value + +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE riscv_libcall_value + #undef TARGET_FUNCTION_VALUE_REGNO_P #define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 45fa521..9146571 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -71,7 +71,7 @@ extern const char *riscv_arch_help (int argc, const char **argv); {"tune", "%{!mtune=*:" \ " %{!mcpu=*:-mtune=%(VALUE)}" \ " %{mcpu=*:-mtune=%:riscv_default_mtune(%* %(VALUE))}}" }, \ - {"arch", "%{!march=*:" \ + {"arch", "%{!march=*|march=unset:" \ " %{!mcpu=*:-march=%(VALUE)}" \ " %{mcpu=*:%:riscv_expand_arch_from_cpu(%* %(VALUE))}}" }, \ {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \ @@ -111,13 +111,19 @@ extern const char *riscv_arch_help (int argc, const char **argv); %(subtarget_asm_spec)" \ ASM_MISA_SPEC +/* Drop all -march=* options before -march=unset. */ +#define ARCH_UNSET_CLEANUP_SPECS \ + "%{march=unset:%<march=*} " \ + #undef DRIVER_SELF_SPECS #define DRIVER_SELF_SPECS \ +ARCH_UNSET_CLEANUP_SPECS \ "%{march=help:%:riscv_arch_help()} " \ "%{print-supported-extensions:%:riscv_arch_help()} " \ "%{-print-supported-extensions:%:riscv_arch_help()} " \ "%{march=*:%:riscv_expand_arch(%*)} " \ -"%{!march=*:%{mcpu=*:%:riscv_expand_arch_from_cpu(%*)}} " +"%{!march=*|march=unset:%{mcpu=*:%:riscv_expand_arch_from_cpu(%*)}} " \ +"%{march=unset:%{!mcpu=*:%eAt least one valid -mcpu option must be given after -march=unset}} " #define LOCAL_LABEL_PREFIX "." #define USER_LABEL_PREFIX "" @@ -759,12 +765,6 @@ enum reg_class #define CALLEE_SAVED_FREG_NUMBER(REGNO) CALLEE_SAVED_REG_NUMBER (REGNO - 32) -#define LIBCALL_VALUE(MODE) \ - riscv_function_value (NULL_TREE, NULL_TREE, MODE) - -#define FUNCTION_VALUE(VALTYPE, FUNC) \ - riscv_function_value (VALTYPE, FUNC, VOIDmode) - /* 1 if N is a possible register number for function argument passing. We have no FP argument registers when soft-float. */ @@ -1319,4 +1319,15 @@ extern void riscv_remove_unneeded_save_restore_calls (void); #define TARGET_HAS_FMV_TARGET_ATTRIBUTE 0 +/* mips pref valid offset range. */ +#define MIPS_RISCV_9BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, 0, 511)) + +/* mips pref cache hint type. */ +typedef enum { + ICACHE_HINT = 0 << 3, + DCACHE_HINT = 1 << 3, + SCACHE_HINT = 2 << 3, + TCACHE_HINT = 3 << 3 +} CacheHint; + #endif /* ! GCC_RISCV_H */ diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 578dd43..d34405c 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -712,24 +712,45 @@ (set_attr "mode" "SI")]) (define_expand "addsi3" - [(set (match_operand:SI 0 "register_operand" "=r,r") - (plus:SI (match_operand:SI 1 "register_operand" " r,r") - (match_operand:SI 2 "arith_operand" " r,I")))] + [(set (match_operand:SI 0 "register_operand") + (plus:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "reg_or_const_int_operand")))] "" { + /* We may be able to find a faster sequence, if so, then we are + done. Otherwise let expansion continue normally. */ + if (CONST_INT_P (operands[2]) + && ((!TARGET_64BIT && synthesize_add (operands)) + || (TARGET_64BIT && synthesize_add_extended (operands)))) + DONE; + + /* Constants have already been handled already. */ if (TARGET_64BIT) { - rtx t = gen_reg_rtx (DImode); - emit_insn (gen_addsi3_extended (t, operands[1], operands[2])); - t = gen_lowpart (SImode, t); - SUBREG_PROMOTED_VAR_P (t) = 1; - SUBREG_PROMOTED_SET (t, SRP_SIGNED); - emit_move_insn (operands[0], t); + rtx tdest = gen_reg_rtx (DImode); + emit_insn (gen_addsi3_extended (tdest, operands[1], operands[2])); + tdest = gen_lowpart (SImode, tdest); + SUBREG_PROMOTED_VAR_P (tdest) = 1; + SUBREG_PROMOTED_SET (tdest, SRP_SIGNED); + emit_move_insn (operands[0], tdest); DONE; } + }) -(define_insn "adddi3" +(define_expand "adddi3" + [(set (match_operand:DI 0 "register_operand") + (plus:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_const_int_operand")))] + "TARGET_64BIT" +{ + /* We may be able to find a faster sequence, if so, then we are + done. Otherwise let expansion continue normally. */ + if (CONST_INT_P (operands[2]) && synthesize_add (operands)) + DONE; +}) + +(define_insn "*adddi3" [(set (match_operand:DI 0 "register_operand" "=r,r") (plus:DI (match_operand:DI 1 "register_operand" " r,r") (match_operand:DI 2 "arith_operand" " r,I")))] @@ -2293,12 +2314,16 @@ rtx abs_reg = gen_reg_rtx (<ANYF:MODE>mode); rtx coeff_reg = gen_reg_rtx (<ANYF:MODE>mode); rtx tmp_reg = gen_reg_rtx (<ANYF:MODE>mode); + rtx fflags = gen_reg_rtx (SImode); riscv_emit_move (tmp_reg, operands[1]); riscv_emit_move (coeff_reg, riscv_vector::get_fp_rounding_coefficient (<ANYF:MODE>mode)); emit_insn (gen_abs<ANYF:mode>2 (abs_reg, operands[1])); + /* fp compare can set invalid flag for NaN, so backup fflags. */ + if (flag_trapping_math) + emit_insn (gen_riscv_frflags (fflags)); riscv_expand_conditional_branch (label, LT, abs_reg, coeff_reg); emit_jump_insn (gen_jump (end_label)); @@ -2324,6 +2349,14 @@ emit_insn (gen_copysign<ANYF:mode>3 (tmp_reg, abs_reg, operands[1])); emit_label (end_label); + + /* Restore fflags, but after label. This is slightly different + than glibc implementation which only needs to restore under + the label, since it checks for NaN first, meaning following fp + compare can't raise fp exceptons and thus not clobber fflags. */ + if (flag_trapping_math) + emit_insn (gen_riscv_fsflags (fflags)); + riscv_emit_move (operands[0], tmp_reg); } @@ -4402,11 +4435,21 @@ ) (define_insn "prefetch" - [(prefetch (match_operand 0 "prefetch_operand" "Qr") - (match_operand 1 "imm5_operand" "i") - (match_operand 2 "const_int_operand" "n"))] - "TARGET_ZICBOP" + [(prefetch (match_operand 0 "prefetch_operand" "Qr,ZD") + (match_operand 1 "imm5_operand" "i,i") + (match_operand 2 "const_int_operand" "n,n"))] + "TARGET_ZICBOP || TARGET_XMIPSCBOP" { + if (TARGET_XMIPSCBOP) + { + /* Mips Prefetch write is nop for p8700. */ + if (operands[1] != CONST0_RTX (GET_MODE (operands[1]))) + return "nop"; + + operands[1] = riscv_prefetch_cookie (operands[1], operands[2]); + return "mips.pref\t%1,%a0"; + } + switch (INTVAL (operands[1])) { case 0: diff --git a/gcc/config/riscv/sifive-p400.md b/gcc/config/riscv/sifive-p400.md index ed8b8ec..0acdbda 100644 --- a/gcc/config/riscv/sifive-p400.md +++ b/gcc/config/riscv/sifive-p400.md @@ -153,10 +153,13 @@ (eq_attr "type" "fmove,fcvt")) "p400_float_pipe,sifive_p400_fpu") +;; We need something for HF so that we don't abort during +;; scheduling if someone was to ask for p400 scheduling, but +;; enable the various HF mode extensions. (define_insn_reservation "sifive_p400_fdiv_s" 18 (and (eq_attr "tune" "sifive_p400") (eq_attr "type" "fdiv,fsqrt") - (eq_attr "mode" "SF")) + (eq_attr "mode" "HF,SF")) "sifive_p400_FM, sifive_p400_fdiv*5") (define_insn_reservation "sifive_p400_fdiv_d" 31 @@ -178,3 +181,18 @@ (define_bypass 1 "sifive_p400_f2i" "sifive_p400_branch,sifive_p400_sfb_alu,sifive_p400_mul, sifive_p400_div,sifive_p400_alu,sifive_p400_cpop") + + +;; Someone familiar with the p400 uarch needs to put +;; these into the right reservations. This is just a placeholder +;; for everything I found that had no mapping to a reservation. +;; +;; Note that even if the processor does not implementat a particular +;; instruction it should still have suitable reservations, even if +;; they are just dummies like this one. +(define_insn_reservation "sifive_p400_unknown" 1 + (and (eq_attr "tune" "sifive_p400") + (eq_attr "type" "ghost,vfrecp,vclmul,vldm,vmffs,vclmulh,vlsegde,vfcvtitof,vsm4k,vfcvtftoi,vfdiv,vsm3c,vsm4r,viwmuladd,vfwredu,vcpop,vfwmuladd,vstux,vsshift,vfwcvtftof,vfncvtftof,vfwmaccbf16,vext,vssegte,rdvl,vaeskf1,vfslide1up,vmov,vimovvx,vaesef,vfsqrt,viminmax,vfwcvtftoi,vssegtox,vfclass,viwmul,vector,vgmul,vsm3me,vfcmp,vstm,vfredo,vfwmul,vaeskf2,vstox,vfncvtbf16,vislide1up,vgather,vldox,viwred,vctz,vghsh,vsts,vslidedown,vfmerge,vicmp,vsmul,vlsegdff,vfalu,vfmov,vislide1down,vfminmax,vcompress,vldr,vldff,vlsegdux,vimuladd,vsalu,vidiv,sf_vqmacc,vfslide1down,vaesem,vimerge,vfncvtftoi,vfwcvtitof,vicalu,vaesz,sf_vc_se,vsha2cl,vmsfs,vldux,vmidx,vslideup,vired,vlde,vfwredo,vfmovfv,vbrev,vfncvtitof,rdfrm,vsetvl,vssegts,vimul,vialu,vbrev8,vfwalu,rdvlenb,sf_vfnrclip,vclz,vnclip,sf_vc,vimov,vste,vfmuladd,vfmovvf,vwsll,vsetvl_pre,vlds,vlsegds,vmiota,vmalu,wrvxrm,wrfrm,viwalu,vaesdm,vssegtux,vaesdf,vimovxv,vror,vnshift,vstr,vaalu,vsha2ms,crypto,vfwcvtbf16,vlsegdox,vrol,vandn,vfsgnj,vmpop,vfredu,vsha2ch,vshift,vrev8,vfmul")) + "p400_int_pipe+sifive_p400_ialu") + + diff --git a/gcc/config/riscv/sifive-p600.md b/gcc/config/riscv/sifive-p600.md index 2401349..ccd006d 100644 --- a/gcc/config/riscv/sifive-p600.md +++ b/gcc/config/riscv/sifive-p600.md @@ -157,10 +157,13 @@ (eq_attr "type" "fmove,fcvt")) "float_pipe,sifive_p600_fpu") +;; We need something for HF so that we don't abort during +;; scheduling if someone was to ask for p600 scheduling, but +;; enable the various HF mode extensions. (define_insn_reservation "sifive_p600_fdiv_s" 11 (and (eq_attr "tune" "sifive_p600") (eq_attr "type" "fdiv,fsqrt") - (eq_attr "mode" "SF")) + (eq_attr "mode" "HF,SF")) "sifive_p600_FM, sifive_p600_fdiv*5") (define_insn_reservation "sifive_p600_fdiv_d" 19 @@ -182,3 +185,15 @@ (define_bypass 1 "sifive_p600_f2i" "sifive_p600_branch,sifive_p600_sfb_alu,sifive_p600_mul, sifive_p600_div,sifive_p600_alu,sifive_p600_cpop") + +;; Someone familiar with the p600 uarch needs to put +;; these into the right reservations. This is just a placeholder +;; for everything I found that had no mapping to a reservation. +;; +;; Note that even if the processor does not implementat a particular +;; instruction it should still have suitable reservations, even if +;; they are just dummies like this one. +(define_insn_reservation "sifive_p600_unknown" 1 + (and (eq_attr "tune" "sifive_p600") + (eq_attr "type" "vicmp,vssegte,vbrev8,vfwalu,vimov,vmpop,vaesdf,vislide1up,vror,vsha2cl,vrol,vslideup,vimuladd,vclmul,vaesef,vext,vlsegdff,vfmuladd,vfclass,vmsfs,vfcmp,vsmul,vsm3me,vmalu,vshift,viwmuladd,vfslide1up,vlsegde,vsm4k,wrvxrm,vislide1down,vsm3c,vfwmuladd,vaesdm,vclmulh,vfwcvtftof,vfwredu,vfredo,sf_vfnrclip,vaesz,vwsll,vmiota,vctz,vsetvl_pre,vstm,vidiv,vssegtux,vfwmul,vcompress,vste,vired,vlsegds,vaesem,vfminmax,ghost,vandn,crypto,vfmul,vialu,vfmovvf,rdfrm,vldff,vfmerge,vsshift,vnclip,sf_vqmacc,vnshift,vfdiv,vfslide1down,vfncvtitof,vfsqrt,vimovxv,vstr,vfwcvtbf16,vfwcvtitof,vbrev,vssegtox,vssegts,vcpop,vmffs,viwmul,vldr,vmidx,rdvlenb,vfalu,vslidedown,vlde,vfsgnj,vfmov,viwalu,vsha2ch,vfncvtbf16,vfcvtitof,rdvl,vsetvl,vsha2ms,vector,vstux,vimerge,vclz,sf_vc,vfcvtftoi,viminmax,vsm4r,sf_vc_se,wrfrm,vstox,vfmovfv,vfncvtftoi,vimul,vsalu,vmov,vgmul,vgather,vldux,vlsegdox,vfncvtftof,vimovvx,vghsh,vldm,vldox,vfwcvtftoi,vlds,vfrecp,vaeskf2,vsts,vfredu,vicalu,vaalu,vfwmaccbf16,vrev8,vfwredo,vlsegdux,viwred,vaeskf1")) + "int_pipe+sifive_p600_ialu") diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md index 50ec8b3..e47bb41 100644 --- a/gcc/config/riscv/sync.md +++ b/gcc/config/riscv/sync.md @@ -386,13 +386,13 @@ }) (define_insn "amo_atomic_exchange<mode>" - [(set (match_operand:GPR 0 "register_operand" "=&r") + [(set (match_operand:GPR 0 "register_operand" "=r") (unspec_volatile:GPR [(match_operand:GPR 1 "memory_operand" "+A") (match_operand:SI 3 "const_int_operand")] ;; model UNSPEC_SYNC_EXCHANGE)) (set (match_dup 1) - (match_operand:GPR 2 "register_operand" "0"))] + (match_operand:GPR 2 "reg_or_0_operand" "rJ"))] "TARGET_ZAAMO" "amoswap.<amo>%A3\t%0,%z2,%1" [(set_attr "type" "atomic") @@ -434,13 +434,13 @@ }) (define_insn "zabha_atomic_exchange<mode>" - [(set (match_operand:SHORT 0 "register_operand" "=&r") + [(set (match_operand:SHORT 0 "register_operand" "=r") (unspec_volatile:SHORT [(match_operand:SHORT 1 "memory_operand" "+A") (match_operand:SI 3 "const_int_operand")] ;; model UNSPEC_SYNC_EXCHANGE_ZABHA)) (set (match_dup 1) - (match_operand:SHORT 2 "register_operand" "0"))] + (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))] "TARGET_ZABHA" "amoswap.<amobh>%A3\t%0,%z2,%1" [(set_attr "type" "atomic") diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 7aac56a..a7eaa8b 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -229,8 +229,41 @@ s-riscv-ext.texi: build/gen-riscv-ext-texi$(build_exeext) $(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.texi $(srcdir)/doc/riscv-ext.texi $(STAMP) s-riscv-ext.texi -# Run `riscv-regen' after you changed or added anything from riscv-ext*.def +RISCV_CORES_DEFS = \ + $(srcdir)/config/riscv/riscv-cores.def + +build/gen-riscv-mtune-texi.o: $(srcdir)/config/riscv/gen-riscv-mtune-texi.cc \ + $(RISCV_CORES_DEFS) + $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@ + +build/gen-riscv-mcpu-texi.o: $(srcdir)/config/riscv/gen-riscv-mcpu-texi.cc \ + $(RISCV_CORES_DEFS) + $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@ + +build/gen-riscv-mtune-texi$(build_exeext): build/gen-riscv-mtune-texi.o + $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $< + +build/gen-riscv-mcpu-texi$(build_exeext): build/gen-riscv-mcpu-texi.o + $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $< + +$(srcdir)/doc/riscv-mtune.texi: $(RISCV_CORES_DEFS) +$(srcdir)/doc/riscv-mtune.texi: s-riscv-mtune.texi ; @true + +$(srcdir)/doc/riscv-mcpu.texi: $(RISCV_CORES_DEFS) +$(srcdir)/doc/riscv-mcpu.texi: s-riscv-mcpu.texi ; @true + +s-riscv-mtune.texi: build/gen-riscv-mtune-texi$(build_exeext) + $(RUN_GEN) build/gen-riscv-mtune-texi$(build_exeext) > tmp-riscv-mtune.texi + $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mtune.texi $(srcdir)/doc/riscv-mtune.texi + $(STAMP) s-riscv-mtune.texi + +s-riscv-mcpu.texi: build/gen-riscv-mcpu-texi$(build_exeext) + $(RUN_GEN) build/gen-riscv-mcpu-texi$(build_exeext) > tmp-riscv-mcpu.texi + $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mcpu.texi $(srcdir)/doc/riscv-mcpu.texi + $(STAMP) s-riscv-mcpu.texi + +# Run `riscv-regen' after you changed or added anything from riscv-ext*.def and riscv-cores*.def .PHONY: riscv-regen -riscv-regen: s-riscv-ext.texi s-riscv-ext.opt +riscv-regen: s-riscv-ext.texi s-riscv-ext.opt s-riscv-mtune.texi s-riscv-mcpu.texi diff --git a/gcc/config/riscv/t-rtems b/gcc/config/riscv/t-rtems index f596e76..a4d2d03 100644 --- a/gcc/config/riscv/t-rtems +++ b/gcc/config/riscv/t-rtems @@ -1,8 +1,8 @@ MULTILIB_OPTIONS = MULTILIB_DIRNAMES = -MULTILIB_OPTIONS += march=rv32i/march=rv32iac/march=rv32im/march=rv32imf/march=rv32ima/march=rv32imac/march=rv32imaf/march=rv32imafc/march=rv32imafd/march=rv32imafdc/march=rv64ima/march=rv64imac/march=rv64imafd/march=rv64imafdc -MULTILIB_DIRNAMES += rv32i rv32iac rv32im rv32imf rv32ima rv32imac rv32imaf rv32imafc rv32imafd rv32imafdc rv64ima rv64imac rv64imafd rv64imafdc +MULTILIB_OPTIONS += march=rv32i/march=rv32iac/march=rv32im/march=rv32imf/march=rv32ima/march=rv32imac/march=rv32imaf/march=rv32imafc/march=rv32imafd/march=rv32imafdc/march=rv64ima/march=rv64imac/march=rv64imafd/march=rv64imafdc/march=rv64imc +MULTILIB_DIRNAMES += rv32i rv32iac rv32im rv32imf rv32ima rv32imac rv32imaf rv32imafc rv32imafd rv32imafdc rv64ima rv64imac rv64imafd rv64imafdc rv64imc MULTILIB_OPTIONS += mabi=ilp32/mabi=ilp32f/mabi=ilp32d/mabi=lp64/mabi=lp64d MULTILIB_DIRNAMES += ilp32 ilp32f ilp32d lp64 lp64d @@ -10,6 +10,9 @@ MULTILIB_DIRNAMES += ilp32 ilp32f ilp32d lp64 lp64d MULTILIB_OPTIONS += mcmodel=medany MULTILIB_DIRNAMES += medany +MULTILIB_OPTIONS += mstrict-align +MULTILIB_DIRNAMES += strict-align + MULTILIB_REQUIRED = MULTILIB_REQUIRED += march=rv32i/mabi=ilp32 MULTILIB_REQUIRED += march=rv32iac/mabi=ilp32 @@ -25,3 +28,5 @@ MULTILIB_REQUIRED += march=rv64ima/mabi=lp64/mcmodel=medany MULTILIB_REQUIRED += march=rv64imac/mabi=lp64/mcmodel=medany MULTILIB_REQUIRED += march=rv64imafd/mabi=lp64d/mcmodel=medany MULTILIB_REQUIRED += march=rv64imafdc/mabi=lp64d/mcmodel=medany +MULTILIB_REQUIRED += march=rv64imafdc/mabi=lp64d/mcmodel=medany/mstrict-align +MULTILIB_REQUIRED += march=rv64imc/mabi=lp64/mcmodel=medany/mstrict-align diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 66b7670..2b35d66 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -1398,6 +1398,7 @@ } [(set_attr "type" "vmov,vlde,vste") (set_attr "mode" "<VT:MODE>") + (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE)) (set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE)) (set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))]) @@ -1435,6 +1436,7 @@ } [(set_attr "type" "vlde,vste,vmov") (set_attr "mode" "<MODE>") + (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE)) (set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE)) (set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))] ) @@ -1485,6 +1487,7 @@ } [(set_attr "type" "vlde,vste,vmov") (set_attr "mode" "<VLS_AVL_REG:MODE>") + (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE)) (set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE)) (set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))] ) @@ -5490,6 +5493,98 @@ "TARGET_VECTOR" {}) +(define_expand "@pred_mul_plus_vx_<mode>" + [(set (match_operand:V_VLSI_QHS 0 "register_operand") + (if_then_else:V_VLSI_QHS + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_QHS + (mult:V_VLSI_QHS + (vec_duplicate:V_VLSI_QHS + (match_operand:<VEL> 2 "register_operand")) + (match_operand:V_VLSI_QHS 3 "register_operand")) + (match_operand:V_VLSI_QHS 4 "register_operand")) + (match_operand:V_VLSI_QHS 5 "vector_merge_operand")))] + "TARGET_VECTOR" +{ + riscv_vector::prepare_ternary_operands (operands); +}) + +(define_expand "@pred_mul_plus_vx_<mode>" + [(set (match_operand:V_VLSI_D 0 "register_operand") + (if_then_else:V_VLSI_D + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_D + (mult:V_VLSI_D + (vec_duplicate:V_VLSI_D + (match_operand:<VEL> 2 "register_operand")) + (match_operand:V_VLSI_D 3 "register_operand")) + (match_operand:V_VLSI_D 4 "register_operand")) + (match_operand:V_VLSI_D 5 "vector_merge_operand")))] + "TARGET_VECTOR && TARGET_64BIT" +{ + riscv_vector::prepare_ternary_operands (operands); +}) + +(define_expand "@pred_vnmsac_vx_<mode>" + [(set (match_operand:V_VLSI_QHS 0 "register_operand") + (if_then_else:V_VLSI_QHS + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (minus:V_VLSI_QHS + (match_operand:V_VLSI_QHS 4 "register_operand") + (mult:V_VLSI_QHS + (vec_duplicate:V_VLSI_QHS + (match_operand:<VEL> 2 "register_operand")) + (match_operand:V_VLSI_QHS 3 "register_operand"))) + (match_operand:V_VLSI_QHS 5 "vector_merge_operand")))] + "TARGET_VECTOR" +{ + riscv_vector::prepare_ternary_operands (operands); +}) + +(define_expand "@pred_vnmsac_vx_<mode>" + [(set (match_operand:V_VLSI_D 0 "register_operand") + (if_then_else:V_VLSI_D + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (minus:V_VLSI_D + (match_operand:V_VLSI_D 4 "register_operand") + (mult:V_VLSI_D + (vec_duplicate:V_VLSI_D + (match_operand:<VEL> 2 "register_operand")) + (match_operand:V_VLSI_D 3 "register_operand"))) + (match_operand:V_VLSI_D 5 "vector_merge_operand")))] + "TARGET_VECTOR && TARGET_64BIT" +{ + riscv_vector::prepare_ternary_operands (operands); +}) + (define_insn "*pred_madd<mode>_scalar" [(set (match_operand:V_VLSI 0 "register_operand" "=vd, vr") (if_then_else:V_VLSI @@ -6324,8 +6419,8 @@ (set_attr "mode" "<MODE>")]) (define_insn "@pred_<optab><mode>_scalar" - [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr") - (if_then_else:VF + [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr") + (if_then_else:V_VLSF (unspec:<VM> [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl") @@ -6336,11 +6431,11 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) - (commutative_float_binop:VF - (vec_duplicate:VF + (commutative_float_binop:V_VLSF + (vec_duplicate:V_VLSF (match_operand:<VEL> 4 "register_operand" " f, f, f, f")) - (match_operand:VF 3 "register_operand" " vr, vr, vr, vr")) - (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))] + (match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr")) + (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "vf<insn>.vf\t%0,%3,%4%p1" [(set_attr "type" "<float_insn_type>") @@ -6349,43 +6444,43 @@ (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))]) (define_insn "@pred_<optab><mode>_scalar" - [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr") - (if_then_else:VF + [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr") + (if_then_else:V_VLSF (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") - (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl") - (match_operand 6 "const_int_operand" " i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i") - (match_operand 8 "const_int_operand" " i, i, i, i") + [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") + (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl") + (match_operand 6 "const_int_operand" " i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (commutative_float_binop_nofrm:VF - (vec_duplicate:VF - (match_operand:<VEL> 4 "register_operand" " f, f, f, f")) - (match_operand:VF 3 "register_operand" " vr, vr, vr, vr")) - (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))] + (commutative_float_binop_nofrm:V_VLSF + (vec_duplicate:V_VLSF + (match_operand:<VEL> 4 "register_operand" " f, f, f, f")) + (match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr")) + (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "vf<insn>.vf\t%0,%3,%4%p1" [(set_attr "type" "<float_insn_type>") (set_attr "mode" "<MODE>")]) (define_insn "@pred_<ieee_fmaxmin_op><mode>_scalar" - [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr") - (if_then_else:VF + [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr") + (if_then_else:V_VLSF (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") - (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl") - (match_operand 6 "const_int_operand" " i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i") - (match_operand 8 "const_int_operand" " i, i, i, i") + [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") + (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl") + (match_operand 6 "const_int_operand" " i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i") + (match_operand 8 "const_int_operand" " i, i, i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (unspec:VF - [(match_operand:VF 3 "register_operand" " vr, vr, vr, vr") - (vec_duplicate:VF + (unspec:V_VLSF + [(match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr") + (vec_duplicate:V_VLSF (match_operand:<VEL> 4 "register_operand" " f, f, f, f"))] UNSPEC_VFMAXMIN) - (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))] + (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "v<ieee_fmaxmin_op>.vf\t%0,%3,%4%p1" [(set_attr "type" "vfminmax") @@ -6417,8 +6512,8 @@ (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))]) (define_insn "@pred_<optab><mode>_reverse_scalar" - [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr") - (if_then_else:VF + [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr") + (if_then_else:V_VLSF (unspec:<VM> [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl") @@ -6429,11 +6524,11 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) - (non_commutative_float_binop:VF - (vec_duplicate:VF + (non_commutative_float_binop:V_VLSF + (vec_duplicate:V_VLSF (match_operand:<VEL> 4 "register_operand" " f, f, f, f")) - (match_operand:VF 3 "register_operand" " vr, vr, vr, vr")) - (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))] + (match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr")) + (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "vfr<insn>.vf\t%0,%3,%4%p1" [(set_attr "type" "<float_insn_type>") @@ -8839,6 +8934,106 @@ [(set_attr "type" "vssegt<order>x") (set_attr "mode" "<V32T:MODE>")]) +(define_insn "*pred_macc_<mode>_scalar_undef" + [(set (match_operand:V_VLSI_QHS 0 "register_operand" "=vd, vr") + (if_then_else:V_VLSI_QHS + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1") + (match_operand 6 "vector_length_operand" "rvl, rvl") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_QHS + (mult:V_VLSI_QHS + (vec_duplicate:V_VLSI_QHS + (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ")) + (match_operand:V_VLSI_QHS 4 "register_operand" " vr, vr")) + (match_operand:V_VLSI_QHS 5 "register_operand" " 0, 0")) + (match_operand:V_VLSI_QHS 2 "vector_undef_operand")))] + "TARGET_VECTOR" + "@ + vmacc.vx\t%0,%z3,%4%p1 + vmacc.vx\t%0,%z3,%4%p1" + [(set_attr "type" "vimuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*pred_macc_<mode>_scalar_undef" + [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr") + (if_then_else:V_VLSI_D + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1") + (match_operand 6 "vector_length_operand" "rvl, rvl") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_D + (mult:V_VLSI_D + (vec_duplicate:V_VLSI_D + (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ")) + (match_operand:V_VLSI_D 4 "register_operand" " vr, vr")) + (match_operand:V_VLSI_D 5 "register_operand" " 0, 0")) + (match_operand:V_VLSI_D 2 "vector_undef_operand")))] + "TARGET_VECTOR && TARGET_64BIT" + "@ + vmacc.vx\t%0,%z3,%4%p1 + vmacc.vx\t%0,%z3,%4%p1" + [(set_attr "type" "vimuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*pred_nmsac_<mode>_scalar_undef" + [(set (match_operand:V_VLSI_QHS 0 "register_operand" "=vd, vr") + (if_then_else:V_VLSI_QHS + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1") + (match_operand 6 "vector_length_operand" "rvl, rvl") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (minus:V_VLSI_QHS + (match_operand:V_VLSI_QHS 5 "register_operand" " 0, 0") + (mult:V_VLSI_QHS + (vec_duplicate:V_VLSI_QHS + (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ")) + (match_operand:V_VLSI_QHS 4 "register_operand" " vr, vr"))) + (match_operand:V_VLSI_QHS 2 "vector_undef_operand")))] + "TARGET_VECTOR" + "@ + vnmsac.vx\t%0,%z3,%4%p1 + vnmsac.vx\t%0,%z3,%4%p1" + [(set_attr "type" "vimuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*pred_nmsac_<mode>_scalar_undef" + [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr") + (if_then_else:V_VLSI_D + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" " vm, Wc1") + (match_operand 6 "vector_length_operand" "rvl, rvl") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (minus:V_VLSI_D + (match_operand:V_VLSI_D 5 "register_operand" " 0, 0") + (mult:V_VLSI_D + (vec_duplicate:V_VLSI_D + (match_operand:<VEL> 3 "reg_or_0_operand" " rJ, rJ")) + (match_operand:V_VLSI_D 4 "register_operand" " vr, vr"))) + (match_operand:V_VLSI_D 2 "vector_undef_operand")))] + "TARGET_VECTOR && TARGET_64BIT" + "@ + vnmsac.vx\t%0,%z3,%4%p1 + vnmsac.vx\t%0,%z3,%4%p1" + [(set_attr "type" "vimuladd") + (set_attr "mode" "<MODE>")]) + (include "autovec.md") (include "autovec-opt.md") (include "sifive-vector.md") diff --git a/gcc/config/riscv/xiangshan.md b/gcc/config/riscv/xiangshan.md index 34b4a8f..6179140 100644 --- a/gcc/config/riscv/xiangshan.md +++ b/gcc/config/riscv/xiangshan.md @@ -144,13 +144,13 @@ (define_insn_reservation "xiangshan_sfdiv" 11 (and (eq_attr "tune" "xiangshan") (eq_attr "type" "fdiv") - (eq_attr "mode" "SF")) + (eq_attr "mode" "HF,SF")) "xs_fmisc_rs") (define_insn_reservation "xiangshan_sfsqrt" 17 (and (eq_attr "tune" "xiangshan") (eq_attr "type" "fsqrt") - (eq_attr "mode" "SF")) + (eq_attr "mode" "HF,SF")) "xs_fmisc_rs") (define_insn_reservation "xiangshan_dfdiv" 21 |