import copy import logging import os import pprint import re from fnmatch import fnmatch from io import StringIO from itertools import chain from typing import Dict, NoReturn, Optional, TypedDict from .constants import ( arg_lut, fixed_ranges, imported_regex, overlapping_extensions, overlapping_instructions, pseudo_regex, single_fixed, ) from .resources import open_text_resource, resource_root LOG_FORMAT = "%(levelname)s:: %(message)s" LOG_LEVEL = logging.INFO pretty_printer = pprint.PrettyPrinter(indent=2) logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) # Log an error message def log_and_exit(message: str) -> NoReturn: """Log an error message and exit the program.""" logging.error(message) raise SystemExit(1) # Initialize encoding to 32-bit '-' values def initialize_encoding(bits: int = 32) -> "list[str]": """Initialize encoding with '-' to represent don't care bits.""" return ["-"] * bits # Validate bit range and value def validate_bit_range(msb: int, lsb: int, entry_value: int, line: str): """Validate the bit range and entry value.""" if msb < lsb: log_and_exit( f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in its encoding' ) if entry_value >= (1 << (msb - lsb + 1)): log_and_exit( f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}' ) # Split the instruction line into name and remaining part def parse_instruction_line(line: str) -> "tuple[str, str]": """Parse the instruction name and the remaining encoding details.""" name, remaining = line.replace("\t", " ").split(" ", 1) name = name.replace(".", "_") # Replace dots for compatibility remaining = remaining.lstrip() # Remove leading whitespace return name, remaining # Verify Overlapping Bits def check_overlapping_bits(encoding: "list[str]", ind: int, line: str): """Check for overlapping bits in the encoding.""" if encoding[31 - ind] != "-": log_and_exit( f'{line.split(" ")[0]:<10} has {ind} bit overlapping in its opcodes' ) # Update encoding for fixed ranges def update_encoding_for_fixed_range( encoding: "list[str]", msb: int, lsb: int, entry_value: int, line: str ): """ Update encoding bits for a given bit range. Checks for overlapping bits and assigns the value accordingly. """ for ind in range(lsb, msb + 1): check_overlapping_bits(encoding, ind, line) bit = str((entry_value >> (ind - lsb)) & 1) encoding[31 - ind] = bit # Process fixed bit patterns def process_fixed_ranges(remaining: str, encoding: "list[str]", line: str): """Process fixed bit ranges in the encoding.""" for s2, s1, entry in fixed_ranges.findall(remaining): msb, lsb, entry_value = int(s2), int(s1), int(entry, 0) # Validate bit range and entry value validate_bit_range(msb, lsb, entry_value, line) update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line) return fixed_ranges.sub(" ", remaining) # Process single bit assignments def process_single_fixed(remaining: str, encoding: "list[str]", line: str): """Process single fixed assignments in the encoding.""" for lsb, value, _drop in single_fixed.findall(remaining): lsb = int(lsb, 0) value = int(value, 0) check_overlapping_bits(encoding, lsb, line) encoding[31 - lsb] = str(value) # Main function to check argument look-up table def check_arg_lut(args: "list[str]", encoding_args: "list[str]", name: str): """Check if arguments are present in arg_lut.""" for arg in args: if arg not in arg_lut: arg = handle_arg_lut_mapping(arg, name) msb, lsb = arg_lut[arg] update_encoding_args(encoding_args, arg, msb, lsb) # Handle missing argument mappings def handle_arg_lut_mapping(arg: str, name: str): """Handle cases where an argument needs to be mapped to an existing one.""" parts = arg.split("=") if len(parts) == 2: existing_arg, _new_arg = parts if existing_arg in arg_lut: arg_lut[arg] = arg_lut[existing_arg] else: log_and_exit( f" Found field {existing_arg} in variable {arg} in instruction {name} " f"whose mapping in arg_lut does not exist" ) else: log_and_exit( f" Found variable {arg} in instruction {name} " f"whose mapping in arg_lut does not exist" ) return arg # Update encoding args with variables def update_encoding_args(encoding_args: "list[str]", arg: str, msb: int, lsb: int): """Update encoding arguments and ensure no overlapping.""" for ind in range(lsb, msb + 1): check_overlapping_bits(encoding_args, ind, arg) encoding_args[31 - ind] = arg # Compute match and mask def convert_encoding_to_match_mask(encoding: "list[str]") -> "tuple[str, str]": """Convert the encoding list to match and mask strings.""" match = "".join(encoding).replace("-", "0") mask = "".join(encoding).replace("0", "1").replace("-", "0") return hex(int(match, 2)), hex(int(mask, 2)) class SingleInstr(TypedDict): encoding: str variable_fields: "list[str]" extension: "list[str]" match: str mask: str InstrDict = Dict[str, SingleInstr] # Processing main function for a line in the encoding file def process_enc_line(line: str, ext: str) -> "tuple[str, SingleInstr]": """ This function processes each line of the encoding files (rv*). As part of the processing, the function ensures that the encoding is legal through the following checks:: - there is no over specification (same bits assigned different values) - there is no under specification (some bits not assigned values) - bit ranges are in the format hi..lo=val where hi > lo - value assigned is representable in the bit range - also checks that the mapping of arguments of an instruction exists in arg_lut. If the above checks pass, then the function returns a tuple of the name and a dictionary containing basic information of the instruction which includes: - variables: list of arguments used by the instruction whose mapping exists in the arg_lut dictionary - encoding: this contains the 32-bit encoding of the instruction where '-' is used to represent position of arguments and 1/0 is used to reprsent the static encoding of the bits - extension: this field contains the rv* filename from which this instruction was included - match: hex value representing the bits that need to match to detect this instruction - mask: hex value representin the bits that need to be masked to extract the value required for matching. """ encoding = initialize_encoding() # Parse the instruction line name, remaining = parse_instruction_line(line) # Process fixed ranges remaining = process_fixed_ranges(remaining, encoding, line) # Process single fixed assignments process_single_fixed(remaining, encoding, line) # Convert the list of encodings into a match and mask match, mask = convert_encoding_to_match_mask(encoding) # Check arguments in arg_lut args = single_fixed.sub(" ", remaining).split() encoding_args = encoding.copy() check_arg_lut(args, encoding_args, name) # Return single_dict return name, { "encoding": "".join(encoding), "variable_fields": args, "extension": [os.path.basename(ext)], "match": match, "mask": mask, } # Extract ISA Type def extract_isa_type(ext_name: str) -> str: """Extracts the ISA type from the extension name.""" return ext_name.split("_")[0] # Verify the types for RV* def is_rv_variant(type1: str, type2: str) -> bool: """Checks if the types are RV variants (rv32/rv64).""" return (type2 == "rv" and type1 in {"rv32", "rv64"}) or ( type1 == "rv" and type2 in {"rv32", "rv64"} ) # Check for same base ISA def has_same_base_isa(type1: str, type2: str) -> bool: """Determines if the two ISA types share the same base.""" return type1 == type2 or is_rv_variant(type1, type2) # Compare the base ISA type of a given extension name against a list of extension names def same_base_isa(ext_name: str, ext_name_list: "list[str]") -> bool: """Checks if the base ISA type of ext_name matches any in ext_name_list.""" type1 = extract_isa_type(ext_name) return any(has_same_base_isa(type1, extract_isa_type(ext)) for ext in ext_name_list) # Pad two strings to equal length def pad_to_equal_length(str1: str, str2: str, pad_char: str = "-") -> "tuple[str, str]": """Pads two strings to equal length using the given padding character.""" max_len = max(len(str1), len(str2)) return str1.rjust(max_len, pad_char), str2.rjust(max_len, pad_char) # Check compatibility for two characters def has_no_conflict(char1: str, char2: str) -> bool: """Checks if two characters are compatible (either matching or don't-care).""" return char1 == "-" or char2 == "-" or char1 == char2 # Conflict check between two encoded strings def overlaps(x: str, y: str) -> bool: """Checks if two encoded strings overlap without conflict.""" x, y = pad_to_equal_length(x, y) return all(has_no_conflict(x[i], y[i]) for i in range(len(x))) # Check presence of keys in dictionary. def is_in_nested_dict(a: "dict[str, set[str]]", key1: str, key2: str) -> bool: """Checks if key2 exists in the dictionary under key1.""" return key1 in a and key2 in a[key1] # Overlap allowance def overlap_allowed(a: "dict[str, set[str]]", x: str, y: str) -> bool: """Determines if overlap is allowed between x and y based on nested dictionary checks""" return is_in_nested_dict(a, x, y) or is_in_nested_dict(a, y, x) # Check overlap allowance between extensions def extension_overlap_allowed(x: str, y: str) -> bool: """Checks if overlap is allowed between two extensions using the overlapping_extensions dictionary.""" return overlap_allowed(overlapping_extensions, x, y) # Check overlap allowance between instructions def instruction_overlap_allowed(x: str, y: str) -> bool: """Checks if overlap is allowed between two instructions using the overlapping_instructions dictionary.""" return overlap_allowed(overlapping_instructions, x, y) # Check 'nf' field def is_segmented_instruction(instruction: SingleInstr) -> bool: """Checks if an instruction contains the 'nf' field.""" return "nf" in instruction["variable_fields"] # Expand 'nf' fields def update_with_expanded_instructions( updated_dict: InstrDict, key: str, value: SingleInstr ): """Expands 'nf' fields in the instruction dictionary and updates it with new instructions.""" for new_key, new_value in expand_nf_field(key, value): updated_dict[new_key] = new_value # Process instructions, expanding segmented ones and updating the dictionary def add_segmented_vls_insn(instr_dict: InstrDict) -> InstrDict: """Processes instructions, expanding segmented ones and updating the dictionary.""" # Use dictionary comprehension for efficiency return dict( chain.from_iterable( ( expand_nf_field(key, value) if is_segmented_instruction(value) else [(key, value)] ) for key, value in instr_dict.items() ) ) # Expand the 'nf' field in the instruction dictionary def expand_nf_field( name: str, single_dict: SingleInstr ) -> "list[tuple[str, SingleInstr]]": """Validate and prepare the instruction dictionary.""" validate_nf_field(single_dict, name) remove_nf_field(single_dict) update_mask(single_dict) name_expand_index = name.find("e") # Pre compute the base match value and encoding prefix base_match = int(single_dict["match"], 16) encoding_prefix = single_dict["encoding"][3:] expanded_instructions = [ create_expanded_instruction( name, single_dict, nf, name_expand_index, base_match, encoding_prefix ) for nf in range(8) # Range of 0 to 7 ] return expanded_instructions # Validate the presence of 'nf' def validate_nf_field(single_dict: SingleInstr, name: str): """Validates the presence of 'nf' in variable fields before expansion.""" if "nf" not in single_dict["variable_fields"]: log_and_exit(f"Cannot expand nf field for instruction {name}") # Remove 'nf' from variable fields def remove_nf_field(single_dict: SingleInstr): """Removes 'nf' from variable fields in the instruction dictionary.""" single_dict["variable_fields"].remove("nf") # Update the mask to include the 'nf' field def update_mask(single_dict: SingleInstr): """Updates the mask to include the 'nf' field in the instruction dictionary.""" single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29) # Create an expanded instruction def create_expanded_instruction( name: str, single_dict: SingleInstr, nf: int, name_expand_index: int, base_match: int, encoding_prefix: str, ) -> "tuple[str, SingleInstr]": """Creates an expanded instruction based on 'nf' value.""" new_single_dict = copy.deepcopy(single_dict) # Update match value in one step new_single_dict["match"] = hex(base_match | (nf << 29)) new_single_dict["encoding"] = format(nf, "03b") + encoding_prefix # Construct new instruction name new_name = ( name if nf == 0 else f"{name[:name_expand_index]}seg{nf + 1}{name[name_expand_index:]}" ) return (new_name, new_single_dict) def read_lines(file: str) -> "list[str]": """ Reads lines from a file and returns non-blank, non-comment lines. The file must be a resource relative to the root of this repo. """ with open_text_resource(file) as fp: lines = (line.rstrip() for line in fp) return [line for line in lines if line and not line.startswith("#")] # Update the instruction dictionary def process_standard_instructions( lines: "list[str]", instr_dict: InstrDict, file_name: str ): """Processes standard instructions from the given lines and updates the instruction dictionary.""" for line in lines: if "$import" in line or "$pseudo" in line: continue logging.debug(f"Processing line: {line}") name, single_dict = process_enc_line(line, file_name) ext_name = os.path.basename(file_name) if name in instr_dict: var = instr_dict[name]["extension"] if same_base_isa(ext_name, var): log_and_exit( f"Instruction {name} from {ext_name} is already added from {var} in same base ISA" ) elif instr_dict[name]["encoding"] != single_dict["encoding"]: log_and_exit( f"Instruction {name} from {ext_name} has different encodings in different base ISAs" ) instr_dict[name]["extension"].extend(single_dict["extension"]) else: for key, item in instr_dict.items(): if ( overlaps(item["encoding"], single_dict["encoding"]) and not extension_overlap_allowed(ext_name, item["extension"][0]) and not instruction_overlap_allowed(name, key) and same_base_isa(ext_name, item["extension"]) ): log_and_exit( f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}' ) instr_dict[name] = single_dict # Incorporate pseudo instructions into the instruction dictionary based on given conditions def process_pseudo_instructions( lines: "list[str]", instr_dict: InstrDict, file_name: str, include_pseudo: bool, include_pseudo_ops: "list[str]", ): """Processes pseudo instructions from the given lines and updates the instruction dictionary.""" for line in lines: if "$pseudo" not in line: continue logging.debug(f"Processing pseudo line: {line}") ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0] ext_file = read_extension_file(ext) validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst) name, single_dict = process_enc_line(f"{pseudo_inst} {line_content}", file_name) if ( orig_inst.replace(".", "_") not in instr_dict or include_pseudo or name in include_pseudo_ops ): if name not in instr_dict: instr_dict[name] = single_dict logging.debug(f"Including pseudo_op: {name}") else: if single_dict["match"] != instr_dict[name]["match"]: instr_dict[f"{name}_pseudo"] = single_dict # TODO: This expression is always false since both sides are list[str]. elif single_dict["extension"] not in instr_dict[name]["extension"]: # type: ignore instr_dict[name]["extension"].extend(single_dict["extension"]) # Integrate imported instructions into the instruction dictionary def process_imported_instructions( lines: "list[str]", instr_dict: InstrDict, file_name: str ): """Processes imported instructions from the given lines and updates the instruction dictionary.""" for line in lines: if "$import" not in line: continue logging.debug(f"Processing imported line: {line}") import_ext, reg_instr = imported_regex.findall(line)[0] ext_file = read_extension_file(import_ext) validate_instruction_in_extension(reg_instr, ext_file, file_name, line) for oline in StringIO(ext_file): if re.findall(f"^\\s*{reg_instr}\\s+", oline): name, single_dict = process_enc_line(oline, file_name) if name in instr_dict: if instr_dict[name]["encoding"] != single_dict["encoding"]: log_and_exit( f"Imported instruction {name} from {os.path.basename(file_name)} has different encodings" ) instr_dict[name]["extension"].extend(single_dict["extension"]) else: instr_dict[name] = single_dict break def read_extension_file(ext: str) -> str: """ Read the extension file path, considering the unratified directory if necessary. """ file = resource_root() / "extensions" / ext if file.is_file(): return file.read_text(encoding="utf-8") file = resource_root() / "extensions" / "unratified" / ext if file.is_file(): return file.read_text(encoding="utf-8") log_and_exit(f"Extension {ext} not found.") # Confirm the presence of an original instruction in the corresponding extension file. def validate_instruction_in_extension( inst: str, ext_file: str, file_name: str, pseudo_inst: str ): """Validates if the original instruction exists in the dependent extension.""" found = False for oline in StringIO(ext_file): if re.findall(f"^\\s*{inst}\\s+", oline): found = True break if not found: log_and_exit( f"Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}" ) # Construct a dictionary of instructions filtered by specified criteria def create_inst_dict( file_filter: "list[str]", include_pseudo: bool = False, include_pseudo_ops: "Optional[list[str]]" = None, ) -> InstrDict: """ Creates a dictionary of instructions based on the provided file filters. This function return a dictionary containing all instructions associated with an extension defined by the file_filter input. Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc. Each node of the dictionary will correspond to an instruction which again is a dictionary. The dictionary contents of each instruction includes: - variables: list of arguments used by the instruction whose mapping exists in the arg_lut dictionary - encoding: this contains the 32-bit encoding of the instruction where '-' is used to represent position of arguments and 1/0 is used to reprsent the static encoding of the bits - extension: this field contains the rv* filename from which this instruction was included - match: hex value representing the bits that need to match to detect this instruction - mask: hex value representin the bits that need to be masked to extract the value required for matching. In order to build this dictionary, the function does 2 passes over the same rv file: - First pass: extracts all standard instructions, skipping pseudo ops and imported instructions. For each selected line, the `process_enc_line` function is called to create the dictionary contents of the instruction. Checks are performed to ensure that the same instruction is not added twice to the overall dictionary. - Second pass: parses only pseudo_ops. For each pseudo_op, the function: - Checks if the dependent extension and instruction exist. - Adds the pseudo_op to the dictionary if the dependent instruction is not already present; otherwise, it is skipped. """ if include_pseudo_ops is None: include_pseudo_ops = [] instr_dict: InstrDict = {} ratified_file_filters = [ fil for fil in file_filter if not fil.startswith("unratified/") ] unratified_file_filters = [ fil.removeprefix("unratified/") for fil in file_filter if fil.startswith("unratified/") ] # Extension file name, "extensions[/unratified]/rv_foo". file_names: list[str] = [] for file in (resource_root() / "extensions").iterdir(): if file.is_file() and any( fnmatch(file.name, fil) for fil in ratified_file_filters ): file_names.append("extensions/" + file.name) for file in (resource_root() / "extensions" / "unratified").iterdir(): if file.is_file() and any( fnmatch(file.name, fil) for fil in unratified_file_filters ): file_names.append("extensions/unratified/" + file.name) logging.debug("Collecting standard instructions") for file_name in file_names: logging.debug(f"Parsing File: {file_name} for standard instructions") lines = read_lines(file_name) process_standard_instructions(lines, instr_dict, file_name) logging.debug("Collecting pseudo instructions") for file_name in file_names: logging.debug(f"Parsing File: {file_name} for pseudo instructions") lines = read_lines(file_name) process_pseudo_instructions( lines, instr_dict, file_name, include_pseudo, include_pseudo_ops, ) logging.debug("Collecting imported instructions") for file_name in file_names: logging.debug(f"Parsing File: {file_name} for imported instructions") lines = read_lines(file_name) process_imported_instructions(lines, instr_dict, file_name) return instr_dict # Extracts the extensions used in an instruction dictionary def instr_dict_2_extensions(instr_dict: InstrDict) -> "list[str]": return list({item["extension"][0] for item in instr_dict.values()}) # Returns signed interpretation of a value within a given width def signed(value: int, width: int) -> int: return value if 0 <= value < (1 << (width - 1)) else value - (1 << width)