diff options
author | IIITM-Jay <jaydev.neuroscitech@gmail.com> | 2024-09-17 00:53:00 +0530 |
---|---|---|
committer | IIITM-Jay <jaydev.neuroscitech@gmail.com> | 2024-09-17 00:53:00 +0530 |
commit | 1b0ef5d896a1652dc52a5353521d9990d481c76b (patch) | |
tree | dc18afacd59344a65a690093b1b4e8e4f278c6f6 /shared_utils.py | |
parent | 6a1be96c8238d603a50d956ff1f91defa264785b (diff) | |
download | riscv-opcodes-1b0ef5d896a1652dc52a5353521d9990d481c76b.zip riscv-opcodes-1b0ef5d896a1652dc52a5353521d9990d481c76b.tar.gz riscv-opcodes-1b0ef5d896a1652dc52a5353521d9990d481c76b.tar.bz2 |
Refactored and Optimized Logic:: Parser Logic, Latex Based Output & Shared Modules
Diffstat (limited to 'shared_utils.py')
-rw-r--r-- | shared_utils.py | 547 |
1 files changed, 547 insertions, 0 deletions
diff --git a/shared_utils.py b/shared_utils.py new file mode 100644 index 0000000..0fc6db9 --- /dev/null +++ b/shared_utils.py @@ -0,0 +1,547 @@ +#!/usr/bin/env python3 +import copy +import re +import glob +import os +import pprint +import logging + +from constants import * + +LOG_FORMAT = '%(levelname)s:: %(message)s' +LOG_LEVEL = logging.INFO + +pretty_printer = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) + + +def process_enc_line(line, ext): + ''' + This function processes each line of the encoding files (rv*). As part of + the processing, the function ensures that the encoding is legal through the + following checks:: + + - there is no over specification (same bits assigned different values) + - there is no under specification (some bits not assigned values) + - bit ranges are in the format hi..lo=val where hi > lo + - value assigned is representable in the bit range + - also checks that the mapping of arguments of an instruction exists in + arg_lut. + + If the above checks pass, then the function returns a tuple of the name and + a dictionary containing basic information of the instruction which includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + ''' + encoding = initialize_encoding() + name, remaining = parse_instruction_name(line) + + # Fixed ranges of the form hi..lo=val + process_fixed_ranges(remaining, encoding, line) + + # Single fixed values of the form <lsb>=<val> + remaining = process_single_fixed(remaining, encoding, line) + + # Create match and mask strings + match, mask = create_match_and_mask(encoding) + + # Process instruction arguments + args = process_arguments(remaining, encoding, name) + + # Create and return the final instruction dictionary + instruction_dict = create_instruction_dict(encoding, args, ext, match, mask) + + return name, instruction_dict + + +def initialize_encoding(): + """Initialize a 32-bit encoding with '-' representing 'don't care'.""" + return ['-'] * 32 + + +def parse_instruction_name(line): + """Extract the instruction name and remaining part of the line.""" + name, remaining = line.split(' ', 1) + name = name.replace('.', '_').lstrip() + return name, remaining + + +def process_fixed_ranges(remaining, encoding, line): + """Process bit ranges of the form hi..lo=val, checking for errors and updating encoding.""" + for s2, s1, entry in fixed_ranges.findall(remaining): + msb, lsb = int(s2), int(s1) + validate_bit_range(msb, lsb, line) + validate_entry_value(msb, lsb, entry, line) + update_encoding(msb, lsb, entry, encoding, line) + + +def validate_bit_range(msb, lsb, line): + """Ensure that msb > lsb and raise an error if not.""" + if msb < lsb: + log_and_exit(f"{get_instruction_name(line)} has msb < lsb in its encoding") + + +def validate_entry_value(msb, lsb, entry, line): + """Ensure that the value assigned to a bit range is legal for its width.""" + entry_value = int(entry, 0) + if entry_value >= (1 << (msb - lsb + 1)): + log_and_exit(f"{get_instruction_name(line)} has an illegal value for the bit width {msb - lsb}") + + +def update_encoding(msb, lsb, entry, encoding, line): + """Update the encoding array for a given bit range.""" + entry_value = int(entry, 0) + for ind in range(lsb, msb + 1): + if encoding[31 - ind] != '-': + log_and_exit(f"{get_instruction_name(line)} has overlapping bits in its opcodes") + encoding[31 - ind] = str((entry_value >> (ind - lsb)) & 1) + + +def process_single_fixed(remaining, encoding, line): + """Process single fixed values of the form <lsb>=<val>.""" + for lsb, value, _ in single_fixed.findall(remaining): + lsb = int(lsb, 0) + value = int(value, 0) + if encoding[31 - lsb] != '-': + log_and_exit(f"{get_instruction_name(line)} has overlapping bits in its opcodes") + encoding[31 - lsb] = str(value) + return fixed_ranges.sub(' ', remaining) + + +def create_match_and_mask(encoding): + """Generate match and mask strings from the encoding array.""" + match = ''.join(encoding).replace('-', '0') + mask = ''.join(encoding).replace('0', '1').replace('-', '0') + return match, mask + + +def process_arguments(remaining, encoding, name): + """Process instruction arguments and update the encoding with argument positions.""" + args = single_fixed.sub(' ', remaining).split() + encoding_args = encoding.copy() + for arg in args: + if arg not in arg_lut: + handle_missing_arg(arg, name) + msb, lsb = arg_lut[arg] + update_arg_encoding(msb, lsb, arg, encoding_args, name) + return args, encoding_args + + +def handle_missing_arg(arg, name): + """Handle missing argument mapping in arg_lut.""" + if '=' in arg and (existing_arg := arg.split('=')[0]) in arg_lut: + arg_lut[arg] = arg_lut[existing_arg] + else: + log_and_exit(f"Variable {arg} in instruction {name} not mapped in arg_lut") + + +def update_arg_encoding(msb, lsb, arg, encoding_args, name): + """Update the encoding array with the argument positions.""" + for ind in range(lsb, msb + 1): + if encoding_args[31 - ind] != '-': + log_and_exit(f"Variable {arg} overlaps in bit {ind} in instruction {name}") + encoding_args[31 - ind] = arg + + +def create_instruction_dict(encoding, args, ext, match, mask): + """Create the final dictionary for the instruction.""" + return { + 'encoding': ''.join(encoding), + 'variable_fields': args, + 'extension': [os.path.basename(ext)], + 'match': hex(int(match, 2)), + 'mask': hex(int(mask, 2)), + } + + +def log_and_exit(message): + """Log an error message and exit the program.""" + logging.error(message) + raise SystemExit(1) + + +def get_instruction_name(line): + """Helper to extract the instruction name from a line.""" + return line.split(' ')[0] + +def overlaps(x, y): + """ + Check if two bit strings overlap without conflicts. + + Args: + x (str): First bit string. + y (str): Second bit string. + + Returns: + bool: True if the bit strings overlap without conflicts, False otherwise. + + In the context of RISC-V opcodes, this function ensures that the bit ranges + defined by two different bit strings do not conflict. + """ + + # Minimum length of the two strings + min_len = min(len(x), len(y)) + + for char_x, char_y in zip(x[:min_len], y[:min_len]): + if char_x != '-' and char_y != '-' and char_x != char_y: + return False + + return True + + +def overlap_allowed(a, x, y): + """ + Check if there is an overlap between keys and values in a dictionary. + + Args: + a (dict): The dictionary where keys are mapped to sets or lists of keys. + x (str): The first key to check. + y (str): The second key to check. + + Returns: + bool: True if both (x, y) or (y, x) are present in the dictionary + as described, False otherwise. + + This function determines if `x` is a key in the dictionary `a` and + its corresponding value contains `y`, or if `y` is a key and its + corresponding value contains `x`. + """ + + return x in a and y in a[x] or \ + y in a and x in a[y] + + +# Checks if overlap between two extensions is allowed +def extension_overlap_allowed(x, y): + return overlap_allowed(overlapping_extensions, x, y) + + +# Checks if overlap between two instructions is allowed +def instruction_overlap_allowed(x, y): + return overlap_allowed(overlapping_instructions, x, y) + + +# Checks if ext_name shares the same base ISA with any in ext_name_list +def same_base_isa(ext_name, ext_name_list): + type1 = ext_name.split("_")[0] + for ext_name1 in ext_name_list: + type2 = ext_name1.split("_")[0] + if type1 == type2 or \ + (type2 == "rv" and type1 in ["rv32", "rv64"]) or \ + (type1 == "rv" and type2 in ["rv32", "rv64"]): + return True + return False + + +# Expands instructions with "nf" field in variable_fields, otherwise returns unchanged +def add_segmented_vls_insn(instr_dict): + updated_dict = {} + for k, v in instr_dict.items(): + if "nf" in v['variable_fields']: + updated_dict.update(expand_nf_field(k, v)) + else: + updated_dict[k] = v + return updated_dict + + +# Expands nf field in instruction name and updates instruction details +def expand_nf_field(name, single_dict): + if "nf" not in single_dict['variable_fields']: + logging.error(f"Cannot expand nf field for instruction {name}") + raise SystemExit(1) + + single_dict['variable_fields'].remove("nf") # Remove "nf" from variable fields + single_dict['mask'] = hex(int(single_dict['mask'], 16) | (0b111 << 29)) # Update mask + + name_expand_index = name.find('e') + expanded_instructions = [] + for nf in range(8): # Expand nf for values 0 to 7 + new_single_dict = copy.deepcopy(single_dict) + new_single_dict['match'] = hex(int(single_dict['match'], 16) | (nf << 29)) + new_single_dict['encoding'] = format(nf, '03b') + single_dict['encoding'][3:] + new_name = name if nf == 0 else f"{name[:name_expand_index]}seg{nf+1}{name[name_expand_index:]}" + expanded_instructions.append((new_name, new_single_dict)) + return expanded_instructions + + +# Extracts the extensions used in an instruction dictionary +def instr_dict_2_extensions(instr_dict): + return list({item['extension'][0] for item in instr_dict.values()}) + + +# Returns signed interpretation of a value within a given width +def signed(value, width): + return value if 0 <= value < (1 << (width - 1)) else value - (1 << width) + + +def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): + ''' + This function return a dictionary containing all instructions associated + with an extension defined by the file_filter input. The file_filter input + needs to be rv* file name with out the 'rv' prefix i.e. '_i', '32_i', etc. + + Each node of the dictionary will correspond to an instruction which again is + a dictionary. The dictionary contents of each instruction includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + + In order to build this dictionary, the function does 2 passes over the same + rv<file_filter> file. The first pass is to extract all standard + instructions. In this pass, all pseudo ops and imported instructions are + skipped. For each selected line of the file, we call process_enc_line + function to create the above mentioned dictionary contents of the + instruction. Checks are performed in this function to ensure that the same + instruction is not added twice to the overall dictionary. + + In the second pass, this function parses only pseudo_ops. For each pseudo_op + this function checks if the dependent extension and instruction, both, exist + before parsing it. The pseudo op is only added to the overall dictionary if + the dependent instruction is not present in the dictionary, else it is + skipped. + + + ''' + opcodes_dir = os.path.dirname(os.path.realpath(__file__)) + instr_dict = {} + + # file_names contains all files to be parsed in the riscv-opcodes directory + file_names = [] + for fil in file_filter: + file_names += glob.glob(f'{opcodes_dir}/{fil}') + file_names.sort(reverse=True) + # first pass if for standard/regular instructions + logging.debug('Collecting standard instructions first') + for f in file_names: + logging.debug(f'Parsing File: {f} for standard instructions') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + # if the an instruction needs to be imported then go to the + # respective file and pick the line that has the instruction. + # The variable 'line' will now point to the new line from the + # imported file + + # ignore all lines starting with $import and $pseudo + if '$import' in line or '$pseudo' in line: + continue + logging.debug(f' Processing line: {line}') + + # call process_enc_line to get the data about the current + # instruction + (name, single_dict) = process_enc_line(line, f) + ext_name = os.path.basename(f) + + # if an instruction has already been added to the filtered + # instruction dictionary throw an error saying the given + # instruction is already imported and raise SystemExit + if name in instr_dict: + var = instr_dict[name]["extension"] + if same_base_isa(ext_name, var): + # disable same names on the same base ISA + err_msg = f'instruction : {name} from ' + err_msg += f'{ext_name} is already ' + err_msg += f'added from {var} in same base ISA' + logging.error(err_msg) + raise SystemExit(1) + elif instr_dict[name]['encoding'] != single_dict['encoding']: + # disable same names with different encodings on different base ISAs + err_msg = f'instruction : {name} from ' + err_msg += f'{ext_name} is already ' + err_msg += f'added from {var} but each have different encodings in different base ISAs' + logging.error(err_msg) + raise SystemExit(1) + instr_dict[name]['extension'].extend(single_dict['extension']) + else: + for key in instr_dict: + item = instr_dict[key] + if overlaps(item['encoding'], single_dict['encoding']) and \ + not extension_overlap_allowed(ext_name, item['extension'][0]) and \ + not instruction_overlap_allowed(name, key) and \ + same_base_isa(ext_name, item['extension']): + # disable different names with overlapping encodings on the same base ISA + err_msg = f'instruction : {name} in extension ' + err_msg += f'{ext_name} overlaps instruction {key} ' + err_msg += f'in extension {item["extension"]}' + logging.error(err_msg) + raise SystemExit(1) + + if name not in instr_dict: + # update the final dict with the instruction + instr_dict[name] = single_dict + + # second pass if for pseudo instructions + logging.debug('Collecting pseudo instructions now') + for f in file_names: + logging.debug(f'Parsing File: {f} for pseudo_ops') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + + # ignore all lines not starting with $pseudo + if '$pseudo' not in line: + continue + logging.debug(f' Processing line: {line}') + + # use the regex pseudo_regex from constants.py to find the dependent + # extension, dependent instruction, the pseudo_op in question and + # its encoding + (ext, orig_inst, pseudo_inst, line) = pseudo_regex.findall(line)[0] + ext_file = f'{opcodes_dir}/{ext}' + + # check if the file of the dependent extension exist. Throw error if + # it doesn't + if not os.path.exists(ext_file): + ext1_file = f'{opcodes_dir}/unratified/{ext}' + if not os.path.exists(ext1_file): + logging.error(f'Pseudo op {pseudo_inst} in {f} depends on {ext} which is not available') + raise SystemExit(1) + else: + ext_file = ext1_file + + # check if the dependent instruction exist in the dependent + # extension. Else throw error. + found = False + for oline in open(ext_file): + if not re.findall(f'^\\s*{orig_inst}\\s+',oline): + continue + else: + found = True + break + if not found: + logging.error(f'Orig instruction {orig_inst} not found in {ext}. Required by pseudo_op {pseudo_inst} present in {f}') + raise SystemExit(1) + + + (name, single_dict) = process_enc_line(pseudo_inst + ' ' + line, f) + # add the pseudo_op to the dictionary only if the original + # instruction is not already in the dictionary. + if orig_inst.replace('.','_') not in instr_dict \ + or include_pseudo \ + or name in include_pseudo_ops: + + # update the final dict with the instruction + if name not in instr_dict: + instr_dict[name] = single_dict + logging.debug(f' including pseudo_ops:{name}') + else: + if(single_dict['match'] != instr_dict[name]['match']): + instr_dict[name + '_pseudo'] = single_dict + + # if a pseudo instruction has already been added to the filtered + # instruction dictionary but the extension is not in the current + # list, add it + else: + ext_name = single_dict['extension'] + + if (ext_name not in instr_dict[name]['extension']) & (name + '_pseudo' not in instr_dict): + instr_dict[name]['extension'].extend(ext_name) + else: + logging.debug(f' Skipping pseudo_op {pseudo_inst} since original instruction {orig_inst} already selected in list') + + # third pass if for imported instructions + logging.debug('Collecting imported instructions') + for f in file_names: + logging.debug(f'Parsing File: {f} for imported ops') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + # if the an instruction needs to be imported then go to the + # respective file and pick the line that has the instruction. + # The variable 'line' will now point to the new line from the + # imported file + + # ignore all lines starting with $import and $pseudo + if '$import' not in line : + continue + logging.debug(f' Processing line: {line}') + + (import_ext, reg_instr) = imported_regex.findall(line)[0] + import_ext_file = f'{opcodes_dir}/{import_ext}' + + # check if the file of the dependent extension exist. Throw error if + # it doesn't + if not os.path.exists(import_ext_file): + ext1_file = f'{opcodes_dir}/unratified/{import_ext}' + if not os.path.exists(ext1_file): + logging.error(f'Instruction {reg_instr} in {f} cannot be imported from {import_ext}') + raise SystemExit(1) + else: + ext_file = ext1_file + else: + ext_file = import_ext_file + + # check if the dependent instruction exist in the dependent + # extension. Else throw error. + found = False + for oline in open(ext_file): + if not re.findall(f'^\\s*{reg_instr}\\s+',oline): + continue + else: + found = True + break + if not found: + logging.error(f'imported instruction {reg_instr} not found in {ext_file}. Required by {line} present in {f}') + logging.error(f'Note: you cannot import pseudo/imported ops.') + raise SystemExit(1) + + # call process_enc_line to get the data about the current + # instruction + (name, single_dict) = process_enc_line(oline, f) + + # if an instruction has already been added to the filtered + # instruction dictionary throw an error saying the given + # instruction is already imported and raise SystemExit + if name in instr_dict: + var = instr_dict[name]["extension"] + if instr_dict[name]['encoding'] != single_dict['encoding']: + err_msg = f'imported instruction : {name} in ' + err_msg += f'{os.path.basename(f)} is already ' + err_msg += f'added from {var} but each have different encodings for the same instruction' + logging.error(err_msg) + raise SystemExit(1) + instr_dict[name]['extension'].extend(single_dict['extension']) + else: + # update the final dict with the instruction + instr_dict[name] = single_dict + return instr_dict + + |