From bd26319a75b2ae4f52dc397b22c97e19b08d527f Mon Sep 17 00:00:00 2001 From: Neel Gala Date: Fri, 8 Apr 2022 20:51:52 +0530 Subject: updated python script, Makefile and README to process new files - the python file is well commented - the README provides a brief overview of how the python script works and the various artifacts it can generate --- parse.py | 872 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 872 insertions(+) create mode 100755 parse.py (limited to 'parse.py') diff --git a/parse.py b/parse.py new file mode 100755 index 0000000..970c29d --- /dev/null +++ b/parse.py @@ -0,0 +1,872 @@ +#!/usr/bin/env python3 + +from constants import * +import re +import glob +import os +import pprint +import logging +import collections +import yaml +import sys + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') + +def process_enc_line(line, ext): + ''' + This function processes each line of the encoding files (rv*). As part of + the processing, the function ensures that the encoding is legal through the + following checks:: + + - there is no over specification (same bits assigned different values) + - there is no under specification (some bits not assigned values) + - bit ranges are in the format hi..lo=val where hi > lo + - value assigned is representable in the bit range + - also checks that the mapping of arguments of an instruction exists in + arg_lut. + + If the above checks pass, then the function returns a tuple of the name and + a dictionary containing basic information of the instruction which includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + ''' + single_dict = {} + + # fill all bits with don't care. we use '-' to represent don't care + # TODO: hardcoded for 32-bits. + encoding = ['-'] * 32 + + # get the name of instruction by splitting based on the first space + [name, remaining] = line.split(' ', 1) + + # replace dots with underscores as dot doesn't work with C/Sverilog, etc + name = name.replace('.', '_') + + # remove leading whitespaces + remaining = remaining.lstrip() + + # check each field for it's length and overlapping bits + # ex: 1..0=5 will result in an error --> x overlapping bits + temp_instr = ['-'] * 32 + entries = [ + x[0] for x in re.findall( + r'((\d)+\.\.(\d)+\=((0b\d+)|(0x\d+)|(\d)+))*', + remaining) if x[0] != '' + ] + for temp_entry in entries: + entry = temp_entry.split('=')[0] + f1, f2 = entry.split('..') + for ind in range(int(f1), int(f2)): + + # overlapping bits + if temp_instr[ind] == 'X': + logging.error( + f'{line.split(" ")[0]:<10} has {ind} bit overlapping in it\'s opcodes' + ) + raise SystemExit(1) + temp_instr[ind] = 'X' + + # check x < y + if int(f1) < int(f2): + logging.error( + f'{line.split(" ")[0]:<10} has position {f1} less than position {f2} in it\'s encoding' + ) + raise SystemExit(1) + + # illegal value assigned as per bit width + entry_value = temp_entry.split('=')[1] + temp_base = 16 if 'x' in entry_value else 2 if 'b' in entry_value else 10 + if len(str(int(entry_value, + temp_base))[2:]) > (int(f1) - int(f2)): + logging.error( + f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {f1 - f2}' + ) + raise SystemExit(1) + + # extract bit pattern assignments of the form hi..lo=val. fixed_ranges is a + # regex expression present in constants.py. The extracted patterns are + # captured as a list in args where each entry is a tuple (msb, lsb, value) + args = fixed_ranges.sub(' ', remaining) + + # parse through the args and assign constants 1/0 to bits which need to be + # hardcoded for this instruction + for (msb, lsb, value) in fixed_ranges.findall(remaining): + value = int(value, 0) + msb = int(msb, 0) + lsb = int(lsb, 0) + value = f"{value:032b}" + for i in range(0, msb - lsb + 1): + encoding[31 - (i + lsb)] = value[31 - i] + + # do the same as above but for = pattern. single_fixed is a regex + # expression present in constants.py + for (lsb, value, drop) in single_fixed.findall(remaining): + lsb = int(lsb, 0) + value = int(value, 0) + encoding[31 - lsb] = str(value) + + # convert the list of encodings into a single string for match and mask + match = "".join(encoding).replace('-','0') + mask = "".join(encoding).replace('0','1').replace('-','0') + + # check if all args of the instruction are present in arg_lut present in + # constants.py + args = single_fixed.sub(' ', args).split() + for a in args: + if a not in arg_lut: + logging.error(f' Found variable {a} in instruction {name} whose mapping in arg_lut does not exist') + raise SystemExit(1) + + # update the fields of the instruction as a dict and return back along with + # the name of the instruction + single_dict['encoding'] = "".join(encoding) + single_dict['variable_fields'] = args + single_dict['extension'] = [ext.split('/')[-1]] + single_dict['match']=hex(int(match,2)) + single_dict['mask']=hex(int(mask,2)) + + return (name, single_dict) + + +def create_inst_dict(file_filter, include_pseudo=False): + ''' + This function return a dictionary containing all instructions associated + with an extension defined by the file_filter input. The file_filter input + needs to be rv* file name with out the 'rv' prefix i.e. '_i', '32_i', etc. + + Each node of the dictionary will correspond to an instruction which again is + a dictionary. The dictionary contents of each instruction includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + + In order to build this dictionary, the function does 2 passes over the same + rv file. The first pass is to extract all standard + instructions. In this pass, all pseudo ops and imported instructions are + skipped. For each selected line of the file, we call process_enc_line + function to create the above mentioned dictionary contents of the + instruction. Checks are performed in this function to ensure that the same + instruction is not added twice to the overall dictionary. + + In the second pass, this function parses only pseudo_ops. For each pseudo_op + this function checks if the dependent extension and instruction, both, exit + before parsing it. The pseudo op is only added to the overall dictionary is + the dependent instruction is not present in the dictionary, else its + skipped. + + + ''' + opcodes_dir = f'./' + instr_dict = {} + + # file_names contains all files to be parsed in the riscv-opcodes directory + file_names = [] + for fil in file_filter: + file_names += glob.glob(f'{opcodes_dir}{fil}') + + # first pass if for standard/regular instructions + logging.debug('Collecting standard instructions first') + for f in file_names: + logging.debug(f'Parsing File: {f}') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + # if the an instruction needs to be imported then go to the + # respective file and pick the line that has the instruction. + # The variable 'line' will now point to the new line from the + # imported file + + # ignore all lines starting with $import and $pseudo + if '$import' in line or '$pseudo' in line: + continue + logging.debug(f' Processing line: {line}') + + # call process_enc_line to get the data about the current + # instruction + (name, single_dict) = process_enc_line(line, f) + + # if an instruction has already been added to the filtered + # instruction dictionary throw an error saying the given + # instruction is already imported and raise SystemExit + if name in instr_dict: + var = instr_dict[name]["extension"] + if instr_dict[name]['encoding'] != single_dict['encoding']: + err_msg = f'instruction : {name} from ' + err_msg += f'{f.split("/")[-1]} is already ' + err_msg += f'added from {var} but each have different encodings for the same instruction' + logging.error(err_msg) + raise SystemExit(1) + instr_dict[name]['extension'].append(single_dict['extension']) + + # update the final dict with the instruction + instr_dict[name] = single_dict + + # second pass if for pseudo instructions + logging.debug('Collecting pseudo instructions now') + for f in file_names: + logging.debug(f'Parsing File: {f}') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + + # ignore all lines not starting with $pseudo + if '$pseudo' not in line: + continue + logging.debug(f' Processing line: {line}') + + # use the regex pseudo_regex from constants.py to find the dependent + # extension, dependent instruction, the pseudo_op in question and + # its encoding + (ext, orig_inst, pseudo_inst, line) = pseudo_regex.findall(line)[0] + + # check if the file of the dependent extension exist. Throw error if + # it doesn't + if not os.path.exists(ext): + ext1 = f'unratified/{ext}' + if not os.path.exists(ext1): + logging.error(f'Pseudo op {pseudo_inst} in {f} depends on {ext} which is not available') + raise SystemExit(1) + else: + ext = ext1 + + # check if the dependent instruction exist in the dependent + # extension. Else throw error. + found = False + for oline in open(ext): + if not re.findall(f'^\s*{orig_inst}',oline): + continue + else: + found = True + break + if not found: + logging.error(f'Orig instruction {orig_inst} not found in {ext}. Required by pseudo_op {pseudo_inst} present in {f}') + raise SystemExit(1) + + + # add the pseudo_op to the dictionary only if the original + # instruction is not already in the dictionary. + if orig_inst.replace('.','_') not in instr_dict or include_pseudo: + (name, single_dict) = process_enc_line(pseudo_inst + ' ' + line, f) + + # update the final dict with the instruction + if name not in instr_dict: + instr_dict[name] = single_dict + else: + logging.debug(f'Skipping pseudo_op {pseudo_inst} since original instruction {orig_inst} already selected in list') + + # third pass if for imported instructions + logging.debug('Collecting imported instructions') + for f in file_names: + logging.debug(f'Parsing File: {f}') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + # if the an instruction needs to be imported then go to the + # respective file and pick the line that has the instruction. + # The variable 'line' will now point to the new line from the + # imported file + + # ignore all lines starting with $import and $pseudo + if '$import' not in line : + continue + logging.debug(f' Processing line: {line}') + + (import_ext, reg_instr) = imported_regex.findall(line)[0] + + # check if the file of the dependent extension exist. Throw error if + # it doesn't + if not os.path.exists(import_ext): + ext1 = f'unratified/{import_ext}' + if not os.path.exists(ext1): + logging.error(f'Instruction {reg_instr} in {f} cannot be imported from {import_ext}') + raise SystemExit(1) + else: + ext = ext1 + else: + ext = import_ext + + # check if the dependent instruction exist in the dependent + # extension. Else throw error. + found = False + for oline in open(ext): + if not re.findall(f'^\s*{reg_instr}',oline): + continue + else: + found = True + break + if not found: + logging.error(f'imported instruction {reg_instr} not found in {ext}. Required by {line} present in {f}') + logging.error(f'Note: you cannot import pseudo ops.') + raise SystemExit(1) + + # call process_enc_line to get the data about the current + # instruction + (name, single_dict) = process_enc_line(oline, f) + + # if an instruction has already been added to the filtered + # instruction dictionary throw an error saying the given + # instruction is already imported and raise SystemExit + if name in instr_dict: + var = instr_dict[name]["extension"] + if instr_dict[name]['encoding'] != single_dict['encoding']: + err_msg = f'imported instruction : {name} in ' + err_msg += f'{f.split("/")[-1]} is already ' + err_msg += f'added from {var} but each have different encodings for the same instruction' + logging.error(err_msg) + raise SystemExit(1) + instr_dict[name]['extension'].append(single_dict['extension']) + + # update the final dict with the instruction + instr_dict[name] = single_dict + return instr_dict + +def make_priv_latex_table(): + latex_file = open('priv-instr-table.tex','w') + type_list = ['R-type','I-type'] + system_instr = ['_h','_s','_system','_svinval', '64_h'] + dataset_list = [ (system_instr, 'Trap-Return Instructions',['sret','mret'], False) ] + dataset_list.append((system_instr, 'Interrupt-Management Instructions',['wfi'], False)) + dataset_list.append((system_instr, 'Supervisor Memory-Management Instructions',['sfence_vma'], False)) + dataset_list.append((system_instr, 'Hypervisor Memory-Management Instructions',['hfence_vvma', 'hfence_gvma'], False)) + dataset_list.append((system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions', + ['hlv_b','hlv_bu', 'hlv_h','hlv_hu', 'hlv_w', 'hlvx_hu', 'hlvx_wu', 'hsv_b', 'hsv_h','hsv_w'], False)) + dataset_list.append((system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions, RV64 only', ['hlv_wu','hlv_d','hsv_d'], False)) + dataset_list.append((system_instr, 'Svinval Memory-Management Instructions', ['sinval_vma', 'sfence_w_inval','sfence_inval_ir', 'hinval_vvma','hinval_gvma'], False)) + caption = '\\caption{RISC-V Privileged Instructions}' + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + latex_file.close() + +def make_latex_table(): + ''' + This function is mean to create the instr-table.tex that is meant to be used + by the riscv-isa-manual. This function basically creates a single latext + file of multiple tables with each table limited to a single page. Only the + last table is assigned a latex-caption. + + For each table we assign a type-list which capture the different instruction + types (R, I, B, etc) that will be required for the table. Then we select the + list of extensions ('_i, '32_i', etc) whose instructions are required to + populate the table. For each extension or collection of extension we can + assign Title, such that in the end they appear as subheadings within + the table (note these are inlined headings and not captions of the table). + + All of the above information is collected/created and sent to + make_ext_latex_table function to dump out the latex contents into a file. + + The last table only has to be given a caption - as per the policy of the + riscv-isa-manual. + ''' + # open the file and use it as a pointer for all further dumps + latex_file = open('instr-table.tex','w') + + # create the rv32i table first. Here we set the caption to empty. We use the + # files rv_i and rv32_i to capture instructions relevant for rv32i + # configuration. The dataset is a list of 4-element tuples : + # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions + # is empty then it indicates that all instructions of the all the extensions + # in list_of_extensions need to be dumped. If not empty, then only the + # instructions listed in list_of_instructions will be dumped into latex. + caption = '' + type_list = ['R-type','I-type','S-type','B-type','U-type','J-type'] + dataset_list = [(['_i','32_i'], 'RV32I Base Instruction Set', [], True)] + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ['R-type','I-type','S-type'] + dataset_list = [(['64_i'], 'RV64I Base Instruction Set (in addition to RV32I)', [], False)] + dataset_list.append((['_zifencei'], 'RV32/RV64 Zifencei Standard Extension', [], False)) + dataset_list.append((['_zicsr'], 'RV32/RV64 Zicsr Standard Extension', [], False)) + dataset_list.append((['_m','32_m'], 'RV32M Standard Extension', [], False)) + dataset_list.append((['64_m'],'RV64M Standard Extension (in addition to RV32M)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ['R-type'] + dataset_list = [(['_a'],'RV32A Standard Extension', [], False)] + dataset_list.append((['64_a'],'RV64A Standard Extension (in addition to RV32A)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ['R-type','R4-type','I-type','S-type'] + dataset_list = [(['_f'],'RV32F Standard Extension', [], False)] + dataset_list.append((['64_f'],'RV64F Standard Extension (in addition to RV32F)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ['R-type','R4-type','I-type','S-type'] + dataset_list = [(['_d'],'RV32D Standard Extension', [], False)] + dataset_list.append((['64_d'],'RV64D Standard Extension (in addition to RV32D)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ['R-type','R4-type','I-type','S-type'] + dataset_list = [(['_q'],'RV32Q Standard Extension', [], False)] + dataset_list.append((['64_q'],'RV64Q Standard Extension (in addition to RV32Q)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + caption = '\\caption{Instruction listing for RISC-V}' + type_list = ['R-type','R4-type','I-type','S-type'] + dataset_list = [(['_zfh', '_d_zfh','_q_zfh'],'RV32Zfh Standard Extension', [], False)] + dataset_list.append((['64_zfh'],'RV64Zfh Standard Extension (in addition to RV32Zfh)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + ## The following is demo to show that Compressed instructions can also be + # dumped in the same manner as above + + #type_list = [''] + #dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])] + #dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', [])) + #make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption) + + latex_file.close() + +def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): + ''' + For a given collection of extensions this function dumps out a complete + latex table which includes the encodings of the instructions. + + The ilen input indicates the length of the instruction for which the table + is created. + + The caption input is used to create the latex-table caption. + + The type_list input is a list of instruction types (R, I, B, etc) that are + treated as header for each table. Each table will have its own requirements + and type_list must include all the instruction-types that the table needs. + Note, all elements of this list must be present in the latex_inst_type + dictionary defined in constants.py + + The latex_file is a file pointer to which the latex-table will dumped into + + The dataset is a list of 3-element tuples containing: + (list_of_extensions, title, list_of_instructions) + The list_of_extensions must contain all the set of extensions whose + instructions must be populated under a given title. If list_of_instructions + is not empty, then only those instructions mentioned in list_of_instructions + present in the extension will be dumped into the latex-table, other + instructions will be ignored. + + Once the above inputs are received then function first creates table entries + for the instruction types. To simplify things, we maintain a dictionary + called latex_inst_type in constants.py which is created in the same way the + instruction dictionary is created. This allows us to re-use the same logic + to create the instruction types table as well + + Once the header is created, we then parse through every entry in the + dataset. For each list dataset entry we use the create_inst_dict function to + create an exhaustive list of instructions associated with the respective + collection of the extension of that dataset. Then we apply the instruction + filter, if any, indicated by the list_of_instructions of that dataset. + Thereon, for each instruction we create a latex table entry. + + Latex table specification for ilen sized instructions: + Each table is created with ilen+1 columns - ilen columns for each bit of the + instruction and one column to hold the name of the instruction. + + For each argument of an instruction we use the arg_lut from constants.py + to identify its position in the encoding, and thus create a multicolumn + entry with the name of the argument as the data. For hardcoded bits, we + do the same where we capture a string of continuous 1s and 0s, identify + the position and assign the same string as the data of the + multicolumn entry in the table. + + ''' + column_size = "".join(['p{0.002in}']*(ilen+1)) + + type_entries = ''' + \\multicolumn{3}{l}{31} & + \\multicolumn{2}{r}{27} & + \\multicolumn{1}{c}{26} & + \\multicolumn{1}{r}{25} & + \\multicolumn{3}{l}{24} & + \\multicolumn{2}{r}{20} & + \\multicolumn{3}{l}{19} & + \\multicolumn{2}{r}{15} & + \\multicolumn{2}{l}{14} & + \\multicolumn{1}{r}{12} & + \\multicolumn{4}{l}{11} & + \\multicolumn{1}{r}{7} & + \\multicolumn{6}{l}{6} & + \\multicolumn{1}{r}{0} \\\\ + \\cline{2-33}\n& \n\n +''' if ilen == 32 else ''' + \\multicolumn{1}{c}{15} & + \\multicolumn{1}{c}{14} & + \\multicolumn{1}{c}{13} & + \\multicolumn{1}{c}{12} & + \\multicolumn{1}{c}{11} & + \\multicolumn{1}{c}{10} & + \\multicolumn{1}{c}{9} & + \\multicolumn{1}{c}{8} & + \\multicolumn{1}{c}{7} & + \\multicolumn{1}{c}{6} & + \\multicolumn{1}{c}{5} & + \\multicolumn{1}{c}{4} & + \\multicolumn{1}{c}{3} & + \\multicolumn{1}{c}{2} & + \\multicolumn{1}{c}{1} & + \\multicolumn{1}{c}{0} \\\\ + \\cline{2-17}\n& \n\n +''' + + # depending on the type_list input we create a subset dictionary of + # latex_inst_type dictionary present in constants.py + type_dict = {key: value for key, value in latex_inst_type.items() if key in type_list} + + # iterate ovr each instruction type and create a table entry + for t in type_dict: + fields = [] + + # first capture all "arguments" of the type (funct3, funct7, rd, etc) + # and capture their positions using arg_lut. + for f in type_dict[t]['variable_fields']: + (msb, lsb) = arg_lut[f] + name = f if f not in latex_mapping else latex_mapping[f] + fields.append((msb, lsb, name)) + + # iterate through the 32 bits, starting from the msb, and assign + # argument names to the relevant portions of the instructions. This + # information is stored as a 3-element tuple containing the msb, lsb + # position of the arugment and the name of the argument. + msb = ilen - 1 + y = '' + for r in range(0,ilen): + if y != '': + fields.append((msb,ilen-1-r+1,y)) + y = '' + msb = ilen-1-r-1 + if r == 31: + if y != '': + fields.append((msb, 0, y)) + y = '' + + # sort the arguments in decreasing order of msb position + fields.sort(key=lambda y: y[0], reverse=True) + + # for each argument/string of 1s or 0s, create a multicolumn latex table + # entry + entry = '' + for r in range(len(fields)): + (msb, lsb, name) = fields[r] + if r == len(fields)-1: + entry += f'\\multicolumn{{ {msb -lsb +1} }}{{|c|}}{{ {name} }} & {t} \\\\ \n' + elif r == 0: + entry += f'\\multicolumn{{ {msb- lsb + 1} }}{{|c|}}{{ {name} }} &\n' + else: + entry += f'\\multicolumn{{ {msb -lsb + 1} }}{{c|}}{{ {name} }} &\n' + entry += f'\\cline{{2-{ilen+1}}}\n&\n\n' + type_entries += entry + + # for each entry in the dataset create a table + content = '' + for (ext_list, title, filter_list, include_pseudo) in dataset: + instr_dict = {} + + # for all extensions list in ext_list, create a dictionary of + # instructions associated with those extensions. + for e in ext_list: + instr_dict.update(create_inst_dict(['rv'+e], include_pseudo)) + + # if filter_list is not empty then use that as the official set of + # instructions that need to be dumped into the latex table + inst_list = list(instr_dict.keys()) if not filter_list else filter_list + + # for each instruction create an latex table entry just like how we did + # above with the instruction-type table. + instr_entries = '' + for inst in inst_list: + if inst not in instr_dict: + logging.error(f'in make_ext_latex_table: Instruction: {inst} not found in instr_dict') + raise SystemExit(1) + fields = [] + + # only if the argument is available in arg_lut we consume it, else + # throw error. + for f in instr_dict[inst]['variable_fields']: + if f not in arg_lut: + logging.error(f'Found variable {f} in instruction {inst} whose mapping is not available') + raise SystemExit(1) + (msb,lsb) = arg_lut[f] + name = f.replace('_','.') if f not in latex_mapping else latex_mapping[f] + fields.append((msb, lsb, name)) + + msb = ilen -1 + y = '' + if ilen == 16: + encoding = instr_dict[inst]['encoding'][16:] + else: + encoding = instr_dict[inst]['encoding'] + for r in range(0,ilen): + x = encoding [r] + if ((msb, ilen-1-r+1)) in latex_fixed_fields: + fields.append((msb,ilen-1-r+1,y)) + msb = ilen-1-r + y = '' + if x == '-': + if y != '': + fields.append((msb,ilen-1-r+1,y)) + y = '' + msb = ilen-1-r-1 + else: + y += str(x) + if r == ilen-1: + if y != '': + fields.append((msb, 0, y)) + y = '' + + fields.sort(key=lambda y: y[0], reverse=True) + entry = '' + for r in range(len(fields)): + (msb, lsb, name) = fields[r] + if r == len(fields)-1: + entry += f'\\multicolumn{{ {msb -lsb +1} }}{{|c|}}{{ {name} }} & {inst.upper().replace("_",".")} \\\\ \n' + elif r == 0: + entry += f'\\multicolumn{{ {msb- lsb + 1} }}{{|c|}}{{ {name} }} &\n' + else: + entry += f'\\multicolumn{{ {msb -lsb + 1} }}{{c|}}{{ {name} }} &\n' + entry += f'\\cline{{2-{ilen+1}}}\n&\n\n' + instr_entries += entry + + # once an entry of the dataset is completed we create the whole table + # with the title of that dataset as sub-heading (sort-of) + if title != '': + content += f''' + +\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\ +\\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\ +\\cline{{2-{ilen+1}}} + + & +{instr_entries} +''' + else: + content += f''' +{instr_entries} +''' + + + header = f''' +\\newpage + +\\begin{{table}}[p] +\\begin{{small}} +\\begin{{center}} + \\begin{{tabular}} {{{column_size}l}} + {" ".join(['&']*ilen)} \\\\ + + & +{type_entries} +''' + endtable=f''' + +\\end{{tabular}} +\\end{{center}} +\\end{{small}} +{caption} +\\end{{table}} +''' + # dump the contents and return + latex_file.write(header+content+endtable) + + +def make_chisel(instr_dict): + + chisel_names='' + cause_names_str='' + csr_names_str = '' + for i in instr_dict: + chisel_names += f' def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n' + for num, name in causes: + cause_names_str += f' val {name.lower().replace(" ","_")} = {hex(num)}\n' + cause_names_str += ''' val all = { + val res = collection.mutable.ArrayBuffer[Int]() +''' + for num, name in causes: + cause_names_str += f' res += {name.lower().replace(" ","_")}\n' + cause_names_str += ''' res.toArray + }''' + + for num, name in csrs+csrs32: + csr_names_str += f' val {name} = {hex(num)}\n' + csr_names_str += ''' val all = { + val res = collection.mutable.ArrayBuffer[Int]() +''' + for num, name in csrs: + csr_names_str += f''' res += {name}\n''' + csr_names_str += ''' res.toArray + } + val all32 = { + val res = collection.mutable.ArrayBuffer(all:_*) +''' + for num, name in csrs32: + csr_names_str += f''' res += {name}\n''' + csr_names_str += ''' res.toArray + }''' + + chisel_file = open('inst.chisel','w') + chisel_file.write(f''' +/* Automatically generated by parse_opcodes */ +object Instructions {{ +{chisel_names} +}} +object Causes {{ +{cause_names_str} +}} +object CSRs {{ +{csr_names_str} +}} +''') + chisel_file.close() + +def make_rust(instr_dict): + mask_match_str= '' + for i in instr_dict: + mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n' + mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n' + for num, name in csrs+csrs32: + mask_match_str += f'const CSR_{name.upper()}: u16 = {hex(num)};\n' + for num, name in causes: + mask_match_str += f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n' + rust_file = open('inst.rs','w') + rust_file.write(f''' +/* Automatically generated by parse_opcodes */ +{mask_match_str} +''') + rust_file.close() + +def make_sverilog(instr_dict): + names_str = '' + for i in instr_dict: + names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n" + names_str += ' /* CSR Addresses */\n' + for num, name in csrs+csrs32: + names_str += f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n" + + sverilog_file = open('inst.sverilog','w') + sverilog_file.write(f''' +/* Automatically generated by parse_opcodes */ +package riscv_instr; +{names_str} +endpackage +''') + sverilog_file.close() +def make_c(instr_dict): + mask_match_str = '' + declare_insn_str = '' + for i in instr_dict: + mask_match_str += f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n' + mask_match_str += f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n' + declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n' + + csr_names_str = '' + declare_csr_str = '' + for num, name in csrs+csrs32: + csr_names_str += f'#define CSR_{name.upper()} {hex(num)}\n' + declare_csr_str += f'DECLARE_CSR({name}, CSR_{name.upper()})\n' + + causes_str= '' + declare_cause_str = '' + for num, name in causes: + causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n" + declare_cause_str += f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n" + + with open('encoding.h', 'r') as file: + enc_header = file.read() + + commit = os.popen('git log -1 --format="format:%h"').read() + enc_file = open('encoding.out.h','w') + enc_file.write(f''' +/* +* This file is auto-generated by running 'make' in +* https://github.com/riscv/riscv-opcodes ({commit}) +*/ +{enc_header} +/* Automatically generated by parse_opcodes. */ +#ifndef RISCV_ENCODING_H +#define RISCV_ENCODING_H +{mask_match_str} +{csr_names_str} +{causes_str} +#endif +#ifdef DECLARE_INSN +{declare_insn_str} +#endif +#ifdef DECLARE_CSR +{declare_csr_str} +#endif +#ifdef DECLARE_CAUSE +{declare_cause_str} +#endif +''') + enc_file.close() + +if __name__ == "__main__": + print(f'Running with args : {sys.argv}') + + extensions = sys.argv[1:] + for i in ['-c','-latex','-chisel','-sverilog','-rust']: + if i in extensions: + extensions.remove(i) + print(f'Extensions selected : {extensions}') + instr_dict = create_inst_dict(extensions) + with open('instr_dict.yaml', 'w') as outfile: + yaml.dump(instr_dict, outfile, default_flow_style=False) + instr_dict = collections.OrderedDict(sorted(instr_dict.items())) + + if '-c' in sys.argv[1:]: + make_c(instr_dict) + logging.info('encoding.out.h generated successfully') + + if '-chisel' in sys.argv[1:]: + make_chisel(instr_dict) + logging.info('inst.chisel generated successfully') + + if '-sverilog' in sys.argv[1:]: + make_sverilog(instr_dict) + logging.info('inst.sverilog generated successfully') + + if '-rust' in sys.argv[1:]: + make_rust(instr_dict) + logging.info('inst.rs generated successfully') + + if '-latex' in sys.argv[1:]: + make_latex_table() + logging.info('instr-table.tex generated successfully') + make_priv_latex_table() + logging.info('priv-instr-table.tex generated successfully') -- cgit v1.1