diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/riscv_opcodes/__init__.py | 2 | ||||
-rw-r--r-- | src/riscv_opcodes/__main__.py | 10 | ||||
-rw-r--r-- | src/riscv_opcodes/c_utils.py | 79 | ||||
-rw-r--r-- | src/riscv_opcodes/chisel_utils.py | 82 | ||||
-rw-r--r-- | src/riscv_opcodes/constants.py | 271 | ||||
-rw-r--r-- | src/riscv_opcodes/go_utils.py | 64 | ||||
-rw-r--r-- | src/riscv_opcodes/latex_utils.py | 450 | ||||
-rw-r--r-- | src/riscv_opcodes/parse.py | 121 | ||||
-rw-r--r-- | src/riscv_opcodes/resources.py | 39 | ||||
-rw-r--r-- | src/riscv_opcodes/rust_utils.py | 28 | ||||
-rw-r--r-- | src/riscv_opcodes/rv_colors.py | 12 | ||||
-rw-r--r-- | src/riscv_opcodes/shared_utils.py | 641 | ||||
-rw-r--r-- | src/riscv_opcodes/sverilog_utils.py | 30 | ||||
-rw-r--r-- | src/riscv_opcodes/svg_utils.py | 284 |
14 files changed, 2113 insertions, 0 deletions
diff --git a/src/riscv_opcodes/__init__.py b/src/riscv_opcodes/__init__.py new file mode 100644 index 0000000..e8fd9d4 --- /dev/null +++ b/src/riscv_opcodes/__init__.py @@ -0,0 +1,2 @@ +# Mark this directory as a package. This is not actually needed by +# Python but Pylint gets confused about relative imports without it. diff --git a/src/riscv_opcodes/__main__.py b/src/riscv_opcodes/__main__.py new file mode 100644 index 0000000..456cddd --- /dev/null +++ b/src/riscv_opcodes/__main__.py @@ -0,0 +1,10 @@ +""" +This allows running as a module, i.e. `python3 -m riscv_opcodes` which +we wouldn't normally need, but the `coverage` tool doesn't work on +installed scripts - you can't do `coverage run riscv_opcodes` because it +looks for a Python file called `riscv_opcodes` in the current directory. +""" + +from .parse import main + +main() diff --git a/src/riscv_opcodes/c_utils.py b/src/riscv_opcodes/c_utils.py new file mode 100644 index 0000000..198a37f --- /dev/null +++ b/src/riscv_opcodes/c_utils.py @@ -0,0 +1,79 @@ +import logging +import os +import pprint + +from .constants import causes, csrs, csrs32 +from .resources import read_text_resource +from .shared_utils import InstrDict, arg_lut + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_c(instr_dict: InstrDict): + mask_match_str = "" + declare_insn_str = "" + for i in instr_dict: + mask_match_str += ( + f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n' + ) + mask_match_str += ( + f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n' + ) + declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n' + + csr_names_str = "" + declare_csr_str = "" + for num, name in csrs + csrs32: + csr_names_str += f"#define CSR_{name.upper()} {hex(num)}\n" + declare_csr_str += f"DECLARE_CSR({name}, CSR_{name.upper()})\n" + + causes_str = "" + declare_cause_str = "" + for num, name in causes: + causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n" + declare_cause_str += ( + f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n" + ) + + arg_str = "" + for name, rng in arg_lut.items(): + sanitized_name = name.replace(" ", "_").replace("=", "_eq_") + begin = rng[1] + end = rng[0] + mask = ((1 << (end - begin + 1)) - 1) << begin + arg_str += f"#define INSN_FIELD_{sanitized_name.upper()} {hex(mask)}\n" + + enc_header = read_text_resource("encoding.h") + + commit = os.popen('git log -1 --format="format:%h"').read() + + # Generate the output as a string + output_str = f"""/* SPDX-License-Identifier: BSD-3-Clause */ + +/* Copyright (c) 2023 RISC-V International */ + +/* + * This file is auto-generated by running 'make' in + * https://github.com/riscv/riscv-opcodes ({commit}) + */ + +{enc_header} +/* Automatically generated by parse_opcodes. */ +#ifndef RISCV_ENCODING_H +#define RISCV_ENCODING_H +{mask_match_str} +{csr_names_str} +{causes_str} +{arg_str}#endif +#ifdef DECLARE_INSN +{declare_insn_str}#endif +#ifdef DECLARE_CSR +{declare_csr_str}#endif +#ifdef DECLARE_CAUSE +{declare_cause_str}#endif +""" + + # Write the modified output to the file + with open("encoding.out.h", "w", encoding="utf-8") as enc_file: + enc_file.write(output_str) diff --git a/src/riscv_opcodes/chisel_utils.py b/src/riscv_opcodes/chisel_utils.py new file mode 100644 index 0000000..46cb0b6 --- /dev/null +++ b/src/riscv_opcodes/chisel_utils.py @@ -0,0 +1,82 @@ +import logging +import pprint + +from .constants import causes, csrs, csrs32 +from .shared_utils import InstrDict, instr_dict_2_extensions + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_chisel(instr_dict: InstrDict, spinal_hdl: bool = False): + + chisel_names = "" + cause_names_str = "" + csr_names_str = "" + for i in instr_dict: + if spinal_hdl: + chisel_names += f' def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n' + # else: + # chisel_names += f' def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n' + if not spinal_hdl: + extensions = instr_dict_2_extensions(instr_dict) + for e in extensions: + if "rv64_" in e: + e_format = e.replace("rv64_", "").upper() + "64" + elif "rv32_" in e: + e_format = e.replace("rv32_", "").upper() + "32" + elif "rv_" in e: + e_format = e.replace("rv_", "").upper() + else: + e_format = e.upper() + chisel_names += f' val {e_format+"Type"} = Map(\n' + for instr_name, instr in instr_dict.items(): + if instr["extension"][0] == e: + tmp_instr_name = '"' + instr_name.upper().replace(".", "_") + '"' + chisel_names += f' {tmp_instr_name:<18s} -> BitPat("b{instr["encoding"].replace("-","?")}"),\n' + chisel_names += " )\n" + + for num, name in causes: + cause_names_str += f' val {name.lower().replace(" ","_")} = {hex(num)}\n' + cause_names_str += """ val all = { + val res = collection.mutable.ArrayBuffer[Int]() +""" + for num, name in causes: + cause_names_str += f' res += {name.lower().replace(" ","_")}\n' + cause_names_str += """ res.toArray + }""" + + for num, name in csrs + csrs32: + csr_names_str += f" val {name} = {hex(num)}\n" + csr_names_str += """ val all = { + val res = collection.mutable.ArrayBuffer[Int]() +""" + for num, name in csrs: + csr_names_str += f""" res += {name}\n""" + csr_names_str += """ res.toArray + } + val all32 = { + val res = collection.mutable.ArrayBuffer(all:_*) +""" + for num, name in csrs32: + csr_names_str += f""" res += {name}\n""" + csr_names_str += """ res.toArray + }""" + + with open( + "inst.spinalhdl" if spinal_hdl else "inst.chisel", "w", encoding="utf-8" + ) as chisel_file: + chisel_file.write( + f""" +/* Automatically generated by parse_opcodes */ +object Instructions {{ +{chisel_names} +}} +object Causes {{ +{cause_names_str} +}} +object CSRs {{ +{csr_names_str} +}} +""" + ) diff --git a/src/riscv_opcodes/constants.py b/src/riscv_opcodes/constants.py new file mode 100644 index 0000000..fb67d70 --- /dev/null +++ b/src/riscv_opcodes/constants.py @@ -0,0 +1,271 @@ +import csv +import re + +from .resources import open_text_resource + +# TODO: The constants in this file should be in all caps. +overlapping_extensions = { + "rv_zcmt": {"rv_c_d"}, + "rv_zcmp": {"rv_c_d"}, + "rv_c": {"rv_zcmop"}, +} + +overlapping_instructions = { + "c_addi": {"c_nop"}, + "c_lui": {"c_addi16sp"}, + "c_mv": {"c_jr"}, + "c_jalr": {"c_ebreak"}, + "c_add": {"c_ebreak", "c_jalr"}, +} + +isa_regex = re.compile( + "^RV(32|64|128)[IE]+[ABCDEFGHJKLMNPQSTUVX]*(Zicsr|Zifencei|Zihintpause|Zam|Ztso|Zkne|Zknd|Zknh|Zkse|Zksh|Zkg|Zkb|Zkr|Zks|Zkn|Zba|Zbc|Zbb|Zbp|Zbr|Zbm|Zbs|Zbe|Zbf|Zbt|Zmmul|Zbpbo|Zca|Zcf|Zcd|Zcb|Zcmp|Zcmt){,1}(_Zicsr){,1}(_Zifencei){,1}(_Zihintpause){,1}(_Zmmul){,1}(_Zam){,1}(_Zba){,1}(_Zbb){,1}(_Zbc){,1}(_Zbe){,1}(_Zbf){,1}(_Zbm){,1}(_Zbp){,1}(_Zbpbo){,1}(_Zbr){,1}(_Zbs){,1}(_Zbt){,1}(_Zkb){,1}(_Zkg){,1}(_Zkr){,1}(_Zks){,1}(_Zkn){,1}(_Zknd){,1}(_Zkne){,1}(_Zknh){,1}(_Zkse){,1}(_Zksh){,1}(_Ztso){,1}(_Zca){,1}(_Zcf){,1}(_Zcd){,1}(_Zcb){,1}(_Zcmp){,1}(_Zcmt){,1}$" +) + +# regex to find <msb>..<lsb>=<val> patterns in instruction +fixed_ranges = re.compile( + r"\s*(?P<msb>\d+.?)\.\.(?P<lsb>\d+.?)\s*=\s*(?P<val>\d[\w]*)[\s$]*", re.M +) + +# regex to find <lsb>=<val> patterns in instructions +# single_fixed = re.compile('\s+(?P<lsb>\d+)=(?P<value>[\w\d]*)[\s$]*', re.M) +single_fixed = re.compile(r"(?:^|[\s])(?P<lsb>\d+)=(?P<value>[\w]*)((?=\s|$))", re.M) + +# regex to find the overloading condition variable +var_regex = re.compile(r"(?P<var>[a-zA-Z][\w\d]*)\s*=\s*.*?[\s$]*", re.M) + +# regex for pseudo op instructions returns the dependent filename, dependent +# instruction, the pseudo op name and the encoding string +pseudo_regex = re.compile( + r"^\$pseudo_op\s+(?P<filename>rv[\d]*_[\w].*)::\s*(?P<orig_inst>.*?)\s+(?P<pseudo_inst>.*?)\s+(?P<overload>.*)$", + re.M, +) + +imported_regex = re.compile( + r"^\s*\$import\s*(?P<extension>.*)\s*::\s*(?P<instruction>.*)", re.M +) + + +def read_int_map_csv(filename: str) -> "list[tuple[int, str]]": + """ + Reads a CSV file and returns a list of tuples. + Each tuple contains an integer value (from the first column) and a string (from the second column). + + Args: + filename (str): The name of the CSV file to read. + + Returns: + list of tuple: A list of (int, str) tuples extracted from the CSV file. + """ + with open_text_resource(filename) as f: + csv_reader = csv.reader(f, skipinitialspace=True) + return [(int(row[0], 0), row[1]) for row in csv_reader] + + +causes = read_int_map_csv("causes.csv") +csrs = read_int_map_csv("csrs.csv") +csrs32 = read_int_map_csv("csrs32.csv") + + +def read_arg_lut_csv(filename: str) -> "dict[str, tuple[int, int]]": + """ + Load the argument lookup table (arg_lut) from a CSV file, mapping argument names to their bit positions. + """ + with open_text_resource(filename) as f: + csv_reader = csv.reader(f, skipinitialspace=True) + return {row[0]: (int(row[1]), int(row[2])) for row in csv_reader} + + +arg_lut = read_arg_lut_csv("arg_lut.csv") + +# for mop +arg_lut["mop_r_t_30"] = (30, 30) +arg_lut["mop_r_t_27_26"] = (27, 26) +arg_lut["mop_r_t_21_20"] = (21, 20) +arg_lut["mop_rr_t_30"] = (30, 30) +arg_lut["mop_rr_t_27_26"] = (27, 26) +arg_lut["c_mop_t"] = (10, 8) + +# dictionary containing the mapping of the argument to the what the fields in +# the latex table should be +latex_mapping = { + "imm12": "imm[11:0]", + "rs1": "rs1", + "rs2": "rs2", + "rd": "rd", + "imm20": "imm[31:12]", + "bimm12hi": "imm[12$\\vert$10:5]", + "bimm12lo": "imm[4:1$\\vert$11]", + "imm12hi": "imm[11:5]", + "imm12lo": "imm[4:0]", + "jimm20": "imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]", + "zimm": "uimm", + "shamtw": "shamt", + "shamtd": "shamt", + "shamtq": "shamt", + "rd_p": "rd\\,$'$", + "rs1_p": "rs1\\,$'$", + "rs2_p": "rs2\\,$'$", + "rd_rs1_n0": "rd/rs$\\neq$0", + "rd_rs1_p": "rs1\\,$'$/rs2\\,$'$", + "c_rs2": "rs2", + "c_rs2_n0": "rs2$\\neq$0", + "rd_n0": "rd$\\neq$0", + "rs1_n0": "rs1$\\neq$0", + "c_rs1_n0": "rs1$\\neq$0", + "rd_rs1": "rd/rs1", + "zimm6hi": "uimm[5]", + "zimm6lo": "uimm[4:0]", + "c_nzuimm10": "nzuimm[5:4$\\vert$9:6$\\vert$2$\\vert$3]", + "c_uimm7lo": "uimm[2$\\vert$6]", + "c_uimm7hi": "uimm[5:3]", + "c_uimm8lo": "uimm[7:6]", + "c_uimm8hi": "uimm[5:3]", + "c_uimm9lo": "uimm[7:6]", + "c_uimm9hi": "uimm[5:4$\\vert$8]", + "c_nzimm6lo": "nzimm[4:0]", + "c_nzimm6hi": "nzimm[5]", + "c_imm6lo": "imm[4:0]", + "c_imm6hi": "imm[5]", + "c_nzimm10hi": "nzimm[9]", + "c_nzimm10lo": "nzimm[4$\\vert$6$\\vert$8:7$\\vert$5]", + "c_nzimm18hi": "nzimm[17]", + "c_nzimm18lo": "nzimm[16:12]", + "c_imm12": "imm[11$\\vert$4$\\vert$9:8$\\vert$10$\\vert$6$\\vert$7$\\vert$3:1$\\vert$5]", + "c_bimm9lo": "imm[7:6$\\vert$2:1$\\vert$5]", + "c_bimm9hi": "imm[8$\\vert$4:3]", + "c_nzuimm5": "nzuimm[4:0]", + "c_nzuimm6lo": "nzuimm[4:0]", + "c_nzuimm6hi": "nzuimm[5]", + "c_uimm8splo": "uimm[4:2$\\vert$7:6]", + "c_uimm8sphi": "uimm[5]", + "c_uimm8sp_s": "uimm[5:2$\\vert$7:6]", + "c_uimm10splo": "uimm[4$\\vert$9:6]", + "c_uimm10sphi": "uimm[5]", + "c_uimm9splo": "uimm[4:3$\\vert$8:6]", + "c_uimm9sphi": "uimm[5]", + "c_uimm10sp_s": "uimm[5:4$\\vert$9:6]", + "c_uimm9sp_s": "uimm[5:3$\\vert$8:6]", + "rd_p_e": "rd\\,$'$, even values only", + "rs2_p_e": "rs2\\,$'$, even values only", + "rd_n0_e": "rd$\\neq$0, even values only", + "c_rs2_e": "rs2, even values only", + "rd_e": "rd, even values only", + "rs2_e": "rs2, even values only", +} + + +# created a dummy instruction-dictionary like dictionary for all the instruction +# types so that the same logic can be used to create their tables +latex_inst_type = { + "R-type": { + "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct7"], + }, + "R4-type": { + "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct2", "rs3"], + }, + "I-type": { + "variable_fields": ["opcode", "rd", "funct3", "rs1", "imm12"], + }, + "S-type": { + "variable_fields": ["opcode", "imm12lo", "funct3", "rs1", "rs2", "imm12hi"], + }, + "B-type": { + "variable_fields": ["opcode", "bimm12lo", "funct3", "rs1", "rs2", "bimm12hi"], + }, + "U-type": { + "variable_fields": ["opcode", "rd", "imm20"], + }, + "J-type": { + "variable_fields": ["opcode", "rd", "jimm20"], + }, +} +latex_fixed_fields = [ + (31, 25), + (24, 20), + (19, 15), + (14, 12), + (11, 7), + (6, 0), +] + +# Pseudo-ops present in the generated encodings. +# By default pseudo-ops are not listed as they are considered aliases +# of their base instruction. +emitted_pseudo_ops = [ + "pause", + "prefetch_i", + "prefetch_r", + "prefetch_w", + "rstsa16", + "rstsa32", + "srli32_u", + "slli_rv32", + "srai_rv32", + "srli_rv32", + "umax32", + "c_mop_1", + "c_sspush_x1", + "c_mop_3", + "c_mop_5", + "c_sspopchk_x5", + "c_mop_7", + "c_mop_9", + "c_mop_11", + "c_mop_13", + "c_mop_15", + "mop_r_0", + "mop_r_1", + "mop_r_2", + "mop_r_3", + "mop_r_4", + "mop_r_5", + "mop_r_6", + "mop_r_7", + "mop_r_8", + "mop_r_9", + "mop_r_10", + "mop_r_11", + "mop_r_12", + "mop_r_13", + "mop_r_14", + "mop_r_15", + "mop_r_16", + "mop_r_17", + "mop_r_18", + "mop_r_19", + "mop_r_20", + "mop_r_21", + "mop_r_22", + "mop_r_23", + "mop_r_24", + "mop_r_25", + "mop_r_26", + "mop_r_27", + "mop_r_28", + "sspopchk_x1", + "sspopchk_x5", + "ssrdp", + "mop_r_29", + "mop_r_30", + "mop_r_31", + "mop_r_32", + "mop_rr_0", + "mop_rr_1", + "mop_rr_2", + "mop_rr_3", + "mop_rr_4", + "mop_rr_5", + "mop_rr_6", + "mop_rr_7", + "sspush_x1", + "sspush_x5", + "lpad", + "bclri.rv32", + "bexti.rv32", + "binvi.rv32", + "bseti.rv32", + "zext.h.rv32", + "rev8.h.rv32", + "rori.rv32", +] diff --git a/src/riscv_opcodes/go_utils.py b/src/riscv_opcodes/go_utils.py new file mode 100644 index 0000000..1a6fc33 --- /dev/null +++ b/src/riscv_opcodes/go_utils.py @@ -0,0 +1,64 @@ +import logging +import pprint +import sys + +from .constants import csrs +from .shared_utils import InstrDict, signed + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_go(instr_dict: InstrDict): + + args = " ".join(sys.argv) + prelude = f"""// Code generated by {args}; DO NOT EDIT.""" + + prelude += """ +package riscv + +import "cmd/internal/obj" + +type inst struct { + opcode uint32 + funct3 uint32 + rs1 uint32 + rs2 uint32 + csr int64 + funct7 uint32 +} + +func encode(a obj.As) *inst { + switch a { +""" + + csrs_map_str = """ } + return nil +} + +var csrs = map[uint16]string { +""" + + endoffile = """} +""" + + instr_str = "" + for i in instr_dict: + enc_match = int(instr_dict[i]["match"], 0) + opcode = (enc_match >> 0) & ((1 << 7) - 1) + funct3 = (enc_match >> 12) & ((1 << 3) - 1) + rs1 = (enc_match >> 15) & ((1 << 5) - 1) + rs2 = (enc_match >> 20) & ((1 << 5) - 1) + csr = (enc_match >> 20) & ((1 << 12) - 1) + funct7 = (enc_match >> 25) & ((1 << 7) - 1) + instr_str += f""" case A{i.upper().replace("_","")}: + return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }} +""" + for num, name in sorted(csrs, key=lambda row: row[0]): + csrs_map_str += f'{hex(num)} : "{name.upper()}",\n' + + with open("inst.go", "w", encoding="utf-8") as file: + file.write(prelude) + file.write(instr_str) + file.write(csrs_map_str) + file.write(endoffile) diff --git a/src/riscv_opcodes/latex_utils.py b/src/riscv_opcodes/latex_utils.py new file mode 100644 index 0000000..38f92f8 --- /dev/null +++ b/src/riscv_opcodes/latex_utils.py @@ -0,0 +1,450 @@ +import logging +import pprint +from typing import TextIO + +from .constants import latex_fixed_fields, latex_inst_type, latex_mapping +from .shared_utils import InstrDict, arg_lut, create_inst_dict + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_priv_latex_table(): + type_list = ["R-type", "I-type"] + system_instr = ["_h", "_s", "_system", "_svinval", "64_h", "_svinval_h"] + dataset_list = [(system_instr, "Trap-Return Instructions", ["sret", "mret"], False)] + dataset_list.append( + (system_instr, "Interrupt-Management Instructions", ["wfi"], False) + ) + dataset_list.append( + ( + system_instr, + "Supervisor Memory-Management Instructions", + ["sfence_vma"], + False, + ) + ) + dataset_list.append( + ( + system_instr, + "Hypervisor Memory-Management Instructions", + ["hfence_vvma", "hfence_gvma"], + False, + ) + ) + dataset_list.append( + ( + system_instr, + "Hypervisor Virtual-Machine Load and Store Instructions", + [ + "hlv_b", + "hlv_bu", + "hlv_h", + "hlv_hu", + "hlv_w", + "hlvx_hu", + "hlvx_wu", + "hsv_b", + "hsv_h", + "hsv_w", + ], + False, + ) + ) + dataset_list.append( + ( + system_instr, + "Hypervisor Virtual-Machine Load and Store Instructions, RV64 only", + ["hlv_wu", "hlv_d", "hsv_d"], + False, + ) + ) + dataset_list.append( + ( + system_instr, + "Svinval Memory-Management Instructions", + [ + "sinval_vma", + "sfence_w_inval", + "sfence_inval_ir", + "hinval_vvma", + "hinval_gvma", + ], + False, + ) + ) + caption = "\\caption{RISC-V Privileged Instructions}" + with open("priv-instr-table.tex", "w", encoding="utf-8") as latex_file: + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + +def make_latex_table(): + """ + This function is mean to create the instr-table.tex that is meant to be used + by the riscv-isa-manual. This function basically creates a single latext + file of multiple tables with each table limited to a single page. Only the + last table is assigned a latex-caption. + + For each table we assign a type-list which capture the different instruction + types (R, I, B, etc) that will be required for the table. Then we select the + list of extensions ('_i, '32_i', etc) whose instructions are required to + populate the table. For each extension or collection of extension we can + assign Title, such that in the end they appear as subheadings within + the table (note these are inlined headings and not captions of the table). + + All of the above information is collected/created and sent to + make_ext_latex_table function to dump out the latex contents into a file. + + The last table only has to be given a caption - as per the policy of the + riscv-isa-manual. + """ + # open the file and use it as a pointer for all further dumps + with open("instr-table.tex", "w", encoding="utf-8") as latex_file: + + # create the rv32i table first. Here we set the caption to empty. We use the + # files rv_i and rv32_i to capture instructions relevant for rv32i + # configuration. The dataset is a list of 4-element tuples : + # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions + # is empty then it indicates that all instructions of the all the extensions + # in list_of_extensions need to be dumped. If not empty, then only the + # instructions listed in list_of_instructions will be dumped into latex. + caption = "" + type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"] + dataset_list: list[tuple[list[str], str, list[str], bool]] = [ + (["_i", "32_i"], "RV32I Base Instruction Set", [], False) + ] + dataset_list.append((["_i"], "", ["fence_tso", "pause"], True)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "I-type", "S-type"] + dataset_list = [ + (["64_i"], "RV64I Base Instruction Set (in addition to RV32I)", [], False) + ] + dataset_list.append( + (["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False) + ) + dataset_list.append( + (["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False) + ) + dataset_list.append((["_m", "32_m"], "RV32M Standard Extension", [], False)) + dataset_list.append( + (["64_m"], "RV64M Standard Extension (in addition to RV32M)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type"] + dataset_list = [(["_a"], "RV32A Standard Extension", [], False)] + dataset_list.append( + (["64_a"], "RV64A Standard Extension (in addition to RV32A)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [(["_f"], "RV32F Standard Extension", [], False)] + dataset_list.append( + (["64_f"], "RV64F Standard Extension (in addition to RV32F)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [(["_d"], "RV32D Standard Extension", [], False)] + dataset_list.append( + (["64_d"], "RV64D Standard Extension (in addition to RV32D)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [(["_q"], "RV32Q Standard Extension", [], False)] + dataset_list.append( + (["64_q"], "RV64Q Standard Extension (in addition to RV32Q)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + caption = "\\caption{Instruction listing for RISC-V}" + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [ + (["_zfh", "_d_zfh", "_q_zfh"], "RV32Zfh Standard Extension", [], False) + ] + dataset_list.append( + ( + ["64_zfh"], + "RV64Zfh Standard Extension (in addition to RV32Zfh)", + [], + False, + ) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + ## The following is demo to show that Compressed instructions can also be + # dumped in the same manner as above + + # type_list = [''] + # dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])] + # dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', [])) + # make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption) + + +def make_ext_latex_table( + type_list: "list[str]", + dataset: "list[tuple[list[str], str, list[str], bool]]", + latex_file: TextIO, + ilen: int, + caption: str, +): + """ + For a given collection of extensions this function dumps out a complete + latex table which includes the encodings of the instructions. + + The ilen input indicates the length of the instruction for which the table + is created. + + The caption input is used to create the latex-table caption. + + The type_list input is a list of instruction types (R, I, B, etc) that are + treated as header for each table. Each table will have its own requirements + and type_list must include all the instruction-types that the table needs. + Note, all elements of this list must be present in the latex_inst_type + dictionary defined in constants.py + + The latex_file is a file pointer to which the latex-table will dumped into + + The dataset is a list of 3-element tuples containing: + (list_of_extensions, title, list_of_instructions) + The list_of_extensions must contain all the set of extensions whose + instructions must be populated under a given title. If list_of_instructions + is not empty, then only those instructions mentioned in list_of_instructions + present in the extension will be dumped into the latex-table, other + instructions will be ignored. + + Once the above inputs are received then function first creates table entries + for the instruction types. To simplify things, we maintain a dictionary + called latex_inst_type in constants.py which is created in the same way the + instruction dictionary is created. This allows us to re-use the same logic + to create the instruction types table as well + + Once the header is created, we then parse through every entry in the + dataset. For each list dataset entry we use the create_inst_dict function to + create an exhaustive list of instructions associated with the respective + collection of the extension of that dataset. Then we apply the instruction + filter, if any, indicated by the list_of_instructions of that dataset. + Thereon, for each instruction we create a latex table entry. + + Latex table specification for ilen sized instructions: + Each table is created with ilen+1 columns - ilen columns for each bit of the + instruction and one column to hold the name of the instruction. + + For each argument of an instruction we use the arg_lut from constants.py + to identify its position in the encoding, and thus create a multicolumn + entry with the name of the argument as the data. For hardcoded bits, we + do the same where we capture a string of continuous 1s and 0s, identify + the position and assign the same string as the data of the + multicolumn entry in the table. + + """ + column_size = "".join(["p{0.002in}"] * (ilen + 1)) + + type_entries = ( + """ + \\multicolumn{3}{l}{31} & + \\multicolumn{2}{r}{27} & + \\multicolumn{1}{c}{26} & + \\multicolumn{1}{r}{25} & + \\multicolumn{3}{l}{24} & + \\multicolumn{2}{r}{20} & + \\multicolumn{3}{l}{19} & + \\multicolumn{2}{r}{15} & + \\multicolumn{2}{l}{14} & + \\multicolumn{1}{r}{12} & + \\multicolumn{4}{l}{11} & + \\multicolumn{1}{r}{7} & + \\multicolumn{6}{l}{6} & + \\multicolumn{1}{r}{0} \\\\ + \\cline{2-33}\n&\n\n +""" + if ilen == 32 + else """ + \\multicolumn{1}{c}{15} & + \\multicolumn{1}{c}{14} & + \\multicolumn{1}{c}{13} & + \\multicolumn{1}{c}{12} & + \\multicolumn{1}{c}{11} & + \\multicolumn{1}{c}{10} & + \\multicolumn{1}{c}{9} & + \\multicolumn{1}{c}{8} & + \\multicolumn{1}{c}{7} & + \\multicolumn{1}{c}{6} & + \\multicolumn{1}{c}{5} & + \\multicolumn{1}{c}{4} & + \\multicolumn{1}{c}{3} & + \\multicolumn{1}{c}{2} & + \\multicolumn{1}{c}{1} & + \\multicolumn{1}{c}{0} \\\\ + \\cline{2-17}\n&\n\n +""" + ) + + # depending on the type_list input we create a subset dictionary of + # latex_inst_type dictionary present in constants.py + type_dict = { + key: value for key, value in latex_inst_type.items() if key in type_list + } + + # iterate ovr each instruction type and create a table entry + for t in type_dict: + fields: list[tuple[int, int, str]] = [] + + # first capture all "arguments" of the type (funct3, funct7, rd, etc) + # and capture their positions using arg_lut. + for f in type_dict[t]["variable_fields"]: + (msb, lsb) = arg_lut[f] + name = f if f not in latex_mapping else latex_mapping[f] + fields.append((msb, lsb, name)) + + # iterate through the 32 bits, starting from the msb, and assign + # argument names to the relevant portions of the instructions. This + # information is stored as a 3-element tuple containing the msb, lsb + # position of the arugment and the name of the argument. + msb = ilen - 1 + y = "" + for r in range(0, ilen): + if y != "": + fields.append((msb, ilen - 1 - r + 1, y)) + y = "" + msb = ilen - 1 - r - 1 + if r == 31: + if y != "": + fields.append((msb, 0, y)) + y = "" + + # sort the arguments in decreasing order of msb position + fields.sort(key=lambda y: y[0], reverse=True) + + # for each argument/string of 1s or 0s, create a multicolumn latex table + # entry + entry = "" + for r, (msb, lsb, name) in enumerate(fields): + if r == len(fields) - 1: + entry += ( + f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n" + ) + elif r == 0: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n" + else: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n" + entry += f"\\cline{{2-{ilen+1}}}\n&\n\n" + type_entries += entry + + # for each entry in the dataset create a table + content = "" + for ext_list, title, filter_list, include_pseudo in dataset: + instr_dict: InstrDict = {} + + # for all extensions list in ext_list, create a dictionary of + # instructions associated with those extensions. + for e in ext_list: + instr_dict.update(create_inst_dict(["rv" + e], include_pseudo)) + + # if filter_list is not empty then use that as the official set of + # instructions that need to be dumped into the latex table + inst_list = list(instr_dict.keys()) if not filter_list else filter_list + + # for each instruction create an latex table entry just like how we did + # above with the instruction-type table. + instr_entries = "" + for inst in inst_list: + if inst not in instr_dict: + logging.error( + f"in make_ext_latex_table: Instruction: {inst} not found in instr_dict" + ) + raise SystemExit(1) + fields = [] + + # only if the argument is available in arg_lut we consume it, else + # throw error. + for f in instr_dict[inst]["variable_fields"]: + if f not in arg_lut: + logging.error( + f"Found variable {f} in instruction {inst} whose mapping is not available" + ) + raise SystemExit(1) + (msb, lsb) = arg_lut[f] + name = ( + f.replace("_", ".") if f not in latex_mapping else latex_mapping[f] + ) + fields.append((msb, lsb, name)) + + msb = ilen - 1 + y = "" + if ilen == 16: + encoding = instr_dict[inst]["encoding"][16:] + else: + encoding = instr_dict[inst]["encoding"] + for r in range(0, ilen): + x = encoding[r] + if (msb, ilen - 1 - r + 1) in latex_fixed_fields: + fields.append((msb, ilen - 1 - r + 1, y)) + msb = ilen - 1 - r + y = "" + if x == "-": + if y != "": + fields.append((msb, ilen - 1 - r + 1, y)) + y = "" + msb = ilen - 1 - r - 1 + else: + y += str(x) + if r == ilen - 1: + if y != "": + fields.append((msb, 0, y)) + y = "" + + fields.sort(key=lambda y: y[0], reverse=True) + entry = "" + for r, (msb, lsb, name) in enumerate(fields): + if r == len(fields) - 1: + entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n' + elif r == 0: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n" + else: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n" + entry += f"\\cline{{2-{ilen+1}}}\n&\n\n" + instr_entries += entry + + # once an entry of the dataset is completed we create the whole table + # with the title of that dataset as sub-heading (sort-of) + if title != "": + content += f""" + +\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\ +\\multicolumn{{{ilen}}}{{c}}{{\\bfseries {title} }} & \\\\ +\\cline{{2-{ilen+1}}} + + & +{instr_entries} +""" + else: + content += f""" +{instr_entries} +""" + + header = f""" +\\newpage + +\\begin{{table}}[p] +\\begin{{small}} +\\begin{{center}} + \\begin{{tabular}} {{{column_size}l}} + {" ".join(['&']*ilen)} \\\\ + + & +{type_entries} +""" + endtable = f""" + +\\end{{tabular}} +\\end{{center}} +\\end{{small}} +{caption} +\\end{{table}} +""" + # dump the contents and return + latex_file.write(header + content + endtable) diff --git a/src/riscv_opcodes/parse.py b/src/riscv_opcodes/parse.py new file mode 100644 index 0000000..d78f232 --- /dev/null +++ b/src/riscv_opcodes/parse.py @@ -0,0 +1,121 @@ +import argparse +import json +import logging +import pprint + +from .c_utils import make_c +from .chisel_utils import make_chisel +from .constants import emitted_pseudo_ops +from .go_utils import make_go +from .latex_utils import make_latex_table, make_priv_latex_table +from .rust_utils import make_rust +from .shared_utils import add_segmented_vls_insn, create_inst_dict +from .sverilog_utils import make_sverilog +from .svg_utils import make_svg + +LOG_FORMAT = "%(levelname)s:: %(message)s" +LOG_LEVEL = logging.INFO + +pretty_printer = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) + + +def generate_extensions( + extensions: list[str], + include_pseudo: bool, + c: bool, + chisel: bool, + spinalhdl: bool, + sverilog: bool, + rust: bool, + go: bool, + latex: bool, + svg: bool, +): + instr_dict = create_inst_dict(extensions, include_pseudo) + instr_dict = dict(sorted(instr_dict.items())) + instr_dict_with_segment = add_segmented_vls_insn(instr_dict) + + with open("instr_dict.json", "w", encoding="utf-8") as outfile: + json.dump(instr_dict_with_segment, outfile, indent=2) + + if c: + instr_dict_c = create_inst_dict( + extensions, False, include_pseudo_ops=emitted_pseudo_ops + ) + instr_dict_c = dict(sorted(instr_dict_c.items())) + make_c(instr_dict_c) + logging.info("encoding.out.h generated successfully") + + if chisel: + make_chisel(instr_dict) + logging.info("inst.chisel generated successfully") + + if spinalhdl: + make_chisel(instr_dict, True) + logging.info("inst.spinalhdl generated successfully") + + if sverilog: + make_sverilog(instr_dict) + logging.info("inst.sverilog generated successfully") + + if rust: + make_rust(instr_dict) + logging.info("inst.rs generated successfully") + + if go: + make_go(instr_dict_with_segment) + logging.info("inst.go generated successfully") + + if latex: + make_latex_table() + logging.info("instr-table.tex generated successfully") + make_priv_latex_table() + logging.info("priv-instr-table.tex generated successfully") + + if svg: + make_svg(instr_dict) + logging.info("inst.svg generated successfully") + + +def main(): + parser = argparse.ArgumentParser(description="Generate RISC-V constants headers") + parser.add_argument( + "-pseudo", action="store_true", help="Include pseudo-instructions" + ) + parser.add_argument("-c", action="store_true", help="Generate output for C") + parser.add_argument( + "-chisel", action="store_true", help="Generate output for Chisel" + ) + parser.add_argument( + "-spinalhdl", action="store_true", help="Generate output for SpinalHDL" + ) + parser.add_argument( + "-sverilog", action="store_true", help="Generate output for SystemVerilog" + ) + parser.add_argument("-rust", action="store_true", help="Generate output for Rust") + parser.add_argument("-go", action="store_true", help="Generate output for Go") + parser.add_argument("-latex", action="store_true", help="Generate output for Latex") + parser.add_argument("-svg", action="store_true", help="Generate .svg output") + parser.add_argument( + "extensions", + nargs="*", + help="Extensions to use. This is a glob of the rv_.. files, e.g. 'rv*' will give all extensions.", + ) + + args = parser.parse_args() + + print(f"Extensions selected : {args.extensions}") + + generate_extensions( + args.extensions, + args.pseudo, + args.c, + args.chisel, + args.spinalhdl, + args.sverilog, + args.rust, + args.go, + args.latex, + args.svg, + ) diff --git a/src/riscv_opcodes/resources.py b/src/riscv_opcodes/resources.py new file mode 100644 index 0000000..e9398ec --- /dev/null +++ b/src/riscv_opcodes/resources.py @@ -0,0 +1,39 @@ +import sys +from importlib.resources import files +from typing import IO + +if sys.version_info < (3, 12): + # This was deprecated in Python 3.12. + from importlib.abc import Traversable +else: + from importlib.resources.abc import Traversable + + +def resource_root() -> Traversable: + """ + Return the root directory as a traversable that can + be used to load the `extensions`, `*.csv` and `encoding.h` + files. For historical reasons these are not stored inside + the `src/riscv_opcodes` directory in the source distribution + but they are moved there when generating the binary wheel. + This means we need to check in both places. + """ + assert __package__ is not None + package_root = files(__package__) + if (package_root / "extensions").is_dir(): + return package_root + return package_root / ".." / ".." + + +def read_text_resource(path_relative_to_root: str) -> str: + """ + Read a text file relative to the root of this repo. + """ + return resource_root().joinpath(path_relative_to_root).read_text(encoding="utf-8") + + +def open_text_resource(path_relative_to_root: str) -> IO[str]: + """ + Open a text file relative to the root of this repo. + """ + return resource_root().joinpath(path_relative_to_root).open("r", encoding="utf-8") diff --git a/src/riscv_opcodes/rust_utils.py b/src/riscv_opcodes/rust_utils.py new file mode 100644 index 0000000..74e17eb --- /dev/null +++ b/src/riscv_opcodes/rust_utils.py @@ -0,0 +1,28 @@ +import logging +import pprint + +from .constants import causes, csrs, csrs32 +from .shared_utils import InstrDict + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_rust(instr_dict: InstrDict): + mask_match_str = "" + for i in instr_dict: + mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n' + mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n' + for num, name in csrs + csrs32: + mask_match_str += f"const CSR_{name.upper()}: u16 = {hex(num)};\n" + for num, name in causes: + mask_match_str += ( + f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n' + ) + with open("inst.rs", "w", encoding="utf-8") as rust_file: + rust_file.write( + f""" +/* Automatically generated by parse_opcodes */ +{mask_match_str} +""" + ) diff --git a/src/riscv_opcodes/rv_colors.py b/src/riscv_opcodes/rv_colors.py new file mode 100644 index 0000000..76e53a5 --- /dev/null +++ b/src/riscv_opcodes/rv_colors.py @@ -0,0 +1,12 @@ +palette = { + "Berkeley Blue": "#003262", + "California Gold": "#FDB515", + "Dark Blue": "#011e41", + "Teal": "#0a6b7c", + "Magenta": "#cb007b", + "Purple": "#60269e", + "Light Gold": "#fdda64", + "Light Teal": "#62cbc9", + "Pink": "#fe9bb1", + "Lavender": "#c2a6e1", +} diff --git a/src/riscv_opcodes/shared_utils.py b/src/riscv_opcodes/shared_utils.py new file mode 100644 index 0000000..3a1a3bc --- /dev/null +++ b/src/riscv_opcodes/shared_utils.py @@ -0,0 +1,641 @@ +import copy +import logging +import os +import pprint +import re +from fnmatch import fnmatch +from io import StringIO +from itertools import chain +from typing import Dict, NoReturn, Optional, TypedDict + +from .constants import ( + arg_lut, + fixed_ranges, + imported_regex, + overlapping_extensions, + overlapping_instructions, + pseudo_regex, + single_fixed, +) +from .resources import open_text_resource, resource_root + +LOG_FORMAT = "%(levelname)s:: %(message)s" +LOG_LEVEL = logging.INFO + +pretty_printer = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) + + +# Log an error message +def log_and_exit(message: str) -> NoReturn: + """Log an error message and exit the program.""" + logging.error(message) + raise SystemExit(1) + + +# Initialize encoding to 32-bit '-' values +def initialize_encoding(bits: int = 32) -> "list[str]": + """Initialize encoding with '-' to represent don't care bits.""" + return ["-"] * bits + + +# Validate bit range and value +def validate_bit_range(msb: int, lsb: int, entry_value: int, line: str): + """Validate the bit range and entry value.""" + if msb < lsb: + log_and_exit( + f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in its encoding' + ) + + if entry_value >= (1 << (msb - lsb + 1)): + log_and_exit( + f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}' + ) + + +# Split the instruction line into name and remaining part +def parse_instruction_line(line: str) -> "tuple[str, str]": + """Parse the instruction name and the remaining encoding details.""" + name, remaining = line.replace("\t", " ").split(" ", 1) + name = name.replace(".", "_") # Replace dots for compatibility + remaining = remaining.lstrip() # Remove leading whitespace + return name, remaining + + +# Verify Overlapping Bits +def check_overlapping_bits(encoding: "list[str]", ind: int, line: str): + """Check for overlapping bits in the encoding.""" + if encoding[31 - ind] != "-": + log_and_exit( + f'{line.split(" ")[0]:<10} has {ind} bit overlapping in its opcodes' + ) + + +# Update encoding for fixed ranges +def update_encoding_for_fixed_range( + encoding: "list[str]", msb: int, lsb: int, entry_value: int, line: str +): + """ + Update encoding bits for a given bit range. + Checks for overlapping bits and assigns the value accordingly. + """ + for ind in range(lsb, msb + 1): + check_overlapping_bits(encoding, ind, line) + bit = str((entry_value >> (ind - lsb)) & 1) + encoding[31 - ind] = bit + + +# Process fixed bit patterns +def process_fixed_ranges(remaining: str, encoding: "list[str]", line: str): + """Process fixed bit ranges in the encoding.""" + for s2, s1, entry in fixed_ranges.findall(remaining): + msb, lsb, entry_value = int(s2), int(s1), int(entry, 0) + + # Validate bit range and entry value + validate_bit_range(msb, lsb, entry_value, line) + update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line) + + return fixed_ranges.sub(" ", remaining) + + +# Process single bit assignments +def process_single_fixed(remaining: str, encoding: "list[str]", line: str): + """Process single fixed assignments in the encoding.""" + for lsb, value, _drop in single_fixed.findall(remaining): + lsb = int(lsb, 0) + value = int(value, 0) + + check_overlapping_bits(encoding, lsb, line) + encoding[31 - lsb] = str(value) + + +# Main function to check argument look-up table +def check_arg_lut(args: "list[str]", encoding_args: "list[str]", name: str): + """Check if arguments are present in arg_lut.""" + for arg in args: + if arg not in arg_lut: + arg = handle_arg_lut_mapping(arg, name) + msb, lsb = arg_lut[arg] + update_encoding_args(encoding_args, arg, msb, lsb) + + +# Handle missing argument mappings +def handle_arg_lut_mapping(arg: str, name: str): + """Handle cases where an argument needs to be mapped to an existing one.""" + parts = arg.split("=") + if len(parts) == 2: + existing_arg, _new_arg = parts + if existing_arg in arg_lut: + arg_lut[arg] = arg_lut[existing_arg] + else: + log_and_exit( + f" Found field {existing_arg} in variable {arg} in instruction {name} " + f"whose mapping in arg_lut does not exist" + ) + else: + log_and_exit( + f" Found variable {arg} in instruction {name} " + f"whose mapping in arg_lut does not exist" + ) + return arg + + +# Update encoding args with variables +def update_encoding_args(encoding_args: "list[str]", arg: str, msb: int, lsb: int): + """Update encoding arguments and ensure no overlapping.""" + for ind in range(lsb, msb + 1): + check_overlapping_bits(encoding_args, ind, arg) + encoding_args[31 - ind] = arg + + +# Compute match and mask +def convert_encoding_to_match_mask(encoding: "list[str]") -> "tuple[str, str]": + """Convert the encoding list to match and mask strings.""" + match = "".join(encoding).replace("-", "0") + mask = "".join(encoding).replace("0", "1").replace("-", "0") + return hex(int(match, 2)), hex(int(mask, 2)) + + +class SingleInstr(TypedDict): + encoding: str + variable_fields: "list[str]" + extension: "list[str]" + match: str + mask: str + + +InstrDict = Dict[str, SingleInstr] + + +# Processing main function for a line in the encoding file +def process_enc_line(line: str, ext: str) -> "tuple[str, SingleInstr]": + """ + This function processes each line of the encoding files (rv*). As part of + the processing, the function ensures that the encoding is legal through the + following checks:: + - there is no over specification (same bits assigned different values) + - there is no under specification (some bits not assigned values) + - bit ranges are in the format hi..lo=val where hi > lo + - value assigned is representable in the bit range + - also checks that the mapping of arguments of an instruction exists in + arg_lut. + If the above checks pass, then the function returns a tuple of the name and + a dictionary containing basic information of the instruction which includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + """ + encoding = initialize_encoding() + + # Parse the instruction line + name, remaining = parse_instruction_line(line) + + # Process fixed ranges + remaining = process_fixed_ranges(remaining, encoding, line) + + # Process single fixed assignments + process_single_fixed(remaining, encoding, line) + + # Convert the list of encodings into a match and mask + match, mask = convert_encoding_to_match_mask(encoding) + + # Check arguments in arg_lut + args = single_fixed.sub(" ", remaining).split() + encoding_args = encoding.copy() + + check_arg_lut(args, encoding_args, name) + + # Return single_dict + return name, { + "encoding": "".join(encoding), + "variable_fields": args, + "extension": [os.path.basename(ext)], + "match": match, + "mask": mask, + } + + +# Extract ISA Type +def extract_isa_type(ext_name: str) -> str: + """Extracts the ISA type from the extension name.""" + return ext_name.split("_")[0] + + +# Verify the types for RV* +def is_rv_variant(type1: str, type2: str) -> bool: + """Checks if the types are RV variants (rv32/rv64).""" + return (type2 == "rv" and type1 in {"rv32", "rv64"}) or ( + type1 == "rv" and type2 in {"rv32", "rv64"} + ) + + +# Check for same base ISA +def has_same_base_isa(type1: str, type2: str) -> bool: + """Determines if the two ISA types share the same base.""" + return type1 == type2 or is_rv_variant(type1, type2) + + +# Compare the base ISA type of a given extension name against a list of extension names +def same_base_isa(ext_name: str, ext_name_list: "list[str]") -> bool: + """Checks if the base ISA type of ext_name matches any in ext_name_list.""" + type1 = extract_isa_type(ext_name) + return any(has_same_base_isa(type1, extract_isa_type(ext)) for ext in ext_name_list) + + +# Pad two strings to equal length +def pad_to_equal_length(str1: str, str2: str, pad_char: str = "-") -> "tuple[str, str]": + """Pads two strings to equal length using the given padding character.""" + max_len = max(len(str1), len(str2)) + return str1.rjust(max_len, pad_char), str2.rjust(max_len, pad_char) + + +# Check compatibility for two characters +def has_no_conflict(char1: str, char2: str) -> bool: + """Checks if two characters are compatible (either matching or don't-care).""" + return char1 == "-" or char2 == "-" or char1 == char2 + + +# Conflict check between two encoded strings +def overlaps(x: str, y: str) -> bool: + """Checks if two encoded strings overlap without conflict.""" + x, y = pad_to_equal_length(x, y) + return all(has_no_conflict(x[i], y[i]) for i in range(len(x))) + + +# Check presence of keys in dictionary. +def is_in_nested_dict(a: "dict[str, set[str]]", key1: str, key2: str) -> bool: + """Checks if key2 exists in the dictionary under key1.""" + return key1 in a and key2 in a[key1] + + +# Overlap allowance +def overlap_allowed(a: "dict[str, set[str]]", x: str, y: str) -> bool: + """Determines if overlap is allowed between x and y based on nested dictionary checks""" + return is_in_nested_dict(a, x, y) or is_in_nested_dict(a, y, x) + + +# Check overlap allowance between extensions +def extension_overlap_allowed(x: str, y: str) -> bool: + """Checks if overlap is allowed between two extensions using the overlapping_extensions dictionary.""" + return overlap_allowed(overlapping_extensions, x, y) + + +# Check overlap allowance between instructions +def instruction_overlap_allowed(x: str, y: str) -> bool: + """Checks if overlap is allowed between two instructions using the overlapping_instructions dictionary.""" + return overlap_allowed(overlapping_instructions, x, y) + + +# Check 'nf' field +def is_segmented_instruction(instruction: SingleInstr) -> bool: + """Checks if an instruction contains the 'nf' field.""" + return "nf" in instruction["variable_fields"] + + +# Expand 'nf' fields +def update_with_expanded_instructions( + updated_dict: InstrDict, key: str, value: SingleInstr +): + """Expands 'nf' fields in the instruction dictionary and updates it with new instructions.""" + for new_key, new_value in expand_nf_field(key, value): + updated_dict[new_key] = new_value + + +# Process instructions, expanding segmented ones and updating the dictionary +def add_segmented_vls_insn(instr_dict: InstrDict) -> InstrDict: + """Processes instructions, expanding segmented ones and updating the dictionary.""" + # Use dictionary comprehension for efficiency + return dict( + chain.from_iterable( + ( + expand_nf_field(key, value) + if is_segmented_instruction(value) + else [(key, value)] + ) + for key, value in instr_dict.items() + ) + ) + + +# Expand the 'nf' field in the instruction dictionary +def expand_nf_field( + name: str, single_dict: SingleInstr +) -> "list[tuple[str, SingleInstr]]": + """Validate and prepare the instruction dictionary.""" + validate_nf_field(single_dict, name) + remove_nf_field(single_dict) + update_mask(single_dict) + + name_expand_index = name.find("e") + + # Pre compute the base match value and encoding prefix + base_match = int(single_dict["match"], 16) + encoding_prefix = single_dict["encoding"][3:] + + expanded_instructions = [ + create_expanded_instruction( + name, single_dict, nf, name_expand_index, base_match, encoding_prefix + ) + for nf in range(8) # Range of 0 to 7 + ] + + return expanded_instructions + + +# Validate the presence of 'nf' +def validate_nf_field(single_dict: SingleInstr, name: str): + """Validates the presence of 'nf' in variable fields before expansion.""" + if "nf" not in single_dict["variable_fields"]: + log_and_exit(f"Cannot expand nf field for instruction {name}") + + +# Remove 'nf' from variable fields +def remove_nf_field(single_dict: SingleInstr): + """Removes 'nf' from variable fields in the instruction dictionary.""" + single_dict["variable_fields"].remove("nf") + + +# Update the mask to include the 'nf' field +def update_mask(single_dict: SingleInstr): + """Updates the mask to include the 'nf' field in the instruction dictionary.""" + single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29) + + +# Create an expanded instruction +def create_expanded_instruction( + name: str, + single_dict: SingleInstr, + nf: int, + name_expand_index: int, + base_match: int, + encoding_prefix: str, +) -> "tuple[str, SingleInstr]": + """Creates an expanded instruction based on 'nf' value.""" + new_single_dict = copy.deepcopy(single_dict) + + # Update match value in one step + new_single_dict["match"] = hex(base_match | (nf << 29)) + new_single_dict["encoding"] = format(nf, "03b") + encoding_prefix + + # Construct new instruction name + new_name = ( + name + if nf == 0 + else f"{name[:name_expand_index]}seg{nf + 1}{name[name_expand_index:]}" + ) + + return (new_name, new_single_dict) + + +def read_lines(file: str) -> "list[str]": + """ + Reads lines from a file and returns non-blank, non-comment lines. + The file must be a resource relative to the root of this repo. + """ + with open_text_resource(file) as fp: + lines = (line.rstrip() for line in fp) + return [line for line in lines if line and not line.startswith("#")] + + +# Update the instruction dictionary +def process_standard_instructions( + lines: "list[str]", instr_dict: InstrDict, file_name: str +): + """Processes standard instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if "$import" in line or "$pseudo" in line: + continue + logging.debug(f"Processing line: {line}") + name, single_dict = process_enc_line(line, file_name) + ext_name = os.path.basename(file_name) + + if name in instr_dict: + var = instr_dict[name]["extension"] + if same_base_isa(ext_name, var): + log_and_exit( + f"Instruction {name} from {ext_name} is already added from {var} in same base ISA" + ) + elif instr_dict[name]["encoding"] != single_dict["encoding"]: + log_and_exit( + f"Instruction {name} from {ext_name} has different encodings in different base ISAs" + ) + + instr_dict[name]["extension"].extend(single_dict["extension"]) + else: + for key, item in instr_dict.items(): + if ( + overlaps(item["encoding"], single_dict["encoding"]) + and not extension_overlap_allowed(ext_name, item["extension"][0]) + and not instruction_overlap_allowed(name, key) + and same_base_isa(ext_name, item["extension"]) + ): + log_and_exit( + f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}' + ) + + instr_dict[name] = single_dict + + +# Incorporate pseudo instructions into the instruction dictionary based on given conditions +def process_pseudo_instructions( + lines: "list[str]", + instr_dict: InstrDict, + file_name: str, + include_pseudo: bool, + include_pseudo_ops: "list[str]", +): + """Processes pseudo instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if "$pseudo" not in line: + continue + logging.debug(f"Processing pseudo line: {line}") + ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0] + ext_file = read_extension_file(ext) + + validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst) + + name, single_dict = process_enc_line(f"{pseudo_inst} {line_content}", file_name) + if ( + orig_inst.replace(".", "_") not in instr_dict + or include_pseudo + or name in include_pseudo_ops + ): + if name not in instr_dict: + instr_dict[name] = single_dict + logging.debug(f"Including pseudo_op: {name}") + else: + if single_dict["match"] != instr_dict[name]["match"]: + instr_dict[f"{name}_pseudo"] = single_dict + # TODO: This expression is always false since both sides are list[str]. + elif single_dict["extension"] not in instr_dict[name]["extension"]: # type: ignore + instr_dict[name]["extension"].extend(single_dict["extension"]) + + +# Integrate imported instructions into the instruction dictionary +def process_imported_instructions( + lines: "list[str]", instr_dict: InstrDict, file_name: str +): + """Processes imported instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if "$import" not in line: + continue + logging.debug(f"Processing imported line: {line}") + import_ext, reg_instr = imported_regex.findall(line)[0] + ext_file = read_extension_file(import_ext) + + validate_instruction_in_extension(reg_instr, ext_file, file_name, line) + + for oline in StringIO(ext_file): + if re.findall(f"^\\s*{reg_instr}\\s+", oline): + name, single_dict = process_enc_line(oline, file_name) + if name in instr_dict: + if instr_dict[name]["encoding"] != single_dict["encoding"]: + log_and_exit( + f"Imported instruction {name} from {os.path.basename(file_name)} has different encodings" + ) + instr_dict[name]["extension"].extend(single_dict["extension"]) + else: + instr_dict[name] = single_dict + break + + +def read_extension_file(ext: str) -> str: + """ + Read the extension file path, considering the unratified directory if necessary. + """ + file = resource_root() / "extensions" / ext + if file.is_file(): + return file.read_text(encoding="utf-8") + file = resource_root() / "extensions" / "unratified" / ext + if file.is_file(): + return file.read_text(encoding="utf-8") + + log_and_exit(f"Extension {ext} not found.") + + +# Confirm the presence of an original instruction in the corresponding extension file. +def validate_instruction_in_extension( + inst: str, ext_file: str, file_name: str, pseudo_inst: str +): + """Validates if the original instruction exists in the dependent extension.""" + found = False + + for oline in StringIO(ext_file): + if re.findall(f"^\\s*{inst}\\s+", oline): + found = True + break + if not found: + log_and_exit( + f"Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}" + ) + + +# Construct a dictionary of instructions filtered by specified criteria +def create_inst_dict( + file_filter: "list[str]", + include_pseudo: bool = False, + include_pseudo_ops: "Optional[list[str]]" = None, +) -> InstrDict: + """ + Creates a dictionary of instructions based on the provided file filters. + + This function return a dictionary containing all instructions associated + with an extension defined by the file_filter input. + Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc. + Each node of the dictionary will correspond to an instruction which again is + a dictionary. The dictionary contents of each instruction includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + In order to build this dictionary, the function does 2 passes over the same + rv<file_filter> file: + - First pass: extracts all standard instructions, skipping pseudo ops + and imported instructions. For each selected line, the `process_enc_line` + function is called to create the dictionary contents of the instruction. + Checks are performed to ensure that the same instruction is not added + twice to the overall dictionary. + - Second pass: parses only pseudo_ops. For each pseudo_op, the function: + - Checks if the dependent extension and instruction exist. + - Adds the pseudo_op to the dictionary if the dependent instruction + is not already present; otherwise, it is skipped. + """ + if include_pseudo_ops is None: + include_pseudo_ops = [] + + instr_dict: InstrDict = {} + + ratified_file_filters = [ + fil for fil in file_filter if not fil.startswith("unratified/") + ] + unratified_file_filters = [ + fil.removeprefix("unratified/") + for fil in file_filter + if fil.startswith("unratified/") + ] + + # Extension file name, "extensions[/unratified]/rv_foo". + file_names: list[str] = [] + + for file in (resource_root() / "extensions").iterdir(): + if file.is_file() and any( + fnmatch(file.name, fil) for fil in ratified_file_filters + ): + file_names.append("extensions/" + file.name) + for file in (resource_root() / "extensions" / "unratified").iterdir(): + if file.is_file() and any( + fnmatch(file.name, fil) for fil in unratified_file_filters + ): + file_names.append("extensions/unratified/" + file.name) + + logging.debug("Collecting standard instructions") + for file_name in file_names: + logging.debug(f"Parsing File: {file_name} for standard instructions") + lines = read_lines(file_name) + process_standard_instructions(lines, instr_dict, file_name) + + logging.debug("Collecting pseudo instructions") + for file_name in file_names: + logging.debug(f"Parsing File: {file_name} for pseudo instructions") + lines = read_lines(file_name) + process_pseudo_instructions( + lines, + instr_dict, + file_name, + include_pseudo, + include_pseudo_ops, + ) + + logging.debug("Collecting imported instructions") + + for file_name in file_names: + logging.debug(f"Parsing File: {file_name} for imported instructions") + lines = read_lines(file_name) + process_imported_instructions(lines, instr_dict, file_name) + + return instr_dict + + +# Extracts the extensions used in an instruction dictionary +def instr_dict_2_extensions(instr_dict: InstrDict) -> "list[str]": + return list({item["extension"][0] for item in instr_dict.values()}) + + +# Returns signed interpretation of a value within a given width +def signed(value: int, width: int) -> int: + return value if 0 <= value < (1 << (width - 1)) else value - (1 << width) diff --git a/src/riscv_opcodes/sverilog_utils.py b/src/riscv_opcodes/sverilog_utils.py new file mode 100644 index 0000000..c17be9f --- /dev/null +++ b/src/riscv_opcodes/sverilog_utils.py @@ -0,0 +1,30 @@ +import logging +import pprint +from pathlib import Path + +from .constants import csrs, csrs32 +from .shared_utils import InstrDict + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_sverilog(instr_dict: InstrDict): + names_str = "" + for i in instr_dict: + names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n" + names_str += " /* CSR Addresses */\n" + for num, name in csrs + csrs32: + names_str += ( + f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n" + ) + + Path("inst.sverilog").write_text( + f""" +/* Automatically generated by parse_opcodes */ +package riscv_instr; +{names_str} +endpackage +""", + encoding="utf-8", + ) diff --git a/src/riscv_opcodes/svg_utils.py b/src/riscv_opcodes/svg_utils.py new file mode 100644 index 0000000..4126ad6 --- /dev/null +++ b/src/riscv_opcodes/svg_utils.py @@ -0,0 +1,284 @@ +import logging +import pprint +from typing import Dict, List, NamedTuple + +from .rv_colors import palette +from .shared_utils import InstrDict, instr_dict_2_extensions + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +class RectangleDimensions(NamedTuple): + x: float + y: float + w: float + h: float + + +class InstrRectangle(NamedTuple): + dims: RectangleDimensions + extension: str + label: str + + +InstrDimsDict = Dict[str, RectangleDimensions] + + +def encoding_to_rect(encoding: str) -> RectangleDimensions: + """Convert a binary encoding string to rectangle dimensions.""" + + def calculate_size(free_bits: int, tick: float) -> float: + """Calculate size based on number of free bits and tick value.""" + return 2**free_bits * tick + + instr_length = len(encoding) + # starting position + x = 0 + y = 0 + x_tick = 1 / (2 ** (0.5 * instr_length)) + y_tick = 1 / (2 ** (0.5 * instr_length)) + x_free_bits = 0 + y_free_bits = 0 + even = encoding[0::2] + odd = encoding[1::2] + # Process bits from least significant to most significant + for i, bit in enumerate(encoding): + if bit == "1": + offset = 0.5 / (2 ** int(i / 2)) + if i % 2 == 0: + y += offset + else: + x += offset + elif bit == "0": + pass + # position not adjusted on 0 + + x_free_bits = odd.count("-") + y_free_bits = even.count("-") + x_size = calculate_size(x_free_bits, x_tick) + y_size = calculate_size(y_free_bits, y_tick) + + # If we came here, encoding can be visualized with a single rectangle + rectangle = RectangleDimensions(x=x, y=y, w=x_size, h=y_size) + return rectangle + + +FIGSIZE = 128 + + +def plot_image( + instr_dict: InstrDict, + instr_dims_dict: InstrDimsDict, + extension_sizes: Dict[str, float], +) -> None: + """Plot the instruction rectangles using matplotlib.""" + + from matplotlib import patches + from matplotlib import pyplot as plt + + def get_readable_font_color(bg_hex: str) -> str: + """Determine readable font color based on background color.""" + + def hex_to_rgb(hex_color: str) -> tuple[int, int, int]: + """Convert hex color string to RGB tuple.""" + hex_color = hex_color.lstrip("#") + r = int(hex_color[0:2], 16) + g = int(hex_color[2:4], 16) + b = int(hex_color[4:6], 16) + + return (r, g, b) + + r, g, b = hex_to_rgb(bg_hex) + luminance = 0.299 * r + 0.587 * g + 0.114 * b + return "#000000" if luminance > 186 else "#FFFFFF" + + def plot_with_matplotlib( + rectangles: list[InstrRectangle], + colors: list[str], + hatches: list[str], + extensions: list[str], + ) -> None: + """Plot rectangles with matplotlib using specified styles.""" + + _, ax = plt.subplots(figsize=(FIGSIZE, FIGSIZE), facecolor="none") # type: ignore + ax.set_facecolor("none") # type: ignore + linewidth = FIGSIZE / 100 + for dims, ext, label in rectangles: + x, y, w, h = dims + ext_idx = extensions.index(ext) + color = colors[ext_idx] + hatch = hatches[ext_idx] + rect = patches.Rectangle( + (x, y), + w, + h, + linewidth=linewidth, + edgecolor="black", + facecolor=color, + hatch=hatch, + alpha=1.0, + ) + ax.add_patch(rect) + + if w >= h: + base_dim = w + rotation = 0 + else: + base_dim = h + rotation = 90 + + # Scale font size based on base dimension and label length + n_chars = len(label) + font_size = ( + base_dim / n_chars * 90 * FIGSIZE + ) # Adjust scaling factor as needed + if font_size > 1: + fontdict = { + "fontsize": font_size, + "color": get_readable_font_color(color), + "family": "DejaVu Sans Mono", + } + ax.text( # type: ignore + x + w / 2, + y + h / 2, + label, + ha="center", + va="center", + fontdict=fontdict, + rotation=rotation, + ) + + plt.axis("off") # type: ignore + plt.tight_layout() # type: ignore + plt.savefig("inst.svg", format="svg") # type: ignore + plt.show() # type: ignore + + extensions: List[str] = sorted( + extension_sizes.keys(), key=lambda k: extension_sizes[k], reverse=True + ) + + rectangles: List[InstrRectangle] = [] + for instr in instr_dict: + dims = instr_dims_dict[instr] + rectangles.append( + InstrRectangle( + dims=dims, + extension=instr_dict[instr]["extension"][0], + label=instr.replace("_", "."), + ) + ) + + # sort rectangles so that small ones are in the foreground + # An overlap occurs e.g. for pseudo ops, and these should be on top of the encoding it reuses + rectangles = sorted(rectangles, key=lambda x: x.dims.w * x.dims.h, reverse=True) + + colors, hatches = generate_styles(extensions) + + plot_with_matplotlib(rectangles, colors, hatches, extensions) + + +def generate_styles(extensions: list[str]) -> tuple[list[str], list[str]]: + """Generate color and hatch styles for extensions.""" + n_colors = len(palette) + colors = [""] * len(extensions) + hatches = [""] * len(extensions) + hatch_options = ["", "/", "\\", "|", "-", "+", "x", ".", "*"] + color_options = list(palette.values()) + + for i in range(len(extensions)): + colors[i] = color_options[i % n_colors] + hatches[i] = hatch_options[int(i / n_colors) % len(hatch_options)] + + return colors, hatches + + +def defragment_encodings( + encodings: list[str], length: int = 32, offset: int = 0 +) -> list[str]: + """Defragment a list of binary encodings by reordering bits.""" + # determine bit position which has the most fixed bits + fixed_encodings = ["0", "1"] + fixed_bits = [0] * length + fixed_encoding_indeces: Dict[str, List[int]] = { + value: [] for value in fixed_encodings + } + for index, encoding in enumerate(encodings): + for position, value in enumerate(encoding): + if position > offset: + if value != "-": + fixed_bits[position] += 1 + + # find bit position with most fixed bits, starting with the LSB to favor the opcode field + max_fixed_bits = max(fixed_bits) + if max_fixed_bits == 0: + # fully defragemented + return encodings + max_fixed_position = len(fixed_bits) - 1 - fixed_bits[::-1].index(max_fixed_bits) + + # move bit position with the most fixed bits to the front + for index, encoding in enumerate(encodings): + encodings[index] = ( + encoding[0:offset] + + encoding[max_fixed_position] + + encoding[offset:max_fixed_position] + + encoding[max_fixed_position + 1 :] + ) + + if encoding[max_fixed_position] in fixed_encodings: + fixed_encoding_indeces[encoding[max_fixed_position]].append(index) + else: + # No more fixed bits in this encoding + pass + + if offset < length: + # continue to defragement starting from the next offset + offset = offset + 1 + + # separate encodings + sep_encodings: Dict[str, List[str]] = {} + for fixed_encoding in fixed_encodings: + sep_encodings[fixed_encoding] = [ + encodings[i] for i in fixed_encoding_indeces[fixed_encoding] + ] + sep_encodings[fixed_encoding] = defragment_encodings( + sep_encodings[fixed_encoding], length=length, offset=offset + ) + + # join encodings + for new_index, orig_index in enumerate( + fixed_encoding_indeces[fixed_encoding] + ): + encodings[orig_index] = sep_encodings[fixed_encoding][new_index] + + return encodings + + +def defragment_encoding_dict(instr_dict: InstrDict) -> InstrDict: + """Apply defragmentation to the encoding dictionary.""" + encodings = [instr["encoding"] for instr in instr_dict.values()] + encodings_defragemented = defragment_encodings(encodings, length=32, offset=0) + for index, instr in enumerate(instr_dict): + instr_dict[instr]["encoding"] = encodings_defragemented[index] + return instr_dict + + +def make_svg(instr_dict: InstrDict) -> None: + """Generate an SVG image from instruction encodings.""" + extensions = instr_dict_2_extensions(instr_dict) + extension_size: Dict[str, float] = {} + + instr_dict = defragment_encoding_dict(instr_dict) + instr_dims_dict: InstrDimsDict = {} + + for ext in extensions: + extension_size[ext] = 0 + + for instr in instr_dict: + dims = encoding_to_rect(instr_dict[instr]["encoding"]) + + extension_size[instr_dict[instr]["extension"][0]] += dims.h * dims.w + + instr_dims_dict[instr] = dims + + plot_image(instr_dict, instr_dims_dict, extension_size) |