aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/riscv_opcodes/__init__.py2
-rw-r--r--src/riscv_opcodes/__main__.py10
-rw-r--r--src/riscv_opcodes/c_utils.py79
-rw-r--r--src/riscv_opcodes/chisel_utils.py82
-rw-r--r--src/riscv_opcodes/constants.py271
-rw-r--r--src/riscv_opcodes/go_utils.py64
-rw-r--r--src/riscv_opcodes/latex_utils.py450
-rw-r--r--src/riscv_opcodes/parse.py121
-rw-r--r--src/riscv_opcodes/resources.py39
-rw-r--r--src/riscv_opcodes/rust_utils.py28
-rw-r--r--src/riscv_opcodes/rv_colors.py12
-rw-r--r--src/riscv_opcodes/shared_utils.py641
-rw-r--r--src/riscv_opcodes/sverilog_utils.py30
-rw-r--r--src/riscv_opcodes/svg_utils.py284
14 files changed, 2113 insertions, 0 deletions
diff --git a/src/riscv_opcodes/__init__.py b/src/riscv_opcodes/__init__.py
new file mode 100644
index 0000000..e8fd9d4
--- /dev/null
+++ b/src/riscv_opcodes/__init__.py
@@ -0,0 +1,2 @@
+# Mark this directory as a package. This is not actually needed by
+# Python but Pylint gets confused about relative imports without it.
diff --git a/src/riscv_opcodes/__main__.py b/src/riscv_opcodes/__main__.py
new file mode 100644
index 0000000..456cddd
--- /dev/null
+++ b/src/riscv_opcodes/__main__.py
@@ -0,0 +1,10 @@
+"""
+This allows running as a module, i.e. `python3 -m riscv_opcodes` which
+we wouldn't normally need, but the `coverage` tool doesn't work on
+installed scripts - you can't do `coverage run riscv_opcodes` because it
+looks for a Python file called `riscv_opcodes` in the current directory.
+"""
+
+from .parse import main
+
+main()
diff --git a/src/riscv_opcodes/c_utils.py b/src/riscv_opcodes/c_utils.py
new file mode 100644
index 0000000..198a37f
--- /dev/null
+++ b/src/riscv_opcodes/c_utils.py
@@ -0,0 +1,79 @@
+import logging
+import os
+import pprint
+
+from .constants import causes, csrs, csrs32
+from .resources import read_text_resource
+from .shared_utils import InstrDict, arg_lut
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_c(instr_dict: InstrDict):
+ mask_match_str = ""
+ declare_insn_str = ""
+ for i in instr_dict:
+ mask_match_str += (
+ f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n'
+ )
+ mask_match_str += (
+ f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n'
+ )
+ declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n'
+
+ csr_names_str = ""
+ declare_csr_str = ""
+ for num, name in csrs + csrs32:
+ csr_names_str += f"#define CSR_{name.upper()} {hex(num)}\n"
+ declare_csr_str += f"DECLARE_CSR({name}, CSR_{name.upper()})\n"
+
+ causes_str = ""
+ declare_cause_str = ""
+ for num, name in causes:
+ causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n"
+ declare_cause_str += (
+ f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n"
+ )
+
+ arg_str = ""
+ for name, rng in arg_lut.items():
+ sanitized_name = name.replace(" ", "_").replace("=", "_eq_")
+ begin = rng[1]
+ end = rng[0]
+ mask = ((1 << (end - begin + 1)) - 1) << begin
+ arg_str += f"#define INSN_FIELD_{sanitized_name.upper()} {hex(mask)}\n"
+
+ enc_header = read_text_resource("encoding.h")
+
+ commit = os.popen('git log -1 --format="format:%h"').read()
+
+ # Generate the output as a string
+ output_str = f"""/* SPDX-License-Identifier: BSD-3-Clause */
+
+/* Copyright (c) 2023 RISC-V International */
+
+/*
+ * This file is auto-generated by running 'make' in
+ * https://github.com/riscv/riscv-opcodes ({commit})
+ */
+
+{enc_header}
+/* Automatically generated by parse_opcodes. */
+#ifndef RISCV_ENCODING_H
+#define RISCV_ENCODING_H
+{mask_match_str}
+{csr_names_str}
+{causes_str}
+{arg_str}#endif
+#ifdef DECLARE_INSN
+{declare_insn_str}#endif
+#ifdef DECLARE_CSR
+{declare_csr_str}#endif
+#ifdef DECLARE_CAUSE
+{declare_cause_str}#endif
+"""
+
+ # Write the modified output to the file
+ with open("encoding.out.h", "w", encoding="utf-8") as enc_file:
+ enc_file.write(output_str)
diff --git a/src/riscv_opcodes/chisel_utils.py b/src/riscv_opcodes/chisel_utils.py
new file mode 100644
index 0000000..46cb0b6
--- /dev/null
+++ b/src/riscv_opcodes/chisel_utils.py
@@ -0,0 +1,82 @@
+import logging
+import pprint
+
+from .constants import causes, csrs, csrs32
+from .shared_utils import InstrDict, instr_dict_2_extensions
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_chisel(instr_dict: InstrDict, spinal_hdl: bool = False):
+
+ chisel_names = ""
+ cause_names_str = ""
+ csr_names_str = ""
+ for i in instr_dict:
+ if spinal_hdl:
+ chisel_names += f' def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n'
+ # else:
+ # chisel_names += f' def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n'
+ if not spinal_hdl:
+ extensions = instr_dict_2_extensions(instr_dict)
+ for e in extensions:
+ if "rv64_" in e:
+ e_format = e.replace("rv64_", "").upper() + "64"
+ elif "rv32_" in e:
+ e_format = e.replace("rv32_", "").upper() + "32"
+ elif "rv_" in e:
+ e_format = e.replace("rv_", "").upper()
+ else:
+ e_format = e.upper()
+ chisel_names += f' val {e_format+"Type"} = Map(\n'
+ for instr_name, instr in instr_dict.items():
+ if instr["extension"][0] == e:
+ tmp_instr_name = '"' + instr_name.upper().replace(".", "_") + '"'
+ chisel_names += f' {tmp_instr_name:<18s} -> BitPat("b{instr["encoding"].replace("-","?")}"),\n'
+ chisel_names += " )\n"
+
+ for num, name in causes:
+ cause_names_str += f' val {name.lower().replace(" ","_")} = {hex(num)}\n'
+ cause_names_str += """ val all = {
+ val res = collection.mutable.ArrayBuffer[Int]()
+"""
+ for num, name in causes:
+ cause_names_str += f' res += {name.lower().replace(" ","_")}\n'
+ cause_names_str += """ res.toArray
+ }"""
+
+ for num, name in csrs + csrs32:
+ csr_names_str += f" val {name} = {hex(num)}\n"
+ csr_names_str += """ val all = {
+ val res = collection.mutable.ArrayBuffer[Int]()
+"""
+ for num, name in csrs:
+ csr_names_str += f""" res += {name}\n"""
+ csr_names_str += """ res.toArray
+ }
+ val all32 = {
+ val res = collection.mutable.ArrayBuffer(all:_*)
+"""
+ for num, name in csrs32:
+ csr_names_str += f""" res += {name}\n"""
+ csr_names_str += """ res.toArray
+ }"""
+
+ with open(
+ "inst.spinalhdl" if spinal_hdl else "inst.chisel", "w", encoding="utf-8"
+ ) as chisel_file:
+ chisel_file.write(
+ f"""
+/* Automatically generated by parse_opcodes */
+object Instructions {{
+{chisel_names}
+}}
+object Causes {{
+{cause_names_str}
+}}
+object CSRs {{
+{csr_names_str}
+}}
+"""
+ )
diff --git a/src/riscv_opcodes/constants.py b/src/riscv_opcodes/constants.py
new file mode 100644
index 0000000..fb67d70
--- /dev/null
+++ b/src/riscv_opcodes/constants.py
@@ -0,0 +1,271 @@
+import csv
+import re
+
+from .resources import open_text_resource
+
+# TODO: The constants in this file should be in all caps.
+overlapping_extensions = {
+ "rv_zcmt": {"rv_c_d"},
+ "rv_zcmp": {"rv_c_d"},
+ "rv_c": {"rv_zcmop"},
+}
+
+overlapping_instructions = {
+ "c_addi": {"c_nop"},
+ "c_lui": {"c_addi16sp"},
+ "c_mv": {"c_jr"},
+ "c_jalr": {"c_ebreak"},
+ "c_add": {"c_ebreak", "c_jalr"},
+}
+
+isa_regex = re.compile(
+ "^RV(32|64|128)[IE]+[ABCDEFGHJKLMNPQSTUVX]*(Zicsr|Zifencei|Zihintpause|Zam|Ztso|Zkne|Zknd|Zknh|Zkse|Zksh|Zkg|Zkb|Zkr|Zks|Zkn|Zba|Zbc|Zbb|Zbp|Zbr|Zbm|Zbs|Zbe|Zbf|Zbt|Zmmul|Zbpbo|Zca|Zcf|Zcd|Zcb|Zcmp|Zcmt){,1}(_Zicsr){,1}(_Zifencei){,1}(_Zihintpause){,1}(_Zmmul){,1}(_Zam){,1}(_Zba){,1}(_Zbb){,1}(_Zbc){,1}(_Zbe){,1}(_Zbf){,1}(_Zbm){,1}(_Zbp){,1}(_Zbpbo){,1}(_Zbr){,1}(_Zbs){,1}(_Zbt){,1}(_Zkb){,1}(_Zkg){,1}(_Zkr){,1}(_Zks){,1}(_Zkn){,1}(_Zknd){,1}(_Zkne){,1}(_Zknh){,1}(_Zkse){,1}(_Zksh){,1}(_Ztso){,1}(_Zca){,1}(_Zcf){,1}(_Zcd){,1}(_Zcb){,1}(_Zcmp){,1}(_Zcmt){,1}$"
+)
+
+# regex to find <msb>..<lsb>=<val> patterns in instruction
+fixed_ranges = re.compile(
+ r"\s*(?P<msb>\d+.?)\.\.(?P<lsb>\d+.?)\s*=\s*(?P<val>\d[\w]*)[\s$]*", re.M
+)
+
+# regex to find <lsb>=<val> patterns in instructions
+# single_fixed = re.compile('\s+(?P<lsb>\d+)=(?P<value>[\w\d]*)[\s$]*', re.M)
+single_fixed = re.compile(r"(?:^|[\s])(?P<lsb>\d+)=(?P<value>[\w]*)((?=\s|$))", re.M)
+
+# regex to find the overloading condition variable
+var_regex = re.compile(r"(?P<var>[a-zA-Z][\w\d]*)\s*=\s*.*?[\s$]*", re.M)
+
+# regex for pseudo op instructions returns the dependent filename, dependent
+# instruction, the pseudo op name and the encoding string
+pseudo_regex = re.compile(
+ r"^\$pseudo_op\s+(?P<filename>rv[\d]*_[\w].*)::\s*(?P<orig_inst>.*?)\s+(?P<pseudo_inst>.*?)\s+(?P<overload>.*)$",
+ re.M,
+)
+
+imported_regex = re.compile(
+ r"^\s*\$import\s*(?P<extension>.*)\s*::\s*(?P<instruction>.*)", re.M
+)
+
+
+def read_int_map_csv(filename: str) -> "list[tuple[int, str]]":
+ """
+ Reads a CSV file and returns a list of tuples.
+ Each tuple contains an integer value (from the first column) and a string (from the second column).
+
+ Args:
+ filename (str): The name of the CSV file to read.
+
+ Returns:
+ list of tuple: A list of (int, str) tuples extracted from the CSV file.
+ """
+ with open_text_resource(filename) as f:
+ csv_reader = csv.reader(f, skipinitialspace=True)
+ return [(int(row[0], 0), row[1]) for row in csv_reader]
+
+
+causes = read_int_map_csv("causes.csv")
+csrs = read_int_map_csv("csrs.csv")
+csrs32 = read_int_map_csv("csrs32.csv")
+
+
+def read_arg_lut_csv(filename: str) -> "dict[str, tuple[int, int]]":
+ """
+ Load the argument lookup table (arg_lut) from a CSV file, mapping argument names to their bit positions.
+ """
+ with open_text_resource(filename) as f:
+ csv_reader = csv.reader(f, skipinitialspace=True)
+ return {row[0]: (int(row[1]), int(row[2])) for row in csv_reader}
+
+
+arg_lut = read_arg_lut_csv("arg_lut.csv")
+
+# for mop
+arg_lut["mop_r_t_30"] = (30, 30)
+arg_lut["mop_r_t_27_26"] = (27, 26)
+arg_lut["mop_r_t_21_20"] = (21, 20)
+arg_lut["mop_rr_t_30"] = (30, 30)
+arg_lut["mop_rr_t_27_26"] = (27, 26)
+arg_lut["c_mop_t"] = (10, 8)
+
+# dictionary containing the mapping of the argument to the what the fields in
+# the latex table should be
+latex_mapping = {
+ "imm12": "imm[11:0]",
+ "rs1": "rs1",
+ "rs2": "rs2",
+ "rd": "rd",
+ "imm20": "imm[31:12]",
+ "bimm12hi": "imm[12$\\vert$10:5]",
+ "bimm12lo": "imm[4:1$\\vert$11]",
+ "imm12hi": "imm[11:5]",
+ "imm12lo": "imm[4:0]",
+ "jimm20": "imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]",
+ "zimm": "uimm",
+ "shamtw": "shamt",
+ "shamtd": "shamt",
+ "shamtq": "shamt",
+ "rd_p": "rd\\,$'$",
+ "rs1_p": "rs1\\,$'$",
+ "rs2_p": "rs2\\,$'$",
+ "rd_rs1_n0": "rd/rs$\\neq$0",
+ "rd_rs1_p": "rs1\\,$'$/rs2\\,$'$",
+ "c_rs2": "rs2",
+ "c_rs2_n0": "rs2$\\neq$0",
+ "rd_n0": "rd$\\neq$0",
+ "rs1_n0": "rs1$\\neq$0",
+ "c_rs1_n0": "rs1$\\neq$0",
+ "rd_rs1": "rd/rs1",
+ "zimm6hi": "uimm[5]",
+ "zimm6lo": "uimm[4:0]",
+ "c_nzuimm10": "nzuimm[5:4$\\vert$9:6$\\vert$2$\\vert$3]",
+ "c_uimm7lo": "uimm[2$\\vert$6]",
+ "c_uimm7hi": "uimm[5:3]",
+ "c_uimm8lo": "uimm[7:6]",
+ "c_uimm8hi": "uimm[5:3]",
+ "c_uimm9lo": "uimm[7:6]",
+ "c_uimm9hi": "uimm[5:4$\\vert$8]",
+ "c_nzimm6lo": "nzimm[4:0]",
+ "c_nzimm6hi": "nzimm[5]",
+ "c_imm6lo": "imm[4:0]",
+ "c_imm6hi": "imm[5]",
+ "c_nzimm10hi": "nzimm[9]",
+ "c_nzimm10lo": "nzimm[4$\\vert$6$\\vert$8:7$\\vert$5]",
+ "c_nzimm18hi": "nzimm[17]",
+ "c_nzimm18lo": "nzimm[16:12]",
+ "c_imm12": "imm[11$\\vert$4$\\vert$9:8$\\vert$10$\\vert$6$\\vert$7$\\vert$3:1$\\vert$5]",
+ "c_bimm9lo": "imm[7:6$\\vert$2:1$\\vert$5]",
+ "c_bimm9hi": "imm[8$\\vert$4:3]",
+ "c_nzuimm5": "nzuimm[4:0]",
+ "c_nzuimm6lo": "nzuimm[4:0]",
+ "c_nzuimm6hi": "nzuimm[5]",
+ "c_uimm8splo": "uimm[4:2$\\vert$7:6]",
+ "c_uimm8sphi": "uimm[5]",
+ "c_uimm8sp_s": "uimm[5:2$\\vert$7:6]",
+ "c_uimm10splo": "uimm[4$\\vert$9:6]",
+ "c_uimm10sphi": "uimm[5]",
+ "c_uimm9splo": "uimm[4:3$\\vert$8:6]",
+ "c_uimm9sphi": "uimm[5]",
+ "c_uimm10sp_s": "uimm[5:4$\\vert$9:6]",
+ "c_uimm9sp_s": "uimm[5:3$\\vert$8:6]",
+ "rd_p_e": "rd\\,$'$, even values only",
+ "rs2_p_e": "rs2\\,$'$, even values only",
+ "rd_n0_e": "rd$\\neq$0, even values only",
+ "c_rs2_e": "rs2, even values only",
+ "rd_e": "rd, even values only",
+ "rs2_e": "rs2, even values only",
+}
+
+
+# created a dummy instruction-dictionary like dictionary for all the instruction
+# types so that the same logic can be used to create their tables
+latex_inst_type = {
+ "R-type": {
+ "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct7"],
+ },
+ "R4-type": {
+ "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct2", "rs3"],
+ },
+ "I-type": {
+ "variable_fields": ["opcode", "rd", "funct3", "rs1", "imm12"],
+ },
+ "S-type": {
+ "variable_fields": ["opcode", "imm12lo", "funct3", "rs1", "rs2", "imm12hi"],
+ },
+ "B-type": {
+ "variable_fields": ["opcode", "bimm12lo", "funct3", "rs1", "rs2", "bimm12hi"],
+ },
+ "U-type": {
+ "variable_fields": ["opcode", "rd", "imm20"],
+ },
+ "J-type": {
+ "variable_fields": ["opcode", "rd", "jimm20"],
+ },
+}
+latex_fixed_fields = [
+ (31, 25),
+ (24, 20),
+ (19, 15),
+ (14, 12),
+ (11, 7),
+ (6, 0),
+]
+
+# Pseudo-ops present in the generated encodings.
+# By default pseudo-ops are not listed as they are considered aliases
+# of their base instruction.
+emitted_pseudo_ops = [
+ "pause",
+ "prefetch_i",
+ "prefetch_r",
+ "prefetch_w",
+ "rstsa16",
+ "rstsa32",
+ "srli32_u",
+ "slli_rv32",
+ "srai_rv32",
+ "srli_rv32",
+ "umax32",
+ "c_mop_1",
+ "c_sspush_x1",
+ "c_mop_3",
+ "c_mop_5",
+ "c_sspopchk_x5",
+ "c_mop_7",
+ "c_mop_9",
+ "c_mop_11",
+ "c_mop_13",
+ "c_mop_15",
+ "mop_r_0",
+ "mop_r_1",
+ "mop_r_2",
+ "mop_r_3",
+ "mop_r_4",
+ "mop_r_5",
+ "mop_r_6",
+ "mop_r_7",
+ "mop_r_8",
+ "mop_r_9",
+ "mop_r_10",
+ "mop_r_11",
+ "mop_r_12",
+ "mop_r_13",
+ "mop_r_14",
+ "mop_r_15",
+ "mop_r_16",
+ "mop_r_17",
+ "mop_r_18",
+ "mop_r_19",
+ "mop_r_20",
+ "mop_r_21",
+ "mop_r_22",
+ "mop_r_23",
+ "mop_r_24",
+ "mop_r_25",
+ "mop_r_26",
+ "mop_r_27",
+ "mop_r_28",
+ "sspopchk_x1",
+ "sspopchk_x5",
+ "ssrdp",
+ "mop_r_29",
+ "mop_r_30",
+ "mop_r_31",
+ "mop_r_32",
+ "mop_rr_0",
+ "mop_rr_1",
+ "mop_rr_2",
+ "mop_rr_3",
+ "mop_rr_4",
+ "mop_rr_5",
+ "mop_rr_6",
+ "mop_rr_7",
+ "sspush_x1",
+ "sspush_x5",
+ "lpad",
+ "bclri.rv32",
+ "bexti.rv32",
+ "binvi.rv32",
+ "bseti.rv32",
+ "zext.h.rv32",
+ "rev8.h.rv32",
+ "rori.rv32",
+]
diff --git a/src/riscv_opcodes/go_utils.py b/src/riscv_opcodes/go_utils.py
new file mode 100644
index 0000000..1a6fc33
--- /dev/null
+++ b/src/riscv_opcodes/go_utils.py
@@ -0,0 +1,64 @@
+import logging
+import pprint
+import sys
+
+from .constants import csrs
+from .shared_utils import InstrDict, signed
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_go(instr_dict: InstrDict):
+
+ args = " ".join(sys.argv)
+ prelude = f"""// Code generated by {args}; DO NOT EDIT."""
+
+ prelude += """
+package riscv
+
+import "cmd/internal/obj"
+
+type inst struct {
+ opcode uint32
+ funct3 uint32
+ rs1 uint32
+ rs2 uint32
+ csr int64
+ funct7 uint32
+}
+
+func encode(a obj.As) *inst {
+ switch a {
+"""
+
+ csrs_map_str = """ }
+ return nil
+}
+
+var csrs = map[uint16]string {
+"""
+
+ endoffile = """}
+"""
+
+ instr_str = ""
+ for i in instr_dict:
+ enc_match = int(instr_dict[i]["match"], 0)
+ opcode = (enc_match >> 0) & ((1 << 7) - 1)
+ funct3 = (enc_match >> 12) & ((1 << 3) - 1)
+ rs1 = (enc_match >> 15) & ((1 << 5) - 1)
+ rs2 = (enc_match >> 20) & ((1 << 5) - 1)
+ csr = (enc_match >> 20) & ((1 << 12) - 1)
+ funct7 = (enc_match >> 25) & ((1 << 7) - 1)
+ instr_str += f""" case A{i.upper().replace("_","")}:
+ return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }}
+"""
+ for num, name in sorted(csrs, key=lambda row: row[0]):
+ csrs_map_str += f'{hex(num)} : "{name.upper()}",\n'
+
+ with open("inst.go", "w", encoding="utf-8") as file:
+ file.write(prelude)
+ file.write(instr_str)
+ file.write(csrs_map_str)
+ file.write(endoffile)
diff --git a/src/riscv_opcodes/latex_utils.py b/src/riscv_opcodes/latex_utils.py
new file mode 100644
index 0000000..38f92f8
--- /dev/null
+++ b/src/riscv_opcodes/latex_utils.py
@@ -0,0 +1,450 @@
+import logging
+import pprint
+from typing import TextIO
+
+from .constants import latex_fixed_fields, latex_inst_type, latex_mapping
+from .shared_utils import InstrDict, arg_lut, create_inst_dict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_priv_latex_table():
+ type_list = ["R-type", "I-type"]
+ system_instr = ["_h", "_s", "_system", "_svinval", "64_h", "_svinval_h"]
+ dataset_list = [(system_instr, "Trap-Return Instructions", ["sret", "mret"], False)]
+ dataset_list.append(
+ (system_instr, "Interrupt-Management Instructions", ["wfi"], False)
+ )
+ dataset_list.append(
+ (
+ system_instr,
+ "Supervisor Memory-Management Instructions",
+ ["sfence_vma"],
+ False,
+ )
+ )
+ dataset_list.append(
+ (
+ system_instr,
+ "Hypervisor Memory-Management Instructions",
+ ["hfence_vvma", "hfence_gvma"],
+ False,
+ )
+ )
+ dataset_list.append(
+ (
+ system_instr,
+ "Hypervisor Virtual-Machine Load and Store Instructions",
+ [
+ "hlv_b",
+ "hlv_bu",
+ "hlv_h",
+ "hlv_hu",
+ "hlv_w",
+ "hlvx_hu",
+ "hlvx_wu",
+ "hsv_b",
+ "hsv_h",
+ "hsv_w",
+ ],
+ False,
+ )
+ )
+ dataset_list.append(
+ (
+ system_instr,
+ "Hypervisor Virtual-Machine Load and Store Instructions, RV64 only",
+ ["hlv_wu", "hlv_d", "hsv_d"],
+ False,
+ )
+ )
+ dataset_list.append(
+ (
+ system_instr,
+ "Svinval Memory-Management Instructions",
+ [
+ "sinval_vma",
+ "sfence_w_inval",
+ "sfence_inval_ir",
+ "hinval_vvma",
+ "hinval_gvma",
+ ],
+ False,
+ )
+ )
+ caption = "\\caption{RISC-V Privileged Instructions}"
+ with open("priv-instr-table.tex", "w", encoding="utf-8") as latex_file:
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+
+def make_latex_table():
+ """
+ This function is mean to create the instr-table.tex that is meant to be used
+ by the riscv-isa-manual. This function basically creates a single latext
+ file of multiple tables with each table limited to a single page. Only the
+ last table is assigned a latex-caption.
+
+ For each table we assign a type-list which capture the different instruction
+ types (R, I, B, etc) that will be required for the table. Then we select the
+ list of extensions ('_i, '32_i', etc) whose instructions are required to
+ populate the table. For each extension or collection of extension we can
+ assign Title, such that in the end they appear as subheadings within
+ the table (note these are inlined headings and not captions of the table).
+
+ All of the above information is collected/created and sent to
+ make_ext_latex_table function to dump out the latex contents into a file.
+
+ The last table only has to be given a caption - as per the policy of the
+ riscv-isa-manual.
+ """
+ # open the file and use it as a pointer for all further dumps
+ with open("instr-table.tex", "w", encoding="utf-8") as latex_file:
+
+ # create the rv32i table first. Here we set the caption to empty. We use the
+ # files rv_i and rv32_i to capture instructions relevant for rv32i
+ # configuration. The dataset is a list of 4-element tuples :
+ # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions
+ # is empty then it indicates that all instructions of the all the extensions
+ # in list_of_extensions need to be dumped. If not empty, then only the
+ # instructions listed in list_of_instructions will be dumped into latex.
+ caption = ""
+ type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"]
+ dataset_list: list[tuple[list[str], str, list[str], bool]] = [
+ (["_i", "32_i"], "RV32I Base Instruction Set", [], False)
+ ]
+ dataset_list.append((["_i"], "", ["fence_tso", "pause"], True))
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ type_list = ["R-type", "I-type", "S-type"]
+ dataset_list = [
+ (["64_i"], "RV64I Base Instruction Set (in addition to RV32I)", [], False)
+ ]
+ dataset_list.append(
+ (["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False)
+ )
+ dataset_list.append(
+ (["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False)
+ )
+ dataset_list.append((["_m", "32_m"], "RV32M Standard Extension", [], False))
+ dataset_list.append(
+ (["64_m"], "RV64M Standard Extension (in addition to RV32M)", [], False)
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ type_list = ["R-type"]
+ dataset_list = [(["_a"], "RV32A Standard Extension", [], False)]
+ dataset_list.append(
+ (["64_a"], "RV64A Standard Extension (in addition to RV32A)", [], False)
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ type_list = ["R-type", "R4-type", "I-type", "S-type"]
+ dataset_list = [(["_f"], "RV32F Standard Extension", [], False)]
+ dataset_list.append(
+ (["64_f"], "RV64F Standard Extension (in addition to RV32F)", [], False)
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ type_list = ["R-type", "R4-type", "I-type", "S-type"]
+ dataset_list = [(["_d"], "RV32D Standard Extension", [], False)]
+ dataset_list.append(
+ (["64_d"], "RV64D Standard Extension (in addition to RV32D)", [], False)
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ type_list = ["R-type", "R4-type", "I-type", "S-type"]
+ dataset_list = [(["_q"], "RV32Q Standard Extension", [], False)]
+ dataset_list.append(
+ (["64_q"], "RV64Q Standard Extension (in addition to RV32Q)", [], False)
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ caption = "\\caption{Instruction listing for RISC-V}"
+ type_list = ["R-type", "R4-type", "I-type", "S-type"]
+ dataset_list = [
+ (["_zfh", "_d_zfh", "_q_zfh"], "RV32Zfh Standard Extension", [], False)
+ ]
+ dataset_list.append(
+ (
+ ["64_zfh"],
+ "RV64Zfh Standard Extension (in addition to RV32Zfh)",
+ [],
+ False,
+ )
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ ## The following is demo to show that Compressed instructions can also be
+ # dumped in the same manner as above
+
+ # type_list = ['']
+ # dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])]
+ # dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', []))
+ # make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption)
+
+
+def make_ext_latex_table(
+ type_list: "list[str]",
+ dataset: "list[tuple[list[str], str, list[str], bool]]",
+ latex_file: TextIO,
+ ilen: int,
+ caption: str,
+):
+ """
+ For a given collection of extensions this function dumps out a complete
+ latex table which includes the encodings of the instructions.
+
+ The ilen input indicates the length of the instruction for which the table
+ is created.
+
+ The caption input is used to create the latex-table caption.
+
+ The type_list input is a list of instruction types (R, I, B, etc) that are
+ treated as header for each table. Each table will have its own requirements
+ and type_list must include all the instruction-types that the table needs.
+ Note, all elements of this list must be present in the latex_inst_type
+ dictionary defined in constants.py
+
+ The latex_file is a file pointer to which the latex-table will dumped into
+
+ The dataset is a list of 3-element tuples containing:
+ (list_of_extensions, title, list_of_instructions)
+ The list_of_extensions must contain all the set of extensions whose
+ instructions must be populated under a given title. If list_of_instructions
+ is not empty, then only those instructions mentioned in list_of_instructions
+ present in the extension will be dumped into the latex-table, other
+ instructions will be ignored.
+
+ Once the above inputs are received then function first creates table entries
+ for the instruction types. To simplify things, we maintain a dictionary
+ called latex_inst_type in constants.py which is created in the same way the
+ instruction dictionary is created. This allows us to re-use the same logic
+ to create the instruction types table as well
+
+ Once the header is created, we then parse through every entry in the
+ dataset. For each list dataset entry we use the create_inst_dict function to
+ create an exhaustive list of instructions associated with the respective
+ collection of the extension of that dataset. Then we apply the instruction
+ filter, if any, indicated by the list_of_instructions of that dataset.
+ Thereon, for each instruction we create a latex table entry.
+
+ Latex table specification for ilen sized instructions:
+ Each table is created with ilen+1 columns - ilen columns for each bit of the
+ instruction and one column to hold the name of the instruction.
+
+ For each argument of an instruction we use the arg_lut from constants.py
+ to identify its position in the encoding, and thus create a multicolumn
+ entry with the name of the argument as the data. For hardcoded bits, we
+ do the same where we capture a string of continuous 1s and 0s, identify
+ the position and assign the same string as the data of the
+ multicolumn entry in the table.
+
+ """
+ column_size = "".join(["p{0.002in}"] * (ilen + 1))
+
+ type_entries = (
+ """
+ \\multicolumn{3}{l}{31} &
+ \\multicolumn{2}{r}{27} &
+ \\multicolumn{1}{c}{26} &
+ \\multicolumn{1}{r}{25} &
+ \\multicolumn{3}{l}{24} &
+ \\multicolumn{2}{r}{20} &
+ \\multicolumn{3}{l}{19} &
+ \\multicolumn{2}{r}{15} &
+ \\multicolumn{2}{l}{14} &
+ \\multicolumn{1}{r}{12} &
+ \\multicolumn{4}{l}{11} &
+ \\multicolumn{1}{r}{7} &
+ \\multicolumn{6}{l}{6} &
+ \\multicolumn{1}{r}{0} \\\\
+ \\cline{2-33}\n&\n\n
+"""
+ if ilen == 32
+ else """
+ \\multicolumn{1}{c}{15} &
+ \\multicolumn{1}{c}{14} &
+ \\multicolumn{1}{c}{13} &
+ \\multicolumn{1}{c}{12} &
+ \\multicolumn{1}{c}{11} &
+ \\multicolumn{1}{c}{10} &
+ \\multicolumn{1}{c}{9} &
+ \\multicolumn{1}{c}{8} &
+ \\multicolumn{1}{c}{7} &
+ \\multicolumn{1}{c}{6} &
+ \\multicolumn{1}{c}{5} &
+ \\multicolumn{1}{c}{4} &
+ \\multicolumn{1}{c}{3} &
+ \\multicolumn{1}{c}{2} &
+ \\multicolumn{1}{c}{1} &
+ \\multicolumn{1}{c}{0} \\\\
+ \\cline{2-17}\n&\n\n
+"""
+ )
+
+ # depending on the type_list input we create a subset dictionary of
+ # latex_inst_type dictionary present in constants.py
+ type_dict = {
+ key: value for key, value in latex_inst_type.items() if key in type_list
+ }
+
+ # iterate ovr each instruction type and create a table entry
+ for t in type_dict:
+ fields: list[tuple[int, int, str]] = []
+
+ # first capture all "arguments" of the type (funct3, funct7, rd, etc)
+ # and capture their positions using arg_lut.
+ for f in type_dict[t]["variable_fields"]:
+ (msb, lsb) = arg_lut[f]
+ name = f if f not in latex_mapping else latex_mapping[f]
+ fields.append((msb, lsb, name))
+
+ # iterate through the 32 bits, starting from the msb, and assign
+ # argument names to the relevant portions of the instructions. This
+ # information is stored as a 3-element tuple containing the msb, lsb
+ # position of the arugment and the name of the argument.
+ msb = ilen - 1
+ y = ""
+ for r in range(0, ilen):
+ if y != "":
+ fields.append((msb, ilen - 1 - r + 1, y))
+ y = ""
+ msb = ilen - 1 - r - 1
+ if r == 31:
+ if y != "":
+ fields.append((msb, 0, y))
+ y = ""
+
+ # sort the arguments in decreasing order of msb position
+ fields.sort(key=lambda y: y[0], reverse=True)
+
+ # for each argument/string of 1s or 0s, create a multicolumn latex table
+ # entry
+ entry = ""
+ for r, (msb, lsb, name) in enumerate(fields):
+ if r == len(fields) - 1:
+ entry += (
+ f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n"
+ )
+ elif r == 0:
+ entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
+ else:
+ entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
+ entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
+ type_entries += entry
+
+ # for each entry in the dataset create a table
+ content = ""
+ for ext_list, title, filter_list, include_pseudo in dataset:
+ instr_dict: InstrDict = {}
+
+ # for all extensions list in ext_list, create a dictionary of
+ # instructions associated with those extensions.
+ for e in ext_list:
+ instr_dict.update(create_inst_dict(["rv" + e], include_pseudo))
+
+ # if filter_list is not empty then use that as the official set of
+ # instructions that need to be dumped into the latex table
+ inst_list = list(instr_dict.keys()) if not filter_list else filter_list
+
+ # for each instruction create an latex table entry just like how we did
+ # above with the instruction-type table.
+ instr_entries = ""
+ for inst in inst_list:
+ if inst not in instr_dict:
+ logging.error(
+ f"in make_ext_latex_table: Instruction: {inst} not found in instr_dict"
+ )
+ raise SystemExit(1)
+ fields = []
+
+ # only if the argument is available in arg_lut we consume it, else
+ # throw error.
+ for f in instr_dict[inst]["variable_fields"]:
+ if f not in arg_lut:
+ logging.error(
+ f"Found variable {f} in instruction {inst} whose mapping is not available"
+ )
+ raise SystemExit(1)
+ (msb, lsb) = arg_lut[f]
+ name = (
+ f.replace("_", ".") if f not in latex_mapping else latex_mapping[f]
+ )
+ fields.append((msb, lsb, name))
+
+ msb = ilen - 1
+ y = ""
+ if ilen == 16:
+ encoding = instr_dict[inst]["encoding"][16:]
+ else:
+ encoding = instr_dict[inst]["encoding"]
+ for r in range(0, ilen):
+ x = encoding[r]
+ if (msb, ilen - 1 - r + 1) in latex_fixed_fields:
+ fields.append((msb, ilen - 1 - r + 1, y))
+ msb = ilen - 1 - r
+ y = ""
+ if x == "-":
+ if y != "":
+ fields.append((msb, ilen - 1 - r + 1, y))
+ y = ""
+ msb = ilen - 1 - r - 1
+ else:
+ y += str(x)
+ if r == ilen - 1:
+ if y != "":
+ fields.append((msb, 0, y))
+ y = ""
+
+ fields.sort(key=lambda y: y[0], reverse=True)
+ entry = ""
+ for r, (msb, lsb, name) in enumerate(fields):
+ if r == len(fields) - 1:
+ entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n'
+ elif r == 0:
+ entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
+ else:
+ entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
+ entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
+ instr_entries += entry
+
+ # once an entry of the dataset is completed we create the whole table
+ # with the title of that dataset as sub-heading (sort-of)
+ if title != "":
+ content += f"""
+
+\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\
+\\multicolumn{{{ilen}}}{{c}}{{\\bfseries {title} }} & \\\\
+\\cline{{2-{ilen+1}}}
+
+ &
+{instr_entries}
+"""
+ else:
+ content += f"""
+{instr_entries}
+"""
+
+ header = f"""
+\\newpage
+
+\\begin{{table}}[p]
+\\begin{{small}}
+\\begin{{center}}
+ \\begin{{tabular}} {{{column_size}l}}
+ {" ".join(['&']*ilen)} \\\\
+
+ &
+{type_entries}
+"""
+ endtable = f"""
+
+\\end{{tabular}}
+\\end{{center}}
+\\end{{small}}
+{caption}
+\\end{{table}}
+"""
+ # dump the contents and return
+ latex_file.write(header + content + endtable)
diff --git a/src/riscv_opcodes/parse.py b/src/riscv_opcodes/parse.py
new file mode 100644
index 0000000..d78f232
--- /dev/null
+++ b/src/riscv_opcodes/parse.py
@@ -0,0 +1,121 @@
+import argparse
+import json
+import logging
+import pprint
+
+from .c_utils import make_c
+from .chisel_utils import make_chisel
+from .constants import emitted_pseudo_ops
+from .go_utils import make_go
+from .latex_utils import make_latex_table, make_priv_latex_table
+from .rust_utils import make_rust
+from .shared_utils import add_segmented_vls_insn, create_inst_dict
+from .sverilog_utils import make_sverilog
+from .svg_utils import make_svg
+
+LOG_FORMAT = "%(levelname)s:: %(message)s"
+LOG_LEVEL = logging.INFO
+
+pretty_printer = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
+
+
+def generate_extensions(
+ extensions: list[str],
+ include_pseudo: bool,
+ c: bool,
+ chisel: bool,
+ spinalhdl: bool,
+ sverilog: bool,
+ rust: bool,
+ go: bool,
+ latex: bool,
+ svg: bool,
+):
+ instr_dict = create_inst_dict(extensions, include_pseudo)
+ instr_dict = dict(sorted(instr_dict.items()))
+ instr_dict_with_segment = add_segmented_vls_insn(instr_dict)
+
+ with open("instr_dict.json", "w", encoding="utf-8") as outfile:
+ json.dump(instr_dict_with_segment, outfile, indent=2)
+
+ if c:
+ instr_dict_c = create_inst_dict(
+ extensions, False, include_pseudo_ops=emitted_pseudo_ops
+ )
+ instr_dict_c = dict(sorted(instr_dict_c.items()))
+ make_c(instr_dict_c)
+ logging.info("encoding.out.h generated successfully")
+
+ if chisel:
+ make_chisel(instr_dict)
+ logging.info("inst.chisel generated successfully")
+
+ if spinalhdl:
+ make_chisel(instr_dict, True)
+ logging.info("inst.spinalhdl generated successfully")
+
+ if sverilog:
+ make_sverilog(instr_dict)
+ logging.info("inst.sverilog generated successfully")
+
+ if rust:
+ make_rust(instr_dict)
+ logging.info("inst.rs generated successfully")
+
+ if go:
+ make_go(instr_dict_with_segment)
+ logging.info("inst.go generated successfully")
+
+ if latex:
+ make_latex_table()
+ logging.info("instr-table.tex generated successfully")
+ make_priv_latex_table()
+ logging.info("priv-instr-table.tex generated successfully")
+
+ if svg:
+ make_svg(instr_dict)
+ logging.info("inst.svg generated successfully")
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Generate RISC-V constants headers")
+ parser.add_argument(
+ "-pseudo", action="store_true", help="Include pseudo-instructions"
+ )
+ parser.add_argument("-c", action="store_true", help="Generate output for C")
+ parser.add_argument(
+ "-chisel", action="store_true", help="Generate output for Chisel"
+ )
+ parser.add_argument(
+ "-spinalhdl", action="store_true", help="Generate output for SpinalHDL"
+ )
+ parser.add_argument(
+ "-sverilog", action="store_true", help="Generate output for SystemVerilog"
+ )
+ parser.add_argument("-rust", action="store_true", help="Generate output for Rust")
+ parser.add_argument("-go", action="store_true", help="Generate output for Go")
+ parser.add_argument("-latex", action="store_true", help="Generate output for Latex")
+ parser.add_argument("-svg", action="store_true", help="Generate .svg output")
+ parser.add_argument(
+ "extensions",
+ nargs="*",
+ help="Extensions to use. This is a glob of the rv_.. files, e.g. 'rv*' will give all extensions.",
+ )
+
+ args = parser.parse_args()
+
+ print(f"Extensions selected : {args.extensions}")
+
+ generate_extensions(
+ args.extensions,
+ args.pseudo,
+ args.c,
+ args.chisel,
+ args.spinalhdl,
+ args.sverilog,
+ args.rust,
+ args.go,
+ args.latex,
+ args.svg,
+ )
diff --git a/src/riscv_opcodes/resources.py b/src/riscv_opcodes/resources.py
new file mode 100644
index 0000000..e9398ec
--- /dev/null
+++ b/src/riscv_opcodes/resources.py
@@ -0,0 +1,39 @@
+import sys
+from importlib.resources import files
+from typing import IO
+
+if sys.version_info < (3, 12):
+ # This was deprecated in Python 3.12.
+ from importlib.abc import Traversable
+else:
+ from importlib.resources.abc import Traversable
+
+
+def resource_root() -> Traversable:
+ """
+ Return the root directory as a traversable that can
+ be used to load the `extensions`, `*.csv` and `encoding.h`
+ files. For historical reasons these are not stored inside
+ the `src/riscv_opcodes` directory in the source distribution
+ but they are moved there when generating the binary wheel.
+ This means we need to check in both places.
+ """
+ assert __package__ is not None
+ package_root = files(__package__)
+ if (package_root / "extensions").is_dir():
+ return package_root
+ return package_root / ".." / ".."
+
+
+def read_text_resource(path_relative_to_root: str) -> str:
+ """
+ Read a text file relative to the root of this repo.
+ """
+ return resource_root().joinpath(path_relative_to_root).read_text(encoding="utf-8")
+
+
+def open_text_resource(path_relative_to_root: str) -> IO[str]:
+ """
+ Open a text file relative to the root of this repo.
+ """
+ return resource_root().joinpath(path_relative_to_root).open("r", encoding="utf-8")
diff --git a/src/riscv_opcodes/rust_utils.py b/src/riscv_opcodes/rust_utils.py
new file mode 100644
index 0000000..74e17eb
--- /dev/null
+++ b/src/riscv_opcodes/rust_utils.py
@@ -0,0 +1,28 @@
+import logging
+import pprint
+
+from .constants import causes, csrs, csrs32
+from .shared_utils import InstrDict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_rust(instr_dict: InstrDict):
+ mask_match_str = ""
+ for i in instr_dict:
+ mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n'
+ mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n'
+ for num, name in csrs + csrs32:
+ mask_match_str += f"const CSR_{name.upper()}: u16 = {hex(num)};\n"
+ for num, name in causes:
+ mask_match_str += (
+ f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n'
+ )
+ with open("inst.rs", "w", encoding="utf-8") as rust_file:
+ rust_file.write(
+ f"""
+/* Automatically generated by parse_opcodes */
+{mask_match_str}
+"""
+ )
diff --git a/src/riscv_opcodes/rv_colors.py b/src/riscv_opcodes/rv_colors.py
new file mode 100644
index 0000000..76e53a5
--- /dev/null
+++ b/src/riscv_opcodes/rv_colors.py
@@ -0,0 +1,12 @@
+palette = {
+ "Berkeley Blue": "#003262",
+ "California Gold": "#FDB515",
+ "Dark Blue": "#011e41",
+ "Teal": "#0a6b7c",
+ "Magenta": "#cb007b",
+ "Purple": "#60269e",
+ "Light Gold": "#fdda64",
+ "Light Teal": "#62cbc9",
+ "Pink": "#fe9bb1",
+ "Lavender": "#c2a6e1",
+}
diff --git a/src/riscv_opcodes/shared_utils.py b/src/riscv_opcodes/shared_utils.py
new file mode 100644
index 0000000..3a1a3bc
--- /dev/null
+++ b/src/riscv_opcodes/shared_utils.py
@@ -0,0 +1,641 @@
+import copy
+import logging
+import os
+import pprint
+import re
+from fnmatch import fnmatch
+from io import StringIO
+from itertools import chain
+from typing import Dict, NoReturn, Optional, TypedDict
+
+from .constants import (
+ arg_lut,
+ fixed_ranges,
+ imported_regex,
+ overlapping_extensions,
+ overlapping_instructions,
+ pseudo_regex,
+ single_fixed,
+)
+from .resources import open_text_resource, resource_root
+
+LOG_FORMAT = "%(levelname)s:: %(message)s"
+LOG_LEVEL = logging.INFO
+
+pretty_printer = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
+
+
+# Log an error message
+def log_and_exit(message: str) -> NoReturn:
+ """Log an error message and exit the program."""
+ logging.error(message)
+ raise SystemExit(1)
+
+
+# Initialize encoding to 32-bit '-' values
+def initialize_encoding(bits: int = 32) -> "list[str]":
+ """Initialize encoding with '-' to represent don't care bits."""
+ return ["-"] * bits
+
+
+# Validate bit range and value
+def validate_bit_range(msb: int, lsb: int, entry_value: int, line: str):
+ """Validate the bit range and entry value."""
+ if msb < lsb:
+ log_and_exit(
+ f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in its encoding'
+ )
+
+ if entry_value >= (1 << (msb - lsb + 1)):
+ log_and_exit(
+ f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}'
+ )
+
+
+# Split the instruction line into name and remaining part
+def parse_instruction_line(line: str) -> "tuple[str, str]":
+ """Parse the instruction name and the remaining encoding details."""
+ name, remaining = line.replace("\t", " ").split(" ", 1)
+ name = name.replace(".", "_") # Replace dots for compatibility
+ remaining = remaining.lstrip() # Remove leading whitespace
+ return name, remaining
+
+
+# Verify Overlapping Bits
+def check_overlapping_bits(encoding: "list[str]", ind: int, line: str):
+ """Check for overlapping bits in the encoding."""
+ if encoding[31 - ind] != "-":
+ log_and_exit(
+ f'{line.split(" ")[0]:<10} has {ind} bit overlapping in its opcodes'
+ )
+
+
+# Update encoding for fixed ranges
+def update_encoding_for_fixed_range(
+ encoding: "list[str]", msb: int, lsb: int, entry_value: int, line: str
+):
+ """
+ Update encoding bits for a given bit range.
+ Checks for overlapping bits and assigns the value accordingly.
+ """
+ for ind in range(lsb, msb + 1):
+ check_overlapping_bits(encoding, ind, line)
+ bit = str((entry_value >> (ind - lsb)) & 1)
+ encoding[31 - ind] = bit
+
+
+# Process fixed bit patterns
+def process_fixed_ranges(remaining: str, encoding: "list[str]", line: str):
+ """Process fixed bit ranges in the encoding."""
+ for s2, s1, entry in fixed_ranges.findall(remaining):
+ msb, lsb, entry_value = int(s2), int(s1), int(entry, 0)
+
+ # Validate bit range and entry value
+ validate_bit_range(msb, lsb, entry_value, line)
+ update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line)
+
+ return fixed_ranges.sub(" ", remaining)
+
+
+# Process single bit assignments
+def process_single_fixed(remaining: str, encoding: "list[str]", line: str):
+ """Process single fixed assignments in the encoding."""
+ for lsb, value, _drop in single_fixed.findall(remaining):
+ lsb = int(lsb, 0)
+ value = int(value, 0)
+
+ check_overlapping_bits(encoding, lsb, line)
+ encoding[31 - lsb] = str(value)
+
+
+# Main function to check argument look-up table
+def check_arg_lut(args: "list[str]", encoding_args: "list[str]", name: str):
+ """Check if arguments are present in arg_lut."""
+ for arg in args:
+ if arg not in arg_lut:
+ arg = handle_arg_lut_mapping(arg, name)
+ msb, lsb = arg_lut[arg]
+ update_encoding_args(encoding_args, arg, msb, lsb)
+
+
+# Handle missing argument mappings
+def handle_arg_lut_mapping(arg: str, name: str):
+ """Handle cases where an argument needs to be mapped to an existing one."""
+ parts = arg.split("=")
+ if len(parts) == 2:
+ existing_arg, _new_arg = parts
+ if existing_arg in arg_lut:
+ arg_lut[arg] = arg_lut[existing_arg]
+ else:
+ log_and_exit(
+ f" Found field {existing_arg} in variable {arg} in instruction {name} "
+ f"whose mapping in arg_lut does not exist"
+ )
+ else:
+ log_and_exit(
+ f" Found variable {arg} in instruction {name} "
+ f"whose mapping in arg_lut does not exist"
+ )
+ return arg
+
+
+# Update encoding args with variables
+def update_encoding_args(encoding_args: "list[str]", arg: str, msb: int, lsb: int):
+ """Update encoding arguments and ensure no overlapping."""
+ for ind in range(lsb, msb + 1):
+ check_overlapping_bits(encoding_args, ind, arg)
+ encoding_args[31 - ind] = arg
+
+
+# Compute match and mask
+def convert_encoding_to_match_mask(encoding: "list[str]") -> "tuple[str, str]":
+ """Convert the encoding list to match and mask strings."""
+ match = "".join(encoding).replace("-", "0")
+ mask = "".join(encoding).replace("0", "1").replace("-", "0")
+ return hex(int(match, 2)), hex(int(mask, 2))
+
+
+class SingleInstr(TypedDict):
+ encoding: str
+ variable_fields: "list[str]"
+ extension: "list[str]"
+ match: str
+ mask: str
+
+
+InstrDict = Dict[str, SingleInstr]
+
+
+# Processing main function for a line in the encoding file
+def process_enc_line(line: str, ext: str) -> "tuple[str, SingleInstr]":
+ """
+ This function processes each line of the encoding files (rv*). As part of
+ the processing, the function ensures that the encoding is legal through the
+ following checks::
+ - there is no over specification (same bits assigned different values)
+ - there is no under specification (some bits not assigned values)
+ - bit ranges are in the format hi..lo=val where hi > lo
+ - value assigned is representable in the bit range
+ - also checks that the mapping of arguments of an instruction exists in
+ arg_lut.
+ If the above checks pass, then the function returns a tuple of the name and
+ a dictionary containing basic information of the instruction which includes:
+ - variables: list of arguments used by the instruction whose mapping
+ exists in the arg_lut dictionary
+ - encoding: this contains the 32-bit encoding of the instruction where
+ '-' is used to represent position of arguments and 1/0 is used to
+ reprsent the static encoding of the bits
+ - extension: this field contains the rv* filename from which this
+ instruction was included
+ - match: hex value representing the bits that need to match to detect
+ this instruction
+ - mask: hex value representin the bits that need to be masked to extract
+ the value required for matching.
+ """
+ encoding = initialize_encoding()
+
+ # Parse the instruction line
+ name, remaining = parse_instruction_line(line)
+
+ # Process fixed ranges
+ remaining = process_fixed_ranges(remaining, encoding, line)
+
+ # Process single fixed assignments
+ process_single_fixed(remaining, encoding, line)
+
+ # Convert the list of encodings into a match and mask
+ match, mask = convert_encoding_to_match_mask(encoding)
+
+ # Check arguments in arg_lut
+ args = single_fixed.sub(" ", remaining).split()
+ encoding_args = encoding.copy()
+
+ check_arg_lut(args, encoding_args, name)
+
+ # Return single_dict
+ return name, {
+ "encoding": "".join(encoding),
+ "variable_fields": args,
+ "extension": [os.path.basename(ext)],
+ "match": match,
+ "mask": mask,
+ }
+
+
+# Extract ISA Type
+def extract_isa_type(ext_name: str) -> str:
+ """Extracts the ISA type from the extension name."""
+ return ext_name.split("_")[0]
+
+
+# Verify the types for RV*
+def is_rv_variant(type1: str, type2: str) -> bool:
+ """Checks if the types are RV variants (rv32/rv64)."""
+ return (type2 == "rv" and type1 in {"rv32", "rv64"}) or (
+ type1 == "rv" and type2 in {"rv32", "rv64"}
+ )
+
+
+# Check for same base ISA
+def has_same_base_isa(type1: str, type2: str) -> bool:
+ """Determines if the two ISA types share the same base."""
+ return type1 == type2 or is_rv_variant(type1, type2)
+
+
+# Compare the base ISA type of a given extension name against a list of extension names
+def same_base_isa(ext_name: str, ext_name_list: "list[str]") -> bool:
+ """Checks if the base ISA type of ext_name matches any in ext_name_list."""
+ type1 = extract_isa_type(ext_name)
+ return any(has_same_base_isa(type1, extract_isa_type(ext)) for ext in ext_name_list)
+
+
+# Pad two strings to equal length
+def pad_to_equal_length(str1: str, str2: str, pad_char: str = "-") -> "tuple[str, str]":
+ """Pads two strings to equal length using the given padding character."""
+ max_len = max(len(str1), len(str2))
+ return str1.rjust(max_len, pad_char), str2.rjust(max_len, pad_char)
+
+
+# Check compatibility for two characters
+def has_no_conflict(char1: str, char2: str) -> bool:
+ """Checks if two characters are compatible (either matching or don't-care)."""
+ return char1 == "-" or char2 == "-" or char1 == char2
+
+
+# Conflict check between two encoded strings
+def overlaps(x: str, y: str) -> bool:
+ """Checks if two encoded strings overlap without conflict."""
+ x, y = pad_to_equal_length(x, y)
+ return all(has_no_conflict(x[i], y[i]) for i in range(len(x)))
+
+
+# Check presence of keys in dictionary.
+def is_in_nested_dict(a: "dict[str, set[str]]", key1: str, key2: str) -> bool:
+ """Checks if key2 exists in the dictionary under key1."""
+ return key1 in a and key2 in a[key1]
+
+
+# Overlap allowance
+def overlap_allowed(a: "dict[str, set[str]]", x: str, y: str) -> bool:
+ """Determines if overlap is allowed between x and y based on nested dictionary checks"""
+ return is_in_nested_dict(a, x, y) or is_in_nested_dict(a, y, x)
+
+
+# Check overlap allowance between extensions
+def extension_overlap_allowed(x: str, y: str) -> bool:
+ """Checks if overlap is allowed between two extensions using the overlapping_extensions dictionary."""
+ return overlap_allowed(overlapping_extensions, x, y)
+
+
+# Check overlap allowance between instructions
+def instruction_overlap_allowed(x: str, y: str) -> bool:
+ """Checks if overlap is allowed between two instructions using the overlapping_instructions dictionary."""
+ return overlap_allowed(overlapping_instructions, x, y)
+
+
+# Check 'nf' field
+def is_segmented_instruction(instruction: SingleInstr) -> bool:
+ """Checks if an instruction contains the 'nf' field."""
+ return "nf" in instruction["variable_fields"]
+
+
+# Expand 'nf' fields
+def update_with_expanded_instructions(
+ updated_dict: InstrDict, key: str, value: SingleInstr
+):
+ """Expands 'nf' fields in the instruction dictionary and updates it with new instructions."""
+ for new_key, new_value in expand_nf_field(key, value):
+ updated_dict[new_key] = new_value
+
+
+# Process instructions, expanding segmented ones and updating the dictionary
+def add_segmented_vls_insn(instr_dict: InstrDict) -> InstrDict:
+ """Processes instructions, expanding segmented ones and updating the dictionary."""
+ # Use dictionary comprehension for efficiency
+ return dict(
+ chain.from_iterable(
+ (
+ expand_nf_field(key, value)
+ if is_segmented_instruction(value)
+ else [(key, value)]
+ )
+ for key, value in instr_dict.items()
+ )
+ )
+
+
+# Expand the 'nf' field in the instruction dictionary
+def expand_nf_field(
+ name: str, single_dict: SingleInstr
+) -> "list[tuple[str, SingleInstr]]":
+ """Validate and prepare the instruction dictionary."""
+ validate_nf_field(single_dict, name)
+ remove_nf_field(single_dict)
+ update_mask(single_dict)
+
+ name_expand_index = name.find("e")
+
+ # Pre compute the base match value and encoding prefix
+ base_match = int(single_dict["match"], 16)
+ encoding_prefix = single_dict["encoding"][3:]
+
+ expanded_instructions = [
+ create_expanded_instruction(
+ name, single_dict, nf, name_expand_index, base_match, encoding_prefix
+ )
+ for nf in range(8) # Range of 0 to 7
+ ]
+
+ return expanded_instructions
+
+
+# Validate the presence of 'nf'
+def validate_nf_field(single_dict: SingleInstr, name: str):
+ """Validates the presence of 'nf' in variable fields before expansion."""
+ if "nf" not in single_dict["variable_fields"]:
+ log_and_exit(f"Cannot expand nf field for instruction {name}")
+
+
+# Remove 'nf' from variable fields
+def remove_nf_field(single_dict: SingleInstr):
+ """Removes 'nf' from variable fields in the instruction dictionary."""
+ single_dict["variable_fields"].remove("nf")
+
+
+# Update the mask to include the 'nf' field
+def update_mask(single_dict: SingleInstr):
+ """Updates the mask to include the 'nf' field in the instruction dictionary."""
+ single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29)
+
+
+# Create an expanded instruction
+def create_expanded_instruction(
+ name: str,
+ single_dict: SingleInstr,
+ nf: int,
+ name_expand_index: int,
+ base_match: int,
+ encoding_prefix: str,
+) -> "tuple[str, SingleInstr]":
+ """Creates an expanded instruction based on 'nf' value."""
+ new_single_dict = copy.deepcopy(single_dict)
+
+ # Update match value in one step
+ new_single_dict["match"] = hex(base_match | (nf << 29))
+ new_single_dict["encoding"] = format(nf, "03b") + encoding_prefix
+
+ # Construct new instruction name
+ new_name = (
+ name
+ if nf == 0
+ else f"{name[:name_expand_index]}seg{nf + 1}{name[name_expand_index:]}"
+ )
+
+ return (new_name, new_single_dict)
+
+
+def read_lines(file: str) -> "list[str]":
+ """
+ Reads lines from a file and returns non-blank, non-comment lines.
+ The file must be a resource relative to the root of this repo.
+ """
+ with open_text_resource(file) as fp:
+ lines = (line.rstrip() for line in fp)
+ return [line for line in lines if line and not line.startswith("#")]
+
+
+# Update the instruction dictionary
+def process_standard_instructions(
+ lines: "list[str]", instr_dict: InstrDict, file_name: str
+):
+ """Processes standard instructions from the given lines and updates the instruction dictionary."""
+ for line in lines:
+ if "$import" in line or "$pseudo" in line:
+ continue
+ logging.debug(f"Processing line: {line}")
+ name, single_dict = process_enc_line(line, file_name)
+ ext_name = os.path.basename(file_name)
+
+ if name in instr_dict:
+ var = instr_dict[name]["extension"]
+ if same_base_isa(ext_name, var):
+ log_and_exit(
+ f"Instruction {name} from {ext_name} is already added from {var} in same base ISA"
+ )
+ elif instr_dict[name]["encoding"] != single_dict["encoding"]:
+ log_and_exit(
+ f"Instruction {name} from {ext_name} has different encodings in different base ISAs"
+ )
+
+ instr_dict[name]["extension"].extend(single_dict["extension"])
+ else:
+ for key, item in instr_dict.items():
+ if (
+ overlaps(item["encoding"], single_dict["encoding"])
+ and not extension_overlap_allowed(ext_name, item["extension"][0])
+ and not instruction_overlap_allowed(name, key)
+ and same_base_isa(ext_name, item["extension"])
+ ):
+ log_and_exit(
+ f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}'
+ )
+
+ instr_dict[name] = single_dict
+
+
+# Incorporate pseudo instructions into the instruction dictionary based on given conditions
+def process_pseudo_instructions(
+ lines: "list[str]",
+ instr_dict: InstrDict,
+ file_name: str,
+ include_pseudo: bool,
+ include_pseudo_ops: "list[str]",
+):
+ """Processes pseudo instructions from the given lines and updates the instruction dictionary."""
+ for line in lines:
+ if "$pseudo" not in line:
+ continue
+ logging.debug(f"Processing pseudo line: {line}")
+ ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0]
+ ext_file = read_extension_file(ext)
+
+ validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst)
+
+ name, single_dict = process_enc_line(f"{pseudo_inst} {line_content}", file_name)
+ if (
+ orig_inst.replace(".", "_") not in instr_dict
+ or include_pseudo
+ or name in include_pseudo_ops
+ ):
+ if name not in instr_dict:
+ instr_dict[name] = single_dict
+ logging.debug(f"Including pseudo_op: {name}")
+ else:
+ if single_dict["match"] != instr_dict[name]["match"]:
+ instr_dict[f"{name}_pseudo"] = single_dict
+ # TODO: This expression is always false since both sides are list[str].
+ elif single_dict["extension"] not in instr_dict[name]["extension"]: # type: ignore
+ instr_dict[name]["extension"].extend(single_dict["extension"])
+
+
+# Integrate imported instructions into the instruction dictionary
+def process_imported_instructions(
+ lines: "list[str]", instr_dict: InstrDict, file_name: str
+):
+ """Processes imported instructions from the given lines and updates the instruction dictionary."""
+ for line in lines:
+ if "$import" not in line:
+ continue
+ logging.debug(f"Processing imported line: {line}")
+ import_ext, reg_instr = imported_regex.findall(line)[0]
+ ext_file = read_extension_file(import_ext)
+
+ validate_instruction_in_extension(reg_instr, ext_file, file_name, line)
+
+ for oline in StringIO(ext_file):
+ if re.findall(f"^\\s*{reg_instr}\\s+", oline):
+ name, single_dict = process_enc_line(oline, file_name)
+ if name in instr_dict:
+ if instr_dict[name]["encoding"] != single_dict["encoding"]:
+ log_and_exit(
+ f"Imported instruction {name} from {os.path.basename(file_name)} has different encodings"
+ )
+ instr_dict[name]["extension"].extend(single_dict["extension"])
+ else:
+ instr_dict[name] = single_dict
+ break
+
+
+def read_extension_file(ext: str) -> str:
+ """
+ Read the extension file path, considering the unratified directory if necessary.
+ """
+ file = resource_root() / "extensions" / ext
+ if file.is_file():
+ return file.read_text(encoding="utf-8")
+ file = resource_root() / "extensions" / "unratified" / ext
+ if file.is_file():
+ return file.read_text(encoding="utf-8")
+
+ log_and_exit(f"Extension {ext} not found.")
+
+
+# Confirm the presence of an original instruction in the corresponding extension file.
+def validate_instruction_in_extension(
+ inst: str, ext_file: str, file_name: str, pseudo_inst: str
+):
+ """Validates if the original instruction exists in the dependent extension."""
+ found = False
+
+ for oline in StringIO(ext_file):
+ if re.findall(f"^\\s*{inst}\\s+", oline):
+ found = True
+ break
+ if not found:
+ log_and_exit(
+ f"Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}"
+ )
+
+
+# Construct a dictionary of instructions filtered by specified criteria
+def create_inst_dict(
+ file_filter: "list[str]",
+ include_pseudo: bool = False,
+ include_pseudo_ops: "Optional[list[str]]" = None,
+) -> InstrDict:
+ """
+ Creates a dictionary of instructions based on the provided file filters.
+
+ This function return a dictionary containing all instructions associated
+ with an extension defined by the file_filter input.
+ Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc.
+ Each node of the dictionary will correspond to an instruction which again is
+ a dictionary. The dictionary contents of each instruction includes:
+ - variables: list of arguments used by the instruction whose mapping
+ exists in the arg_lut dictionary
+ - encoding: this contains the 32-bit encoding of the instruction where
+ '-' is used to represent position of arguments and 1/0 is used to
+ reprsent the static encoding of the bits
+ - extension: this field contains the rv* filename from which this
+ instruction was included
+ - match: hex value representing the bits that need to match to detect
+ this instruction
+ - mask: hex value representin the bits that need to be masked to extract
+ the value required for matching.
+ In order to build this dictionary, the function does 2 passes over the same
+ rv<file_filter> file:
+ - First pass: extracts all standard instructions, skipping pseudo ops
+ and imported instructions. For each selected line, the `process_enc_line`
+ function is called to create the dictionary contents of the instruction.
+ Checks are performed to ensure that the same instruction is not added
+ twice to the overall dictionary.
+ - Second pass: parses only pseudo_ops. For each pseudo_op, the function:
+ - Checks if the dependent extension and instruction exist.
+ - Adds the pseudo_op to the dictionary if the dependent instruction
+ is not already present; otherwise, it is skipped.
+ """
+ if include_pseudo_ops is None:
+ include_pseudo_ops = []
+
+ instr_dict: InstrDict = {}
+
+ ratified_file_filters = [
+ fil for fil in file_filter if not fil.startswith("unratified/")
+ ]
+ unratified_file_filters = [
+ fil.removeprefix("unratified/")
+ for fil in file_filter
+ if fil.startswith("unratified/")
+ ]
+
+ # Extension file name, "extensions[/unratified]/rv_foo".
+ file_names: list[str] = []
+
+ for file in (resource_root() / "extensions").iterdir():
+ if file.is_file() and any(
+ fnmatch(file.name, fil) for fil in ratified_file_filters
+ ):
+ file_names.append("extensions/" + file.name)
+ for file in (resource_root() / "extensions" / "unratified").iterdir():
+ if file.is_file() and any(
+ fnmatch(file.name, fil) for fil in unratified_file_filters
+ ):
+ file_names.append("extensions/unratified/" + file.name)
+
+ logging.debug("Collecting standard instructions")
+ for file_name in file_names:
+ logging.debug(f"Parsing File: {file_name} for standard instructions")
+ lines = read_lines(file_name)
+ process_standard_instructions(lines, instr_dict, file_name)
+
+ logging.debug("Collecting pseudo instructions")
+ for file_name in file_names:
+ logging.debug(f"Parsing File: {file_name} for pseudo instructions")
+ lines = read_lines(file_name)
+ process_pseudo_instructions(
+ lines,
+ instr_dict,
+ file_name,
+ include_pseudo,
+ include_pseudo_ops,
+ )
+
+ logging.debug("Collecting imported instructions")
+
+ for file_name in file_names:
+ logging.debug(f"Parsing File: {file_name} for imported instructions")
+ lines = read_lines(file_name)
+ process_imported_instructions(lines, instr_dict, file_name)
+
+ return instr_dict
+
+
+# Extracts the extensions used in an instruction dictionary
+def instr_dict_2_extensions(instr_dict: InstrDict) -> "list[str]":
+ return list({item["extension"][0] for item in instr_dict.values()})
+
+
+# Returns signed interpretation of a value within a given width
+def signed(value: int, width: int) -> int:
+ return value if 0 <= value < (1 << (width - 1)) else value - (1 << width)
diff --git a/src/riscv_opcodes/sverilog_utils.py b/src/riscv_opcodes/sverilog_utils.py
new file mode 100644
index 0000000..c17be9f
--- /dev/null
+++ b/src/riscv_opcodes/sverilog_utils.py
@@ -0,0 +1,30 @@
+import logging
+import pprint
+from pathlib import Path
+
+from .constants import csrs, csrs32
+from .shared_utils import InstrDict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_sverilog(instr_dict: InstrDict):
+ names_str = ""
+ for i in instr_dict:
+ names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n"
+ names_str += " /* CSR Addresses */\n"
+ for num, name in csrs + csrs32:
+ names_str += (
+ f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n"
+ )
+
+ Path("inst.sverilog").write_text(
+ f"""
+/* Automatically generated by parse_opcodes */
+package riscv_instr;
+{names_str}
+endpackage
+""",
+ encoding="utf-8",
+ )
diff --git a/src/riscv_opcodes/svg_utils.py b/src/riscv_opcodes/svg_utils.py
new file mode 100644
index 0000000..4126ad6
--- /dev/null
+++ b/src/riscv_opcodes/svg_utils.py
@@ -0,0 +1,284 @@
+import logging
+import pprint
+from typing import Dict, List, NamedTuple
+
+from .rv_colors import palette
+from .shared_utils import InstrDict, instr_dict_2_extensions
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+class RectangleDimensions(NamedTuple):
+ x: float
+ y: float
+ w: float
+ h: float
+
+
+class InstrRectangle(NamedTuple):
+ dims: RectangleDimensions
+ extension: str
+ label: str
+
+
+InstrDimsDict = Dict[str, RectangleDimensions]
+
+
+def encoding_to_rect(encoding: str) -> RectangleDimensions:
+ """Convert a binary encoding string to rectangle dimensions."""
+
+ def calculate_size(free_bits: int, tick: float) -> float:
+ """Calculate size based on number of free bits and tick value."""
+ return 2**free_bits * tick
+
+ instr_length = len(encoding)
+ # starting position
+ x = 0
+ y = 0
+ x_tick = 1 / (2 ** (0.5 * instr_length))
+ y_tick = 1 / (2 ** (0.5 * instr_length))
+ x_free_bits = 0
+ y_free_bits = 0
+ even = encoding[0::2]
+ odd = encoding[1::2]
+ # Process bits from least significant to most significant
+ for i, bit in enumerate(encoding):
+ if bit == "1":
+ offset = 0.5 / (2 ** int(i / 2))
+ if i % 2 == 0:
+ y += offset
+ else:
+ x += offset
+ elif bit == "0":
+ pass
+ # position not adjusted on 0
+
+ x_free_bits = odd.count("-")
+ y_free_bits = even.count("-")
+ x_size = calculate_size(x_free_bits, x_tick)
+ y_size = calculate_size(y_free_bits, y_tick)
+
+ # If we came here, encoding can be visualized with a single rectangle
+ rectangle = RectangleDimensions(x=x, y=y, w=x_size, h=y_size)
+ return rectangle
+
+
+FIGSIZE = 128
+
+
+def plot_image(
+ instr_dict: InstrDict,
+ instr_dims_dict: InstrDimsDict,
+ extension_sizes: Dict[str, float],
+) -> None:
+ """Plot the instruction rectangles using matplotlib."""
+
+ from matplotlib import patches
+ from matplotlib import pyplot as plt
+
+ def get_readable_font_color(bg_hex: str) -> str:
+ """Determine readable font color based on background color."""
+
+ def hex_to_rgb(hex_color: str) -> tuple[int, int, int]:
+ """Convert hex color string to RGB tuple."""
+ hex_color = hex_color.lstrip("#")
+ r = int(hex_color[0:2], 16)
+ g = int(hex_color[2:4], 16)
+ b = int(hex_color[4:6], 16)
+
+ return (r, g, b)
+
+ r, g, b = hex_to_rgb(bg_hex)
+ luminance = 0.299 * r + 0.587 * g + 0.114 * b
+ return "#000000" if luminance > 186 else "#FFFFFF"
+
+ def plot_with_matplotlib(
+ rectangles: list[InstrRectangle],
+ colors: list[str],
+ hatches: list[str],
+ extensions: list[str],
+ ) -> None:
+ """Plot rectangles with matplotlib using specified styles."""
+
+ _, ax = plt.subplots(figsize=(FIGSIZE, FIGSIZE), facecolor="none") # type: ignore
+ ax.set_facecolor("none") # type: ignore
+ linewidth = FIGSIZE / 100
+ for dims, ext, label in rectangles:
+ x, y, w, h = dims
+ ext_idx = extensions.index(ext)
+ color = colors[ext_idx]
+ hatch = hatches[ext_idx]
+ rect = patches.Rectangle(
+ (x, y),
+ w,
+ h,
+ linewidth=linewidth,
+ edgecolor="black",
+ facecolor=color,
+ hatch=hatch,
+ alpha=1.0,
+ )
+ ax.add_patch(rect)
+
+ if w >= h:
+ base_dim = w
+ rotation = 0
+ else:
+ base_dim = h
+ rotation = 90
+
+ # Scale font size based on base dimension and label length
+ n_chars = len(label)
+ font_size = (
+ base_dim / n_chars * 90 * FIGSIZE
+ ) # Adjust scaling factor as needed
+ if font_size > 1:
+ fontdict = {
+ "fontsize": font_size,
+ "color": get_readable_font_color(color),
+ "family": "DejaVu Sans Mono",
+ }
+ ax.text( # type: ignore
+ x + w / 2,
+ y + h / 2,
+ label,
+ ha="center",
+ va="center",
+ fontdict=fontdict,
+ rotation=rotation,
+ )
+
+ plt.axis("off") # type: ignore
+ plt.tight_layout() # type: ignore
+ plt.savefig("inst.svg", format="svg") # type: ignore
+ plt.show() # type: ignore
+
+ extensions: List[str] = sorted(
+ extension_sizes.keys(), key=lambda k: extension_sizes[k], reverse=True
+ )
+
+ rectangles: List[InstrRectangle] = []
+ for instr in instr_dict:
+ dims = instr_dims_dict[instr]
+ rectangles.append(
+ InstrRectangle(
+ dims=dims,
+ extension=instr_dict[instr]["extension"][0],
+ label=instr.replace("_", "."),
+ )
+ )
+
+ # sort rectangles so that small ones are in the foreground
+ # An overlap occurs e.g. for pseudo ops, and these should be on top of the encoding it reuses
+ rectangles = sorted(rectangles, key=lambda x: x.dims.w * x.dims.h, reverse=True)
+
+ colors, hatches = generate_styles(extensions)
+
+ plot_with_matplotlib(rectangles, colors, hatches, extensions)
+
+
+def generate_styles(extensions: list[str]) -> tuple[list[str], list[str]]:
+ """Generate color and hatch styles for extensions."""
+ n_colors = len(palette)
+ colors = [""] * len(extensions)
+ hatches = [""] * len(extensions)
+ hatch_options = ["", "/", "\\", "|", "-", "+", "x", ".", "*"]
+ color_options = list(palette.values())
+
+ for i in range(len(extensions)):
+ colors[i] = color_options[i % n_colors]
+ hatches[i] = hatch_options[int(i / n_colors) % len(hatch_options)]
+
+ return colors, hatches
+
+
+def defragment_encodings(
+ encodings: list[str], length: int = 32, offset: int = 0
+) -> list[str]:
+ """Defragment a list of binary encodings by reordering bits."""
+ # determine bit position which has the most fixed bits
+ fixed_encodings = ["0", "1"]
+ fixed_bits = [0] * length
+ fixed_encoding_indeces: Dict[str, List[int]] = {
+ value: [] for value in fixed_encodings
+ }
+ for index, encoding in enumerate(encodings):
+ for position, value in enumerate(encoding):
+ if position > offset:
+ if value != "-":
+ fixed_bits[position] += 1
+
+ # find bit position with most fixed bits, starting with the LSB to favor the opcode field
+ max_fixed_bits = max(fixed_bits)
+ if max_fixed_bits == 0:
+ # fully defragemented
+ return encodings
+ max_fixed_position = len(fixed_bits) - 1 - fixed_bits[::-1].index(max_fixed_bits)
+
+ # move bit position with the most fixed bits to the front
+ for index, encoding in enumerate(encodings):
+ encodings[index] = (
+ encoding[0:offset]
+ + encoding[max_fixed_position]
+ + encoding[offset:max_fixed_position]
+ + encoding[max_fixed_position + 1 :]
+ )
+
+ if encoding[max_fixed_position] in fixed_encodings:
+ fixed_encoding_indeces[encoding[max_fixed_position]].append(index)
+ else:
+ # No more fixed bits in this encoding
+ pass
+
+ if offset < length:
+ # continue to defragement starting from the next offset
+ offset = offset + 1
+
+ # separate encodings
+ sep_encodings: Dict[str, List[str]] = {}
+ for fixed_encoding in fixed_encodings:
+ sep_encodings[fixed_encoding] = [
+ encodings[i] for i in fixed_encoding_indeces[fixed_encoding]
+ ]
+ sep_encodings[fixed_encoding] = defragment_encodings(
+ sep_encodings[fixed_encoding], length=length, offset=offset
+ )
+
+ # join encodings
+ for new_index, orig_index in enumerate(
+ fixed_encoding_indeces[fixed_encoding]
+ ):
+ encodings[orig_index] = sep_encodings[fixed_encoding][new_index]
+
+ return encodings
+
+
+def defragment_encoding_dict(instr_dict: InstrDict) -> InstrDict:
+ """Apply defragmentation to the encoding dictionary."""
+ encodings = [instr["encoding"] for instr in instr_dict.values()]
+ encodings_defragemented = defragment_encodings(encodings, length=32, offset=0)
+ for index, instr in enumerate(instr_dict):
+ instr_dict[instr]["encoding"] = encodings_defragemented[index]
+ return instr_dict
+
+
+def make_svg(instr_dict: InstrDict) -> None:
+ """Generate an SVG image from instruction encodings."""
+ extensions = instr_dict_2_extensions(instr_dict)
+ extension_size: Dict[str, float] = {}
+
+ instr_dict = defragment_encoding_dict(instr_dict)
+ instr_dims_dict: InstrDimsDict = {}
+
+ for ext in extensions:
+ extension_size[ext] = 0
+
+ for instr in instr_dict:
+ dims = encoding_to_rect(instr_dict[instr]["encoding"])
+
+ extension_size[instr_dict[instr]["extension"][0]] += dims.h * dims.w
+
+ instr_dims_dict[instr] = dims
+
+ plot_image(instr_dict, instr_dims_dict, extension_size)