Reorganise Python code using pyproject.toml (#378)HEAD master

Add pyproject.toml (the modern alternative to requirements.txt), making this a proper Python package that can be installed via pip and potentially uploaded to PyPI. This also loads the files using `importlib.resources` and installs them into the wheel. This means that when you create a wheel using `uv build` it will still be able to load all the opcodes and CSV files. To avoid moving those resource files in the source repo, the Python build backend (hatchling) is instructed to move them to the right place when building a wheel, and the `resource_root()` function checks in both places so it always works. This is a little hacky but it works. CI builds source and binary wheels (not actually binary) that can be uploaded to PyPI. If we do upload them then using this project is as simple as ``` uvx riscv_opcodes -c 'rv*' ``` Co-authored-by: Tim Hutt <timothy.hutt@codasip.com>
author: Andrew Waterman <andrew@sifive.com> 2025-10-13 16:24:22 -0700
committer: GitHub <noreply@github.com> 2025-10-13 16:24:22 -0700
commit: 26e2c04c913a67ac51bf0a354f21f2d7d5c07c40 (patch)
tree: 7db4f4637f71f1777f86702a2228ccf3b9217a03 /src
parent: 433081c02368eb72e6dea5413e404a8ef231658e (diff)
download: riscv-opcodes-master.zip
riscv-opcodes-master.tar.gz
riscv-opcodes-master.tar.bz2
14 files changed, 2113 insertions, 0 deletions
diff --git a/src/riscv_opcodes/__init__.py b/src/riscv_opcodes/__init__.py
new file mode 100644
index 0000000..e8fd9d4
--- /dev/null
+++ b/src/riscv_opcodes/__init__.py
@@ -0,0 +1,2 @@
+# Mark this directory as a package. This is not actually needed by
+# Python but Pylint gets confused about relative imports without it.
diff --git a/src/riscv_opcodes/__main__.py b/src/riscv_opcodes/__main__.py
new file mode 100644
index 0000000..456cddd
--- /dev/null
+++ b/src/riscv_opcodes/__main__.py
@@ -0,0 +1,10 @@
+"""
+This allows running as a module, i.e. `python3 -m riscv_opcodes` which
+we wouldn't normally need, but the `coverage` tool doesn't work on
+installed scripts - you can't do `coverage run riscv_opcodes` because it
+looks for a Python file called `riscv_opcodes` in the current directory.
+"""
+
+from .parse import main
+
+main()
diff --git a/src/riscv_opcodes/c_utils.py b/src/riscv_opcodes/c_utils.py
new file mode 100644
index 0000000..198a37f
--- /dev/null
+++ b/src/riscv_opcodes/c_utils.py
@@ -0,0 +1,79 @@
+import logging
+import os
+import pprint
+
+from .constants import causes, csrs, csrs32
+from .resources import read_text_resource
+from .shared_utils import InstrDict, arg_lut
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_c(instr_dict: InstrDict):
+    mask_match_str = ""
+    declare_insn_str = ""
+    for i in instr_dict:
+        mask_match_str += (
+            f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n'
+        )
+        mask_match_str += (
+            f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n'
+        )
+        declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n'
+
+    csr_names_str = ""
+    declare_csr_str = ""
+    for num, name in csrs + csrs32:
+        csr_names_str += f"#define CSR_{name.upper()} {hex(num)}\n"
+        declare_csr_str += f"DECLARE_CSR({name}, CSR_{name.upper()})\n"
+
+    causes_str = ""
+    declare_cause_str = ""
+    for num, name in causes:
+        causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n"
+        declare_cause_str += (
+            f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n"
+        )
+
+    arg_str = ""
+    for name, rng in arg_lut.items():
+        sanitized_name = name.replace(" ", "_").replace("=", "_eq_")
+        begin = rng[1]
+        end = rng[0]
+        mask = ((1 << (end - begin + 1)) - 1) << begin
+        arg_str += f"#define INSN_FIELD_{sanitized_name.upper()} {hex(mask)}\n"
+
+    enc_header = read_text_resource("encoding.h")
+
+    commit = os.popen('git log -1 --format="format:%h"').read()
+
+    # Generate the output as a string
+    output_str = f"""/* SPDX-License-Identifier: BSD-3-Clause */
+
+/* Copyright (c) 2023 RISC-V International */
+
+/*
+ * This file is auto-generated by running 'make' in
+ * https://github.com/riscv/riscv-opcodes ({commit})
+ */
+
+{enc_header}
+/* Automatically generated by parse_opcodes. */
+#ifndef RISCV_ENCODING_H
+#define RISCV_ENCODING_H
+{mask_match_str}
+{csr_names_str}
+{causes_str}
+{arg_str}#endif
+#ifdef DECLARE_INSN
+{declare_insn_str}#endif
+#ifdef DECLARE_CSR
+{declare_csr_str}#endif
+#ifdef DECLARE_CAUSE
+{declare_cause_str}#endif
+"""
+
+    # Write the modified output to the file
+    with open("encoding.out.h", "w", encoding="utf-8") as enc_file:
+        enc_file.write(output_str)
diff --git a/src/riscv_opcodes/chisel_utils.py b/src/riscv_opcodes/chisel_utils.py
new file mode 100644
index 0000000..46cb0b6
--- /dev/null
+++ b/src/riscv_opcodes/chisel_utils.py
@@ -0,0 +1,82 @@
+import logging
+import pprint
+
+from .constants import causes, csrs, csrs32
+from .shared_utils import InstrDict, instr_dict_2_extensions
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_chisel(instr_dict: InstrDict, spinal_hdl: bool = False):
+
+    chisel_names = ""
+    cause_names_str = ""
+    csr_names_str = ""
+    for i in instr_dict:
+        if spinal_hdl:
+            chisel_names += f'  def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n'
+        # else:
+        #     chisel_names += f'  def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n'
+    if not spinal_hdl:
+        extensions = instr_dict_2_extensions(instr_dict)
+        for e in extensions:
+            if "rv64_" in e:
+                e_format = e.replace("rv64_", "").upper() + "64"
+            elif "rv32_" in e:
+                e_format = e.replace("rv32_", "").upper() + "32"
+            elif "rv_" in e:
+                e_format = e.replace("rv_", "").upper()
+            else:
+                e_format = e.upper()
+            chisel_names += f'  val {e_format+"Type"} = Map(\n'
+            for instr_name, instr in instr_dict.items():
+                if instr["extension"][0] == e:
+                    tmp_instr_name = '"' + instr_name.upper().replace(".", "_") + '"'
+                    chisel_names += f'   {tmp_instr_name:<18s} -> BitPat("b{instr["encoding"].replace("-","?")}"),\n'
+            chisel_names += "  )\n"
+
+    for num, name in causes:
+        cause_names_str += f'  val {name.lower().replace(" ","_")} = {hex(num)}\n'
+    cause_names_str += """  val all = {
+    val res = collection.mutable.ArrayBuffer[Int]()
+"""
+    for num, name in causes:
+        cause_names_str += f'    res += {name.lower().replace(" ","_")}\n'
+    cause_names_str += """    res.toArray
+  }"""
+
+    for num, name in csrs + csrs32:
+        csr_names_str += f"  val {name} = {hex(num)}\n"
+    csr_names_str += """  val all = {
+    val res = collection.mutable.ArrayBuffer[Int]()
+"""
+    for num, name in csrs:
+        csr_names_str += f"""    res += {name}\n"""
+    csr_names_str += """    res.toArray
+  }
+  val all32 = {
+    val res = collection.mutable.ArrayBuffer(all:_*)
+"""
+    for num, name in csrs32:
+        csr_names_str += f"""    res += {name}\n"""
+    csr_names_str += """    res.toArray
+  }"""
+
+    with open(
+        "inst.spinalhdl" if spinal_hdl else "inst.chisel", "w", encoding="utf-8"
+    ) as chisel_file:
+        chisel_file.write(
+            f"""
+/* Automatically generated by parse_opcodes */
+object Instructions {{
+{chisel_names}
+}}
+object Causes {{
+{cause_names_str}
+}}
+object CSRs {{
+{csr_names_str}
+}}
+"""
+        )
diff --git a/src/riscv_opcodes/constants.py b/src/riscv_opcodes/constants.py
new file mode 100644
index 0000000..fb67d70
--- /dev/null
+++ b/src/riscv_opcodes/constants.py
@@ -0,0 +1,271 @@
+import csv
+import re
+
+from .resources import open_text_resource
+
+# TODO: The constants in this file should be in all caps.
+overlapping_extensions = {
+    "rv_zcmt": {"rv_c_d"},
+    "rv_zcmp": {"rv_c_d"},
+    "rv_c": {"rv_zcmop"},
+}
+
+overlapping_instructions = {
+    "c_addi": {"c_nop"},
+    "c_lui": {"c_addi16sp"},
+    "c_mv": {"c_jr"},
+    "c_jalr": {"c_ebreak"},
+    "c_add": {"c_ebreak", "c_jalr"},
+}
+
+isa_regex = re.compile(
+    "^RV(32|64|128)[IE]+[ABCDEFGHJKLMNPQSTUVX]*(Zicsr|Zifencei|Zihintpause|Zam|Ztso|Zkne|Zknd|Zknh|Zkse|Zksh|Zkg|Zkb|Zkr|Zks|Zkn|Zba|Zbc|Zbb|Zbp|Zbr|Zbm|Zbs|Zbe|Zbf|Zbt|Zmmul|Zbpbo|Zca|Zcf|Zcd|Zcb|Zcmp|Zcmt){,1}(_Zicsr){,1}(_Zifencei){,1}(_Zihintpause){,1}(_Zmmul){,1}(_Zam){,1}(_Zba){,1}(_Zbb){,1}(_Zbc){,1}(_Zbe){,1}(_Zbf){,1}(_Zbm){,1}(_Zbp){,1}(_Zbpbo){,1}(_Zbr){,1}(_Zbs){,1}(_Zbt){,1}(_Zkb){,1}(_Zkg){,1}(_Zkr){,1}(_Zks){,1}(_Zkn){,1}(_Zknd){,1}(_Zkne){,1}(_Zknh){,1}(_Zkse){,1}(_Zksh){,1}(_Ztso){,1}(_Zca){,1}(_Zcf){,1}(_Zcd){,1}(_Zcb){,1}(_Zcmp){,1}(_Zcmt){,1}$"
+)
+
+# regex to find <msb>..<lsb>=<val> patterns in instruction
+fixed_ranges = re.compile(
+    r"\s*(?P<msb>\d+.?)\.\.(?P<lsb>\d+.?)\s*=\s*(?P<val>\d[\w]*)[\s$]*", re.M
+)
+
+# regex to find <lsb>=<val> patterns in instructions
+# single_fixed = re.compile('\s+(?P<lsb>\d+)=(?P<value>[\w\d]*)[\s$]*', re.M)
+single_fixed = re.compile(r"(?:^|[\s])(?P<lsb>\d+)=(?P<value>[\w]*)((?=\s|$))", re.M)
+
+# regex to find the overloading condition variable
+var_regex = re.compile(r"(?P<var>[a-zA-Z][\w\d]*)\s*=\s*.*?[\s$]*", re.M)
+
+# regex for pseudo op instructions returns the dependent filename, dependent
+# instruction, the pseudo op name and the encoding string
+pseudo_regex = re.compile(
+    r"^\$pseudo_op\s+(?P<filename>rv[\d]*_[\w].*)::\s*(?P<orig_inst>.*?)\s+(?P<pseudo_inst>.*?)\s+(?P<overload>.*)$",
+    re.M,
+)
+
+imported_regex = re.compile(
+    r"^\s*\$import\s*(?P<extension>.*)\s*::\s*(?P<instruction>.*)", re.M
+)
+
+
+def read_int_map_csv(filename: str) -> "list[tuple[int, str]]":
+    """
+    Reads a CSV file and returns a list of tuples.
+    Each tuple contains an integer value (from the first column) and a string (from the second column).
+
+    Args:
+        filename (str): The name of the CSV file to read.
+
+    Returns:
+        list of tuple: A list of (int, str) tuples extracted from the CSV file.
+    """
+    with open_text_resource(filename) as f:
+        csv_reader = csv.reader(f, skipinitialspace=True)
+        return [(int(row[0], 0), row[1]) for row in csv_reader]
+
+
+causes = read_int_map_csv("causes.csv")
+csrs = read_int_map_csv("csrs.csv")
+csrs32 = read_int_map_csv("csrs32.csv")
+
+
+def read_arg_lut_csv(filename: str) -> "dict[str, tuple[int, int]]":
+    """
+    Load the argument lookup table (arg_lut) from a CSV file, mapping argument names to their bit positions.
+    """
+    with open_text_resource(filename) as f:
+        csv_reader = csv.reader(f, skipinitialspace=True)
+        return {row[0]: (int(row[1]), int(row[2])) for row in csv_reader}
+
+
+arg_lut = read_arg_lut_csv("arg_lut.csv")
+
+# for mop
+arg_lut["mop_r_t_30"] = (30, 30)
+arg_lut["mop_r_t_27_26"] = (27, 26)
+arg_lut["mop_r_t_21_20"] = (21, 20)
+arg_lut["mop_rr_t_30"] = (30, 30)
+arg_lut["mop_rr_t_27_26"] = (27, 26)
+arg_lut["c_mop_t"] = (10, 8)
+
+# dictionary containing the mapping of the argument to the what the fields in
+# the latex table should be
+latex_mapping = {
+    "imm12": "imm[11:0]",
+    "rs1": "rs1",
+    "rs2": "rs2",
+    "rd": "rd",
+    "imm20": "imm[31:12]",
+    "bimm12hi": "imm[12$\\vert$10:5]",
+    "bimm12lo": "imm[4:1$\\vert$11]",
+    "imm12hi": "imm[11:5]",
+    "imm12lo": "imm[4:0]",
+    "jimm20": "imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]",
+    "zimm": "uimm",
+    "shamtw": "shamt",
+    "shamtd": "shamt",
+    "shamtq": "shamt",
+    "rd_p": "rd\\,$'$",
+    "rs1_p": "rs1\\,$'$",
+    "rs2_p": "rs2\\,$'$",
+    "rd_rs1_n0": "rd/rs$\\neq$0",
+    "rd_rs1_p": "rs1\\,$'$/rs2\\,$'$",
+    "c_rs2": "rs2",
+    "c_rs2_n0": "rs2$\\neq$0",
+    "rd_n0": "rd$\\neq$0",
+    "rs1_n0": "rs1$\\neq$0",
+    "c_rs1_n0": "rs1$\\neq$0",
+    "rd_rs1": "rd/rs1",
+    "zimm6hi": "uimm[5]",
+    "zimm6lo": "uimm[4:0]",
+    "c_nzuimm10": "nzuimm[5:4$\\vert$9:6$\\vert$2$\\vert$3]",
+    "c_uimm7lo": "uimm[2$\\vert$6]",
+    "c_uimm7hi": "uimm[5:3]",
+    "c_uimm8lo": "uimm[7:6]",
+    "c_uimm8hi": "uimm[5:3]",
+    "c_uimm9lo": "uimm[7:6]",
+    "c_uimm9hi": "uimm[5:4$\\vert$8]",
+    "c_nzimm6lo": "nzimm[4:0]",
+    "c_nzimm6hi": "nzimm[5]",
+    "c_imm6lo": "imm[4:0]",
+    "c_imm6hi": "imm[5]",
+    "c_nzimm10hi": "nzimm[9]",
+    "c_nzimm10lo": "nzimm[4$\\vert$6$\\vert$8:7$\\vert$5]",
+    "c_nzimm18hi": "nzimm[17]",
+    "c_nzimm18lo": "nzimm[16:12]",
+    "c_imm12": "imm[11$\\vert$4$\\vert$9:8$\\vert$10$\\vert$6$\\vert$7$\\vert$3:1$\\vert$5]",
+    "c_bimm9lo": "imm[7:6$\\vert$2:1$\\vert$5]",
+    "c_bimm9hi": "imm[8$\\vert$4:3]",
+    "c_nzuimm5": "nzuimm[4:0]",
+    "c_nzuimm6lo": "nzuimm[4:0]",
+    "c_nzuimm6hi": "nzuimm[5]",
+    "c_uimm8splo": "uimm[4:2$\\vert$7:6]",
+    "c_uimm8sphi": "uimm[5]",
+    "c_uimm8sp_s": "uimm[5:2$\\vert$7:6]",
+    "c_uimm10splo": "uimm[4$\\vert$9:6]",
+    "c_uimm10sphi": "uimm[5]",
+    "c_uimm9splo": "uimm[4:3$\\vert$8:6]",
+    "c_uimm9sphi": "uimm[5]",
+    "c_uimm10sp_s": "uimm[5:4$\\vert$9:6]",
+    "c_uimm9sp_s": "uimm[5:3$\\vert$8:6]",
+    "rd_p_e": "rd\\,$'$, even values only",
+    "rs2_p_e": "rs2\\,$'$, even values only",
+    "rd_n0_e": "rd$\\neq$0, even values only",
+    "c_rs2_e": "rs2, even values only",
+    "rd_e": "rd, even values only",
+    "rs2_e": "rs2, even values only",
+}
+
+
+# created a dummy instruction-dictionary like dictionary for all the instruction
+# types so that the same logic can be used to create their tables
+latex_inst_type = {
+    "R-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct7"],
+    },
+    "R4-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct2", "rs3"],
+    },
+    "I-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "imm12"],
+    },
+    "S-type": {
+        "variable_fields": ["opcode", "imm12lo", "funct3", "rs1", "rs2", "imm12hi"],
+    },
+    "B-type": {
+        "variable_fields": ["opcode", "bimm12lo", "funct3", "rs1", "rs2", "bimm12hi"],
+    },
+    "U-type": {
+        "variable_fields": ["opcode", "rd", "imm20"],
+    },
+    "J-type": {
+        "variable_fields": ["opcode", "rd", "jimm20"],
+    },
+}
+latex_fixed_fields = [
+    (31, 25),
+    (24, 20),
+    (19, 15),
+    (14, 12),
+    (11, 7),
+    (6, 0),
+]
+
+# Pseudo-ops present in the generated encodings.
+# By default pseudo-ops are not listed as they are considered aliases
+# of their base instruction.
+emitted_pseudo_ops = [
+    "pause",
+    "prefetch_i",
+    "prefetch_r",
+    "prefetch_w",
+    "rstsa16",
+    "rstsa32",
+    "srli32_u",
+    "slli_rv32",
+    "srai_rv32",
+    "srli_rv32",
+    "umax32",
+    "c_mop_1",
+    "c_sspush_x1",
+    "c_mop_3",
+    "c_mop_5",
+    "c_sspopchk_x5",
+    "c_mop_7",
+    "c_mop_9",
+    "c_mop_11",
+    "c_mop_13",
+    "c_mop_15",
+    "mop_r_0",
+    "mop_r_1",
+    "mop_r_2",
+    "mop_r_3",
+    "mop_r_4",
+    "mop_r_5",
+    "mop_r_6",
+    "mop_r_7",
+    "mop_r_8",
+    "mop_r_9",
+    "mop_r_10",
+    "mop_r_11",
+    "mop_r_12",
+    "mop_r_13",
+    "mop_r_14",
+    "mop_r_15",
+    "mop_r_16",
+    "mop_r_17",
+    "mop_r_18",
+    "mop_r_19",
+    "mop_r_20",
+    "mop_r_21",
+    "mop_r_22",
+    "mop_r_23",
+    "mop_r_24",
+    "mop_r_25",
+    "mop_r_26",
+    "mop_r_27",
+    "mop_r_28",
+    "sspopchk_x1",
+    "sspopchk_x5",
+    "ssrdp",
+    "mop_r_29",
+    "mop_r_30",
+    "mop_r_31",
+    "mop_r_32",
+    "mop_rr_0",
+    "mop_rr_1",
+    "mop_rr_2",
+    "mop_rr_3",
+    "mop_rr_4",
+    "mop_rr_5",
+    "mop_rr_6",
+    "mop_rr_7",
+    "sspush_x1",
+    "sspush_x5",
+    "lpad",
+    "bclri.rv32",
+    "bexti.rv32",
+    "binvi.rv32",
+    "bseti.rv32",
+    "zext.h.rv32",
+    "rev8.h.rv32",
+    "rori.rv32",
+]
diff --git a/src/riscv_opcodes/go_utils.py b/src/riscv_opcodes/go_utils.py
new file mode 100644
index 0000000..1a6fc33
--- /dev/null
+++ b/src/riscv_opcodes/go_utils.py
@@ -0,0 +1,64 @@
+import logging
+import pprint
+import sys
+
+from .constants import csrs
+from .shared_utils import InstrDict, signed
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_go(instr_dict: InstrDict):
+
+    args = " ".join(sys.argv)
+    prelude = f"""// Code generated by {args}; DO NOT EDIT."""
+
+    prelude += """
+package riscv
+
+import "cmd/internal/obj"
+
+type inst struct {
+	opcode uint32
+	funct3 uint32
+	rs1    uint32
+	rs2    uint32
+	csr    int64
+	funct7 uint32
+}
+
+func encode(a obj.As) *inst {
+	switch a {
+"""
+
+    csrs_map_str = """  }
+	return nil
+}
+
+var csrs = map[uint16]string {
+"""
+
+    endoffile = """}
+"""
+
+    instr_str = ""
+    for i in instr_dict:
+        enc_match = int(instr_dict[i]["match"], 0)
+        opcode = (enc_match >> 0) & ((1 << 7) - 1)
+        funct3 = (enc_match >> 12) & ((1 << 3) - 1)
+        rs1 = (enc_match >> 15) & ((1 << 5) - 1)
+        rs2 = (enc_match >> 20) & ((1 << 5) - 1)
+        csr = (enc_match >> 20) & ((1 << 12) - 1)
+        funct7 = (enc_match >> 25) & ((1 << 7) - 1)
+        instr_str += f"""  case A{i.upper().replace("_","")}:
+    return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }}
+"""
+    for num, name in sorted(csrs, key=lambda row: row[0]):
+        csrs_map_str += f'{hex(num)} : "{name.upper()}",\n'
+
+    with open("inst.go", "w", encoding="utf-8") as file:
+        file.write(prelude)
+        file.write(instr_str)
+        file.write(csrs_map_str)
+        file.write(endoffile)
diff --git a/src/riscv_opcodes/latex_utils.py b/src/riscv_opcodes/latex_utils.py
new file mode 100644
index 0000000..38f92f8
--- /dev/null
+++ b/src/riscv_opcodes/latex_utils.py
@@ -0,0 +1,450 @@
+import logging
+import pprint
+from typing import TextIO
+
+from .constants import latex_fixed_fields, latex_inst_type, latex_mapping
+from .shared_utils import InstrDict, arg_lut, create_inst_dict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_priv_latex_table():
+    type_list = ["R-type", "I-type"]
+    system_instr = ["_h", "_s", "_system", "_svinval", "64_h", "_svinval_h"]
+    dataset_list = [(system_instr, "Trap-Return Instructions", ["sret", "mret"], False)]
+    dataset_list.append(
+        (system_instr, "Interrupt-Management Instructions", ["wfi"], False)
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Supervisor Memory-Management Instructions",
+            ["sfence_vma"],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Hypervisor Memory-Management Instructions",
+            ["hfence_vvma", "hfence_gvma"],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Hypervisor Virtual-Machine Load and Store Instructions",
+            [
+                "hlv_b",
+                "hlv_bu",
+                "hlv_h",
+                "hlv_hu",
+                "hlv_w",
+                "hlvx_hu",
+                "hlvx_wu",
+                "hsv_b",
+                "hsv_h",
+                "hsv_w",
+            ],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Hypervisor Virtual-Machine Load and Store Instructions, RV64 only",
+            ["hlv_wu", "hlv_d", "hsv_d"],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Svinval Memory-Management Instructions",
+            [
+                "sinval_vma",
+                "sfence_w_inval",
+                "sfence_inval_ir",
+                "hinval_vvma",
+                "hinval_gvma",
+            ],
+            False,
+        )
+    )
+    caption = "\\caption{RISC-V Privileged Instructions}"
+    with open("priv-instr-table.tex", "w", encoding="utf-8") as latex_file:
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+
+def make_latex_table():
+    """
+    This function is mean to create the instr-table.tex that is meant to be used
+    by the riscv-isa-manual. This function basically creates a single latext
+    file of multiple tables with each table limited to a single page. Only the
+    last table is assigned a latex-caption.
+
+    For each table we assign a type-list which capture the different instruction
+    types (R, I, B, etc) that will be required for the table. Then we select the
+    list of extensions ('_i, '32_i', etc) whose instructions are required to
+    populate the table. For each extension or collection of extension we can
+    assign Title, such that in the end they appear as subheadings within
+    the table (note these are inlined headings and not captions of the table).
+
+    All of the above information is collected/created and sent to
+    make_ext_latex_table function to dump out the latex contents into a file.
+
+    The last table only has to be given a caption - as per the policy of the
+    riscv-isa-manual.
+    """
+    # open the file and use it as a pointer for all further dumps
+    with open("instr-table.tex", "w", encoding="utf-8") as latex_file:
+
+        # create the rv32i table first. Here we set the caption to empty. We use the
+        # files rv_i and rv32_i to capture instructions relevant for rv32i
+        # configuration. The dataset is a list of 4-element tuples :
+        # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions
+        # is empty then it indicates that all instructions of the all the extensions
+        # in list_of_extensions need to be dumped. If not empty, then only the
+        # instructions listed in list_of_instructions will be dumped into latex.
+        caption = ""
+        type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"]
+        dataset_list: list[tuple[list[str], str, list[str], bool]] = [
+            (["_i", "32_i"], "RV32I Base Instruction Set", [], False)
+        ]
+        dataset_list.append((["_i"], "", ["fence_tso", "pause"], True))
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "I-type", "S-type"]
+        dataset_list = [
+            (["64_i"], "RV64I Base Instruction Set (in addition to RV32I)", [], False)
+        ]
+        dataset_list.append(
+            (["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False)
+        )
+        dataset_list.append(
+            (["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False)
+        )
+        dataset_list.append((["_m", "32_m"], "RV32M Standard Extension", [], False))
+        dataset_list.append(
+            (["64_m"], "RV64M Standard Extension (in addition to RV32M)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type"]
+        dataset_list = [(["_a"], "RV32A Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_a"], "RV64A Standard Extension (in addition to RV32A)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [(["_f"], "RV32F Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_f"], "RV64F Standard Extension (in addition to RV32F)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [(["_d"], "RV32D Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_d"], "RV64D Standard Extension (in addition to RV32D)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [(["_q"], "RV32Q Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_q"], "RV64Q Standard Extension (in addition to RV32Q)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        caption = "\\caption{Instruction listing for RISC-V}"
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [
+            (["_zfh", "_d_zfh", "_q_zfh"], "RV32Zfh Standard Extension", [], False)
+        ]
+        dataset_list.append(
+            (
+                ["64_zfh"],
+                "RV64Zfh Standard Extension (in addition to RV32Zfh)",
+                [],
+                False,
+            )
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        ## The following is demo to show that Compressed instructions can also be
+        # dumped in the same manner as above
+
+        # type_list = ['']
+        # dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])]
+        # dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', []))
+        # make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption)
+
+
+def make_ext_latex_table(
+    type_list: "list[str]",
+    dataset: "list[tuple[list[str], str, list[str], bool]]",
+    latex_file: TextIO,
+    ilen: int,
+    caption: str,
+):
+    """
+    For a given collection of extensions this function dumps out a complete
+    latex table which includes the encodings of the instructions.
+
+    The ilen input indicates the length of the instruction for which the table
+    is created.
+
+    The caption input is used to create the latex-table caption.
+
+    The type_list input is a list of instruction types (R, I, B, etc) that are
+    treated as header for each table. Each table will have its own requirements
+    and type_list must include all the instruction-types that the table needs.
+    Note, all elements of this list must be present in the latex_inst_type
+    dictionary defined in constants.py
+
+    The latex_file is a file pointer to which the latex-table will dumped into
+
+    The dataset is a list of 3-element tuples containing:
+        (list_of_extensions, title, list_of_instructions)
+    The list_of_extensions must contain all the set of extensions whose
+    instructions must be populated under a given title. If list_of_instructions
+    is not empty, then only those instructions mentioned in list_of_instructions
+    present in the extension will be dumped into the latex-table, other
+    instructions will be ignored.
+
+    Once the above inputs are received then function first creates table entries
+    for the instruction types. To simplify things, we maintain a dictionary
+    called latex_inst_type in constants.py which is created in the same way the
+    instruction dictionary is created. This allows us to re-use the same logic
+    to create the instruction types table as well
+
+    Once the header is created, we then parse through every entry in the
+    dataset. For each list dataset entry we use the create_inst_dict function to
+    create an exhaustive list of instructions associated with the respective
+    collection of the extension of that dataset. Then we apply the instruction
+    filter, if any, indicated by the list_of_instructions of that dataset.
+    Thereon, for each instruction we create a latex table entry.
+
+    Latex table specification for ilen sized instructions:
+        Each table is created with ilen+1 columns - ilen columns for each bit of the
+        instruction and one column to hold the name of the instruction.
+
+        For each argument of an instruction we use the arg_lut from constants.py
+        to identify its position in the encoding, and thus create a multicolumn
+        entry with the name of the argument as the data. For hardcoded bits, we
+        do the same where we capture a string of continuous 1s and 0s, identify
+        the position and assign the same string as the data of the
+        multicolumn entry in the table.
+
+    """
+    column_size = "".join(["p{0.002in}"] * (ilen + 1))
+
+    type_entries = (
+        """
+    \\multicolumn{3}{l}{31} &
+    \\multicolumn{2}{r}{27} &
+    \\multicolumn{1}{c}{26} &
+    \\multicolumn{1}{r}{25} &
+    \\multicolumn{3}{l}{24} &
+    \\multicolumn{2}{r}{20} &
+    \\multicolumn{3}{l}{19} &
+    \\multicolumn{2}{r}{15} &
+    \\multicolumn{2}{l}{14} &
+    \\multicolumn{1}{r}{12} &
+    \\multicolumn{4}{l}{11} &
+    \\multicolumn{1}{r}{7} &
+    \\multicolumn{6}{l}{6} &
+    \\multicolumn{1}{r}{0} \\\\
+    \\cline{2-33}\n&\n\n
+"""
+        if ilen == 32
+        else """
+    \\multicolumn{1}{c}{15} &
+    \\multicolumn{1}{c}{14} &
+    \\multicolumn{1}{c}{13} &
+    \\multicolumn{1}{c}{12} &
+    \\multicolumn{1}{c}{11} &
+    \\multicolumn{1}{c}{10} &
+    \\multicolumn{1}{c}{9} &
+    \\multicolumn{1}{c}{8} &
+    \\multicolumn{1}{c}{7} &
+    \\multicolumn{1}{c}{6} &
+    \\multicolumn{1}{c}{5} &
+    \\multicolumn{1}{c}{4} &
+    \\multicolumn{1}{c}{3} &
+    \\multicolumn{1}{c}{2} &
+    \\multicolumn{1}{c}{1} &
+    \\multicolumn{1}{c}{0} \\\\
+    \\cline{2-17}\n&\n\n
+"""
+    )
+
+    # depending on the type_list input we create a subset dictionary of
+    # latex_inst_type dictionary present in constants.py
+    type_dict = {
+        key: value for key, value in latex_inst_type.items() if key in type_list
+    }
+
+    # iterate ovr each instruction type and create a table entry
+    for t in type_dict:
+        fields: list[tuple[int, int, str]] = []
+
+        # first capture all "arguments" of the type (funct3, funct7, rd, etc)
+        # and capture their positions using arg_lut.
+        for f in type_dict[t]["variable_fields"]:
+            (msb, lsb) = arg_lut[f]
+            name = f if f not in latex_mapping else latex_mapping[f]
+            fields.append((msb, lsb, name))
+
+        # iterate through the 32 bits, starting from the msb, and assign
+        # argument names to the relevant portions of the instructions. This
+        # information is stored as a 3-element tuple containing the msb, lsb
+        # position of the arugment and the name of the argument.
+        msb = ilen - 1
+        y = ""
+        for r in range(0, ilen):
+            if y != "":
+                fields.append((msb, ilen - 1 - r + 1, y))
+                y = ""
+            msb = ilen - 1 - r - 1
+            if r == 31:
+                if y != "":
+                    fields.append((msb, 0, y))
+                y = ""
+
+        # sort the arguments in decreasing order of msb position
+        fields.sort(key=lambda y: y[0], reverse=True)
+
+        # for each argument/string of 1s or 0s, create a multicolumn latex table
+        # entry
+        entry = ""
+        for r, (msb, lsb, name) in enumerate(fields):
+            if r == len(fields) - 1:
+                entry += (
+                    f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n"
+                )
+            elif r == 0:
+                entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
+            else:
+                entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
+        entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
+        type_entries += entry
+
+    # for each entry in the dataset create a table
+    content = ""
+    for ext_list, title, filter_list, include_pseudo in dataset:
+        instr_dict: InstrDict = {}
+
+        # for all extensions list in ext_list, create a dictionary of
+        # instructions associated with those extensions.
+        for e in ext_list:
+            instr_dict.update(create_inst_dict(["rv" + e], include_pseudo))
+
+        # if filter_list is not empty then use that as the official set of
+        # instructions that need to be dumped into the latex table
+        inst_list = list(instr_dict.keys()) if not filter_list else filter_list
+
+        # for each instruction create an latex table entry just like how we did
+        # above with the instruction-type table.
+        instr_entries = ""
+        for inst in inst_list:
+            if inst not in instr_dict:
+                logging.error(
+                    f"in make_ext_latex_table: Instruction: {inst} not found in instr_dict"
+                )
+                raise SystemExit(1)
+            fields = []
+
+            # only if the argument is available in arg_lut we consume it, else
+            # throw error.
+            for f in instr_dict[inst]["variable_fields"]:
+                if f not in arg_lut:
+                    logging.error(
+                        f"Found variable {f} in instruction {inst} whose mapping is not available"
+                    )
+                    raise SystemExit(1)
+                (msb, lsb) = arg_lut[f]
+                name = (
+                    f.replace("_", ".") if f not in latex_mapping else latex_mapping[f]
+                )
+                fields.append((msb, lsb, name))
+
+            msb = ilen - 1
+            y = ""
+            if ilen == 16:
+                encoding = instr_dict[inst]["encoding"][16:]
+            else:
+                encoding = instr_dict[inst]["encoding"]
+            for r in range(0, ilen):
+                x = encoding[r]
+                if (msb, ilen - 1 - r + 1) in latex_fixed_fields:
+                    fields.append((msb, ilen - 1 - r + 1, y))
+                    msb = ilen - 1 - r
+                    y = ""
+                if x == "-":
+                    if y != "":
+                        fields.append((msb, ilen - 1 - r + 1, y))
+                        y = ""
+                    msb = ilen - 1 - r - 1
+                else:
+                    y += str(x)
+                if r == ilen - 1:
+                    if y != "":
+                        fields.append((msb, 0, y))
+                    y = ""
+
+            fields.sort(key=lambda y: y[0], reverse=True)
+            entry = ""
+            for r, (msb, lsb, name) in enumerate(fields):
+                if r == len(fields) - 1:
+                    entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n'
+                elif r == 0:
+                    entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
+                else:
+                    entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
+            entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
+            instr_entries += entry
+
+        # once an entry of the dataset is completed we create the whole table
+        # with the title of that dataset as sub-heading (sort-of)
+        if title != "":
+            content += f"""
+
+\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\
+\\multicolumn{{{ilen}}}{{c}}{{\\bfseries {title} }} & \\\\
+\\cline{{2-{ilen+1}}}
+
+            &
+{instr_entries}
+"""
+        else:
+            content += f"""
+{instr_entries}
+"""
+
+    header = f"""
+\\newpage
+
+\\begin{{table}}[p]
+\\begin{{small}}
+\\begin{{center}}
+    \\begin{{tabular}} {{{column_size}l}}
+    {" ".join(['&']*ilen)} \\\\
+
+            &
+{type_entries}
+"""
+    endtable = f"""
+
+\\end{{tabular}}
+\\end{{center}}
+\\end{{small}}
+{caption}
+\\end{{table}}
+"""
+    # dump the contents and return
+    latex_file.write(header + content + endtable)
diff --git a/src/riscv_opcodes/parse.py b/src/riscv_opcodes/parse.py
new file mode 100644
index 0000000..d78f232
--- /dev/null
+++ b/src/riscv_opcodes/parse.py
@@ -0,0 +1,121 @@
+import argparse
+import json
+import logging
+import pprint
+
+from .c_utils import make_c
+from .chisel_utils import make_chisel
+from .constants import emitted_pseudo_ops
+from .go_utils import make_go
+from .latex_utils import make_latex_table, make_priv_latex_table
+from .rust_utils import make_rust
+from .shared_utils import add_segmented_vls_insn, create_inst_dict
+from .sverilog_utils import make_sverilog
+from .svg_utils import make_svg
+
+LOG_FORMAT = "%(levelname)s:: %(message)s"
+LOG_LEVEL = logging.INFO
+
+pretty_printer = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
+
+
+def generate_extensions(
+    extensions: list[str],
+    include_pseudo: bool,
+    c: bool,
+    chisel: bool,
+    spinalhdl: bool,
+    sverilog: bool,
+    rust: bool,
+    go: bool,
+    latex: bool,
+    svg: bool,
+):
+    instr_dict = create_inst_dict(extensions, include_pseudo)
+    instr_dict = dict(sorted(instr_dict.items()))
+    instr_dict_with_segment = add_segmented_vls_insn(instr_dict)
+
+    with open("instr_dict.json", "w", encoding="utf-8") as outfile:
+        json.dump(instr_dict_with_segment, outfile, indent=2)
+
+    if c:
+        instr_dict_c = create_inst_dict(
+            extensions, False, include_pseudo_ops=emitted_pseudo_ops
+        )
+        instr_dict_c = dict(sorted(instr_dict_c.items()))
+        make_c(instr_dict_c)
+        logging.info("encoding.out.h generated successfully")
+
+    if chisel:
+        make_chisel(instr_dict)
+        logging.info("inst.chisel generated successfully")
+
+    if spinalhdl:
+        make_chisel(instr_dict, True)
+        logging.info("inst.spinalhdl generated successfully")
+
+    if sverilog:
+        make_sverilog(instr_dict)
+        logging.info("inst.sverilog generated successfully")
+
+    if rust:
+        make_rust(instr_dict)
+        logging.info("inst.rs generated successfully")
+
+    if go:
+        make_go(instr_dict_with_segment)
+        logging.info("inst.go generated successfully")
+
+    if latex:
+        make_latex_table()
+        logging.info("instr-table.tex generated successfully")
+        make_priv_latex_table()
+        logging.info("priv-instr-table.tex generated successfully")
+
+    if svg:
+        make_svg(instr_dict)
+        logging.info("inst.svg generated successfully")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate RISC-V constants headers")
+    parser.add_argument(
+        "-pseudo", action="store_true", help="Include pseudo-instructions"
+    )
+    parser.add_argument("-c", action="store_true", help="Generate output for C")
+    parser.add_argument(
+        "-chisel", action="store_true", help="Generate output for Chisel"
+    )
+    parser.add_argument(
+        "-spinalhdl", action="store_true", help="Generate output for SpinalHDL"
+    )
+    parser.add_argument(
+        "-sverilog", action="store_true", help="Generate output for SystemVerilog"
+    )
+    parser.add_argument("-rust", action="store_true", help="Generate output for Rust")
+    parser.add_argument("-go", action="store_true", help="Generate output for Go")
+    parser.add_argument("-latex", action="store_true", help="Generate output for Latex")
+    parser.add_argument("-svg", action="store_true", help="Generate .svg output")
+    parser.add_argument(
+        "extensions",
+        nargs="*",
+        help="Extensions to use. This is a glob of the rv_.. files, e.g. 'rv*' will give all extensions.",
+    )
+
+    args = parser.parse_args()
+
+    print(f"Extensions selected : {args.extensions}")
+
+    generate_extensions(
+        args.extensions,
+        args.pseudo,
+        args.c,
+        args.chisel,
+        args.spinalhdl,
+        args.sverilog,
+        args.rust,
+        args.go,
+        args.latex,
+        args.svg,
+    )
diff --git a/src/riscv_opcodes/resources.py b/src/riscv_opcodes/resources.py
new file mode 100644
index 0000000..e9398ec
--- /dev/null
+++ b/src/riscv_opcodes/resources.py
@@ -0,0 +1,39 @@
+import sys
+from importlib.resources import files
+from typing import IO
+
+if sys.version_info < (3, 12):
+    # This was deprecated in Python 3.12.
+    from importlib.abc import Traversable
+else:
+    from importlib.resources.abc import Traversable
+
+
+def resource_root() -> Traversable:
+    """
+    Return the root directory as a traversable that can
+    be used to load the `extensions`, `*.csv` and `encoding.h`
+    files. For historical reasons these are not stored inside
+    the `src/riscv_opcodes` directory in the source distribution
+    but they are moved there when generating the binary wheel.
+    This means we need to check in both places.
+    """
+    assert __package__ is not None
+    package_root = files(__package__)
+    if (package_root / "extensions").is_dir():
+        return package_root
+    return package_root / ".." / ".."
+
+
+def read_text_resource(path_relative_to_root: str) -> str:
+    """
+    Read a text file relative to the root of this repo.
+    """
+    return resource_root().joinpath(path_relative_to_root).read_text(encoding="utf-8")
+
+
+def open_text_resource(path_relative_to_root: str) -> IO[str]:
+    """
+    Open a text file relative to the root of this repo.
+    """
+    return resource_root().joinpath(path_relative_to_root).open("r", encoding="utf-8")
diff --git a/src/riscv_opcodes/rust_utils.py b/src/riscv_opcodes/rust_utils.py
new file mode 100644
index 0000000..74e17eb
--- /dev/null
+++ b/src/riscv_opcodes/rust_utils.py
@@ -0,0 +1,28 @@
+import logging
+import pprint
+
+from .constants import causes, csrs, csrs32
+from .shared_utils import InstrDict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_rust(instr_dict: InstrDict):
+    mask_match_str = ""
+    for i in instr_dict:
+        mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n'
+        mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n'
+    for num, name in csrs + csrs32:
+        mask_match_str += f"const CSR_{name.upper()}: u16 = {hex(num)};\n"
+    for num, name in causes:
+        mask_match_str += (
+            f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n'
+        )
+    with open("inst.rs", "w", encoding="utf-8") as rust_file:
+        rust_file.write(
+            f"""
+/* Automatically generated by parse_opcodes */
+{mask_match_str}
+"""
+        )
diff --git a/src/riscv_opcodes/rv_colors.py b/src/riscv_opcodes/rv_colors.py
new file mode 100644
index 0000000..76e53a5
--- /dev/null
+++ b/src/riscv_opcodes/rv_colors.py
@@ -0,0 +1,12 @@
+palette = {
+    "Berkeley Blue": "#003262",
+    "California Gold": "#FDB515",
+    "Dark Blue": "#011e41",
+    "Teal": "#0a6b7c",
+    "Magenta": "#cb007b",
+    "Purple": "#60269e",
+    "Light Gold": "#fdda64",
+    "Light Teal": "#62cbc9",
+    "Pink": "#fe9bb1",
+    "Lavender": "#c2a6e1",
+}
diff --git a/src/riscv_opcodes/shared_utils.py b/src/riscv_opcodes/shared_utils.py
new file mode 100644
index 0000000..3a1a3bc
--- /dev/null
+++ b/src/riscv_opcodes/shared_utils.py
@@ -0,0 +1,641 @@
+import copy
+import logging
+import os
+import pprint
+import re
+from fnmatch import fnmatch
+from io import StringIO
+from itertools import chain
+from typing import Dict, NoReturn, Optional, TypedDict
+
+from .constants import (
+    arg_lut,
+    fixed_ranges,
+    imported_regex,
+    overlapping_extensions,
+    overlapping_instructions,
+    pseudo_regex,
+    single_fixed,
+)
+from .resources import open_text_resource, resource_root
+
+LOG_FORMAT = "%(levelname)s:: %(message)s"
+LOG_LEVEL = logging.INFO
+
+pretty_printer = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
+
+
+# Log an error message
+def log_and_exit(message: str) -> NoReturn:
+    """Log an error message and exit the program."""
+    logging.error(message)
+    raise SystemExit(1)
+
+
+# Initialize encoding to 32-bit '-' values
+def initialize_encoding(bits: int = 32) -> "list[str]":
+    """Initialize encoding with '-' to represent don't care bits."""
+    return ["-"] * bits
+
+
+# Validate bit range and value
+def validate_bit_range(msb: int, lsb: int, entry_value: int, line: str):
+    """Validate the bit range and entry value."""
+    if msb < lsb:
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in its encoding'
+        )
+
+    if entry_value >= (1 << (msb - lsb + 1)):
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}'
+        )
+
+
+# Split the instruction line into name and remaining part
+def parse_instruction_line(line: str) -> "tuple[str, str]":
+    """Parse the instruction name and the remaining encoding details."""
+    name, remaining = line.replace("\t", " ").split(" ", 1)
+    name = name.replace(".", "_")  # Replace dots for compatibility
+    remaining = remaining.lstrip()  # Remove leading whitespace
+    return name, remaining
+
+
+# Verify Overlapping Bits
+def check_overlapping_bits(encoding: "list[str]", ind: int, line: str):
+    """Check for overlapping bits in the encoding."""
+    if encoding[31 - ind] != "-":
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has {ind} bit overlapping in its opcodes'
+        )
+
+
+# Update encoding for fixed ranges
+def update_encoding_for_fixed_range(
+    encoding: "list[str]", msb: int, lsb: int, entry_value: int, line: str
+):
+    """
+    Update encoding bits for a given bit range.
+    Checks for overlapping bits and assigns the value accordingly.
+    """
+    for ind in range(lsb, msb + 1):
+        check_overlapping_bits(encoding, ind, line)
+        bit = str((entry_value >> (ind - lsb)) & 1)
+        encoding[31 - ind] = bit
+
+
+# Process fixed bit patterns
+def process_fixed_ranges(remaining: str, encoding: "list[str]", line: str):
+    """Process fixed bit ranges in the encoding."""
+    for s2, s1, entry in fixed_ranges.findall(remaining):
+        msb, lsb, entry_value = int(s2), int(s1), int(entry, 0)
+
+        # Validate bit range and entry value
+        validate_bit_range(msb, lsb, entry_value, line)
+        update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line)
+
+    return fixed_ranges.sub(" ", remaining)
+
+
+# Process single bit assignments
+def process_single_fixed(remaining: str, encoding: "list[str]", line: str):
+    """Process single fixed assignments in the encoding."""
+    for lsb, value, _drop in single_fixed.findall(remaining):
+        lsb = int(lsb, 0)
+        value = int(value, 0)
+
+        check_overlapping_bits(encoding, lsb, line)
+        encoding[31 - lsb] = str(value)
+
+
+# Main function to check argument look-up table
+def check_arg_lut(args: "list[str]", encoding_args: "list[str]", name: str):
+    """Check if arguments are present in arg_lut."""
+    for arg in args:
+        if arg not in arg_lut:
+            arg = handle_arg_lut_mapping(arg, name)
+        msb, lsb = arg_lut[arg]
+        update_encoding_args(encoding_args, arg, msb, lsb)
+
+
+# Handle missing argument mappings
+def handle_arg_lut_mapping(arg: str, name: str):
+    """Handle cases where an argument needs to be mapped to an existing one."""
+    parts = arg.split("=")
+    if len(parts) == 2:
+        existing_arg, _new_arg = parts
+        if existing_arg in arg_lut:
+            arg_lut[arg] = arg_lut[existing_arg]
+        else:
+            log_and_exit(
+                f" Found field {existing_arg} in variable {arg} in instruction {name} "
+                f"whose mapping in arg_lut does not exist"
+            )
+    else:
+        log_and_exit(
+            f" Found variable {arg} in instruction {name} "
+            f"whose mapping in arg_lut does not exist"
+        )
+    return arg
+
+
+# Update encoding args with variables
+def update_encoding_args(encoding_args: "list[str]", arg: str, msb: int, lsb: int):
+    """Update encoding arguments and ensure no overlapping."""
+    for ind in range(lsb, msb + 1):
+        check_overlapping_bits(encoding_args, ind, arg)
+        encoding_args[31 - ind] = arg
+
+
+# Compute match and mask
+def convert_encoding_to_match_mask(encoding: "list[str]") -> "tuple[str, str]":
+    """Convert the encoding list to match and mask strings."""
+    match = "".join(encoding).replace("-", "0")
+    mask = "".join(encoding).replace("0", "1").replace("-", "0")
+    return hex(int(match, 2)), hex(int(mask, 2))
+
+
+class SingleInstr(TypedDict):
+    encoding: str
+    variable_fields: "list[str]"
+    extension: "list[str]"
+    match: str
+    mask: str
+
+
+InstrDict = Dict[str, SingleInstr]
+
+
+# Processing main function for a line in the encoding file
+def process_enc_line(line: str, ext: str) -> "tuple[str, SingleInstr]":
+    """
+    This function processes each line of the encoding files (rv*). As part of
+    the processing, the function ensures that the encoding is legal through the
+    following checks::
+        - there is no over specification (same bits assigned different values)
+        - there is no under specification (some bits not assigned values)
+        - bit ranges are in the format hi..lo=val where hi > lo
+        - value assigned is representable in the bit range
+        - also checks that the mapping of arguments of an instruction exists in
+          arg_lut.
+    If the above checks pass, then the function returns a tuple of the name and
+    a dictionary containing basic information of the instruction which includes:
+        - variables: list of arguments used by the instruction whose mapping
+          exists in the arg_lut dictionary
+        - encoding: this contains the 32-bit encoding of the instruction where
+          '-' is used to represent position of arguments and 1/0 is used to
+          reprsent the static encoding of the bits
+        - extension: this field contains the rv* filename from which this
+          instruction was included
+        - match: hex value representing the bits that need to match to detect
+          this instruction
+        - mask: hex value representin the bits that need to be masked to extract
+          the value required for matching.
+    """
+    encoding = initialize_encoding()
+
+    # Parse the instruction line
+    name, remaining = parse_instruction_line(line)
+
+    # Process fixed ranges
+    remaining = process_fixed_ranges(remaining, encoding, line)
+
+    # Process single fixed assignments
+    process_single_fixed(remaining, encoding, line)
+
+    # Convert the list of encodings into a match and mask
+    match, mask = convert_encoding_to_match_mask(encoding)
+
+    # Check arguments in arg_lut
+    args = single_fixed.sub(" ", remaining).split()
+    encoding_args = encoding.copy()
+
+    check_arg_lut(args, encoding_args, name)
+
+    # Return single_dict
+    return name, {
+        "encoding": "".join(encoding),
+        "variable_fields": args,
+        "extension": [os.path.basename(ext)],
+        "match": match,
+        "mask": mask,
+    }
+
+
+# Extract ISA Type
+def extract_isa_type(ext_name: str) -> str:
+    """Extracts the ISA type from the extension name."""
+    return ext_name.split("_")[0]
+
+
+# Verify the types for RV*
+def is_rv_variant(type1: str, type2: str) -> bool:
+    """Checks if the types are RV variants (rv32/rv64)."""
+    return (type2 == "rv" and type1 in {"rv32", "rv64"}) or (
+        type1 == "rv" and type2 in {"rv32", "rv64"}
+    )
+
+
+# Check for same base ISA
+def has_same_base_isa(type1: str, type2: str) -> bool:
+    """Determines if the two ISA types share the same base."""
+    return type1 == type2 or is_rv_variant(type1, type2)
+
+
+# Compare the base ISA type of a given extension name against a list of extension names
+def same_base_isa(ext_name: str, ext_name_list: "list[str]") -> bool:
+    """Checks if the base ISA type of ext_name matches any in ext_name_list."""
+    type1 = extract_isa_type(ext_name)
+    return any(has_same_base_isa(type1, extract_isa_type(ext)) for ext in ext_name_list)
+
+
+# Pad two strings to equal length
+def pad_to_equal_length(str1: str, str2: str, pad_char: str = "-") -> "tuple[str, str]":
+    """Pads two strings to equal length using the given padding character."""
+    max_len = max(len(str1), len(str2))
+    return str1.rjust(max_len, pad_char), str2.rjust(max_len, pad_char)
+
+
+# Check compatibility for two characters
+def has_no_conflict(char1: str, char2: str) -> bool:
+    """Checks if two characters are compatible (either matching or don't-care)."""
+    return char1 == "-" or char2 == "-" or char1 == char2
+
+
+# Conflict check between two encoded strings
+def overlaps(x: str, y: str) -> bool:
+    """Checks if two encoded strings overlap without conflict."""
+    x, y = pad_to_equal_length(x, y)
+    return all(has_no_conflict(x[i], y[i]) for i in range(len(x)))
+
+
+# Check presence of keys in dictionary.
+def is_in_nested_dict(a: "dict[str, set[str]]", key1: str, key2: str) -> bool:
+    """Checks if key2 exists in the dictionary under key1."""
+    return key1 in a and key2 in a[key1]
+
+
+# Overlap allowance
+def overlap_allowed(a: "dict[str, set[str]]", x: str, y: str) -> bool:
+    """Determines if overlap is allowed between x and y based on nested dictionary checks"""
+    return is_in_nested_dict(a, x, y) or is_in_nested_dict(a, y, x)
+
+
+# Check overlap allowance between extensions
+def extension_overlap_allowed(x: str, y: str) -> bool:
+    """Checks if overlap is allowed between two extensions using the overlapping_extensions dictionary."""
+    return overlap_allowed(overlapping_extensions, x, y)
+
+
+# Check overlap allowance between instructions
+def instruction_overlap_allowed(x: str, y: str) -> bool:
+    """Checks if overlap is allowed between two instructions using the overlapping_instructions dictionary."""
+    return overlap_allowed(overlapping_instructions, x, y)
+
+
+# Check 'nf' field
+def is_segmented_instruction(instruction: SingleInstr) -> bool:
+    """Checks if an instruction contains the 'nf' field."""
+    return "nf" in instruction["variable_fields"]
+
+
+# Expand 'nf' fields
+def update_with_expanded_instructions(
+    updated_dict: InstrDict, key: str, value: SingleInstr
+):
+    """Expands 'nf' fields in the instruction dictionary and updates it with new instructions."""
+    for new_key, new_value in expand_nf_field(key, value):
+        updated_dict[new_key] = new_value
+
+
+# Process instructions, expanding segmented ones and updating the dictionary
+def add_segmented_vls_insn(instr_dict: InstrDict) -> InstrDict:
+    """Processes instructions, expanding segmented ones and updating the dictionary."""
+    # Use dictionary comprehension for efficiency
+    return dict(
+        chain.from_iterable(
+            (
+                expand_nf_field(key, value)
+                if is_segmented_instruction(value)
+                else [(key, value)]
+            )
+            for key, value in instr_dict.items()
+        )
+    )
+
+
+# Expand the 'nf' field in the instruction dictionary
+def expand_nf_field(
+    name: str, single_dict: SingleInstr
+) -> "list[tuple[str, SingleInstr]]":
+    """Validate and prepare the instruction dictionary."""
+    validate_nf_field(single_dict, name)
+    remove_nf_field(single_dict)
+    update_mask(single_dict)
+
+    name_expand_index = name.find("e")
+
+    # Pre compute the base match value and encoding prefix
+    base_match = int(single_dict["match"], 16)
+    encoding_prefix = single_dict["encoding"][3:]
+
+    expanded_instructions = [
+        create_expanded_instruction(
+            name, single_dict, nf, name_expand_index, base_match, encoding_prefix
+        )
+        for nf in range(8)  # Range of 0 to 7
+    ]
+
+    return expanded_instructions
+
+
+# Validate the presence of 'nf'
+def validate_nf_field(single_dict: SingleInstr, name: str):
+    """Validates the presence of 'nf' in variable fields before expansion."""
+    if "nf" not in single_dict["variable_fields"]:
+        log_and_exit(f"Cannot expand nf field for instruction {name}")
+
+
+# Remove 'nf' from variable fields
+def remove_nf_field(single_dict: SingleInstr):
+    """Removes 'nf' from variable fields in the instruction dictionary."""
+    single_dict["variable_fields"].remove("nf")
+
+
+# Update the mask to include the 'nf' field
+def update_mask(single_dict: SingleInstr):
+    """Updates the mask to include the 'nf' field in the instruction dictionary."""
+    single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29)
+
+
+# Create an expanded instruction
+def create_expanded_instruction(
+    name: str,
+    single_dict: SingleInstr,
+    nf: int,
+    name_expand_index: int,
+    base_match: int,
+    encoding_prefix: str,
+) -> "tuple[str, SingleInstr]":
+    """Creates an expanded instruction based on 'nf' value."""
+    new_single_dict = copy.deepcopy(single_dict)
+
+    # Update match value in one step
+    new_single_dict["match"] = hex(base_match | (nf << 29))
+    new_single_dict["encoding"] = format(nf, "03b") + encoding_prefix
+
+    # Construct new instruction name
+    new_name = (
+        name
+        if nf == 0
+        else f"{name[:name_expand_index]}seg{nf + 1}{name[name_expand_index:]}"
+    )
+
+    return (new_name, new_single_dict)
+
+
+def read_lines(file: str) -> "list[str]":
+    """
+    Reads lines from a file and returns non-blank, non-comment lines.
+    The file must be a resource relative to the root of this repo.
+    """
+    with open_text_resource(file) as fp:
+        lines = (line.rstrip() for line in fp)
+        return [line for line in lines if line and not line.startswith("#")]
+
+
+# Update the instruction dictionary
+def process_standard_instructions(
+    lines: "list[str]", instr_dict: InstrDict, file_name: str
+):
+    """Processes standard instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$import" in line or "$pseudo" in line:
+            continue
+        logging.debug(f"Processing line: {line}")
+        name, single_dict = process_enc_line(line, file_name)
+        ext_name = os.path.basename(file_name)
+
+        if name in instr_dict:
+            var = instr_dict[name]["extension"]
+            if same_base_isa(ext_name, var):
+                log_and_exit(
+                    f"Instruction {name} from {ext_name} is already added from {var} in same base ISA"
+                )
+            elif instr_dict[name]["encoding"] != single_dict["encoding"]:
+                log_and_exit(
+                    f"Instruction {name} from {ext_name} has different encodings in different base ISAs"
+                )
+
+            instr_dict[name]["extension"].extend(single_dict["extension"])
+        else:
+            for key, item in instr_dict.items():
+                if (
+                    overlaps(item["encoding"], single_dict["encoding"])
+                    and not extension_overlap_allowed(ext_name, item["extension"][0])
+                    and not instruction_overlap_allowed(name, key)
+                    and same_base_isa(ext_name, item["extension"])
+                ):
+                    log_and_exit(
+                        f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}'
+                    )
+
+            instr_dict[name] = single_dict
+
+
+# Incorporate pseudo instructions into the instruction dictionary based on given conditions
+def process_pseudo_instructions(
+    lines: "list[str]",
+    instr_dict: InstrDict,
+    file_name: str,
+    include_pseudo: bool,
+    include_pseudo_ops: "list[str]",
+):
+    """Processes pseudo instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$pseudo" not in line:
+            continue
+        logging.debug(f"Processing pseudo line: {line}")
+        ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0]
+        ext_file = read_extension_file(ext)
+
+        validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst)
+
+        name, single_dict = process_enc_line(f"{pseudo_inst} {line_content}", file_name)
+        if (
+            orig_inst.replace(".", "_") not in instr_dict
+            or include_pseudo
+            or name in include_pseudo_ops
+        ):
+            if name not in instr_dict:
+                instr_dict[name] = single_dict
+                logging.debug(f"Including pseudo_op: {name}")
+            else:
+                if single_dict["match"] != instr_dict[name]["match"]:
+                    instr_dict[f"{name}_pseudo"] = single_dict
+                # TODO: This expression is always false since both sides are list[str].
+                elif single_dict["extension"] not in instr_dict[name]["extension"]:  # type: ignore
+                    instr_dict[name]["extension"].extend(single_dict["extension"])
+
+
+# Integrate imported instructions into the instruction dictionary
+def process_imported_instructions(
+    lines: "list[str]", instr_dict: InstrDict, file_name: str
+):
+    """Processes imported instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$import" not in line:
+            continue
+        logging.debug(f"Processing imported line: {line}")
+        import_ext, reg_instr = imported_regex.findall(line)[0]
+        ext_file = read_extension_file(import_ext)
+
+        validate_instruction_in_extension(reg_instr, ext_file, file_name, line)
+
+        for oline in StringIO(ext_file):
+            if re.findall(f"^\\s*{reg_instr}\\s+", oline):
+                name, single_dict = process_enc_line(oline, file_name)
+                if name in instr_dict:
+                    if instr_dict[name]["encoding"] != single_dict["encoding"]:
+                        log_and_exit(
+                            f"Imported instruction {name} from {os.path.basename(file_name)} has different encodings"
+                        )
+                    instr_dict[name]["extension"].extend(single_dict["extension"])
+                else:
+                    instr_dict[name] = single_dict
+                break
+
+
+def read_extension_file(ext: str) -> str:
+    """
+    Read the extension file path, considering the unratified directory if necessary.
+    """
+    file = resource_root() / "extensions" / ext
+    if file.is_file():
+        return file.read_text(encoding="utf-8")
+    file = resource_root() / "extensions" / "unratified" / ext
+    if file.is_file():
+        return file.read_text(encoding="utf-8")
+
+    log_and_exit(f"Extension {ext} not found.")
+
+
+# Confirm the presence of an original instruction in the corresponding extension file.
+def validate_instruction_in_extension(
+    inst: str, ext_file: str, file_name: str, pseudo_inst: str
+):
+    """Validates if the original instruction exists in the dependent extension."""
+    found = False
+
+    for oline in StringIO(ext_file):
+        if re.findall(f"^\\s*{inst}\\s+", oline):
+            found = True
+            break
+    if not found:
+        log_and_exit(
+            f"Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}"
+        )
+
+
+# Construct a dictionary of instructions filtered by specified criteria
+def create_inst_dict(
+    file_filter: "list[str]",
+    include_pseudo: bool = False,
+    include_pseudo_ops: "Optional[list[str]]" = None,
+) -> InstrDict:
+    """
+    Creates a dictionary of instructions based on the provided file filters.
+
+    This function return a dictionary containing all instructions associated
+    with an extension defined by the file_filter input.
+    Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc.
+    Each node of the dictionary will correspond to an instruction which again is
+    a dictionary. The dictionary contents of each instruction includes:
+        - variables: list of arguments used by the instruction whose mapping
+          exists in the arg_lut dictionary
+        - encoding: this contains the 32-bit encoding of the instruction where
+          '-' is used to represent position of arguments and 1/0 is used to
+          reprsent the static encoding of the bits
+        - extension: this field contains the rv* filename from which this
+          instruction was included
+        - match: hex value representing the bits that need to match to detect
+          this instruction
+        - mask: hex value representin the bits that need to be masked to extract
+          the value required for matching.
+    In order to build this dictionary, the function does 2 passes over the same
+    rv<file_filter> file:
+        - First pass: extracts all standard instructions, skipping pseudo ops
+          and imported instructions. For each selected line, the `process_enc_line`
+          function is called to create the dictionary contents of the instruction.
+          Checks are performed to ensure that the same instruction is not added
+          twice to the overall dictionary.
+        - Second pass: parses only pseudo_ops. For each pseudo_op, the function:
+            - Checks if the dependent extension and instruction exist.
+            - Adds the pseudo_op to the dictionary if the dependent instruction
+              is not already present; otherwise, it is skipped.
+    """
+    if include_pseudo_ops is None:
+        include_pseudo_ops = []
+
+    instr_dict: InstrDict = {}
+
+    ratified_file_filters = [
+        fil for fil in file_filter if not fil.startswith("unratified/")
+    ]
+    unratified_file_filters = [
+        fil.removeprefix("unratified/")
+        for fil in file_filter
+        if fil.startswith("unratified/")
+    ]
+
+    # Extension file name, "extensions[/unratified]/rv_foo".
+    file_names: list[str] = []
+
+    for file in (resource_root() / "extensions").iterdir():
+        if file.is_file() and any(
+            fnmatch(file.name, fil) for fil in ratified_file_filters
+        ):
+            file_names.append("extensions/" + file.name)
+    for file in (resource_root() / "extensions" / "unratified").iterdir():
+        if file.is_file() and any(
+            fnmatch(file.name, fil) for fil in unratified_file_filters
+        ):
+            file_names.append("extensions/unratified/" + file.name)
+
+    logging.debug("Collecting standard instructions")
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for standard instructions")
+        lines = read_lines(file_name)
+        process_standard_instructions(lines, instr_dict, file_name)
+
+    logging.debug("Collecting pseudo instructions")
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for pseudo instructions")
+        lines = read_lines(file_name)
+        process_pseudo_instructions(
+            lines,
+            instr_dict,
+            file_name,
+            include_pseudo,
+            include_pseudo_ops,
+        )
+
+    logging.debug("Collecting imported instructions")
+
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for imported instructions")
+        lines = read_lines(file_name)
+        process_imported_instructions(lines, instr_dict, file_name)
+
+    return instr_dict
+
+
+# Extracts the extensions used in an instruction dictionary
+def instr_dict_2_extensions(instr_dict: InstrDict) -> "list[str]":
+    return list({item["extension"][0] for item in instr_dict.values()})
+
+
+# Returns signed interpretation of a value within a given width
+def signed(value: int, width: int) -> int:
+    return value if 0 <= value < (1 << (width - 1)) else value - (1 << width)
diff --git a/src/riscv_opcodes/sverilog_utils.py b/src/riscv_opcodes/sverilog_utils.py
new file mode 100644
index 0000000..c17be9f
--- /dev/null
+++ b/src/riscv_opcodes/sverilog_utils.py
@@ -0,0 +1,30 @@
+import logging
+import pprint
+from pathlib import Path
+
+from .constants import csrs, csrs32
+from .shared_utils import InstrDict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_sverilog(instr_dict: InstrDict):
+    names_str = ""
+    for i in instr_dict:
+        names_str += f"  localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n"
+    names_str += "  /* CSR Addresses */\n"
+    for num, name in csrs + csrs32:
+        names_str += (
+            f"  localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n"
+        )
+
+    Path("inst.sverilog").write_text(
+        f"""
+/* Automatically generated by parse_opcodes */
+package riscv_instr;
+{names_str}
+endpackage
+""",
+        encoding="utf-8",
+    )
diff --git a/src/riscv_opcodes/svg_utils.py b/src/riscv_opcodes/svg_utils.py
new file mode 100644
index 0000000..4126ad6
--- /dev/null
+++ b/src/riscv_opcodes/svg_utils.py
@@ -0,0 +1,284 @@
+import logging
+import pprint
+from typing import Dict, List, NamedTuple
+
+from .rv_colors import palette
+from .shared_utils import InstrDict, instr_dict_2_extensions
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+class RectangleDimensions(NamedTuple):
+    x: float
+    y: float
+    w: float
+    h: float
+
+
+class InstrRectangle(NamedTuple):
+    dims: RectangleDimensions
+    extension: str
+    label: str
+
+
+InstrDimsDict = Dict[str, RectangleDimensions]
+
+
+def encoding_to_rect(encoding: str) -> RectangleDimensions:
+    """Convert a binary encoding string to rectangle dimensions."""
+
+    def calculate_size(free_bits: int, tick: float) -> float:
+        """Calculate size based on number of free bits and tick value."""
+        return 2**free_bits * tick
+
+    instr_length = len(encoding)
+    # starting position
+    x = 0
+    y = 0
+    x_tick = 1 / (2 ** (0.5 * instr_length))
+    y_tick = 1 / (2 ** (0.5 * instr_length))
+    x_free_bits = 0
+    y_free_bits = 0
+    even = encoding[0::2]
+    odd = encoding[1::2]
+    # Process bits from least significant to most significant
+    for i, bit in enumerate(encoding):
+        if bit == "1":
+            offset = 0.5 / (2 ** int(i / 2))
+            if i % 2 == 0:
+                y += offset
+            else:
+                x += offset
+        elif bit == "0":
+            pass
+            # position not adjusted on 0
+
+    x_free_bits = odd.count("-")
+    y_free_bits = even.count("-")
+    x_size = calculate_size(x_free_bits, x_tick)
+    y_size = calculate_size(y_free_bits, y_tick)
+
+    # If we came here, encoding can be visualized with a single rectangle
+    rectangle = RectangleDimensions(x=x, y=y, w=x_size, h=y_size)
+    return rectangle
+
+
+FIGSIZE = 128
+
+
+def plot_image(
+    instr_dict: InstrDict,
+    instr_dims_dict: InstrDimsDict,
+    extension_sizes: Dict[str, float],
+) -> None:
+    """Plot the instruction rectangles using matplotlib."""
+
+    from matplotlib import patches
+    from matplotlib import pyplot as plt
+
+    def get_readable_font_color(bg_hex: str) -> str:
+        """Determine readable font color based on background color."""
+
+        def hex_to_rgb(hex_color: str) -> tuple[int, int, int]:
+            """Convert hex color string to RGB tuple."""
+            hex_color = hex_color.lstrip("#")
+            r = int(hex_color[0:2], 16)
+            g = int(hex_color[2:4], 16)
+            b = int(hex_color[4:6], 16)
+
+            return (r, g, b)
+
+        r, g, b = hex_to_rgb(bg_hex)
+        luminance = 0.299 * r + 0.587 * g + 0.114 * b
+        return "#000000" if luminance > 186 else "#FFFFFF"
+
+    def plot_with_matplotlib(
+        rectangles: list[InstrRectangle],
+        colors: list[str],
+        hatches: list[str],
+        extensions: list[str],
+    ) -> None:
+        """Plot rectangles with matplotlib using specified styles."""
+
+        _, ax = plt.subplots(figsize=(FIGSIZE, FIGSIZE), facecolor="none")  # type: ignore
+        ax.set_facecolor("none")  # type: ignore
+        linewidth = FIGSIZE / 100
+        for dims, ext, label in rectangles:
+            x, y, w, h = dims
+            ext_idx = extensions.index(ext)
+            color = colors[ext_idx]
+            hatch = hatches[ext_idx]
+            rect = patches.Rectangle(
+                (x, y),
+                w,
+                h,
+                linewidth=linewidth,
+                edgecolor="black",
+                facecolor=color,
+                hatch=hatch,
+                alpha=1.0,
+            )
+            ax.add_patch(rect)
+
+            if w >= h:
+                base_dim = w
+                rotation = 0
+            else:
+                base_dim = h
+                rotation = 90
+
+            # Scale font size based on base dimension and label length
+            n_chars = len(label)
+            font_size = (
+                base_dim / n_chars * 90 * FIGSIZE
+            )  # Adjust scaling factor as needed
+            if font_size > 1:
+                fontdict = {
+                    "fontsize": font_size,
+                    "color": get_readable_font_color(color),
+                    "family": "DejaVu Sans Mono",
+                }
+                ax.text(  # type: ignore
+                    x + w / 2,
+                    y + h / 2,
+                    label,
+                    ha="center",
+                    va="center",
+                    fontdict=fontdict,
+                    rotation=rotation,
+                )
+
+        plt.axis("off")  # type: ignore
+        plt.tight_layout()  # type: ignore
+        plt.savefig("inst.svg", format="svg")  # type: ignore
+        plt.show()  # type: ignore
+
+    extensions: List[str] = sorted(
+        extension_sizes.keys(), key=lambda k: extension_sizes[k], reverse=True
+    )
+
+    rectangles: List[InstrRectangle] = []
+    for instr in instr_dict:
+        dims = instr_dims_dict[instr]
+        rectangles.append(
+            InstrRectangle(
+                dims=dims,
+                extension=instr_dict[instr]["extension"][0],
+                label=instr.replace("_", "."),
+            )
+        )
+
+    # sort rectangles so that small ones are in the foreground
+    # An overlap occurs e.g. for pseudo ops, and these should be on top of the encoding it reuses
+    rectangles = sorted(rectangles, key=lambda x: x.dims.w * x.dims.h, reverse=True)
+
+    colors, hatches = generate_styles(extensions)
+
+    plot_with_matplotlib(rectangles, colors, hatches, extensions)
+
+
+def generate_styles(extensions: list[str]) -> tuple[list[str], list[str]]:
+    """Generate color and hatch styles for extensions."""
+    n_colors = len(palette)
+    colors = [""] * len(extensions)
+    hatches = [""] * len(extensions)
+    hatch_options = ["", "/", "\\", "|", "-", "+", "x", ".", "*"]
+    color_options = list(palette.values())
+
+    for i in range(len(extensions)):
+        colors[i] = color_options[i % n_colors]
+        hatches[i] = hatch_options[int(i / n_colors) % len(hatch_options)]
+
+    return colors, hatches
+
+
+def defragment_encodings(
+    encodings: list[str], length: int = 32, offset: int = 0
+) -> list[str]:
+    """Defragment a list of binary encodings by reordering bits."""
+    # determine bit position which has the most fixed bits
+    fixed_encodings = ["0", "1"]
+    fixed_bits = [0] * length
+    fixed_encoding_indeces: Dict[str, List[int]] = {
+        value: [] for value in fixed_encodings
+    }
+    for index, encoding in enumerate(encodings):
+        for position, value in enumerate(encoding):
+            if position > offset:
+                if value != "-":
+                    fixed_bits[position] += 1
+
+    # find bit position with most fixed bits, starting with the LSB to favor the opcode field
+    max_fixed_bits = max(fixed_bits)
+    if max_fixed_bits == 0:
+        # fully defragemented
+        return encodings
+    max_fixed_position = len(fixed_bits) - 1 - fixed_bits[::-1].index(max_fixed_bits)
+
+    # move bit position with the most fixed bits to the front
+    for index, encoding in enumerate(encodings):
+        encodings[index] = (
+            encoding[0:offset]
+            + encoding[max_fixed_position]
+            + encoding[offset:max_fixed_position]
+            + encoding[max_fixed_position + 1 :]
+        )
+
+        if encoding[max_fixed_position] in fixed_encodings:
+            fixed_encoding_indeces[encoding[max_fixed_position]].append(index)
+        else:
+            # No more fixed bits in this encoding
+            pass
+
+    if offset < length:
+        # continue to defragement starting from the next offset
+        offset = offset + 1
+
+        # separate encodings
+        sep_encodings: Dict[str, List[str]] = {}
+        for fixed_encoding in fixed_encodings:
+            sep_encodings[fixed_encoding] = [
+                encodings[i] for i in fixed_encoding_indeces[fixed_encoding]
+            ]
+            sep_encodings[fixed_encoding] = defragment_encodings(
+                sep_encodings[fixed_encoding], length=length, offset=offset
+            )
+
+            # join encodings
+            for new_index, orig_index in enumerate(
+                fixed_encoding_indeces[fixed_encoding]
+            ):
+                encodings[orig_index] = sep_encodings[fixed_encoding][new_index]
+
+    return encodings
+
+
+def defragment_encoding_dict(instr_dict: InstrDict) -> InstrDict:
+    """Apply defragmentation to the encoding dictionary."""
+    encodings = [instr["encoding"] for instr in instr_dict.values()]
+    encodings_defragemented = defragment_encodings(encodings, length=32, offset=0)
+    for index, instr in enumerate(instr_dict):
+        instr_dict[instr]["encoding"] = encodings_defragemented[index]
+    return instr_dict
+
+
+def make_svg(instr_dict: InstrDict) -> None:
+    """Generate an SVG image from instruction encodings."""
+    extensions = instr_dict_2_extensions(instr_dict)
+    extension_size: Dict[str, float] = {}
+
+    instr_dict = defragment_encoding_dict(instr_dict)
+    instr_dims_dict: InstrDimsDict = {}
+
+    for ext in extensions:
+        extension_size[ext] = 0
+
+    for instr in instr_dict:
+        dims = encoding_to_rect(instr_dict[instr]["encoding"])
+
+        extension_size[instr_dict[instr]["extension"][0]] += dims.h * dims.w
+
+        instr_dims_dict[instr] = dims
+
+    plot_image(instr_dict, instr_dims_dict, extension_size)
author	Andrew Waterman <andrew@sifive.com>	2025-10-13 16:24:22 -0700
committer	GitHub <noreply@github.com>	2025-10-13 16:24:22 -0700
commit	26e2c04c913a67ac51bf0a354f21f2d7d5c07c40 (patch)
tree	7db4f4637f71f1777f86702a2228ccf3b9217a03 /src
parent	433081c02368eb72e6dea5413e404a8ef231658e (diff)
download	riscv-opcodes-master.zip riscv-opcodes-master.tar.gz riscv-opcodes-master.tar.bz2