14 files changed, 2113 insertions, 0 deletions
diff --git a/src/riscv_opcodes/__init__.py b/src/riscv_opcodes/__init__.py
new file mode 100644
index 0000000..e8fd9d4
--- /dev/null
+++ b/src/riscv_opcodes/__init__.py
@@ -0,0 +1,2 @@
+# Mark this directory as a package. This is not actually needed by
+# Python but Pylint gets confused about relative imports without it.
diff --git a/src/riscv_opcodes/__main__.py b/src/riscv_opcodes/__main__.py
new file mode 100644
index 0000000..456cddd
--- /dev/null
+++ b/src/riscv_opcodes/__main__.py
@@ -0,0 +1,10 @@
+"""
+This allows running as a module, i.e. `python3 -m riscv_opcodes` which
+we wouldn't normally need, but the `coverage` tool doesn't work on
+installed scripts - you can't do `coverage run riscv_opcodes` because it
+looks for a Python file called `riscv_opcodes` in the current directory.
+"""
+
+from .parse import main
+
+main()
diff --git a/src/riscv_opcodes/c_utils.py b/src/riscv_opcodes/c_utils.py
new file mode 100644
index 0000000..198a37f
--- /dev/null
+++ b/src/riscv_opcodes/c_utils.py
@@ -0,0 +1,79 @@
+import logging
+import os
+import pprint
+
+from .constants import causes, csrs, csrs32
+from .resources import read_text_resource
+from .shared_utils import InstrDict, arg_lut
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_c(instr_dict: InstrDict):
+    mask_match_str = ""
+    declare_insn_str = ""
+    for i in instr_dict:
+        mask_match_str += (
+            f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n'
+        )
+        mask_match_str += (
+            f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n'
+        )
+        declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n'
+
+    csr_names_str = ""
+    declare_csr_str = ""
+    for num, name in csrs + csrs32:
+        csr_names_str += f"#define CSR_{name.upper()} {hex(num)}\n"
+        declare_csr_str += f"DECLARE_CSR({name}, CSR_{name.upper()})\n"
+
+    causes_str = ""
+    declare_cause_str = ""
+    for num, name in causes:
+        causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n"
+        declare_cause_str += (
+            f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n"
+        )
+
+    arg_str = ""
+    for name, rng in arg_lut.items():
+        sanitized_name = name.replace(" ", "_").replace("=", "_eq_")
+        begin = rng[1]
+        end = rng[0]
+        mask = ((1 << (end - begin + 1)) - 1) << begin
+        arg_str += f"#define INSN_FIELD_{sanitized_name.upper()} {hex(mask)}\n"
+
+    enc_header = read_text_resource("encoding.h")
+
+    commit = os.popen('git log -1 --format="format:%h"').read()
+
+    # Generate the output as a string
+    output_str = f"""/* SPDX-License-Identifier: BSD-3-Clause */
+
+/* Copyright (c) 2023 RISC-V International */
+
+/*
+ * This file is auto-generated by running 'make' in
+ * https://github.com/riscv/riscv-opcodes ({commit})
+ */
+
+{enc_header}
+/* Automatically generated by parse_opcodes. */
+#ifndef RISCV_ENCODING_H
+#define RISCV_ENCODING_H
+{mask_match_str}
+{csr_names_str}
+{causes_str}
+{arg_str}#endif
+#ifdef DECLARE_INSN
+{declare_insn_str}#endif
+#ifdef DECLARE_CSR
+{declare_csr_str}#endif
+#ifdef DECLARE_CAUSE
+{declare_cause_str}#endif
+"""
+
+    # Write the modified output to the file
+    with open("encoding.out.h", "w", encoding="utf-8") as enc_file:
+        enc_file.write(output_str)
diff --git a/src/riscv_opcodes/chisel_utils.py b/src/riscv_opcodes/chisel_utils.py
new file mode 100644
index 0000000..46cb0b6
--- /dev/null
+++ b/src/riscv_opcodes/chisel_utils.py
@@ -0,0 +1,82 @@
+import logging
+import pprint
+
+from .constants import causes, csrs, csrs32
+from .shared_utils import InstrDict, instr_dict_2_extensions
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_chisel(instr_dict: InstrDict, spinal_hdl: bool = False):
+
+    chisel_names = ""
+    cause_names_str = ""
+    csr_names_str = ""
+    for i in instr_dict:
+        if spinal_hdl:
+            chisel_names += f'  def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n'
+        # else:
+        #     chisel_names += f'  def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n'
+    if not spinal_hdl:
+        extensions = instr_dict_2_extensions(instr_dict)
+        for e in extensions:
+            if "rv64_" in e:
+                e_format = e.replace("rv64_", "").upper() + "64"
+            elif "rv32_" in e:
+                e_format = e.replace("rv32_", "").upper() + "32"
+            elif "rv_" in e:
+                e_format = e.replace("rv_", "").upper()
+            else:
+                e_format = e.upper()
+            chisel_names += f'  val {e_format+"Type"} = Map(\n'
+            for instr_name, instr in instr_dict.items():
+                if instr["extension"][0] == e:
+                    tmp_instr_name = '"' + instr_name.upper().replace(".", "_") + '"'
+                    chisel_names += f'   {tmp_instr_name:<18s} -> BitPat("b{instr["encoding"].replace("-","?")}"),\n'
+            chisel_names += "  )\n"
+
+    for num, name in causes:
+        cause_names_str += f'  val {name.lower().replace(" ","_")} = {hex(num)}\n'
+    cause_names_str += """  val all = {
+    val res = collection.mutable.ArrayBuffer[Int]()
+"""
+    for num, name in causes:
+        cause_names_str += f'    res += {name.lower().replace(" ","_")}\n'
+    cause_names_str += """    res.toArray
+  }"""
+
+    for num, name in csrs + csrs32:
+        csr_names_str += f"  val {name} = {hex(num)}\n"
+    csr_names_str += """  val all = {
+    val res = collection.mutable.ArrayBuffer[Int]()
+"""
+    for num, name in csrs:
+        csr_names_str += f"""    res += {name}\n"""
+    csr_names_str += """    res.toArray
+  }
+  val all32 = {
+    val res = collection.mutable.ArrayBuffer(all:_*)
+"""
+    for num, name in csrs32:
+        csr_names_str += f"""    res += {name}\n"""
+    csr_names_str += """    res.toArray
+  }"""
+
+    with open(
+        "inst.spinalhdl" if spinal_hdl else "inst.chisel", "w", encoding="utf-8"
+    ) as chisel_file:
+        chisel_file.write(
+            f"""
+/* Automatically generated by parse_opcodes */
+object Instructions {{
+{chisel_names}
+}}
+object Causes {{
+{cause_names_str}
+}}
+object CSRs {{
+{csr_names_str}
+}}
+"""
+        )
diff --git a/src/riscv_opcodes/constants.py b/src/riscv_opcodes/constants.py
new file mode 100644
index 0000000..fb67d70
--- /dev/null
+++ b/src/riscv_opcodes/constants.py
@@ -0,0 +1,271 @@
+import csv
+import re
+
+from .resources import open_text_resource
+
+# TODO: The constants in this file should be in all caps.
+overlapping_extensions = {
+    "rv_zcmt": {"rv_c_d"},
+    "rv_zcmp": {"rv_c_d"},
+    "rv_c": {"rv_zcmop"},
+}
+
+overlapping_instructions = {
+    "c_addi": {"c_nop"},
+    "c_lui": {"c_addi16sp"},
+    "c_mv": {"c_jr"},
+    "c_jalr": {"c_ebreak"},
+    "c_add": {"c_ebreak", "c_jalr"},
+}
+
+isa_regex = re.compile(
+    "^RV(32|64|128)[IE]+[ABCDEFGHJKLMNPQSTUVX]*(Zicsr|Zifencei|Zihintpause|Zam|Ztso|Zkne|Zknd|Zknh|Zkse|Zksh|Zkg|Zkb|Zkr|Zks|Zkn|Zba|Zbc|Zbb|Zbp|Zbr|Zbm|Zbs|Zbe|Zbf|Zbt|Zmmul|Zbpbo|Zca|Zcf|Zcd|Zcb|Zcmp|Zcmt){,1}(_Zicsr){,1}(_Zifencei){,1}(_Zihintpause){,1}(_Zmmul){,1}(_Zam){,1}(_Zba){,1}(_Zbb){,1}(_Zbc){,1}(_Zbe){,1}(_Zbf){,1}(_Zbm){,1}(_Zbp){,1}(_Zbpbo){,1}(_Zbr){,1}(_Zbs){,1}(_Zbt){,1}(_Zkb){,1}(_Zkg){,1}(_Zkr){,1}(_Zks){,1}(_Zkn){,1}(_Zknd){,1}(_Zkne){,1}(_Zknh){,1}(_Zkse){,1}(_Zksh){,1}(_Ztso){,1}(_Zca){,1}(_Zcf){,1}(_Zcd){,1}(_Zcb){,1}(_Zcmp){,1}(_Zcmt){,1}$"
+)
+
+# regex to find <msb>..<lsb>=<val> patterns in instruction
+fixed_ranges = re.compile(
+    r"\s*(?P<msb>\d+.?)\.\.(?P<lsb>\d+.?)\s*=\s*(?P<val>\d[\w]*)[\s$]*", re.M
+)
+
+# regex to find <lsb>=<val> patterns in instructions
+# single_fixed = re.compile('\s+(?P<lsb>\d+)=(?P<value>[\w\d]*)[\s$]*', re.M)
+single_fixed = re.compile(r"(?:^|[\s])(?P<lsb>\d+)=(?P<value>[\w]*)((?=\s|$))", re.M)
+
+# regex to find the overloading condition variable
+var_regex = re.compile(r"(?P<var>[a-zA-Z][\w\d]*)\s*=\s*.*?[\s$]*", re.M)
+
+# regex for pseudo op instructions returns the dependent filename, dependent
+# instruction, the pseudo op name and the encoding string
+pseudo_regex = re.compile(
+    r"^\$pseudo_op\s+(?P<filename>rv[\d]*_[\w].*)::\s*(?P<orig_inst>.*?)\s+(?P<pseudo_inst>.*?)\s+(?P<overload>.*)$",
+    re.M,
+)
+
+imported_regex = re.compile(
+    r"^\s*\$import\s*(?P<extension>.*)\s*::\s*(?P<instruction>.*)", re.M
+)
+
+
+def read_int_map_csv(filename: str) -> "list[tuple[int, str]]":
+    """
+    Reads a CSV file and returns a list of tuples.
+    Each tuple contains an integer value (from the first column) and a string (from the second column).
+
+    Args:
+        filename (str): The name of the CSV file to read.
+
+    Returns:
+        list of tuple: A list of (int, str) tuples extracted from the CSV file.
+    """
+    with open_text_resource(filename) as f:
+        csv_reader = csv.reader(f, skipinitialspace=True)
+        return [(int(row[0], 0), row[1]) for row in csv_reader]
+
+
+causes = read_int_map_csv("causes.csv")
+csrs = read_int_map_csv("csrs.csv")
+csrs32 = read_int_map_csv("csrs32.csv")
+
+
+def read_arg_lut_csv(filename: str) -> "dict[str, tuple[int, int]]":
+    """
+    Load the argument lookup table (arg_lut) from a CSV file, mapping argument names to their bit positions.
+    """
+    with open_text_resource(filename) as f:
+        csv_reader = csv.reader(f, skipinitialspace=True)
+        return {row[0]: (int(row[1]), int(row[2])) for row in csv_reader}
+
+
+arg_lut = read_arg_lut_csv("arg_lut.csv")
+
+# for mop
+arg_lut["mop_r_t_30"] = (30, 30)
+arg_lut["mop_r_t_27_26"] = (27, 26)
+arg_lut["mop_r_t_21_20"] = (21, 20)
+arg_lut["mop_rr_t_30"] = (30, 30)
+arg_lut["mop_rr_t_27_26"] = (27, 26)
+arg_lut["c_mop_t"] = (10, 8)
+
+# dictionary containing the mapping of the argument to the what the fields in
+# the latex table should be
+latex_mapping = {
+    "imm12": "imm[11:0]",
+    "rs1": "rs1",
+    "rs2": "rs2",
+    "rd": "rd",
+    "imm20": "imm[31:12]",
+    "bimm12hi": "imm[12$\\vert$10:5]",
+    "bimm12lo": "imm[4:1$\\vert$11]",
+    "imm12hi": "imm[11:5]",
+    "imm12lo": "imm[4:0]",
+    "jimm20": "imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]",
+    "zimm": "uimm",
+    "shamtw": "shamt",
+    "shamtd": "shamt",
+    "shamtq": "shamt",
+    "rd_p": "rd\\,$'$",
+    "rs1_p": "rs1\\,$'$",
+    "rs2_p": "rs2\\,$'$",
+    "rd_rs1_n0": "rd/rs$\\neq$0",
+    "rd_rs1_p": "rs1\\,$'$/rs2\\,$'$",
+    "c_rs2": "rs2",
+    "c_rs2_n0": "rs2$\\neq$0",
+    "rd_n0": "rd$\\neq$0",
+    "rs1_n0": "rs1$\\neq$0",
+    "c_rs1_n0": "rs1$\\neq$0",
+    "rd_rs1": "rd/rs1",
+    "zimm6hi": "uimm[5]",
+    "zimm6lo": "uimm[4:0]",
+    "c_nzuimm10": "nzuimm[5:4$\\vert$9:6$\\vert$2$\\vert$3]",
+    "c_uimm7lo": "uimm[2$\\vert$6]",
+    "c_uimm7hi": "uimm[5:3]",
+    "c_uimm8lo": "uimm[7:6]",
+    "c_uimm8hi": "uimm[5:3]",
+    "c_uimm9lo": "uimm[7:6]",
+    "c_uimm9hi": "uimm[5:4$\\vert$8]",
+    "c_nzimm6lo": "nzimm[4:0]",
+    "c_nzimm6hi": "nzimm[5]",
+    "c_imm6lo": "imm[4:0]",
+    "c_imm6hi": "imm[5]",
+    "c_nzimm10hi": "nzimm[9]",
+    "c_nzimm10lo": "nzimm[4$\\vert$6$\\vert$8:7$\\vert$5]",
+    "c_nzimm18hi": "nzimm[17]",
+    "c_nzimm18lo": "nzimm[16:12]",
+    "c_imm12": "imm[11$\\vert$4$\\vert$9:8$\\vert$10$\\vert$6$\\vert$7$\\vert$3:1$\\vert$5]",
+    "c_bimm9lo": "imm[7:6$\\vert$2:1$\\vert$5]",
+    "c_bimm9hi": "imm[8$\\vert$4:3]",
+    "c_nzuimm5": "nzuimm[4:0]",
+    "c_nzuimm6lo": "nzuimm[4:0]",
+    "c_nzuimm6hi": "nzuimm[5]",
+    "c_uimm8splo": "uimm[4:2$\\vert$7:6]",
+    "c_uimm8sphi": "uimm[5]",
+    "c_uimm8sp_s": "uimm[5:2$\\vert$7:6]",
+    "c_uimm10splo": "uimm[4$\\vert$9:6]",
+    "c_uimm10sphi": "uimm[5]",
+    "c_uimm9splo": "uimm[4:3$\\vert$8:6]",
+    "c_uimm9sphi": "uimm[5]",
+    "c_uimm10sp_s": "uimm[5:4$\\vert$9:6]",
+    "c_uimm9sp_s": "uimm[5:3$\\vert$8:6]",
+    "rd_p_e": "rd\\,$'$, even values only",
+    "rs2_p_e": "rs2\\,$'$, even values only",
+    "rd_n0_e": "rd$\\neq$0, even values only",
+    "c_rs2_e": "rs2, even values only",
+    "rd_e": "rd, even values only",
+    "rs2_e": "rs2, even values only",
+}
+
+
+# created a dummy instruction-dictionary like dictionary for all the instruction
+# types so that the same logic can be used to create their tables
+latex_inst_type = {
+    "R-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct7"],
+    },
+    "R4-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct2", "rs3"],
+    },
+    "I-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "imm12"],
+    },
+    "S-type": {
+        "variable_fields": ["opcode", "imm12lo", "funct3", "rs1", "rs2", "imm12hi"],
+    },
+    "B-type": {
+        "variable_fields": ["opcode", "bimm12lo", "funct3", "rs1", "rs2", "bimm12hi"],
+    },
+    "U-type": {
+        "variable_fields": ["opcode", "rd", "imm20"],
+    },
+    "J-type": {
+        "variable_fields": ["opcode", "rd", "jimm20"],
+    },
+}
+latex_fixed_fields = [
+    (31, 25),
+    (24, 20),
+    (19, 15),
+    (14, 12),
+    (11, 7),
+    (6, 0),
+]
+
+# Pseudo-ops present in the generated encodings.
+# By default pseudo-ops are not listed as they are considered aliases
+# of their base instruction.
+emitted_pseudo_ops = [
+    "pause",
+    "prefetch_i",
+    "prefetch_r",
+    "prefetch_w",
+    "rstsa16",
+    "rstsa32",
+    "srli32_u",
+    "slli_rv32",
+    "srai_rv32",
+    "srli_rv32",
+    "umax32",
+    "c_mop_1",
+    "c_sspush_x1",
+    "c_mop_3",
+    "c_mop_5",
+    "c_sspopchk_x5",
+    "c_mop_7",
+    "c_mop_9",
+    "c_mop_11",
+    "c_mop_13",
+    "c_mop_15",
+    "mop_r_0",
+    "mop_r_1",
+    "mop_r_2",
+    "mop_r_3",
+    "mop_r_4",
+    "mop_r_5",
+    "mop_r_6",
+    "mop_r_7",
+    "mop_r_8",
+    "mop_r_9",
+    "mop_r_10",
+    "mop_r_11",
+    "mop_r_12",
+    "mop_r_13",
+    "mop_r_14",
+    "mop_r_15",
+    "mop_r_16",
+    "mop_r_17",
+    "mop_r_18",
+    "mop_r_19",
+    "mop_r_20",
+    "mop_r_21",
+    "mop_r_22",
+    "mop_r_23",
+    "mop_r_24",
+    "mop_r_25",
+    "mop_r_26",
+    "mop_r_27",
+    "mop_r_28",
+    "sspopchk_x1",
+    "sspopchk_x5",
+    "ssrdp",
+    "mop_r_29",
+    "mop_r_30",
+    "mop_r_31",
+    "mop_r_32",
+    "mop_rr_0",
+    "mop_rr_1",
+    "mop_rr_2",
+    "mop_rr_3",
+    "mop_rr_4",
+    "mop_rr_5",
+    "mop_rr_6",
+    "mop_rr_7",
+    "sspush_x1",
+    "sspush_x5",
+    "lpad",
+    "bclri.rv32",
+    "bexti.rv32",
+    "binvi.rv32",
+    "bseti.rv32",
+    "zext.h.rv32",
+    "rev8.h.rv32",
+    "rori.rv32",
+]
diff --git a/src/riscv_opcodes/go_utils.py b/src/riscv_opcodes/go_utils.py
new file mode 100644
index 0000000..1a6fc33
--- /dev/null
+++ b/src/riscv_opcodes/go_utils.py
@@ -0,0 +1,64 @@
+import logging
+import pprint
+import sys
+
+from .constants import csrs
+from .shared_utils import InstrDict, signed
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_go(instr_dict: InstrDict):
+
+    args = " ".join(sys.argv)
+    prelude = f"""// Code generated by {args}; DO NOT EDIT."""
+
+    prelude += """
+package riscv
+
+import "cmd/internal/obj"
+
+type inst struct {
+	opcode uint32
+	funct3 uint32
+	rs1    uint32
+	rs2    uint32
+	csr    int64
+	funct7 uint32
+}
+
+func encode(a obj.As) *inst {
+	switch a {
+"""
+
+    csrs_map_str = """  }
+	return nil
+}
+
+var csrs = map[uint16]string {
+"""
+
+    endoffile = """}
+"""
+
+    instr_str = ""
+    for i in instr_dict:
+        enc_match = int(instr_dict[i]["match"], 0)
+        opcode = (enc_match >> 0) & ((1 << 7) - 1)
+        funct3 = (enc_match >> 12) & ((1 << 3) - 1)
+        rs1 = (enc_match >> 15) & ((1 << 5) - 1)
+        rs2 = (enc_match >> 20) & ((1 << 5) - 1)
+        csr = (enc_match >> 20) & ((1 << 12) - 1)
+        funct7 = (enc_match >> 25) & ((1 << 7) - 1)
+        instr_str += f"""  case A{i.upper().replace("_","")}:
+    return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }}
+"""
+    for num, name in sorted(csrs, key=lambda row: row[0]):
+        csrs_map_str += f'{hex(num)} : "{name.upper()}",\n'
+
+    with open("inst.go", "w", encoding="utf-8") as file:
+        file.write(prelude)
+        file.write(instr_str)
+        file.write(csrs_map_str)
+        file.write(endoffile)
diff --git a/src/riscv_opcodes/latex_utils.py b/src/riscv_opcodes/latex_utils.py
new file mode 100644
index 0000000..38f92f8
--- /dev/null
+++ b/src/riscv_opcodes/latex_utils.py
@@ -0,0 +1,450 @@
+import logging
+import pprint
+from typing import TextIO
+
+from .constants import latex_fixed_fields, latex_inst_type, latex_mapping
+from .shared_utils import InstrDict, arg_lut, create_inst_dict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_priv_latex_table():
+    type_list = ["R-type", "I-type"]
+    system_instr = ["_h", "_s", "_system", "_svinval", "64_h", "_svinval_h"]
+    dataset_list = [(system_instr, "Trap-Return Instructions", ["sret", "mret"], False)]
+    dataset_list.append(
+        (system_instr, "Interrupt-Management Instructions", ["wfi"], False)
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Supervisor Memory-Management Instructions",
+            ["sfence_vma"],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Hypervisor Memory-Management Instructions",
+            ["hfence_vvma", "hfence_gvma"],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Hypervisor Virtual-Machine Load and Store Instructions",
+            [
+                "hlv_b",
+                "hlv_bu",
+                "hlv_h",
+                "hlv_hu",
+                "hlv_w",
+                "hlvx_hu",
+                "hlvx_wu",
+                "hsv_b",
+                "hsv_h",
+                "hsv_w",
+            ],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Hypervisor Virtual-Machine Load and Store Instructions, RV64 only",
+            ["hlv_wu", "hlv_d", "hsv_d"],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Svinval Memory-Management Instructions",
+            [
+                "sinval_vma",
+                "sfence_w_inval",
+                "sfence_inval_ir",
+                "hinval_vvma",
+                "hinval_gvma",
+            ],
+            False,
+        )
+    )
+    caption = "\\caption{RISC-V Privileged Instructions}"
+    with open("priv-instr-table.tex", "w", encoding="utf-8") as latex_file:
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+
+def make_latex_table():
+    """
+    This function is mean to create the instr-table.tex that is meant to be used
+    by the riscv-isa-manual. This function basically creates a single latext
+    file of multiple tables with each table limited to a single page. Only the
+    last table is assigned a latex-caption.
+
+    For each table we assign a type-list which capture the different instruction
+    types (R, I, B, etc) that will be required for the table. Then we select the
+    list of extensions ('_i, '32_i', etc) whose instructions are required to
+    populate the table. For each extension or collection of extension we can
+    assign Title, such that in the end they appear as subheadings within
+    the table (note these are inlined headings and not captions of the table).
+
+    All of the above information is collected/created and sent to
+    make_ext_latex_table function to dump out the latex contents into a file.
+
+    The last table only has to be given a caption - as per the policy of the
+    riscv-isa-manual.
+    """
+    # open the file and use it as a pointer for all further dumps
+    with open("instr-table.tex", "w", encoding="utf-8") as latex_file:
+
+        # create the rv32i table first. Here we set the caption to empty. We use the
+        # files rv_i and rv32_i to capture instructions relevant for rv32i
+        # configuration. The dataset is a list of 4-element tuples :
+        # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions
+        # is empty then it indicates that all instructions of the all the extensions
+        # in list_of_extensions need to be dumped. If not empty, then only the
+        # instructions listed in list_of_instructions will be dumped into latex.
+        caption = ""
+        type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"]
+        dataset_list: list[tuple[list[str], str, list[str], bool]] = [
+            (["_i", "32_i"], "RV32I Base Instruction Set", [], False)
+        ]
+        dataset_list.append((["_i"], "", ["fence_tso", "pause"], True))
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "I-type", "S-type"]
+        dataset_list = [
+            (["64_i"], "RV64I Base Instruction Set (in addition to RV32I)", [], False)
+        ]
+        dataset_list.append(
+            (["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False)
+        )
+        dataset_list.append(
+            (["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False)
+        )
+        dataset_list.append((["_m", "32_m"], "RV32M Standard Extension", [], False))
+        dataset_list.append(
+            (["64_m"], "RV64M Standard Extension (in addition to RV32M)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type"]
+        dataset_list = [(["_a"], "RV32A Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_a"], "RV64A Standard Extension (in addition to RV32A)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [(["_f"], "RV32F Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_f"], "RV64F Standard Extension (in addition to RV32F)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [(["_d"], "RV32D Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_d"], "RV64D Standard Extension (in addition to RV32D)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [(["_q"], "RV32Q Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_q"], "RV64Q Standard Extension (in addition to RV32Q)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        caption = "\\caption{Instruction listing for RISC-V}"
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [
+            (["_zfh", "_d_zfh", "_q_zfh"], "RV32Zfh Standard Extension", [], False)
+        ]
+        dataset_list.append(
+            (
+                ["64_zfh"],
+                "RV64Zfh Standard Extension (in addition to RV32Zfh)",
+                [],
+                False,
+            )
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        ## The following is demo to show that Compressed instructions can also be
+        # dumped in the same manner as above
+
+        # type_list = ['']
+        # dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])]
+        # dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', []))
+        # make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption)
+
+
+def make_ext_latex_table(
+    type_list: "list[str]",
+    dataset: "list[tuple[list[str], str, list[str], bool]]",
+    latex_file: TextIO,
+    ilen: int,
+    caption: str,
+):
+    """
+    For a given collection of extensions this function dumps out a complete
+    latex table which includes the encodings of the instructions.
+
+    The ilen input indicates the length of the instruction for which the table
+    is created.
+
+    The caption input is used to create the latex-table caption.
+
+    The type_list input is a list of instruction types (R, I, B, etc) that are
+    treated as header for each table. Each table will have its own requirements
+    and type_list must include all the instruction-types that the table needs.
+    Note, all elements of this list must be present in the latex_inst_type
+    dictionary defined in constants.py
+
+    The latex_file is a file pointer to which the latex-table will dumped into
+
+    The dataset is a list of 3-element tuples containing:
+        (list_of_extensions, title, list_of_instructions)
+    The list_of_extensions must contain all the set of extensions whose
+    instructions must be populated under a given title. If list_of_instructions
+    is not empty, then only those instructions mentioned in list_of_instructions
+    present in the extension will be dumped into the latex-table, other
+    instructions will be ignored.
+
+    Once the above inputs are received then function first creates table entries
+    for the instruction types. To simplify things, we maintain a dictionary
+    called latex_inst_type in constants.py which is created in the same way the
+    instruction dictionary is created. This allows us to re-use the same logic
+    to create the instruction types table as well
+
+    Once the header is created, we then parse through every entry in the
+    dataset. For each list dataset entry we use the create_inst_dict function to
+    create an exhaustive list of instructions associated with the respective
+    collection of the extension of that dataset. Then we apply the instruction
+    filter, if any, indicated by the list_of_instructions of that dataset.
+    Thereon, for each instruction we create a latex table entry.
+
+    Latex table specification for ilen sized instructions:
+        Each table is created with ilen+1 columns - ilen columns for each bit of the
+        instruction and one column to hold the name of the instruction.
+
+        For each argument of an instruction we use the arg_lut from constants.py
+        to identify its position in the encoding, and thus create a multicolumn
+        entry with the name of the argument as the data. For hardcoded bits, we
+        do the same where we capture a string of continuous 1s and 0s, identify
+        the position and assign the same string as the data of the
+        multicolumn entry in the table.
+
+    """
+    column_size = "".join(["p{0.002in}"] * (ilen + 1))
+
+    type_entries = (
+        """
+    \\multicolumn{3}{l}{31} &
+    \\multicolumn{2}{r}{27} &
+    \\multicolumn{1}{c}{26} &
+    \\multicolumn{1}{r}{25} &
+    \\multicolumn{3}{l}{24} &
+    \\multicolumn{2}{r}{20} &
+    \\multicolumn{3}{l}{19} &
+    \\multicolumn{2}{r}{15} &
+    \\multicolumn{2}{l}{14} &
+    \\multicolumn{1}{r}{12} &
+    \\multicolumn{4}{l}{11} &
+    \\multicolumn{1}{r}{7} &
+    \\multicolumn{6}{l}{6} &
+    \\multicolumn{1}{r}{0} \\\\
+    \\cline{2-33}\n&\n\n
+"""
+        if ilen == 32
+        else """
+    \\multicolumn{1}{c}{15} &
+    \\multicolumn{1}{c}{14} &
+    \\multicolumn{1}{c}{13} &
+    \\multicolumn{1}{c}{12} &
+    \\multicolumn{1}{c}{11} &
+    \\multicolumn{1}{c}{10} &
+    \\multicolumn{1}{c}{9} &
+    \\multicolumn{1}{c}{8} &
+    \\multicolumn{1}{c}{7} &
+    \\multicolumn{1}{c}{6} &
+    \\multicolumn{1}{c}{5} &
+    \\multicolumn{1}{c}{4} &
+    \\multicolumn{1}{c}{3} &
+    \\multicolumn{1}{c}{2} &
+    \\multicolumn{1}{c}{1} &
+    \\multicolumn{1}{c}{0} \\\\
+    \\cline{2-17}\n&\n\n
+"""
+    )
+
+    # depending on the type_list input we create a subset dictionary of
+    # latex_inst_type dictionary present in constants.py
+    type_dict = {
+        key: value for key, value in latex_inst_type.items() if key in type_list
+    }
+
+    # iterate ovr each instruction type and create a table entry
+    for t in type_dict:
+        fields: list[tuple[int, int, str]] = []
+
+        # first capture all "arguments" of the type (funct3, funct7, rd, etc)
+        # and capture their positions using arg_lut.
+        for f in type_dict[t]["variable_fields"]:
+            (msb, lsb) = arg_lut[f]
+            name = f if f not in latex_mapping else latex_mapping[f]
+            fields.append((msb, lsb, name))
+
+        # iterate through the 32 bits, starting from the msb, and assign
+        # argument names to the relevant portions of the instructions. This
+        # information is stored as a 3-element tuple containing the msb, lsb
+        # position of the arugment and the name of the argument.
+        msb = ilen - 1
+        y = ""
+        for r in range(0, ilen):
+            if y != "":
+                fields.append((msb, ilen - 1 - r + 1, y))
+                y = ""
+            msb = ilen - 1 - r - 1
+            if r == 31:
+                if y != "":
+                    fields.append((msb, 0, y))
+                y = ""
+
+        # sort the arguments in decreasing order of msb position
+        fields.sort(key=lambda y: y[0], reverse=True)
+
+        # for each argument/string of 1s or 0s, create a multicolumn latex table
+        # entry
+        entry = ""
+        for r, (msb, lsb, name) in enumerate(fields):
+            if r == len(fields) - 1:
+                entry += (
+                    f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n"
+                )
+            elif r == 0:
+                entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
+            else:
+                entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
+        entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
+        type_entries += entry
+
+    # for each entry in the dataset create a table
+    content = ""
+    for ext_list, title, filter_list, include_pseudo in dataset:
+        instr_dict: InstrDict = {}
+
+        # for all extensions list in ext_list, create a dictionary of
+        # instructions associated with those extensions.
+        for e in ext_list:
+            instr_dict.update(create_inst_dict(["rv" + e], include_pseudo))
+
+        # if filter_list is not empty then use that as the official set of
+        # instructions that need to be dumped into the latex table
+        inst_list = list(instr_dict.keys()) if not filter_list else filter_list
+
+        # for each instruction create an latex table entry just like how we did
+        # above with the instruction-type table.
+        instr_entries = ""
+        for inst in inst_list:
+            if inst not in instr_dict:
+                logging.error(
+                    f"in make_ext_latex_table: Instruction: {inst} not found in instr_dict"
+                )
+                raise SystemExit(1)
+            fields = []
+
+            # only if the argument is available in arg_lut we consume it, else
+            # throw error.
+            for f in instr_dict[inst]["variable_fields"]:
+                if f not in arg_lut:
+                    logging.error(
+                        f"Found variable {f} in instruction {inst} whose mapping is not available"
+                    )
+                    raise SystemExit(1)
+                (msb, lsb) = arg_lut[f]
+                name = (
+                    f.replace("_", ".") if f not in latex_mapping else latex_mapping[f]
+                )
+                fields.append((msb, lsb, name))
+
+            msb = ilen - 1
+            y = ""
+            if ilen == 16:
+                encoding = instr_dict[inst]["encoding"][16:]
+            else:
+                encoding = instr_dict[inst]["encoding"]
+            for r in range(0, ilen):
+                x = encoding[r]
+                if (msb, ilen - 1 - r + 1) in latex_fixed_fields:
+                    fields.append((msb, ilen - 1 - r + 1, y))
+                    msb = ilen - 1 - r
+                    y = ""
+                if x == "-":
+                    if y != "":
+                        fields.append((msb, ilen - 1 - r + 1, y))
+                        y = ""
+                    msb = ilen - 1 - r - 1
+                else:
+                    y += str(x)
+                if r == ilen - 1:
+                    if y != "":
+                        fields.append((msb, 0, y))
+                    y = ""
+
+            fields.sort(key=lambda y: y[0], reverse=True)
+            entry = ""
+            for r, (msb, lsb, name) in enumerate(fields):
+                if r == len(fields) - 1:
+                    entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n'
+                elif r == 0:
+                    entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
+                else:
+                    entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
+            entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
+            instr_entries += entry
+
+        # once an entry of the dataset is completed we create the whole table
+        # with the title of that dataset as sub-heading (sort-of)
+        if title != "":
+            content += f"""
+
+\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\
+\\multicolumn{{{ilen}}}{{c}}{{\\bfseries {title} }} & \\\\
+\\cline{{2-{ilen+1}}}
+
+            &
+{instr_entries}
+"""
+        else:
+            content += f"""
+{instr_entries}
+"""
+
+    header = f"""
+\\newpage
+
+\\begin{{table}}[p]
+\\begin{{small}}
+\\begin{{center}}
+    \\begin{{tabular}} {{{column_size}l}}
+    {" ".join(['&']*ilen)} \\\\
+
+            &
+{type_entries}
+"""
+    endtable = f"""
+
+\\end{{tabular}}
+\\end{{center}}
+\\end{{small}}
+{caption}
+\\end{{table}}
+"""
+    # dump the contents and return
+    latex_file.write(header + content + endtable)
diff --git a/src/riscv_opcodes/parse.py b/src/riscv_opcodes/parse.py
new file mode 100644
index 0000000..d78f232
--- /dev/null
+++ b/src/riscv_opcodes/parse.py
@@ -0,0 +1,121 @@
+import argparse
+import json
+import logging
+import pprint
+
+from .c_utils import make_c
+from .chisel_utils import make_chisel
+from .constants import emitted_pseudo_ops
+from .go_utils import make_go
+from .latex_utils import make_latex_table, make_priv_latex_table
+from .rust_utils import make_rust
+from .shared_utils import add_segmented_vls_insn, create_inst_dict
+from .sverilog_utils import make_sverilog
+from .svg_utils import make_svg
+
+LOG_FORMAT = "%(levelname)s:: %(message)s"
+LOG_LEVEL = logging.INFO
+
+pretty_printer = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
+
+
+def generate_extensions(
+    extensions: list[str],
+    include_pseudo: bool,
+    c: bool,
+    chisel: bool,
+    spinalhdl: bool,
+    sverilog: bool,
+    rust: bool,
+    go: bool,
+    latex: bool,
+    svg: bool,
+):
+    instr_dict = create_inst_dict(extensions, include_pseudo)
+    instr_dict = dict(sorted(instr_dict.items()))
+    instr_dict_with_segment = add_segmented_vls_insn(instr_dict)
+
+    with open("instr_dict.json", "w", encoding="utf-8") as outfile:
+        json.dump(instr_dict_with_segment, outfile, indent=2)
+
+    if c:
+        instr_dict_c = create_inst_dict(
+            extensions, False, include_pseudo_ops=emitted_pseudo_ops
+        )
+        instr_dict_c = dict(sorted(instr_dict_c.items()))
+        make_c(instr_dict_c)
+        logging.info("encoding.out.h generated successfully")
+
+    if chisel:
+        make_chisel(instr_dict)
+        logging.info("inst.chisel generated successfully")
+
+    if spinalhdl:
+        make_chisel(instr_dict, True)
+        logging.info("inst.spinalhdl generated successfully")
+
+    if sverilog:
+        make_sverilog(instr_dict)
+        logging.info("inst.sverilog generated successfully")
+
+    if rust:
+        make_rust(instr_dict)
+        logging.info("inst.rs generated successfully")
+
+    if go:
+        make_go(instr_dict_with_segment)
+        logging.info("inst.go generated successfully")
+
+    if latex:
+        make_latex_table()
+        logging.info("instr-table.tex generated successfully")
+        make_priv_latex_table()
+        logging.info("priv-instr-table.tex generated successfully")
+
+    if svg:
+        make_svg(instr_dict)
+        logging.info("inst.svg generated successfully")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate RISC-V constants headers")
+    parser.add_argument(
+        "-pseudo", action="store_true", help="Include pseudo-instructions"
+    )
+    parser.add_argument("-c", action="store_true", help="Generate output for C")
+    parser.add_argument(
+        "-chisel", action="store_true", help="Generate output for Chisel"
+    )
+    parser.add_argument(
+        "-spinalhdl", action="store_true", help="Generate output for SpinalHDL"
+    )
+    parser.add_argument(
+        "-sverilog", action="store_true", help="Generate output for SystemVerilog"
+    )
+    parser.add_argument("-rust", action="store_true", help="Generate output for Rust")
+    parser.add_argument("-go", action="store_true", help="Generate output for Go")
+    parser.add_argument("-latex", action="store_true", help="Generate output for Latex")
+    parser.add_argument("-svg", action="store_true", help="Generate .svg output")
+    parser.add_argument(
+        "extensions",
+        nargs="*",
+        help="Extensions to use. This is a glob of the rv_.. files, e.g. 'rv*' will give all extensions.",
+    )
+
+    args = parser.parse_args()
+
+    print(f"Extensions selected : {args.extensions}")
+
+    generate_extensions(
+        args.extensions,
+        args.pseudo,
+        args.c,
+        args.chisel,
+        args.spinalhdl,
+        args.sverilog,
+        args.rust,
+        args.go,
+        args.latex,
+        args.svg,
+    )
diff --git a/src/riscv_opcodes/resources.py b/src/riscv_opcodes/resources.py
new file mode 100644
index 0000000..e9398ec
--- /dev/null
+++ b/src/riscv_opcodes/resources.py
@@ -0,0 +1,39 @@
+import sys
+from importlib.resources import files
+from typing import IO
+
+if sys.version_info < (3, 12):
+    # This was deprecated in Python 3.12.
+    from importlib.abc import Traversable
+else:
+    from importlib.resources.abc import Traversable
+
+
+def resource_root() -> Traversable:
+    """
+    Return the root directory as a traversable that can
+    be used to load the `extensions`, `*.csv` and `encoding.h`
+    files. For historical reasons these are not stored inside
+    the `src/riscv_opcodes` directory in the source distribution
+    but they are moved there when generating the binary wheel.
+    This means we need to check in both places.
+    """
+    assert __package__ is not None
+    package_root = files(__package__)
+    if (package_root / "extensions").is_dir():
+        return package_root
+    return package_root / ".." / ".."
+
+
+def read_text_resource(path_relative_to_root: str) -> str:
+    """
+    Read a text file relative to the root of this repo.
+    """
+    return resource_root().joinpath(path_relative_to_root).read_text(encoding="utf-8")
+
+
+def open_text_resource(path_relative_to_root: str) -> IO[str]:
+    """
+    Open a text file relative to the root of this repo.
+    """
+    return resource_root().joinpath(path_relative_to_root).open("r", encoding="utf-8")
diff --git a/src/riscv_opcodes/rust_utils.py b/src/riscv_opcodes/rust_utils.py
new file mode 100644
index 0000000..74e17eb
--- /dev/null
+++ b/src/riscv_opcodes/rust_utils.py
@@ -0,0 +1,28 @@
+import logging
+import pprint
+
+from .constants import causes, csrs, csrs32
+from .shared_utils import InstrDict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_rust(instr_dict: InstrDict):
+    mask_match_str = ""
+    for i in instr_dict:
+        mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n'
+        mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n'
+    for num, name in csrs + csrs32:
+        mask_match_str += f"const CSR_{name.upper()}: u16 = {hex(num)};\n"
+    for num, name in causes:
+        mask_match_str += (
+            f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n'
+        )
+    with open("inst.rs", "w", encoding="utf-8") as rust_file:
+        rust_file.write(
+            f"""
+/* Automatically generated by parse_opcodes */
+{mask_match_str}
+"""
+        )
diff --git a/src/riscv_opcodes/rv_colors.py b/src/riscv_opcodes/rv_colors.py
new file mode 100644
index 0000000..76e53a5
--- /dev/null
+++ b/src/riscv_opcodes/rv_colors.py
@@ -0,0 +1,12 @@
+palette = {
+    "Berkeley Blue": "#003262",
+    "California Gold": "#FDB515",
+    "Dark Blue": "#011e41",
+    "Teal": "#0a6b7c",
+    "Magenta": "#cb007b",
+    "Purple": "#60269e",
+    "Light Gold": "#fdda64",
+    "Light Teal": "#62cbc9",
+    "Pink": "#fe9bb1",
+    "Lavender": "#c2a6e1",
+}
diff --git a/src/riscv_opcodes/shared_utils.py b/src/riscv_opcodes/shared_utils.py
new file mode 100644
index 0000000..3a1a3bc
--- /dev/null
+++ b/src/riscv_opcodes/shared_utils.py
@@ -0,0 +1,641 @@
+import copy
+import logging
+import os
+import pprint
+import re
+from fnmatch import fnmatch
+from io import StringIO
+from itertools import chain
+from typing import Dict, NoReturn, Optional, TypedDict
+
+from .constants import (
+    arg_lut,
+    fixed_ranges,
+    imported_regex,
+    overlapping_extensions,
+    overlapping_instructions,
+    pseudo_regex,
+    single_fixed,
+)
+from .resources import open_text_resource, resource_root
+
+LOG_FORMAT = "%(levelname)s:: %(message)s"
+LOG_LEVEL = logging.INFO
+
+pretty_printer = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
+
+
+# Log an error message
+def log_and_exit(message: str) -> NoReturn:
+    """Log an error message and exit the program."""
+    logging.error(message)
+    raise SystemExit(1)
+
+
+# Initialize encoding to 32-bit '-' values
+def initialize_encoding(bits: int = 32) -> "list[str]":
+    """Initialize encoding with '-' to represent don't care bits."""
+    return ["-"] * bits
+
+
+# Validate bit range and value
+def validate_bit_range(msb: int, lsb: int, entry_value: int, line: str):
+    """Validate the bit range and entry value."""
+    if msb < lsb:
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in its encoding'
+        )
+
+    if entry_value >= (1 << (msb - lsb + 1)):
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}'
+        )
+
+
+# Split the instruction line into name and remaining part
+def parse_instruction_line(line: str) -> "tuple[str, str]":
+    """Parse the instruction name and the remaining encoding details."""
+    name, remaining = line.replace("\t", " ").split(" ", 1)
+    name = name.replace(".", "_")  # Replace dots for compatibility
+    remaining = remaining.lstrip()  # Remove leading whitespace
+    return name, remaining
+
+
+# Verify Overlapping Bits
+def check_overlapping_bits(encoding: "list[str]", ind: int, line: str):
+    """Check for overlapping bits in the encoding."""
+    if encoding[31 - ind] != "-":
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has {ind} bit overlapping in its opcodes'
+        )
+
+
+# Update encoding for fixed ranges
+def update_encoding_for_fixed_range(
+    encoding: "list[str]", msb: int, lsb: int, entry_value: int, line: str
+):
+    """
+    Update encoding bits for a given bit range.
+    Checks for overlapping bits and assigns the value accordingly.
+    """
+    for ind in range(lsb, msb + 1):
+        check_overlapping_bits(encoding, ind, line)
+        bit = str((entry_value >> (ind - lsb)) & 1)
+        encoding[31 - ind] = bit
+
+
+# Process fixed bit patterns
+def process_fixed_ranges(remaining: str, encoding: "list[str]", line: str):
+    """Process fixed bit ranges in the encoding."""
+    for s2, s1, entry in fixed_ranges.findall(remaining):
+        msb, lsb, entry_value = int(s2), int(s1), int(entry, 0)
+
+        # Validate bit range and entry value
+        validate_bit_range(msb, lsb, entry_value, line)
+        update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line)
+
+    return fixed_ranges.sub(" ", remaining)
+
+
+# Process single bit assignments
+def process_single_fixed(remaining: str, encoding: "list[str]", line: str):
+    """Process single fixed assignments in the encoding."""
+    for lsb, value, _drop in single_fixed.findall(remaining):
+        lsb = int(lsb, 0)
+        value = int(value, 0)
+
+        check_overlapping_bits(encoding, lsb, line)
+        encoding[31 - lsb] = str(value)
+
+
+# Main function to check argument look-up table
+def check_arg_lut(args: "list[str]", encoding_args: "list[str]", name: str):
+    """Check if arguments are present in arg_lut."""
+    for arg in args:
+        if arg not in arg_lut:
+            arg = handle_arg_lut_mapping(arg, name)
+        msb, lsb = arg_lut[arg]
+        update_encoding_args(encoding_args, arg, msb, lsb)
+
+
+# Handle missing argument mappings
+def handle_arg_lut_mapping(arg: str, name: str):
+    """Handle cases where an argument needs to be mapped to an existing one."""
+    parts = arg.split("=")
+    if len(parts) == 2:
+        existing_arg, _new_arg = parts
+        if existing_arg in arg_lut:
+            arg_lut[arg] = arg_lut[existing_arg]
+        else:
+            log_and_exit(
+                f" Found field {existing_arg} in variable {arg} in instruction {name} "
+                f"whose mapping in arg_lut does not exist"
+            )
+    else:
+        log_and_exit(
+            f" Found variable {arg} in instruction {name} "
+            f"whose mapping in arg_lut does not exist"
+        )
+    return arg
+
+
+# Update encoding args with variables
+def update_encoding_args(encoding_args: "list[str]", arg: str, msb: int, lsb: int):
+    """Update encoding arguments and ensure no overlapping."""
+    for ind in range(lsb, msb + 1):
+        check_overlapping_bits(encoding_args, ind, arg)
+        encoding_args[31 - ind] = arg
+
+
+# Compute match and mask
+def convert_encoding_to_match_mask(encoding: "list[str]") -> "tuple[str, str]":
+    """Convert the encoding list to match and mask strings."""
+    match = "".join(encoding).replace("-", "0")
+    mask = "".join(encoding).replace("0", "1").replace("-", "0")
+    return hex(int(match, 2)), hex(int(mask, 2))
+
+
+class SingleInstr(TypedDict):
+    encoding: str
+    variable_fields: "list[str]"
+    extension: "list[str]"
+    match: str
+    mask: str
+
+
+InstrDict = Dict[str, SingleInstr]
+
+
+# Processing main function for a line in the encoding file
+def process_enc_line(line: str, ext: str) -> "tuple[str, SingleInstr]":
+    """
+    This function processes each line of the encoding files (rv*). As part of
+    the processing, the function ensures that the encoding is legal through the
+    following checks::
+        - there is no over specification (same bits assigned different values)
+        - there is no under specification (some bits not assigned values)
+        - bit ranges are in the format hi..lo=val where hi > lo
+        - value assigned is representable in the bit range
+        - also checks that the mapping of arguments of an instruction exists in
+          arg_lut.
+    If the above checks pass, then the function returns a tuple of the name and
+    a dictionary containing basic information of the instruction which includes:
+        - variables: list of arguments used by the instruction whose mapping
+          exists in the arg_lut dictionary
+        - encoding: this contains the 32-bit encoding of the instruction where
+          '-' is used to represent position of arguments and 1/0 is used to
+          reprsent the static encoding of the bits
+        - extension: this field contains the rv* filename from which this
+          instruction was included
+        - match: hex value representing the bits that need to match to detect
+          this instruction
+        - mask: hex value representin the bits that need to be masked to extract
+          the value required for matching.
+    """
+    encoding = initialize_encoding()
+
+    # Parse the instruction line
+    name, remaining = parse_instruction_line(line)
+
+    # Process fixed ranges
+    remaining = process_fixed_ranges(remaining, encoding, line)
+
+    # Process single fixed assignments
+    process_single_fixed(remaining, encoding, line)
+
+    # Convert the list of encodings into a match and mask
+    match, mask = convert_encoding_to_match_mask(encoding)
+
+    # Check arguments in arg_lut
+    args = single_fixed.sub(" ", remaining).split()
+    encoding_args = encoding.copy()
+
+    check_arg_lut(args, encoding_args, name)
+
+    # Return single_dict
+    return name, {
+        "encoding": "".join(encoding),
+        "variable_fields": args,
+        "extension": [os.path.basename(ext)],
+        "match": match,
+        "mask": mask,
+    }
+
+
+# Extract ISA Type
+def extract_isa_type(ext_name: str) -> str:
+    """Extracts the ISA type from the extension name."""
+    return ext_name.split("_")[0]
+
+
+# Verify the types for RV*
+def is_rv_variant(type1: str, type2: str) -> bool:
+    """Checks if the types are RV variants (rv32/rv64)."""
+    return (type2 == "rv" and type1 in {"rv32", "rv64"}) or (
+        type1 == "rv" and type2 in {"rv32", "rv64"}
+    )
+
+
+# Check for same base ISA
+def has_same_base_isa(type1: str, type2: str) -> bool:
+    """Determines if the two ISA types share the same base."""
+    return type1 == type2 or is_rv_variant(type1, type2)
+
+
+# Compare the base ISA type of a given extension name against a list of extension names
+def same_base_isa(ext_name: str, ext_name_list: "list[str]") -> bool:
+    """Checks if the base ISA type of ext_name matches any in ext_name_list."""
+    type1 = extract_isa_type(ext_name)
+    return any(has_same_base_isa(type1, extract_isa_type(ext)) for ext in ext_name_list)
+
+
+# Pad two strings to equal length
+def pad_to_equal_length(str1: str, str2: str, pad_char: str = "-") -> "tuple[str, str]":
+    """Pads two strings to equal length using the given padding character."""
+    max_len = max(len(str1), len(str2))
+    return str1.rjust(max_len, pad_char), str2.rjust(max_len, pad_char)
+
+
+# Check compatibility for two characters
+def has_no_conflict(char1: str, char2: str) -> bool:
+    """Checks if two characters are compatible (either matching or don't-care)."""
+    return char1 == "-" or char2 == "-" or char1 == char2
+
+
+# Conflict check between two encoded strings
+def overlaps(x: str, y: str) -> bool:
+    """Checks if two encoded strings overlap without conflict."""
+    x, y = pad_to_equal_length(x, y)
+    return all(has_no_conflict(x[i], y[i]) for i in range(len(x)))
+
+
+# Check presence of keys in dictionary.
+def is_in_nested_dict(a: "dict[str, set[str]]", key1: str, key2: str) -> bool:
+    """Checks if key2 exists in the dictionary under key1."""
+    return key1 in a and key2 in a[key1]
+
+
+# Overlap allowance
+def overlap_allowed(a: "dict[str, set[str]]", x: str, y: str) -> bool:
+    """Determines if overlap is allowed between x and y based on nested dictionary checks"""
+    return is_in_nested_dict(a, x, y) or is_in_nested_dict(a, y, x)
+
+
+# Check overlap allowance between extensions
+def extension_overlap_allowed(x: str, y: str) -> bool:
+    """Checks if overlap is allowed between two extensions using the overlapping_extensions dictionary."""
+    return overlap_allowed(overlapping_extensions, x, y)
+
+
+# Check overlap allowance between instructions
+def instruction_overlap_allowed(x: str, y: str) -> bool:
+    """Checks if overlap is allowed between two instructions using the overlapping_instructions dictionary."""
+    return overlap_allowed(overlapping_instructions, x, y)
+
+
+# Check 'nf' field
+def is_segmented_instruction(instruction: SingleInstr) -> bool:
+    """Checks if an instruction contains the 'nf' field."""
+    return "nf" in instruction["variable_fields"]
+
+
+# Expand 'nf' fields
+def update_with_expanded_instructions(
+    updated_dict: InstrDict, key: str, value: SingleInstr
+):
+    """Expands 'nf' fields in the instruction dictionary and updates it with new instructions."""
+    for new_key, new_value in expand_nf_field(key, value):
+        updated_dict[new_key] = new_value
+
+
+# Process instructions, expanding segmented ones and updating the dictionary
+def add_segmented_vls_insn(instr_dict: InstrDict) -> InstrDict:
+    """Processes instructions, expanding segmented ones and updating the dictionary."""
+    # Use dictionary comprehension for efficiency
+    return dict(
+        chain.from_iterable(
+            (
+                expand_nf_field(key, value)
+                if is_segmented_instruction(value)
+                else [(key, value)]
+            )
+            for key, value in instr_dict.items()
+        )
+    )
+
+
+# Expand the 'nf' field in the instruction dictionary
+def expand_nf_field(
+    name: str, single_dict: SingleInstr
+) -> "list[tuple[str, SingleInstr]]":
+    """Validate and prepare the instruction dictionary."""
+    validate_nf_field(single_dict, name)
+    remove_nf_field(single_dict)
+    update_mask(single_dict)
+
+    name_expand_index = name.find("e")
+
+    # Pre compute the base match value and encoding prefix
+    base_match = int(single_dict["match"], 16)
+    encoding_prefix = single_dict["encoding"][3:]
+
+    expanded_instructions = [
+        create_expanded_instruction(
+            name, single_dict, nf, name_expand_index, base_match, encoding_prefix
+        )
+        for nf in range(8)  # Range of 0 to 7
+    ]
+
+    return expanded_instructions
+
+
+# Validate the presence of 'nf'
+def validate_nf_field(single_dict: SingleInstr, name: str):
+    """Validates the presence of 'nf' in variable fields before expansion."""
+    if "nf" not in single_dict["variable_fields"]:
+        log_and_exit(f"Cannot expand nf field for instruction {name}")
+
+
+# Remove 'nf' from variable fields
+def remove_nf_field(single_dict: SingleInstr):
+    """Removes 'nf' from variable fields in the instruction dictionary."""
+    single_dict["variable_fields"].remove("nf")
+
+
+# Update the mask to include the 'nf' field
+def update_mask(single_dict: SingleInstr):
+    """Updates the mask to include the 'nf' field in the instruction dictionary."""
+    single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29)
+
+
+# Create an expanded instruction
+def create_expanded_instruction(
+    name: str,
+    single_dict: SingleInstr,
+    nf: int,
+    name_expand_index: int,
+    base_match: int,
+    encoding_prefix: str,
+) -> "tuple[str, SingleInstr]":
+    """Creates an expanded instruction based on 'nf' value."""
+    new_single_dict = copy.deepcopy(single_dict)
+
+    # Update match value in one step
+    new_single_dict["match"] = hex(base_match | (nf << 29))
+    new_single_dict["encoding"] = format(nf, "03b") + encoding_prefix
+
+    # Construct new instruction name
+    new_name = (
+        name
+        if nf == 0
+        else f"{name[:name_expand_index]}seg{nf + 1}{name[name_expand_index:]}"
+    )
+
+    return (new_name, new_single_dict)
+
+
+def read_lines(file: str) -> "list[str]":
+    """
+    Reads lines from a file and returns non-blank, non-comment lines.
+    The file must be a resource relative to the root of this repo.
+    """
+    with open_text_resource(file) as fp:
+        lines = (line.rstrip() for line in fp)
+        return [line for line in lines if line and not line.startswith("#")]
+
+
+# Update the instruction dictionary
+def process_standard_instructions(
+    lines: "list[str]", instr_dict: InstrDict, file_name: str
+):
+    """Processes standard instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$import" in line or "$pseudo" in line:
+            continue
+        logging.debug(f"Processing line: {line}")
+        name, single_dict = process_enc_line(line, file_name)
+        ext_name = os.path.basename(file_name)
+
+        if name in instr_dict:
+            var = instr_dict[name]["extension"]
+            if same_base_isa(ext_name, var):
+                log_and_exit(
+                    f"Instruction {name} from {ext_name} is already added from {var} in same base ISA"
+                )
+            elif instr_dict[name]["encoding"] != single_dict["encoding"]:
+                log_and_exit(
+                    f"Instruction {name} from {ext_name} has different encodings in different base ISAs"
+                )
+
+            instr_dict[name]["extension"].extend(single_dict["extension"])
+        else:
+            for key, item in instr_dict.items():
+                if (
+                    overlaps(item["encoding"], single_dict["encoding"])
+                    and not extension_overlap_allowed(ext_name, item["extension"][0])
+                    and not instruction_overlap_allowed(name, key)
+                    and same_base_isa(ext_name, item["extension"])
+                ):
+                    log_and_exit(
+                        f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}'
+                    )
+
+            instr_dict[name] = single_dict
+
+
+# Incorporate pseudo instructions into the instruction dictionary based on given conditions
+def process_pseudo_instructions(
+    lines: "list[str]",
+    instr_dict: InstrDict,
+    file_name: str,
+    include_pseudo: bool,
+    include_pseudo_ops: "list[str]",
+):
+    """Processes pseudo instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$pseudo" not in line:
+            continue
+        logging.debug(f"Processing pseudo line: {line}")
+        ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0]
+        ext_file = read_extension_file(ext)
+
+        validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst)
+
+        name, single_dict = process_enc_line(f"{pseudo_inst} {line_content}", file_name)
+        if (
+            orig_inst.replace(".", "_") not in instr_dict
+            or include_pseudo
+            or name in include_pseudo_ops
+        ):
+            if name not in instr_dict:
+                instr_dict[name] = single_dict
+                logging.debug(f"Including pseudo_op: {name}")
+            else:
+                if single_dict["match"] != instr_dict[name]["match"]:
+                    instr_dict[f"{name}_pseudo"] = single_dict
+                # TODO: This expression is always false since both sides are list[str].
+                elif single_dict["extension"] not in instr_dict[name]["extension"]:  # type: ignore
+                    instr_dict[name]["extension"].extend(single_dict["extension"])
+
+
+# Integrate imported instructions into the instruction dictionary
+def process_imported_instructions(
+    lines: "list[str]", instr_dict: InstrDict, file_name: str
+):
+    """Processes imported instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$import" not in line:
+            continue
+        logging.debug(f"Processing imported line: {line}")
+        import_ext, reg_instr = imported_regex.findall(line)[0]
+        ext_file = read_extension_file(import_ext)
+
+        validate_instruction_in_extension(reg_instr, ext_file, file_name, line)
+
+        for oline in StringIO(ext_file):
+            if re.findall(f"^\\s*{reg_instr}\\s+", oline):
+                name, single_dict = process_enc_line(oline, file_name)
+                if name in instr_dict:
+                    if instr_dict[name]["encoding"] != single_dict["encoding"]:
+                        log_and_exit(
+                            f"Imported instruction {name} from {os.path.basename(file_name)} has different encodings"
+                        )
+                    instr_dict[name]["extension"].extend(single_dict["extension"])
+                else:
+                    instr_dict[name] = single_dict
+                break
+
+
+def read_extension_file(ext: str) -> str:
+    """
+    Read the extension file path, considering the unratified directory if necessary.
+    """
+    file = resource_root() / "extensions" / ext
+    if file.is_file():
+        return file.read_text(encoding="utf-8")
+    file = resource_root() / "extensions" / "unratified" / ext
+    if file.is_file():
+        return file.read_text(encoding="utf-8")
+
+    log_and_exit(f"Extension {ext} not found.")
+
+
+# Confirm the presence of an original instruction in the corresponding extension file.
+def validate_instruction_in_extension(
+    inst: str, ext_file: str, file_name: str, pseudo_inst: str
+):
+    """Validates if the original instruction exists in the dependent extension."""
+    found = False
+
+    for oline in StringIO(ext_file):
+        if re.findall(f"^\\s*{inst}\\s+", oline):
+            found = True
+            break
+    if not found:
+        log_and_exit(
+            f"Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}"
+        )
+
+
+# Construct a dictionary of instructions filtered by specified criteria
+def create_inst_dict(
+    file_filter: "list[str]",
+    include_pseudo: bool = False,
+    include_pseudo_ops: "Optional[list[str]]" = None,
+) -> InstrDict:
+    """
+    Creates a dictionary of instructions based on the provided file filters.
+
+    This function return a dictionary containing all instructions associated
+    with an extension defined by the file_filter input.
+    Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc.
+    Each node of the dictionary will correspond to an instruction which again is
+    a dictionary. The dictionary contents of each instruction includes:
+        - variables: list of arguments used by the instruction whose mapping
+          exists in the arg_lut dictionary
+        - encoding: this contains the 32-bit encoding of the instruction where
+          '-' is used to represent position of arguments and 1/0 is used to
+          reprsent the static encoding of the bits
+        - extension: this field contains the rv* filename from which this
+          instruction was included
+        - match: hex value representing the bits that need to match to detect
+          this instruction
+        - mask: hex value representin the bits that need to be masked to extract
+          the value required for matching.
+    In order to build this dictionary, the function does 2 passes over the same
+    rv<file_filter> file:
+        - First pass: extracts all standard instructions, skipping pseudo ops
+          and imported instructions. For each selected line, the `process_enc_line`
+          function is called to create the dictionary contents of the instruction.
+          Checks are performed to ensure that the same instruction is not added
+          twice to the overall dictionary.
+        - Second pass: parses only pseudo_ops. For each pseudo_op, the function:
+            - Checks if the dependent extension and instruction exist.
+            - Adds the pseudo_op to the dictionary if the dependent instruction
+              is not already present; otherwise, it is skipped.
+    """
+    if include_pseudo_ops is None:
+        include_pseudo_ops = []
+
+    instr_dict: InstrDict = {}
+
+    ratified_file_filters = [
+        fil for fil in file_filter if not fil.startswith("unratified/")
+    ]
+    unratified_file_filters = [
+        fil.removeprefix("unratified/")
+        for fil in file_filter
+        if fil.startswith("unratified/")
+    ]
+
+    # Extension file name, "extensions[/unratified]/rv_foo".
+    file_names: list[str] = []
+
+    for file in (resource_root() / "extensions").iterdir():
+        if file.is_file() and any(
+            fnmatch(file.name, fil) for fil in ratified_file_filters
+        ):
+            file_names.append("extensions/" + file.name)
+    for file in (resource_root() / "extensions" / "unratified").iterdir():
+        if file.is_file() and any(
+            fnmatch(file.name, fil) for fil in unratified_file_filters
+        ):
+            file_names.append("extensions/unratified/" + file.name)
+
+    logging.debug("Collecting standard instructions")
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for standard instructions")
+        lines = read_lines(file_name)
+        process_standard_instructions(lines, instr_dict, file_name)
+
+    logging.debug("Collecting pseudo instructions")
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for pseudo instructions")
+        lines = read_lines(file_name)
+        process_pseudo_instructions(
+            lines,
+            instr_dict,
+            file_name,
+            include_pseudo,
+            include_pseudo_ops,
+        )
+
+    logging.debug("Collecting imported instructions")
+
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for imported instructions")
+        lines = read_lines(file_name)
+        process_imported_instructions(lines, instr_dict, file_name)
+
+    return instr_dict
+
+
+# Extracts the extensions used in an instruction dictionary
+def instr_dict_2_extensions(instr_dict: InstrDict) -> "list[str]":
+    return list({item["extension"][0] for item in instr_dict.values()})
+
+
+# Returns signed interpretation of a value within a given width
+def signed(value: int, width: int) -> int:
+    return value if 0 <= value < (1 << (width - 1)) else value - (1 << width)
diff --git a/src/riscv_opcodes/sverilog_utils.py b/src/riscv_opcodes/sverilog_utils.py
new file mode 100644
index 0000000..c17be9f
--- /dev/null
+++ b/src/riscv_opcodes/sverilog_utils.py
@@ -0,0 +1,30 @@
+import logging
+import pprint
+from pathlib import Path
+
+from .constants import csrs, csrs32
+from .shared_utils import InstrDict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_sverilog(instr_dict: InstrDict):
+    names_str = ""
+    for i in instr_dict:
+        names_str += f"  localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n"
+    names_str += "  /* CSR Addresses */\n"
+    for num, name in csrs + csrs32:
+        names_str += (
+            f"  localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n"
+        )
+
+    Path("inst.sverilog").write_text(
+        f"""
+/* Automatically generated by parse_opcodes */
+package riscv_instr;
+{names_str}
+endpackage
+""",
+        encoding="utf-8",
+    )
diff --git a/src/riscv_opcodes/svg_utils.py b/src/riscv_opcodes/svg_utils.py
new file mode 100644
index 0000000..4126ad6
--- /dev/null
+++ b/src/riscv_opcodes/svg_utils.py
@@ -0,0 +1,284 @@
+import logging
+import pprint
+from typing import Dict, List, NamedTuple
+
+from .rv_colors import palette
+from .shared_utils import InstrDict, instr_dict_2_extensions
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+class RectangleDimensions(NamedTuple):
+    x: float
+    y: float
+    w: float
+    h: float
+
+
+class InstrRectangle(NamedTuple):
+    dims: RectangleDimensions
+    extension: str
+    label: str
+
+
+InstrDimsDict = Dict[str, RectangleDimensions]
+
+
+def encoding_to_rect(encoding: str) -> RectangleDimensions:
+    """Convert a binary encoding string to rectangle dimensions."""
+
+    def calculate_size(free_bits: int, tick: float) -> float:
+        """Calculate size based on number of free bits and tick value."""
+        return 2**free_bits * tick
+
+    instr_length = len(encoding)
+    # starting position
+    x = 0
+    y = 0
+    x_tick = 1 / (2 ** (0.5 * instr_length))
+    y_tick = 1 / (2 ** (0.5 * instr_length))
+    x_free_bits = 0
+    y_free_bits = 0
+    even = encoding[0::2]
+    odd = encoding[1::2]
+    # Process bits from least significant to most significant
+    for i, bit in enumerate(encoding):
+        if bit == "1":
+            offset = 0.5 / (2 ** int(i / 2))
+            if i % 2 == 0:
+                y += offset
+            else:
+                x += offset
+        elif bit == "0":
+            pass
+            # position not adjusted on 0
+
+    x_free_bits = odd.count("-")
+    y_free_bits = even.count("-")
+    x_size = calculate_size(x_free_bits, x_tick)
+    y_size = calculate_size(y_free_bits, y_tick)
+
+    # If we came here, encoding can be visualized with a single rectangle
+    rectangle = RectangleDimensions(x=x, y=y, w=x_size, h=y_size)
+    return rectangle
+
+
+FIGSIZE = 128
+
+
+def plot_image(
+    instr_dict: InstrDict,
+    instr_dims_dict: InstrDimsDict,
+    extension_sizes: Dict[str, float],
+) -> None:
+    """Plot the instruction rectangles using matplotlib."""
+
+    from matplotlib import patches
+    from matplotlib import pyplot as plt
+
+    def get_readable_font_color(bg_hex: str) -> str:
+        """Determine readable font color based on background color."""
+
+        def hex_to_rgb(hex_color: str) -> tuple[int, int, int]:
+            """Convert hex color string to RGB tuple."""
+            hex_color = hex_color.lstrip("#")
+            r = int(hex_color[0:2], 16)
+            g = int(hex_color[2:4], 16)
+            b = int(hex_color[4:6], 16)
+
+            return (r, g, b)
+
+        r, g, b = hex_to_rgb(bg_hex)
+        luminance = 0.299 * r + 0.587 * g + 0.114 * b
+        return "#000000" if luminance > 186 else "#FFFFFF"
+
+    def plot_with_matplotlib(
+        rectangles: list[InstrRectangle],
+        colors: list[str],
+        hatches: list[str],
+        extensions: list[str],
+    ) -> None:
+        """Plot rectangles with matplotlib using specified styles."""
+
+        _, ax = plt.subplots(figsize=(FIGSIZE, FIGSIZE), facecolor="none")  # type: ignore
+        ax.set_facecolor("none")  # type: ignore
+        linewidth = FIGSIZE / 100
+        for dims, ext, label in rectangles:
+            x, y, w, h = dims
+            ext_idx = extensions.index(ext)
+            color = colors[ext_idx]
+            hatch = hatches[ext_idx]
+            rect = patches.Rectangle(
+                (x, y),
+                w,
+                h,
+                linewidth=linewidth,
+                edgecolor="black",
+                facecolor=color,
+                hatch=hatch,
+                alpha=1.0,
+            )
+            ax.add_patch(rect)
+
+            if w >= h:
+                base_dim = w
+                rotation = 0
+            else:
+                base_dim = h
+                rotation = 90
+
+            # Scale font size based on base dimension and label length
+            n_chars = len(label)
+            font_size = (
+                base_dim / n_chars * 90 * FIGSIZE
+            )  # Adjust scaling factor as needed
+            if font_size > 1:
+                fontdict = {
+                    "fontsize": font_size,
+                    "color": get_readable_font_color(color),
+                    "family": "DejaVu Sans Mono",
+                }
+                ax.text(  # type: ignore
+                    x + w / 2,
+                    y + h / 2,
+                    label,
+                    ha="center",
+                    va="center",
+                    fontdict=fontdict,
+                    rotation=rotation,
+                )
+
+        plt.axis("off")  # type: ignore
+        plt.tight_layout()  # type: ignore
+        plt.savefig("inst.svg", format="svg")  # type: ignore
+        plt.show()  # type: ignore
+
+    extensions: List[str] = sorted(
+        extension_sizes.keys(), key=lambda k: extension_sizes[k], reverse=True
+    )
+
+    rectangles: List[InstrRectangle] = []
+    for instr in instr_dict:
+        dims = instr_dims_dict[instr]
+        rectangles.append(
+            InstrRectangle(
+                dims=dims,
+                extension=instr_dict[instr]["extension"][0],
+                label=instr.replace("_", "."),
+            )
+        )
+
+    # sort rectangles so that small ones are in the foreground
+    # An overlap occurs e.g. for pseudo ops, and these should be on top of the encoding it reuses
+    rectangles = sorted(rectangles, key=lambda x: x.dims.w * x.dims.h, reverse=True)
+
+    colors, hatches = generate_styles(extensions)
+
+    plot_with_matplotlib(rectangles, colors, hatches, extensions)
+
+
+def generate_styles(extensions: list[str]) -> tuple[list[str], list[str]]:
+    """Generate color and hatch styles for extensions."""
+    n_colors = len(palette)
+    colors = [""] * len(extensions)
+    hatches = [""] * len(extensions)
+    hatch_options = ["", "/", "\\", "|", "-", "+", "x", ".", "*"]
+    color_options = list(palette.values())
+
+    for i in range(len(extensions)):
+        colors[i] = color_options[i % n_colors]
+        hatches[i] = hatch_options[int(i / n_colors) % len(hatch_options)]
+
+    return colors, hatches
+
+
+def defragment_encodings(
+    encodings: list[str], length: int = 32, offset: int = 0
+) -> list[str]:
+    """Defragment a list of binary encodings by reordering bits."""
+    # determine bit position which has the most fixed bits
+    fixed_encodings = ["0", "1"]
+    fixed_bits = [0] * length
+    fixed_encoding_indeces: Dict[str, List[int]] = {
+        value: [] for value in fixed_encodings
+    }
+    for index, encoding in enumerate(encodings):
+        for position, value in enumerate(encoding):
+            if position > offset:
+                if value != "-":
+                    fixed_bits[position] += 1
+
+    # find bit position with most fixed bits, starting with the LSB to favor the opcode field
+    max_fixed_bits = max(fixed_bits)
+    if max_fixed_bits == 0:
+        # fully defragemented
+        return encodings
+    max_fixed_position = len(fixed_bits) - 1 - fixed_bits[::-1].index(max_fixed_bits)
+
+    # move bit position with the most fixed bits to the front
+    for index, encoding in enumerate(encodings):
+        encodings[index] = (
+            encoding[0:offset]
+            + encoding[max_fixed_position]
+            + encoding[offset:max_fixed_position]
+            + encoding[max_fixed_position + 1 :]
+        )
+
+        if encoding[max_fixed_position] in fixed_encodings:
+            fixed_encoding_indeces[encoding[max_fixed_position]].append(index)
+        else:
+            # No more fixed bits in this encoding
+            pass
+
+    if offset < length:
+        # continue to defragement starting from the next offset
+        offset = offset + 1
+
+        # separate encodings
+        sep_encodings: Dict[str, List[str]] = {}
+        for fixed_encoding in fixed_encodings:
+            sep_encodings[fixed_encoding] = [
+                encodings[i] for i in fixed_encoding_indeces[fixed_encoding]
+            ]
+            sep_encodings[fixed_encoding] = defragment_encodings(
+                sep_encodings[fixed_encoding], length=length, offset=offset
+            )
+
+            # join encodings
+            for new_index, orig_index in enumerate(
+                fixed_encoding_indeces[fixed_encoding]
+            ):
+                encodings[orig_index] = sep_encodings[fixed_encoding][new_index]
+
+    return encodings
+
+
+def defragment_encoding_dict(instr_dict: InstrDict) -> InstrDict:
+    """Apply defragmentation to the encoding dictionary."""
+    encodings = [instr["encoding"] for instr in instr_dict.values()]
+    encodings_defragemented = defragment_encodings(encodings, length=32, offset=0)
+    for index, instr in enumerate(instr_dict):
+        instr_dict[instr]["encoding"] = encodings_defragemented[index]
+    return instr_dict
+
+
+def make_svg(instr_dict: InstrDict) -> None:
+    """Generate an SVG image from instruction encodings."""
+    extensions = instr_dict_2_extensions(instr_dict)
+    extension_size: Dict[str, float] = {}
+
+    instr_dict = defragment_encoding_dict(instr_dict)
+    instr_dims_dict: InstrDimsDict = {}
+
+    for ext in extensions:
+        extension_size[ext] = 0
+
+    for instr in instr_dict:
+        dims = encoding_to_rect(instr_dict[instr]["encoding"])
+
+        extension_size[instr_dict[instr]["extension"][0]] += dims.h * dims.w
+
+        instr_dims_dict[instr] = dims
+
+    plot_image(instr_dict, instr_dims_dict, extension_size)