135 files changed, 2275 insertions, 1411 deletions
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 86d5f0a..0ba3ac0 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -9,22 +9,40 @@ on:
 jobs:
   build:
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.9', '3.10', '3.11', '3.12','3.13']
+
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python 3.8
-      uses: actions/setup-python@v2
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
       with:
-        python-version: 3.8
-    - name: Install PyYAML
-      run: |
-         pip3 install -r requirements.txt
-         pip3 install coverage
+        python-version: ${{ matrix.python-version }}
+
+    - name: Cache pre-commit environment
+      uses: actions/cache@v3
+      with:
+        path: ~/.cache/pre-commit
+        key: ${{ runner.os }}-pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}
+        restore-keys: |
+          ${{ runner.os }}-pre-commit-
+
+    - name: Install dependencies
+      run: python3 -m pip install pre-commit coverage
+
+    - name: Run pre-commit
+      run: pre-commit run --all-files
+
     - name: Generate
       run: coverage run ./parse.py -c -chisel -sverilog -rust -latex -spinalhdl -go "rv*" "unratified/rv*"
+
     - name: Check C output
-      run: cat encoding.out.h  | cpp
+      run: cat encoding.out.h | cpp
+
     - name: Generate coverage
       run: coverage xml
+
     - name: Upload coverage
-      uses: codecov/codecov-action@v2
-          
+      uses: codecov/codecov-action@v4
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 9357293..7871786 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -9,20 +9,35 @@ on:
 jobs:
   build:
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.9', '3.10', '3.11', '3.12','3.13']
+
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python 3.8
-      uses: actions/setup-python@v2
+    - uses: actions/checkout@v4
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Cache pip dependencies
+      uses: actions/cache@v3
       with:
-        python-version: 3.8
-    - name: Install PyYAML
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ matrix.python-version }}-coverage
+        restore-keys: |
+          ${{ runner.os }}-pip-${{ matrix.python-version }}-
+
+    - name: Install dependencies
       run: |
-         pip3 install -r requirements.txt
          pip3 install coverage
+
     - name: Test error outputs
       run: coverage run -m unittest -b
-    - name: Generate coverage
+
+    - name: Generate coverage report
       run: coverage xml
-    - name: Upload coverage
-      uses: codecov/codecov-action@v2
-          
+
+    - name: Upload coverage report
+      uses: codecov/codecov-action@v4
diff --git a/.gitignore b/.gitignore
index 4ddba6b..1380615 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,6 @@ priv-instr-table.tex
 inst.rs
 inst.spinalhdl
 inst.sverilog
-instr_dict.yaml
+instr_dict.json
 
 __pycache__/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..dc25bc1
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,32 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: check-merge-conflict
+      - id: check-symlinks
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+
+  - repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        # Configure isort to use Black's import style to prevent formatting
+        # conflicts. As isort and Black may repeatedly reformat the same code,
+        # causing pre-commit to fail.
+        args: ["--profile", "black"]
+
+  - repo: https://github.com/psf/black
+    rev: 24.8.0
+    hooks:
+      - id: black
+
+  - repo: https://github.com/PyCQA/pylint
+    rev: v3.3.1
+    hooks:
+      - id: pylint
+
+  - repo: https://github.com/RobertCraigie/pyright-python
+    rev: v1.1.383
+    hooks:
+      - id: pyright
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..33ae3c3
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,31 @@
+[MAIN]
+py-version = 3.9.0
+disable=
+    # Allow 'TODO:' in code.
+    fixme,
+    # Overly zealous duplicate code detection.
+    duplicate-code,
+    # These debatable style lints are quite annoying, and often push
+    # you into mixing up small changes (adding one statement to a function)
+    # with large refactors (splitting the function up into shorter functions).
+    too-few-public-methods,
+    too-many-arguments,
+    too-many-positional-arguments,
+    too-many-branches,
+    too-many-instance-attributes,
+    too-many-locals,
+    too-many-return-statements,
+    too-many-statements,
+    # Handled by Black.
+    line-too-long,
+    # This is technically correct but not that important.
+    logging-fstring-interpolation,
+    # TODO: These should be enabled but writing documentation for
+    # all of the code is not feasible in one go.
+    missing-module-docstring,
+    missing-function-docstring,
+    missing-class-docstring,
+
+# These names are fine when used sensibly. Without listing them here
+# Pylint will complain they are too short.
+good-names=c,i,j,k,id,pc
diff --git a/Makefile b/Makefile
index cce19cd..a6ea5db 100644
--- a/Makefile
+++ b/Makefile
@@ -4,51 +4,49 @@ PK_H := ../riscv-pk/machine/encoding.h
 ENV_H := ../riscv-tests/env/encoding.h
 OPENOCD_H := ../riscv-openocd/src/target/riscv/encoding.h
 INSTALL_HEADER_FILES := $(ISASIM_H) $(PK_H) $(ENV_H) $(OPENOCD_H)
+PSEUDO_FLAG := $(if $(PSEUDO),-pseudo,)
+
 
 default: everything
 
-.PHONY : everything
+.PHONY: everything encoding.out.h inst.chisel inst.go latex inst.sverilog inst.rs clean install instr-table.tex priv-instr-table.tex inst.spinalhdl pseudo
+
+pseudo:
+	@$(MAKE) PSEUDO=1 everything
+
 everything:
-	@./parse.py -c -go -chisel -sverilog -rust -latex -spinalhdl $(EXTENSIONS)
+	@./parse.py  $(PSEUDO_FLAG) -c -go -chisel -sverilog -rust -latex -spinalhdl $(EXTENSIONS)
 
-.PHONY : encoding.out.h
 encoding.out.h:
-	@./parse.py -c rv* unratified/rv_* unratified/rv32* unratified/rv64*
+	@./parse.py -c $(PSEUDO_FLAG) rv* unratified/rv_* unratified/rv32* unratified/rv64*
 
-.PHONY : inst.chisel
 inst.chisel:
-	@./parse.py -chisel $(EXTENSIONS)
+	@./parse.py -chisel $(PSEUDO_FLAG) $(EXTENSIONS)
 
-.PHONY : inst.go
 inst.go:
-	@./parse.py -go $(EXTENSIONS)
+	@./parse.py -go $(PSEUDO_FLAG) $(EXTENSIONS)
 
-.PHONY : latex
 latex:
-	@./parse.py -latex $(EXTENSIONS)
+	@./parse.py -latex $(PSEUDO_FLAG) $(EXTENSIONS)
 
-.PHONY : inst.sverilog
 inst.sverilog:
-	@./parse.py -sverilog $(EXTENSIONS)
+	@./parse.py -sverilog $(PSEUDO_FLAG) $(EXTENSIONS)
 
-.PHONY : inst.rs
 inst.rs:
-	@./parse.py -rust $(EXTENSIONS)
+	@./parse.py -rust $(PSEUDO_FLAG) $(EXTENSIONS)
 
-.PHONY : clean
 clean:
 	rm -f inst* priv-instr-table.tex encoding.out.h
 
-.PHONY : install
 install: everything
-	set -e; for FILE in $(INSTALL_HEADER_FILES); do cp -f encoding.out.h $$FILE; done
+	set -e; \
+	for FILE in $(INSTALL_HEADER_FILES); do \
+	    cp -f encoding.out.h $$FILE; \
+	done
 
-.PHONY: instr-table.tex
 instr-table.tex: latex
 
-.PHONY: priv-instr-table.tex
 priv-instr-table.tex: latex
 
-.PHONY: inst.spinalhdl
 inst.spinalhdl:
-	@./parse.py -spinalhdl $(EXTENSIONS)
+	@./parse.py -spinalhdl $(PSEUDO_FLAG) $(EXTENSIONS)
diff --git a/README.md b/README.md
index f1a71d3..73c0715 100644
--- a/README.md
+++ b/README.md
@@ -4,8 +4,8 @@ This repo enumerates standard RISC-V instruction opcodes and control and
 status registers.  It also contains a script to convert them into several
 formats (C, Scala, LaTeX).
 
-Artifacts (encoding.h, latex-tables, etc) from this repo are used in other 
-tools and projects like Spike, PK, RISC-V Manual, etc. 
+Artifacts (encoding.h, latex-tables, etc) from this repo are used in other
+tools and projects like Spike, PK, RISC-V Manual, etc.
 
 ## Project Structure
 
@@ -43,7 +43,7 @@ The encoding syntax uses `$` to indicate keywords. As of now 2 keywords have bee
 
 Instruction syntaxes used in this project are broadly categorized into three:
 
-- **regular instructions** :- these are instructions which hold a unique opcode in the encoding space. A very generic syntax guideline 
+- **regular instructions** :- these are instructions which hold a unique opcode in the encoding space. A very generic syntax guideline
   for these instructions is as follows:
   ```
   <instruction name> <arguments>
@@ -55,19 +55,19 @@ Instruction syntaxes used in this project are broadly categorized into three:
   lui     rd imm20 6..2=0x0D 1..0=3
   beq     bimm12hi rs1 rs2 bimm12lo 14..12=0 6..2=0x18 1..0=3
   ```
-  The bit encodings are usually of 2 types: 
+  The bit encodings are usually of 2 types:
     - *single bit assignment* : here the value of a single bit is assigned using syntax `<bit-position>=<value>`. For e.g. `6=1` means bit 6 should be 1. Here the value must be 1 or 0.
-    - *range assignment*: here a range of bits is assigned a value using syntax: `<msb>..<lsb>=<val>`. For e.g. `31..24=0xab`. The value here can be either unsigned integer, hex (0x) or binary (0b). 
+    - *range assignment*: here a range of bits is assigned a value using syntax: `<msb>..<lsb>=<val>`. For e.g. `31..24=0xab`. The value here can be either unsigned integer, hex (0x) or binary (0b).
 
-- **pseudo_instructions** (a.k.a pseudo\_ops) - These are instructions which are aliases of regular instructions. Their encodings force 
+- **pseudo_instructions** (a.k.a pseudo\_ops) - These are instructions which are aliases of regular instructions. Their encodings force
   certain restrictions over the regular instruction. The syntax for such instructions uses the `$pseudo_op` keyword as follows:
   ```
   $pseudo_op <extension>::<base-instruction> <instruction name> <instruction args> <bit-encodings>
   ```
-  Here the `<extension>` specifies the extension which contains the base instruction. `<base-instruction>` indicates the name of the instruction 
-  this pseudo-instruction is an alias of. The remaining fields are the same as the regular instruction syntax, where all the args and the fields 
+  Here the `<extension>` specifies the extension which contains the base instruction. `<base-instruction>` indicates the name of the instruction
+  this pseudo-instruction is an alias of. The remaining fields are the same as the regular instruction syntax, where all the args and the fields
   of the pseudo instruction are specified.
-  
+
   Example:
   ```
   $pseudo_op rv_zicsr::csrrs frflags rd 19..15=0 31..20=0x001 14..12=2 6..2=0x1C 1..0=3
@@ -78,7 +78,7 @@ Instruction syntaxes used in this project are broadly categorized into three:
   define the new instruction as a pseudo\_op of the unratified regular
   instruction, as this avoids existence of overlapping opcodes for users who are
   experimenting with unratified extensions as well.
-  
+
 - **imported_instructions** - these are instructions which are borrowed from an extension into a new/different extension/sub-extension. Only regular instructions can be imported. Pseudo-op or already imported instructions cannot be imported. Example:
   ```
   $import rv32_zkne::aes32esmi
@@ -96,15 +96,15 @@ Following are the restrictions one should keep in mind while defining $pseudo\_o
 
 The `parse.py` python file is used to perform checks on the current set of instruction encodings and also generates multiple artifacts : latex tables, encoding.h header file, etc. This section will provide a brief overview of the flow within the python file.
 
-To start with, `parse.py` creates a list of all `rv*` files currently checked into the repo (including those inside the `unratified` directory as well). 
-It then starts parsing each file line by line. In the first pass, we only capture regular instructions and ignore the imported or pseudo instructions. 
+To start with, `parse.py` creates a list of all `rv*` files currently checked into the repo (including those inside the `unratified` directory as well).
+It then starts parsing each file line by line. In the first pass, we only capture regular instructions and ignore the imported or pseudo instructions.
 For each regular instruction, the following checks are performed :
 
   - for range-assignment syntax, the *msb* position must be higher than the *lsb* position
   - for range-assignment syntax, the value of the range must representable in the space identified by *msb* and *lsb*
   - values for the same bit positions should not be defined multiple times.
   - All bit positions must be accounted for (either as args or constant value fields)
- 
+
 Once the above checks are passed for a regular instruction, we then create a dictionary for this instruction which contains the following fields:
   - encoding : contains a 32-bit string defining the encoding of the instruction. Here `-` is used to represent instruction argument fields
   - extension : string indicating which extension/filename this instruction was picked from
@@ -112,23 +112,25 @@ Once the above checks are passed for a regular instruction, we then create a dic
   - match : a 32-bit hex value indicating the values the encoding must take for the bits which are set as 1 in the mask above
   - variable_fields : This is list of args required by the instruction
 
-The above dictionary elements are added to a main `instr_dict` dictionary under the instruction node. This process continues until all regular 
-instructions have been processed. In the second pass, we now process the `$pseudo_op` instructions. Here, we first check if the *base-instruction* of 
-this pseudo instruction exists in the relevant extension/filename or not. If it is present, the the remaining part of the syntax undergoes the same 
-checks as above. Once the checks pass and if the *base-instruction* is not already added to the main `instr_dict` then the pseudo-instruction is added to 
+The above dictionary elements are added to a main `instr_dict` dictionary under the instruction node. This process continues until all regular
+instructions have been processed. In the second pass, we now process the `$pseudo_op` instructions. Here, we first check if the *base-instruction* of
+this pseudo instruction exists in the relevant extension/filename or not. If it is present, the the remaining part of the syntax undergoes the same
+checks as above. Once the checks pass and if the *base-instruction* is not already added to the main `instr_dict` then the pseudo-instruction is added to
 the list. In the third, and final, pass we process the imported instructions.
 
-The case where the *base-instruction* for a pseudo-instruction may not be present in the main `instr_dict` after the first pass is if the only a subset 
-of extensions are being processed such that the *base-instruction* is not included. 
+The case where the *base-instruction* for a pseudo-instruction may not be present in the main `instr_dict` after the first pass is if the only a subset
+of extensions are being processed such that the *base-instruction* is not included.
 
 
 ## Artifact Generation and Usage
 
 The following artifacts can be generated using parse.py:
 
-- instr\_dict.yaml : This is file generated always by parse.py and contains the
-  entire main dictionary `instr\_dict` in YAML format. Note, in this yaml the
-  *dots* in an instruction are replaced with *underscores*
+- instr\_dict.json : This is always generated by parse.py and contains the
+  entire main dictionary `instr\_dict` in JSON format. Note, in this file the
+  *dots* in an instruction are replaced with *underscores*. In previous
+  versions of this project the generated file was instr\_dict.yaml. Note that
+  JSON is a subset of YAML so the file can still be read by any YAML parser.
 - encoding.out.h : this is the header file that is used by tools like spike, pk, etc
 - instr-table.tex : the latex table of instructions used in the riscv-unpriv spec
 - priv-instr-table.tex : the latex table of instruction used in the riscv-priv spec
@@ -138,14 +140,6 @@ The following artifacts can be generated using parse.py:
 - inst.spinalhdl : spinalhdl code to decode instructions
 - inst.go : go code to decode instructions
 
-Make sure you install the required python pre-requisites are installed by executing the following
-command:
-
-```
-sudo apt-get install python-pip3
-pip3 install -r requirements.txt
-```
-
 To generate all the above artifacts for all instructions currently checked in, simply run `make` from the root-directory. This should print the following log on the command-line:
 
 ```
@@ -165,24 +159,43 @@ By default all extensions are enabled. To select only a subset of extensions you
 For example if you want only the I and M extensions you can do the following:
 
 ```bash
-make EXTENSIONS='rv*_i rv*_m' 
+make EXTENSIONS='rv*_i rv*_m'
 ```
 
 Which will print the following log:
 
 ```
-Running with args : ['./parse.py', '-c', '-chisel', '-sverilog', '-rust', '-latex', 'rv32_i', 'rv64_i', 'rv_i', 'rv64_m', 'rv_m']
+Running with args : ['./parse.py', '-c', '-go', '-chisel', '-sverilog', '-rust', '-latex', '-spinalhdl', 'rv32_i', 'rv64_i', 'rv_i', 'rv64_m', 'rv_m']
 Extensions selected : ['rv32_i', 'rv64_i', 'rv_i', 'rv64_m', 'rv_m']
 INFO:: encoding.out.h generated successfully
 INFO:: inst.chisel generated successfully
+INFO:: inst.spinalhdl generated successfully
 INFO:: inst.sverilog generated successfully
 INFO:: inst.rs generated successfully
+INFO:: inst.go generated successfully
 INFO:: instr-table.tex generated successfully
 INFO:: priv-instr-table.tex generated successfully
 ```
 
-If you only want a specific artifact you can use one or more of the following targets : `c`, `rust`, `chisel`, `sverilog`, `latex`
+If you only want a specific artifact you can use one or more of the following targets : `c`, `rust`, `chisel`, `sverilog`, `latex`.
+For example, if you want to generate the `c` based artifact with extensions as shown earlier, you can use the following command:
 
+```bash
+./parse.py -c  EXTENSIONS='rv*_i rv*_m'
+```
+Which will print the following log:
+
+```
+Running with args : ['./parse.py', '-c', 'EXTENSIONS=rv*_i rv*_m']
+Extensions selected : ['EXTENSIONS=rv*_i rv*_m']
+INFO:: encoding.out.h generated successfully
+```
+
+or you can also use the `make` command as:
+
+```bash
+make encoding.out.h  EXTENSIONS='rv*_i rv*_m'
+```
 You can use the `clean` target to remove all artifacts.
 
 ## Adding a new extension
@@ -204,7 +217,7 @@ Create a PR for review.
 
 ## Enabling Debug logs in parse.py
 
-To enable debug logs in parse.py change `level=logging.INFO` to `level=logging.DEBUG` and run the python command. You will now see debug statements on 
+To enable debug logs in parse.py change `level=logging.INFO` to `level=logging.DEBUG` and run the python command. You will now see debug statements on
 the terminal like below:
 ```
 DEBUG:: Collecting standard instructions first
@@ -220,7 +233,6 @@ DEBUG::      Processing line: bne     bimm12hi rs1 rs2 bimm12lo 14..12=1 6..2=0x
 ## How do I find where an instruction is defined?
 
 You can use `grep "^\s*<instr-name>" rv* unratified/rv*` OR run `make` and open
-`instr_dict.yaml` and search of the instruction you are looking for. Within that
-instruction the `extension` field will indicate which file the instruction was
-picked from.
-
+`instr_dict.json` and search for the instruction you are looking for. Within
+that instruction the `extension` field will indicate which file the
+instruction was picked from.
diff --git a/arg_lut.csv b/arg_lut.csv
index b143650..ed30b11 100644
--- a/arg_lut.csv
+++ b/arg_lut.csv
@@ -32,7 +32,6 @@
 "imm4", 23, 20
 "imm5", 24, 20
 "imm6", 25, 20
-"zimm", 19, 15
 "opcode", 6,0
 "funct7", 31,25
 "vd", 11, 7
diff --git a/c_utils.py b/c_utils.py
new file mode 100644
index 0000000..8aa5138
--- /dev/null
+++ b/c_utils.py
@@ -0,0 +1,79 @@
+import logging
+import os
+import pprint
+
+from constants import causes, csrs, csrs32
+from shared_utils import InstrDict, arg_lut
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_c(instr_dict: InstrDict):
+    mask_match_str = ""
+    declare_insn_str = ""
+    for i in instr_dict:
+        mask_match_str += (
+            f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n'
+        )
+        mask_match_str += (
+            f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n'
+        )
+        declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n'
+
+    csr_names_str = ""
+    declare_csr_str = ""
+    for num, name in csrs + csrs32:
+        csr_names_str += f"#define CSR_{name.upper()} {hex(num)}\n"
+        declare_csr_str += f"DECLARE_CSR({name}, CSR_{name.upper()})\n"
+
+    causes_str = ""
+    declare_cause_str = ""
+    for num, name in causes:
+        causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n"
+        declare_cause_str += (
+            f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n"
+        )
+
+    arg_str = ""
+    for name, rng in arg_lut.items():
+        sanitized_name = name.replace(" ", "_").replace("=", "_eq_")
+        begin = rng[1]
+        end = rng[0]
+        mask = ((1 << (end - begin + 1)) - 1) << begin
+        arg_str += f"#define INSN_FIELD_{sanitized_name.upper()} {hex(mask)}\n"
+
+    with open(f"{os.path.dirname(__file__)}/encoding.h", "r", encoding="utf-8") as file:
+        enc_header = file.read()
+
+    commit = os.popen('git log -1 --format="format:%h"').read()
+
+    # Generate the output as a string
+    output_str = f"""/* SPDX-License-Identifier: BSD-3-Clause */
+
+/* Copyright (c) 2023 RISC-V International */
+
+/*
+ * This file is auto-generated by running 'make' in
+ * https://github.com/riscv/riscv-opcodes ({commit})
+ */
+
+{enc_header}
+/* Automatically generated by parse_opcodes. */
+#ifndef RISCV_ENCODING_H
+#define RISCV_ENCODING_H
+{mask_match_str}
+{csr_names_str}
+{causes_str}
+{arg_str}#endif
+#ifdef DECLARE_INSN
+{declare_insn_str}#endif
+#ifdef DECLARE_CSR
+{declare_csr_str}#endif
+#ifdef DECLARE_CAUSE
+{declare_cause_str}#endif
+"""
+
+    # Write the modified output to the file
+    with open("encoding.out.h", "w", encoding="utf-8") as enc_file:
+        enc_file.write(output_str)
diff --git a/causes.csv b/causes.csv
index 0fda1a4..42e51c3 100644
--- a/causes.csv
+++ b/causes.csv
@@ -13,6 +13,7 @@
 0x0C, "fetch page fault"
 0x0D, "load page fault"
 0x0F, "store page fault"
+0x10, "double trap"
 0x12, "software check fault"
 0x13, "hardware error fault"
 0x14, "fetch guest page fault"
diff --git a/chisel_utils.py b/chisel_utils.py
new file mode 100644
index 0000000..34f6ac3
--- /dev/null
+++ b/chisel_utils.py
@@ -0,0 +1,82 @@
+import logging
+import pprint
+
+from constants import causes, csrs, csrs32
+from shared_utils import InstrDict, instr_dict_2_extensions
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_chisel(instr_dict: InstrDict, spinal_hdl: bool = False):
+
+    chisel_names = ""
+    cause_names_str = ""
+    csr_names_str = ""
+    for i in instr_dict:
+        if spinal_hdl:
+            chisel_names += f'  def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n'
+        # else:
+        #     chisel_names += f'  def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n'
+    if not spinal_hdl:
+        extensions = instr_dict_2_extensions(instr_dict)
+        for e in extensions:
+            if "rv64_" in e:
+                e_format = e.replace("rv64_", "").upper() + "64"
+            elif "rv32_" in e:
+                e_format = e.replace("rv32_", "").upper() + "32"
+            elif "rv_" in e:
+                e_format = e.replace("rv_", "").upper()
+            else:
+                e_format = e.upper()
+            chisel_names += f'  val {e_format+"Type"} = Map(\n'
+            for instr_name, instr in instr_dict.items():
+                if instr["extension"][0] == e:
+                    tmp_instr_name = '"' + instr_name.upper().replace(".", "_") + '"'
+                    chisel_names += f'   {tmp_instr_name:<18s} -> BitPat("b{instr["encoding"].replace("-","?")}"),\n'
+            chisel_names += "  )\n"
+
+    for num, name in causes:
+        cause_names_str += f'  val {name.lower().replace(" ","_")} = {hex(num)}\n'
+    cause_names_str += """  val all = {
+    val res = collection.mutable.ArrayBuffer[Int]()
+"""
+    for num, name in causes:
+        cause_names_str += f'    res += {name.lower().replace(" ","_")}\n'
+    cause_names_str += """    res.toArray
+  }"""
+
+    for num, name in csrs + csrs32:
+        csr_names_str += f"  val {name} = {hex(num)}\n"
+    csr_names_str += """  val all = {
+    val res = collection.mutable.ArrayBuffer[Int]()
+"""
+    for num, name in csrs:
+        csr_names_str += f"""    res += {name}\n"""
+    csr_names_str += """    res.toArray
+  }
+  val all32 = {
+    val res = collection.mutable.ArrayBuffer(all:_*)
+"""
+    for num, name in csrs32:
+        csr_names_str += f"""    res += {name}\n"""
+    csr_names_str += """    res.toArray
+  }"""
+
+    with open(
+        "inst.spinalhdl" if spinal_hdl else "inst.chisel", "w", encoding="utf-8"
+    ) as chisel_file:
+        chisel_file.write(
+            f"""
+/* Automatically generated by parse_opcodes */
+object Instructions {{
+{chisel_names}
+}}
+object Causes {{
+{cause_names_str}
+}}
+object CSRs {{
+{csr_names_str}
+}}
+"""
+        )
diff --git a/constants.py b/constants.py
index cb3e689..fa59aa7 100644
--- a/constants.py
+++ b/constants.py
@@ -1,242 +1,262 @@
-import re
 import csv
+import re
 
-
+# TODO: The constants in this file should be in all caps.
 overlapping_extensions = {
-    'rv_zcmt': {'rv_c_d'},
-    'rv_zcmp': {'rv_c_d'},
-    'rv_c': {'rv_zcmop'},
+    "rv_zcmt": {"rv_c_d"},
+    "rv_zcmp": {"rv_c_d"},
+    "rv_c": {"rv_zcmop"},
 }
 
 overlapping_instructions = {
-    'c_addi': {'c_nop'},
-    'c_lui': {'c_addi16sp'},
-    'c_mv': {'c_jr'},
-    'c_jalr': {'c_ebreak'},
-    'c_add': {'c_ebreak', 'c_jalr'},
+    "c_addi": {"c_nop"},
+    "c_lui": {"c_addi16sp"},
+    "c_mv": {"c_jr"},
+    "c_jalr": {"c_ebreak"},
+    "c_add": {"c_ebreak", "c_jalr"},
 }
 
-isa_regex = \
-re.compile("^RV(32|64|128)[IE]+[ABCDEFGHJKLMNPQSTUVX]*(Zicsr|Zifencei|Zihintpause|Zam|Ztso|Zkne|Zknd|Zknh|Zkse|Zksh|Zkg|Zkb|Zkr|Zks|Zkn|Zba|Zbc|Zbb|Zbp|Zbr|Zbm|Zbs|Zbe|Zbf|Zbt|Zmmul|Zbpbo|Zca|Zcf|Zcd|Zcb|Zcmp|Zcmt){,1}(_Zicsr){,1}(_Zifencei){,1}(_Zihintpause){,1}(_Zmmul){,1}(_Zam){,1}(_Zba){,1}(_Zbb){,1}(_Zbc){,1}(_Zbe){,1}(_Zbf){,1}(_Zbm){,1}(_Zbp){,1}(_Zbpbo){,1}(_Zbr){,1}(_Zbs){,1}(_Zbt){,1}(_Zkb){,1}(_Zkg){,1}(_Zkr){,1}(_Zks){,1}(_Zkn){,1}(_Zknd){,1}(_Zkne){,1}(_Zknh){,1}(_Zkse){,1}(_Zksh){,1}(_Ztso){,1}(_Zca){,1}(_Zcf){,1}(_Zcd){,1}(_Zcb){,1}(_Zcmp){,1}(_Zcmt){,1}$")
+isa_regex = re.compile(
+    "^RV(32|64|128)[IE]+[ABCDEFGHJKLMNPQSTUVX]*(Zicsr|Zifencei|Zihintpause|Zam|Ztso|Zkne|Zknd|Zknh|Zkse|Zksh|Zkg|Zkb|Zkr|Zks|Zkn|Zba|Zbc|Zbb|Zbp|Zbr|Zbm|Zbs|Zbe|Zbf|Zbt|Zmmul|Zbpbo|Zca|Zcf|Zcd|Zcb|Zcmp|Zcmt){,1}(_Zicsr){,1}(_Zifencei){,1}(_Zihintpause){,1}(_Zmmul){,1}(_Zam){,1}(_Zba){,1}(_Zbb){,1}(_Zbc){,1}(_Zbe){,1}(_Zbf){,1}(_Zbm){,1}(_Zbp){,1}(_Zbpbo){,1}(_Zbr){,1}(_Zbs){,1}(_Zbt){,1}(_Zkb){,1}(_Zkg){,1}(_Zkr){,1}(_Zks){,1}(_Zkn){,1}(_Zknd){,1}(_Zkne){,1}(_Zknh){,1}(_Zkse){,1}(_Zksh){,1}(_Ztso){,1}(_Zca){,1}(_Zcf){,1}(_Zcd){,1}(_Zcb){,1}(_Zcmp){,1}(_Zcmt){,1}$"
+)
 
 # regex to find <msb>..<lsb>=<val> patterns in instruction
 fixed_ranges = re.compile(
-    '\s*(?P<msb>\d+.?)\.\.(?P<lsb>\d+.?)\s*=\s*(?P<val>\d[\w]*)[\s$]*', re.M)
+    r"\s*(?P<msb>\d+.?)\.\.(?P<lsb>\d+.?)\s*=\s*(?P<val>\d[\w]*)[\s$]*", re.M
+)
 
 # regex to find <lsb>=<val> patterns in instructions
-#single_fixed = re.compile('\s+(?P<lsb>\d+)=(?P<value>[\w\d]*)[\s$]*', re.M)
-single_fixed = re.compile('(?:^|[\s])(?P<lsb>\d+)=(?P<value>[\w]*)((?=\s|$))', re.M)
+# single_fixed = re.compile('\s+(?P<lsb>\d+)=(?P<value>[\w\d]*)[\s$]*', re.M)
+single_fixed = re.compile(r"(?:^|[\s])(?P<lsb>\d+)=(?P<value>[\w]*)((?=\s|$))", re.M)
 
 # regex to find the overloading condition variable
-var_regex = re.compile('(?P<var>[a-zA-Z][\w\d]*)\s*=\s*.*?[\s$]*', re.M)
+var_regex = re.compile(r"(?P<var>[a-zA-Z][\w\d]*)\s*=\s*.*?[\s$]*", re.M)
 
 # regex for pseudo op instructions returns the dependent filename, dependent
 # instruction, the pseudo op name and the encoding string
 pseudo_regex = re.compile(
-    '^\$pseudo_op\s+(?P<filename>rv[\d]*_[\w].*)::\s*(?P<orig_inst>.*?)\s+(?P<pseudo_inst>.*?)\s+(?P<overload>.*)$'
-, re.M)
-
-imported_regex = re.compile('^\s*\$import\s*(?P<extension>.*)\s*::\s*(?P<instruction>.*)', re.M)
-
-causes = []
-with open("causes.csv") as f:
-    csv_reader = csv.reader(f, skipinitialspace=True)
-    for row in csv_reader:
-        causes.append((int(row[0], 0), row[1]))
-csrs = []
-with open("csrs.csv") as f:
-    csv_reader = csv.reader(f, skipinitialspace=True)
-    for row in csv_reader:
-        csrs.append((int(row[0], 0), row[1]))
-csrs32 = []
-with open("csrs32.csv") as f:
-    csv_reader = csv.reader(f, skipinitialspace=True)
-    for row in csv_reader:
-        csrs32.append((int(row[0], 0), row[1]))
-arg_lut = {}
-with open("arg_lut.csv") as f:
-    csv_reader = csv.reader(f, skipinitialspace=True)
-    for row in csv_reader:
-        k = row[0]
-        v = (int(row[1]), int(row[2]))
-        arg_lut[k] = v
+    r"^\$pseudo_op\s+(?P<filename>rv[\d]*_[\w].*)::\s*(?P<orig_inst>.*?)\s+(?P<pseudo_inst>.*?)\s+(?P<overload>.*)$",
+    re.M,
+)
+
+imported_regex = re.compile(
+    r"^\s*\$import\s*(?P<extension>.*)\s*::\s*(?P<instruction>.*)", re.M
+)
+
+
+def read_int_map_csv(filename: str) -> "list[tuple[int, str]]":
+    """
+    Reads a CSV file and returns a list of tuples.
+    Each tuple contains an integer value (from the first column) and a string (from the second column).
+
+    Args:
+        filename (str): The name of the CSV file to read.
+
+    Returns:
+        list of tuple: A list of (int, str) tuples extracted from the CSV file.
+    """
+    with open(filename, encoding="utf-8") as f:
+        csv_reader = csv.reader(f, skipinitialspace=True)
+        return [(int(row[0], 0), row[1]) for row in csv_reader]
+
+
+causes = read_int_map_csv("causes.csv")
+csrs = read_int_map_csv("csrs.csv")
+csrs32 = read_int_map_csv("csrs32.csv")
+
+
+def read_arg_lut_csv(filename: str) -> "dict[str, tuple[int, int]]":
+    """
+    Load the argument lookup table (arg_lut) from a CSV file, mapping argument names to their bit positions.
+    """
+    with open(filename, encoding="utf-8") as f:
+        csv_reader = csv.reader(f, skipinitialspace=True)
+        return {row[0]: (int(row[1]), int(row[2])) for row in csv_reader}
+
+
+arg_lut = read_arg_lut_csv("arg_lut.csv")
 
 # for mop
-arg_lut['mop_r_t_30'] = (30,30)
-arg_lut['mop_r_t_27_26'] = (27,26)
-arg_lut['mop_r_t_21_20'] = (21, 20)
-arg_lut['mop_rr_t_30'] = (30,30)
-arg_lut['mop_rr_t_27_26'] = (27, 26)
-arg_lut['c_mop_t'] = (10,8)
+arg_lut["mop_r_t_30"] = (30, 30)
+arg_lut["mop_r_t_27_26"] = (27, 26)
+arg_lut["mop_r_t_21_20"] = (21, 20)
+arg_lut["mop_rr_t_30"] = (30, 30)
+arg_lut["mop_rr_t_27_26"] = (27, 26)
+arg_lut["c_mop_t"] = (10, 8)
 
 # dictionary containing the mapping of the argument to the what the fields in
 # the latex table should be
-latex_mapping = {}
-latex_mapping['imm12'] = 'imm[11:0]'
-latex_mapping['rs1'] = 'rs1'
-latex_mapping['rs2'] = 'rs2'
-latex_mapping['rd'] = 'rd'
-latex_mapping['imm20'] = 'imm[31:12]'
-latex_mapping['bimm12hi'] = 'imm[12$\\vert$10:5]'
-latex_mapping['bimm12lo'] = 'imm[4:1$\\vert$11]'
-latex_mapping['imm12hi'] = 'imm[11:5]'
-latex_mapping['imm12lo'] = 'imm[4:0]'
-latex_mapping['jimm20'] = 'imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]'
-latex_mapping['zimm'] = 'uimm'
-latex_mapping['shamtw'] = 'shamt'
-latex_mapping['shamtd'] = 'shamt'
-latex_mapping['shamtq'] = 'shamt'
-latex_mapping['rd_p'] = "rd\\,$'$"
-latex_mapping['rs1_p'] = "rs1\\,$'$"
-latex_mapping['rs2_p'] = "rs2\\,$'$"
-latex_mapping['rd_rs1_n0'] = 'rd/rs$\\neq$0'
-latex_mapping['rd_rs1_p'] = "rs1\\,$'$/rs2\\,$'$"
-latex_mapping['c_rs2'] = 'rs2'
-latex_mapping['c_rs2_n0'] = 'rs2$\\neq$0'
-latex_mapping['rd_n0'] = 'rd$\\neq$0'
-latex_mapping['rs1_n0'] = 'rs1$\\neq$0'
-latex_mapping['c_rs1_n0'] = 'rs1$\\neq$0'
-latex_mapping['rd_rs1'] = 'rd/rs1'
-latex_mapping['zimm6hi'] = 'uimm[5]'
-latex_mapping['zimm6lo'] = 'uimm[4:0]'
-latex_mapping['c_nzuimm10'] = "nzuimm[5:4$\\vert$9:6$\\vert$2$\\vert$3]"
-latex_mapping['c_uimm7lo'] = 'uimm[2$\\vert$6]'
-latex_mapping['c_uimm7hi'] = 'uimm[5:3]'
-latex_mapping['c_uimm8lo'] = 'uimm[7:6]'
-latex_mapping['c_uimm8hi'] = 'uimm[5:3]'
-latex_mapping['c_uimm9lo'] = 'uimm[7:6]'
-latex_mapping['c_uimm9hi'] = 'uimm[5:4$\\vert$8]'
-latex_mapping['c_nzimm6lo'] = 'nzimm[4:0]'
-latex_mapping['c_nzimm6hi'] = 'nzimm[5]'
-latex_mapping['c_imm6lo'] = 'imm[4:0]'
-latex_mapping['c_imm6hi'] = 'imm[5]'
-latex_mapping['c_nzimm10hi'] = 'nzimm[9]'
-latex_mapping['c_nzimm10lo'] = 'nzimm[4$\\vert$6$\\vert$8:7$\\vert$5]'
-latex_mapping['c_nzimm18hi'] = 'nzimm[17]'
-latex_mapping['c_nzimm18lo'] = 'nzimm[16:12]'
-latex_mapping['c_imm12'] = 'imm[11$\\vert$4$\\vert$9:8$\\vert$10$\\vert$6$\\vert$7$\\vert$3:1$\\vert$5]'
-latex_mapping['c_bimm9lo'] = 'imm[7:6$\\vert$2:1$\\vert$5]'
-latex_mapping['c_bimm9hi'] = 'imm[8$\\vert$4:3]'
-latex_mapping['c_nzuimm5'] = 'nzuimm[4:0]'
-latex_mapping['c_nzuimm6lo'] = 'nzuimm[4:0]'
-latex_mapping['c_nzuimm6hi'] = 'nzuimm[5]'
-latex_mapping['c_uimm8splo'] = 'uimm[4:2$\\vert$7:6]'
-latex_mapping['c_uimm8sphi'] = 'uimm[5]'
-latex_mapping['c_uimm8sp_s'] = 'uimm[5:2$\\vert$7:6]'
-latex_mapping['c_uimm10splo'] = 'uimm[4$\\vert$9:6]'
-latex_mapping['c_uimm10sphi'] = 'uimm[5]'
-latex_mapping['c_uimm9splo'] = 'uimm[4:3$\\vert$8:6]'
-latex_mapping['c_uimm9sphi'] = 'uimm[5]'
-latex_mapping['c_uimm10sp_s'] = 'uimm[5:4$\\vert$9:6]'
-latex_mapping['c_uimm9sp_s'] = 'uimm[5:3$\\vert$8:6]'
+latex_mapping = {
+    "imm12": "imm[11:0]",
+    "rs1": "rs1",
+    "rs2": "rs2",
+    "rd": "rd",
+    "imm20": "imm[31:12]",
+    "bimm12hi": "imm[12$\\vert$10:5]",
+    "bimm12lo": "imm[4:1$\\vert$11]",
+    "imm12hi": "imm[11:5]",
+    "imm12lo": "imm[4:0]",
+    "jimm20": "imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]",
+    "zimm": "uimm",
+    "shamtw": "shamt",
+    "shamtd": "shamt",
+    "shamtq": "shamt",
+    "rd_p": "rd\\,$'$",
+    "rs1_p": "rs1\\,$'$",
+    "rs2_p": "rs2\\,$'$",
+    "rd_rs1_n0": "rd/rs$\\neq$0",
+    "rd_rs1_p": "rs1\\,$'$/rs2\\,$'$",
+    "c_rs2": "rs2",
+    "c_rs2_n0": "rs2$\\neq$0",
+    "rd_n0": "rd$\\neq$0",
+    "rs1_n0": "rs1$\\neq$0",
+    "c_rs1_n0": "rs1$\\neq$0",
+    "rd_rs1": "rd/rs1",
+    "zimm6hi": "uimm[5]",
+    "zimm6lo": "uimm[4:0]",
+    "c_nzuimm10": "nzuimm[5:4$\\vert$9:6$\\vert$2$\\vert$3]",
+    "c_uimm7lo": "uimm[2$\\vert$6]",
+    "c_uimm7hi": "uimm[5:3]",
+    "c_uimm8lo": "uimm[7:6]",
+    "c_uimm8hi": "uimm[5:3]",
+    "c_uimm9lo": "uimm[7:6]",
+    "c_uimm9hi": "uimm[5:4$\\vert$8]",
+    "c_nzimm6lo": "nzimm[4:0]",
+    "c_nzimm6hi": "nzimm[5]",
+    "c_imm6lo": "imm[4:0]",
+    "c_imm6hi": "imm[5]",
+    "c_nzimm10hi": "nzimm[9]",
+    "c_nzimm10lo": "nzimm[4$\\vert$6$\\vert$8:7$\\vert$5]",
+    "c_nzimm18hi": "nzimm[17]",
+    "c_nzimm18lo": "nzimm[16:12]",
+    "c_imm12": "imm[11$\\vert$4$\\vert$9:8$\\vert$10$\\vert$6$\\vert$7$\\vert$3:1$\\vert$5]",
+    "c_bimm9lo": "imm[7:6$\\vert$2:1$\\vert$5]",
+    "c_bimm9hi": "imm[8$\\vert$4:3]",
+    "c_nzuimm5": "nzuimm[4:0]",
+    "c_nzuimm6lo": "nzuimm[4:0]",
+    "c_nzuimm6hi": "nzuimm[5]",
+    "c_uimm8splo": "uimm[4:2$\\vert$7:6]",
+    "c_uimm8sphi": "uimm[5]",
+    "c_uimm8sp_s": "uimm[5:2$\\vert$7:6]",
+    "c_uimm10splo": "uimm[4$\\vert$9:6]",
+    "c_uimm10sphi": "uimm[5]",
+    "c_uimm9splo": "uimm[4:3$\\vert$8:6]",
+    "c_uimm9sphi": "uimm[5]",
+    "c_uimm10sp_s": "uimm[5:4$\\vert$9:6]",
+    "c_uimm9sp_s": "uimm[5:3$\\vert$8:6]",
+}
 
 # created a dummy instruction-dictionary like dictionary for all the instruction
 # types so that the same logic can be used to create their tables
-latex_inst_type = {}
-latex_inst_type['R-type'] = {}
-latex_inst_type['R-type']['variable_fields'] = ['opcode', 'rd', 'funct3', \
-        'rs1', 'rs2', 'funct7']
-latex_inst_type['R4-type'] = {}
-latex_inst_type['R4-type']['variable_fields'] = ['opcode', 'rd', 'funct3', \
-        'rs1', 'rs2', 'funct2', 'rs3']
-latex_inst_type['I-type'] = {}
-latex_inst_type['I-type']['variable_fields'] = ['opcode', 'rd', 'funct3', \
-        'rs1', 'imm12']
-latex_inst_type['S-type'] = {}
-latex_inst_type['S-type']['variable_fields'] = ['opcode', 'imm12lo', 'funct3', \
-        'rs1', 'rs2', 'imm12hi']
-latex_inst_type['B-type'] = {}
-latex_inst_type['B-type']['variable_fields'] = ['opcode', 'bimm12lo', 'funct3', \
-        'rs1', 'rs2', 'bimm12hi']
-latex_inst_type['U-type'] = {}
-latex_inst_type['U-type']['variable_fields'] = ['opcode', 'rd', 'imm20']
-latex_inst_type['J-type'] = {}
-latex_inst_type['J-type']['variable_fields'] = ['opcode', 'rd', 'jimm20']
-latex_fixed_fields = []
-latex_fixed_fields.append((31,25))
-latex_fixed_fields.append((24,20))
-latex_fixed_fields.append((19,15))
-latex_fixed_fields.append((14,12))
-latex_fixed_fields.append((11,7))
-latex_fixed_fields.append((6,0))
+latex_inst_type = {
+    "R-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct7"],
+    },
+    "R4-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "rs2", "funct2", "rs3"],
+    },
+    "I-type": {
+        "variable_fields": ["opcode", "rd", "funct3", "rs1", "imm12"],
+    },
+    "S-type": {
+        "variable_fields": ["opcode", "imm12lo", "funct3", "rs1", "rs2", "imm12hi"],
+    },
+    "B-type": {
+        "variable_fields": ["opcode", "bimm12lo", "funct3", "rs1", "rs2", "bimm12hi"],
+    },
+    "U-type": {
+        "variable_fields": ["opcode", "rd", "imm20"],
+    },
+    "J-type": {
+        "variable_fields": ["opcode", "rd", "jimm20"],
+    },
+}
+latex_fixed_fields = [
+    (31, 25),
+    (24, 20),
+    (19, 15),
+    (14, 12),
+    (11, 7),
+    (6, 0),
+]
 
 # Pseudo-ops present in the generated encodings.
 # By default pseudo-ops are not listed as they are considered aliases
 # of their base instruction.
 emitted_pseudo_ops = [
-    'pause',
-    'prefetch_i',
-    'prefetch_r',
-    'prefetch_w',
-    'rstsa16',
-    'rstsa32',
-    'srli32_u',
-    'slli_rv32',
-    'srai_rv32',
-    'srli_rv32',
-    'umax32',
-    'c_mop_1',
-    'c_sspush_x1',
-    'c_mop_3',
-    'c_mop_5',
-    'c_sspopchk_x5',
-    'c_mop_7',
-    'c_mop_9',
-    'c_mop_11',
-    'c_mop_13',
-    'c_mop_15',
-    'mop_r_0',
-    'mop_r_1',
-    'mop_r_2',
-    'mop_r_3',
-    'mop_r_4',
-    'mop_r_5',
-    'mop_r_6',
-    'mop_r_7',
-    'mop_r_8',
-    'mop_r_9',
-    'mop_r_10',
-    'mop_r_11',
-    'mop_r_12',
-    'mop_r_13',
-    'mop_r_14',
-    'mop_r_15',
-    'mop_r_16',
-    'mop_r_17',
-    'mop_r_18',
-    'mop_r_19',
-    'mop_r_20',
-    'mop_r_21',
-    'mop_r_22',
-    'mop_r_23',
-    'mop_r_24',
-    'mop_r_25',
-    'mop_r_26',
-    'mop_r_27',
-    'mop_r_28',
-    'sspopchk_x1',
-    'sspopchk_x5',
-    'ssrdp',
-    'mop_r_29',
-    'mop_r_30',
-    'mop_r_31',
-    'mop_r_32',
-    'mop_rr_0',
-    'mop_rr_1',
-    'mop_rr_2',
-    'mop_rr_3',
-    'mop_rr_4',
-    'mop_rr_5',
-    'mop_rr_6',
-    'mop_rr_7',
-    'sspush_x1',
-    'sspush_x5',
-    'lpad',
-    'bclri.rv32',
-    'bexti.rv32',
-    'binvi.rv32',
-    'bseti.rv32',
-    'zext.h.rv32',
-    'rev8.h.rv32',
-    'rori.rv32',
+    "pause",
+    "prefetch_i",
+    "prefetch_r",
+    "prefetch_w",
+    "rstsa16",
+    "rstsa32",
+    "srli32_u",
+    "slli_rv32",
+    "srai_rv32",
+    "srli_rv32",
+    "umax32",
+    "c_mop_1",
+    "c_sspush_x1",
+    "c_mop_3",
+    "c_mop_5",
+    "c_sspopchk_x5",
+    "c_mop_7",
+    "c_mop_9",
+    "c_mop_11",
+    "c_mop_13",
+    "c_mop_15",
+    "mop_r_0",
+    "mop_r_1",
+    "mop_r_2",
+    "mop_r_3",
+    "mop_r_4",
+    "mop_r_5",
+    "mop_r_6",
+    "mop_r_7",
+    "mop_r_8",
+    "mop_r_9",
+    "mop_r_10",
+    "mop_r_11",
+    "mop_r_12",
+    "mop_r_13",
+    "mop_r_14",
+    "mop_r_15",
+    "mop_r_16",
+    "mop_r_17",
+    "mop_r_18",
+    "mop_r_19",
+    "mop_r_20",
+    "mop_r_21",
+    "mop_r_22",
+    "mop_r_23",
+    "mop_r_24",
+    "mop_r_25",
+    "mop_r_26",
+    "mop_r_27",
+    "mop_r_28",
+    "sspopchk_x1",
+    "sspopchk_x5",
+    "ssrdp",
+    "mop_r_29",
+    "mop_r_30",
+    "mop_r_31",
+    "mop_r_32",
+    "mop_rr_0",
+    "mop_rr_1",
+    "mop_rr_2",
+    "mop_rr_3",
+    "mop_rr_4",
+    "mop_rr_5",
+    "mop_rr_6",
+    "mop_rr_7",
+    "sspush_x1",
+    "sspush_x5",
+    "lpad",
+    "bclri.rv32",
+    "bexti.rv32",
+    "binvi.rv32",
+    "bseti.rv32",
+    "zext.h.rv32",
+    "rev8.h.rv32",
+    "rori.rv32",
 ]
diff --git a/csrs32.csv b/csrs32.csv
index 9223eb2..f8c62d9 100644
--- a/csrs32.csv
+++ b/csrs32.csv
@@ -4,6 +4,7 @@
 0x214, "vsieh"
 0x254, "vsiph"
 0x25D, "vstimecmph"
+0x612, "hedelegh"
 0x615, "htimedeltah"
 0x613, "hidelegh"
 0x618, "hvienh"
diff --git a/encoding.h b/encoding.h
index a85f5a3..76aac0d 100644
--- a/encoding.h
+++ b/encoding.h
@@ -66,13 +66,15 @@
 #define HSTATUS_SPV         0x00000080
 #define HSTATUS_GVA         0x00000040
 #define HSTATUS_VSBE        0x00000020
+#define HSTATUS_HUPMM       0x0003000000000000
 
 #define USTATUS_UIE         0x00000001
 #define USTATUS_UPIE        0x00000010
 
 #define MNSTATUS_NMIE       0x00000008
-#define MNSTATUS_MNPP       0x00001800
 #define MNSTATUS_MNPV       0x00000080
+#define MNSTATUS_MNPELP     0x00000200
+#define MNSTATUS_MNPP       0x00001800
 
 #define DCSR_XDEBUGVER      (15U<<28)
 #define DCSR_EXTCAUSE       (7<<24)
@@ -100,6 +102,9 @@
 #define DCSR_CAUSE_STEP     4
 #define DCSR_CAUSE_HALT     5
 #define DCSR_CAUSE_GROUP    6
+#define DCSR_CAUSE_EXTCAUSE 7
+
+#define DCSR_EXTCAUSE_CRITERR 0
 
 #define MCONTROL_TYPE(xlen)    (0xfULL<<((xlen)-4))
 #define MCONTROL_DMODE(xlen)   (1ULL<<((xlen)-5))
@@ -107,7 +112,7 @@
 
 #define MCONTROL_SELECT     (1<<19)
 #define MCONTROL_TIMING     (1<<18)
-#define MCONTROL_ACTION     (0x3f<<12)
+#define MCONTROL_ACTION     (0xf<<12)
 #define MCONTROL_CHAIN      (1<<11)
 #define MCONTROL_MATCH      (0xf<<7)
 #define MCONTROL_M          (1<<6)
@@ -166,12 +171,15 @@
 #define MENVCFG_CBIE  0x00000030
 #define MENVCFG_CBCFE 0x00000040
 #define MENVCFG_CBZE  0x00000080
+#define MENVCFG_PMM   0x0000000300000000
 #define MENVCFG_DTE   0x0800000000000000
+#define MENVCFG_CDE   0x1000000000000000
 #define MENVCFG_ADUE  0x2000000000000000
 #define MENVCFG_PBMTE 0x4000000000000000
 #define MENVCFG_STCE  0x8000000000000000
 
 #define MENVCFGH_DTE   0x08000000
+#define MENVCFGH_CDE   0x10000000
 #define MENVCFGH_ADUE  0x20000000
 #define MENVCFGH_PBMTE 0x40000000
 #define MENVCFGH_STCE  0x80000000
@@ -180,7 +188,9 @@
 #define MSTATEEN0_FCSR     0x00000002
 #define MSTATEEN0_JVT      0x00000004
 #define MSTATEEN0_CTR      0x0040000000000000
+#define MSTATEEN0_PRIV113  0x0100000000000000
 #define MSTATEEN0_PRIV114  0x0080000000000000
+#define MSTATEEN0_IMSIC    0x0400000000000000
 #define MSTATEEN0_HCONTEXT 0x0200000000000000
 #define MSTATEEN0_AIA      0x0800000000000000
 #define MSTATEEN0_CSRIND   0x1000000000000000
@@ -188,7 +198,9 @@
 #define MSTATEEN_HSTATEEN  0x8000000000000000
 
 #define MSTATEEN0H_CTR      0x00400000
+#define MSTATEEN0H_PRIV113  0x01000000
 #define MSTATEEN0H_PRIV114  0x00800000
+#define MSTATEEN0H_IMSIC    0x04000000
 #define MSTATEEN0H_HCONTEXT 0x02000000
 #define MSTATEEN0H_AIA      0x08000000
 #define MSTATEEN0H_CSRIND   0x10000000
@@ -209,12 +221,24 @@
 #define MHPMEVENTH_MINH  0x40000000
 #define MHPMEVENTH_OF    0x80000000
 
+#define MCOUNTEREN_CY_SHIFT    0
+#define MCOUNTEREN_TIME_SHIFT  1
+#define MCOUNTEREN_IR_SHIFT    2
+
+#define MCOUNTEREN_CY      (1U << MCOUNTEREN_CY_SHIFT)
+#define MCOUNTEREN_TIME    (1U << MCOUNTEREN_TIME_SHIFT)
+#define MCOUNTEREN_IR      (1U << MCOUNTEREN_IR_SHIFT)
+
+#define MCOUNTINHIBIT_CY    MCOUNTEREN_CY
+#define MCOUNTINHIBIT_IR    MCOUNTEREN_IR
+
 #define HENVCFG_FIOM  0x00000001
 #define HENVCFG_LPE   0x00000004
 #define HENVCFG_SSE   0x00000008
 #define HENVCFG_CBIE  0x00000030
 #define HENVCFG_CBCFE 0x00000040
 #define HENVCFG_CBZE  0x00000080
+#define HENVCFG_PMM   0x0000000300000000
 #define HENVCFG_DTE   0x0800000000000000
 #define HENVCFG_ADUE  0x2000000000000000
 #define HENVCFG_PBMTE 0x4000000000000000
@@ -237,11 +261,25 @@
 #define SISELECT_SMCDELEG_HPMEVENT_3    0x43
 #define SISELECT_SMCDELEG_END           0x5f
 
+#define MISELECT_IPRIO     0x30
+#define MISELECT_IPRIO_TOP 0x3f
+#define MISELECT_IMSIC     0x70
+#define MISELECT_IMSIC_TOP 0xff
+
+#define SISELECT_IPRIO     0x30
+#define SISELECT_IPRIO_TOP 0x3f
+#define SISELECT_IMSIC     0x70
+#define SISELECT_IMSIC_TOP 0xff
+
+#define VSISELECT_IMSIC     0x70
+#define VSISELECT_IMSIC_TOP 0xff
+
 #define HSTATEEN0_CS       0x00000001
 #define HSTATEEN0_FCSR     0x00000002
 #define HSTATEEN0_JVT      0x00000004
 #define HSTATEEN0_CTR      0x0040000000000000
 #define HSTATEEN0_SCONTEXT 0x0200000000000000
+#define HSTATEEN0_IMSIC    0x0400000000000000
 #define HSTATEEN0_AIA      0x0800000000000000
 #define HSTATEEN0_CSRIND   0x1000000000000000
 #define HSTATEEN0_SENVCFG  0x4000000000000000
@@ -249,6 +287,7 @@
 
 #define HSTATEEN0H_CTR      0x00400000
 #define HSTATEEN0H_SCONTEXT 0x02000000
+#define HSTATEEN0H_IMSIC    0x04000000
 #define HSTATEEN0H_AIA      0x08000000
 #define HSTATEEN0H_CSRIND   0x10000000
 #define HSTATEEN0H_SENVCFG  0x40000000
@@ -260,6 +299,7 @@
 #define SENVCFG_CBIE  0x00000030
 #define SENVCFG_CBCFE 0x00000040
 #define SENVCFG_CBZE  0x00000080
+#define SENVCFG_PMM   0x0000000300000000
 
 #define SSTATEEN0_CS   0x00000001
 #define SSTATEEN0_FCSR 0x00000002
@@ -271,6 +311,7 @@
 #define MSECCFG_USEED  0x00000100
 #define MSECCFG_SSEED  0x00000200
 #define MSECCFG_MLPE   0x00000400
+#define MSECCFG_PMM    0x0000000300000000
 
 /* jvt fields */
 #define JVT_MODE   0x3F
diff --git a/rv32_c b/extensions/rv32_c
index 6b94d84..f8ce289 100644
--- a/rv32_c
+++ b/extensions/rv32_c
@@ -2,7 +2,7 @@
 c.jal c_imm12              1..0=1 15..13=1
 $pseudo_op rv64_c::c.srli c.srli rd_rs1_p c_nzuimm5  1..0=1 15..13=4 12..10=0
 $pseudo_op rv64_c::c.srai c.srai rd_rs1_p c_nzuimm5  1..0=1 15..13=4 12..10=1
-$pseudo_op rv64_c::c.slli c.slli rd_rs1_n0 c_nzuimm6lo  1..0=2 15..12=0 
+$pseudo_op rv64_c::c.slli c.slli rd_rs1_n0 c_nzuimm6lo  1..0=2 15..12=0
 $pseudo_op rv64_c::c.srli c.srli_rv32 rd_rs1_p c_nzuimm5  1..0=1 15..13=4 12..10=0
 $pseudo_op rv64_c::c.srai c.srai_rv32 rd_rs1_p c_nzuimm5  1..0=1 15..13=4 12..10=1
-$pseudo_op rv64_c::c.slli c.slli_rv32 rd_rs1_n0 c_nzuimm6lo  1..0=2 15..12=0 
+$pseudo_op rv64_c::c.slli c.slli_rv32 rd_rs1_n0 c_nzuimm6lo  1..0=2 15..12=0
diff --git a/rv32_c_f b/extensions/rv32_c_f
index 8487c9a..3b735e6 100644
--- a/rv32_c_f
+++ b/extensions/rv32_c_f
@@ -5,4 +5,3 @@ c.fsw rs1_p rs2_p c_uimm7lo c_uimm7hi    1..0=0 15..13=7
 #quadrant 2
 c.flwsp rd c_uimm8sphi c_uimm8splo       1..0=2 15..13=3
 c.fswsp c_rs2 c_uimm8sp_s                1..0=2 15..13=7
-
diff --git a/rv32_d_zfa b/extensions/rv32_d_zfa
index 8a543e7..8a543e7 100644
--- a/rv32_d_zfa
+++ b/extensions/rv32_d_zfa
diff --git a/rv32_i b/extensions/rv32_i
index 59e79da..59e79da 100644
--- a/rv32_i
+++ b/extensions/rv32_i
diff --git a/rv32_zbb b/extensions/rv32_zbb
index bc23350..bc23350 100644
--- a/rv32_zbb
+++ b/extensions/rv32_zbb
diff --git a/rv32_zbkb b/extensions/rv32_zbkb
index e302531..55b7be9 100644
--- a/rv32_zbkb
+++ b/extensions/rv32_zbkb
@@ -1,4 +1,4 @@
-$pseudo_op rv64_zbp::shfli    zip rd rs1 31..25=4 24..20=15 14..12=1 6..2=4 1..0=3 
-$pseudo_op rv64_zbp::unshfli  unzip rd rs1 31..25=4 24..20=15 14..12=5 6..2=4 1..0=3 
+$pseudo_op rv64_zbp::shfli    zip rd rs1 31..25=4 24..20=15 14..12=1 6..2=4 1..0=3
+$pseudo_op rv64_zbp::unshfli  unzip rd rs1 31..25=4 24..20=15 14..12=5 6..2=4 1..0=3
 $pseudo_op rv64_zbb::rori     rori.rv32 rd rs1   31..25=0x30 shamtw 14..12=5 6..2=0x04 1..0=3
 $pseudo_op rv64_zbp::grevi    rev8.rv32 rd rs1   31..20=0x698 14..12=5 6..0=0x13
diff --git a/rv32_zbs b/extensions/rv32_zbs
index a7ff752..fe3c8a2 100644
--- a/rv32_zbs
+++ b/extensions/rv32_zbs
@@ -2,4 +2,3 @@ $pseudo_op rv64_zbs::bclri bclri.rv32 rd rs1 31..25=0x24 shamtw 14..12=1 6..2=0x
 $pseudo_op rv64_zbs::bexti bexti.rv32 rd rs1 31..25=0x24 shamtw 14..12=5 6..2=0x04 1..0=3
 $pseudo_op rv64_zbs::binvi binvi.rv32 rd rs1 31..25=0x34 shamtw 14..12=1 6..2=0x04 1..0=3
 $pseudo_op rv64_zbs::bseti bseti.rv32 rd rs1 31..25=0x14 shamtw 14..12=1 6..2=0x04 1..0=3
-
diff --git a/extensions/rv32_zicntr b/extensions/rv32_zicntr
new file mode 100644
index 0000000..6df98c3
--- /dev/null
+++ b/extensions/rv32_zicntr
@@ -0,0 +1,3 @@
+$pseudo_op rv_zicsr::csrrs  rdcycleh   rd 19..15=0 31..20=0xC80 14..12=2 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrs  rdtimeh    rd 19..15=0 31..20=0xC81 14..12=2 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrs  rdinstreth rd 19..15=0 31..20=0xC82 14..12=2 6..2=0x1C 1..0=3
diff --git a/rv32_zk b/extensions/rv32_zk
index 0bd9443..b4b1330 100644
--- a/rv32_zk
+++ b/extensions/rv32_zk
@@ -1,6 +1,6 @@
 #import zbkb
-$pseudo_op rv64_zbp::shfli    zip rd rs1 31..25=4 24..20=15 14..12=1 6..2=4 1..0=3 
-$pseudo_op rv64_zbp::unshfli  unzip rd rs1 31..25=4 24..20=15 14..12=5 6..2=4 1..0=3 
+$pseudo_op rv64_zbp::shfli    zip rd rs1 31..25=4 24..20=15 14..12=1 6..2=4 1..0=3
+$pseudo_op rv64_zbp::unshfli  unzip rd rs1 31..25=4 24..20=15 14..12=5 6..2=4 1..0=3
 $pseudo_op rv64_zbb::rori     rori.rv32 rd rs1   31..25=0x30 shamtw 14..12=5 6..2=0x04 1..0=3
 $pseudo_op rv64_zbp::grevi    rev8.rv32 rd rs1   31..20=0x698 14..12=5 6..0=0x13
 
@@ -22,4 +22,3 @@ $import rv32_zknh::sha512sig0l
 $import rv32_zknh::sha512sig0h
 $import rv32_zknh::sha512sig1l
 $import rv32_zknh::sha512sig1h
-
diff --git a/rv32_zkn b/extensions/rv32_zkn
index 0bd9443..b4b1330 100644
--- a/rv32_zkn
+++ b/extensions/rv32_zkn
@@ -1,6 +1,6 @@
 #import zbkb
-$pseudo_op rv64_zbp::shfli    zip rd rs1 31..25=4 24..20=15 14..12=1 6..2=4 1..0=3 
-$pseudo_op rv64_zbp::unshfli  unzip rd rs1 31..25=4 24..20=15 14..12=5 6..2=4 1..0=3 
+$pseudo_op rv64_zbp::shfli    zip rd rs1 31..25=4 24..20=15 14..12=1 6..2=4 1..0=3
+$pseudo_op rv64_zbp::unshfli  unzip rd rs1 31..25=4 24..20=15 14..12=5 6..2=4 1..0=3
 $pseudo_op rv64_zbb::rori     rori.rv32 rd rs1   31..25=0x30 shamtw 14..12=5 6..2=0x04 1..0=3
 $pseudo_op rv64_zbp::grevi    rev8.rv32 rd rs1   31..20=0x698 14..12=5 6..0=0x13
 
@@ -22,4 +22,3 @@ $import rv32_zknh::sha512sig0l
 $import rv32_zknh::sha512sig0h
 $import rv32_zknh::sha512sig1l
 $import rv32_zknh::sha512sig1h
-
diff --git a/rv32_zknd b/extensions/rv32_zknd
index f367d5e..aa4bf23 100644
--- a/rv32_zknd
+++ b/extensions/rv32_zknd
@@ -1,4 +1,3 @@
 # Scalar AES - RV32
 aes32dsmi     rd rs1 rs2 bs          29..25=0b10111 14..12=0 6..0=0x33
 aes32dsi      rd rs1 rs2 bs          29..25=0b10101 14..12=0 6..0=0x33
-
diff --git a/rv32_zkne b/extensions/rv32_zkne
index 72bd617..c6ac67d 100644
--- a/rv32_zkne
+++ b/extensions/rv32_zkne
@@ -2,4 +2,3 @@
 
 aes32esmi     rd rs1 rs2 bs          29..25=0b10011 14..12=0 6..0=0x33
 aes32esi      rd rs1 rs2 bs          29..25=0b10001 14..12=0 6..0=0x33
-
diff --git a/rv32_zknh b/extensions/rv32_zknh
index 675bf54..501f665 100644
--- a/rv32_zknh
+++ b/extensions/rv32_zknh
@@ -5,4 +5,3 @@ sha512sig0l   rd rs1 rs2    31..30=1 29..25=0b01010 14..12=0 6..0=0x33
 sha512sig0h   rd rs1 rs2    31..30=1 29..25=0b01110 14..12=0 6..0=0x33
 sha512sig1l   rd rs1 rs2    31..30=1 29..25=0b01011 14..12=0 6..0=0x33
 sha512sig1h   rd rs1 rs2    31..30=1 29..25=0b01111 14..12=0 6..0=0x33
-
diff --git a/rv32_zks b/extensions/rv32_zks
index b1be874..a422a9b 100644
--- a/rv32_zks
+++ b/extensions/rv32_zks
@@ -1,6 +1,5 @@
 #import zbkb
-$pseudo_op rv64_zbp::shfli    zip rd rs1 31..25=4 24..20=15 14..12=1 6..2=4 1..0=3 
-$pseudo_op rv64_zbp::unshfli  unzip rd rs1 31..25=4 24..20=15 14..12=5 6..2=4 1..0=3 
+$pseudo_op rv64_zbp::shfli    zip rd rs1 31..25=4 24..20=15 14..12=1 6..2=4 1..0=3
+$pseudo_op rv64_zbp::unshfli  unzip rd rs1 31..25=4 24..20=15 14..12=5 6..2=4 1..0=3
 $pseudo_op rv64_zbb::rori     rori.rv32 rd rs1   31..25=0x30 shamtw 14..12=5 6..2=0x04 1..0=3
 $pseudo_op rv64_zbp::grevi    rev8.rv32 rd rs1   31..20=0x698 14..12=5 6..0=0x13
-
diff --git a/rv64_a b/extensions/rv64_a
index fe208e9..fe208e9 100644
--- a/rv64_a
+++ b/extensions/rv64_a
diff --git a/rv64_c b/extensions/rv64_c
index 7c7494b..7c7494b 100644
--- a/rv64_c
+++ b/extensions/rv64_c
diff --git a/rv64_d b/extensions/rv64_d
index d8c8299..d8c8299 100644
--- a/rv64_d
+++ b/extensions/rv64_d
diff --git a/rv64_f b/extensions/rv64_f
index 787677c..64f02d6 100644
--- a/rv64_f
+++ b/extensions/rv64_f
@@ -4,4 +4,3 @@ fcvt.l.s  rd rs1 24..20=2 31..27=0x18 rm       26..25=0 6..2=0x14 1..0=3
 fcvt.lu.s rd rs1 24..20=3 31..27=0x18 rm       26..25=0 6..2=0x14 1..0=3
 fcvt.s.l  rd rs1 24..20=2 31..27=0x1A rm       26..25=0 6..2=0x14 1..0=3
 fcvt.s.lu rd rs1 24..20=3 31..27=0x1A rm       26..25=0 6..2=0x14 1..0=3
-
diff --git a/rv64_h b/extensions/rv64_h
index 488dcd4..75589e1 100644
--- a/rv64_h
+++ b/extensions/rv64_h
@@ -2,4 +2,3 @@
 hlv.wu           rd rs1 24..20=0x1 31..25=0x34  14..12=4 6..2=0x1C 1..0=3
 hlv.d            rd rs1 24..20=0x0 31..25=0x36  14..12=4 6..2=0x1C 1..0=3
 hsv.d       11..7=0 rs1        rs2 31..25=0x37  14..12=4 6..2=0x1C 1..0=3
-
diff --git a/rv64_i b/extensions/rv64_i
index 3fad043..dea3d38 100644
--- a/rv64_i
+++ b/extensions/rv64_i
@@ -18,3 +18,5 @@ subw    rd rs1 rs2 31..25=32 14..12=0 6..2=0x0E 1..0=3
 sllw    rd rs1 rs2 31..25=0  14..12=1 6..2=0x0E 1..0=3
 srlw    rd rs1 rs2 31..25=0  14..12=5 6..2=0x0E 1..0=3
 sraw    rd rs1 rs2 31..25=32 14..12=5 6..2=0x0E 1..0=3
+
+$pseudo_op rv64_i::addiw sext.w rd rs1 31..20=0 14..12=0 6..2=0x06 1..0=3
diff --git a/rv64_m b/extensions/rv64_m
index cfac0b1..cfac0b1 100644
--- a/rv64_m
+++ b/extensions/rv64_m
diff --git a/rv64_q b/extensions/rv64_q
index 32019aa..571edf1 100644
--- a/rv64_q
+++ b/extensions/rv64_q
@@ -5,4 +5,3 @@ fcvt.lu.q rd rs1 24..20=3 31..27=0x18 rm       26..25=3 6..2=0x14 1..0=3
 
 fcvt.q.l  rd rs1 24..20=2 31..27=0x1A rm       26..25=3 6..2=0x14 1..0=3
 fcvt.q.lu rd rs1 24..20=3 31..27=0x1A rm       26..25=3 6..2=0x14 1..0=3
-
diff --git a/rv64_q_zfa b/extensions/rv64_q_zfa
index be33e4e..be33e4e 100644
--- a/rv64_q_zfa
+++ b/extensions/rv64_q_zfa
diff --git a/rv64_zacas b/extensions/rv64_zacas
index 089fbd6..089fbd6 100644
--- a/rv64_zacas
+++ b/extensions/rv64_zacas
diff --git a/rv64_zba b/extensions/rv64_zba
index 5378e52..3a1186a 100644
--- a/rv64_zba
+++ b/extensions/rv64_zba
@@ -3,3 +3,5 @@ sh1add.uw  rd rs1 rs2 31..25=16 14..12=2 6..2=0x0E 1..0=3
 sh2add.uw  rd rs1 rs2 31..25=16 14..12=4 6..2=0x0E 1..0=3
 sh3add.uw  rd rs1 rs2 31..25=16 14..12=6 6..2=0x0E 1..0=3
 slli.uw    rd rs1 31..26=2 shamtd 14..12=1 6..2=0x06 1..0=3
+
+$pseudo_op rv64_zba::add.uw zext.w rd rs1 31..25=4 24..20=0 14..12=0 6..2=0x0E 1..0=3
diff --git a/rv64_zbb b/extensions/rv64_zbb
index c7e4e8c..653827f 100644
--- a/rv64_zbb
+++ b/extensions/rv64_zbb
@@ -5,5 +5,5 @@ rolw  rd rs1 rs2                            31..25=0x30 14..12=1 6..2=0x0E 1..0=
 rorw  rd rs1 rs2                            31..25=0x30 14..12=5 6..2=0x0E 1..0=3
 roriw rd rs1                                31..25=0x30 shamtw 14..12=5 6..2=0x06 1..0=3
 rori  rd rs1                                31..26=0x18 shamtd 14..12=5 6..2=0x04 1..0=3
-$pseudo_op rv64_zbkb::packw zext.h rd rs1    31..25=0x04 24..20=0 14..12=0x4 6..2=0xE 1..0=0x3 
+$pseudo_op rv64_zbkb::packw zext.h rd rs1    31..25=0x04 24..20=0 14..12=0x4 6..2=0xE 1..0=0x3
 $pseudo_op rv64_zbp::grevi rev8 rd rs1      31..20=0x6B8 14..12=5 6..0=0x13
diff --git a/rv64_zbkb b/extensions/rv64_zbkb
index b5e0606..b5e0606 100644
--- a/rv64_zbkb
+++ b/extensions/rv64_zbkb
diff --git a/rv64_zbs b/extensions/rv64_zbs
index 27e6360..4e15b66 100644
--- a/rv64_zbs
+++ b/extensions/rv64_zbs
@@ -2,4 +2,3 @@ bclri       rd rs1 31..26=0x12 shamtd 14..12=1 6..2=0x04 1..0=3
 bexti       rd rs1 31..26=0x12 shamtd 14..12=5 6..2=0x04 1..0=3
 binvi       rd rs1 31..26=0x1a shamtd 14..12=1 6..2=0x04 1..0=3
 bseti       rd rs1 31..26=0x0a shamtd 14..12=1 6..2=0x04 1..0=3
-
diff --git a/extensions/rv64_zcb b/extensions/rv64_zcb
new file mode 100644
index 0000000..8ce4429
--- /dev/null
+++ b/extensions/rv64_zcb
@@ -0,0 +1,3 @@
+c.zext.w  rd_rs1_p 1..0=1 15..13=4 12..10=7 6..5=3 4..2=4
+
+$pseudo_op rv64_c::c.addiw c.sext.w  rd_rs1_n0 15..13=1 12=0 6..2=0 1..0=1
diff --git a/rv64_zfh b/extensions/rv64_zfh
index 5cc9f25..5cc9f25 100644
--- a/rv64_zfh
+++ b/extensions/rv64_zfh
diff --git a/rv64_zk b/extensions/rv64_zk
index 891f48a..b59326f 100644
--- a/rv64_zk
+++ b/extensions/rv64_zk
@@ -25,4 +25,3 @@ $import rv64_zknh::sha512sum0
 $import rv64_zknh::sha512sum1
 $import rv64_zknh::sha512sig0
 $import rv64_zknh::sha512sig1
-
diff --git a/rv64_zkn b/extensions/rv64_zkn
index 891f48a..b59326f 100644
--- a/rv64_zkn
+++ b/extensions/rv64_zkn
@@ -25,4 +25,3 @@ $import rv64_zknh::sha512sum0
 $import rv64_zknh::sha512sum1
 $import rv64_zknh::sha512sig0
 $import rv64_zknh::sha512sig1
-
diff --git a/rv64_zknd b/extensions/rv64_zknd
index f1507d6..b276658 100644
--- a/rv64_zknd
+++ b/extensions/rv64_zknd
@@ -4,4 +4,3 @@ aes64ds    rd rs1 rs2  31..30=0 29..25=0b11101          14..12=0b000 6..0=0x33
 aes64ks1i  rd rs1 rnum 31..30=0 29..25=0b11000 24=1     14..12=0b001 6..0=0x13
 aes64im    rd rs1      31..30=0 29..25=0b11000 24..20=0b0000 14..12=0b001 6..0=0x13
 aes64ks2   rd rs1 rs2  31..30=1 29..25=0b11111          14..12=0b000 6..0=0x33
-
diff --git a/rv64_zkne b/extensions/rv64_zkne
index 3323b7f..3323b7f 100644
--- a/rv64_zkne
+++ b/extensions/rv64_zkne
diff --git a/rv64_zknh b/extensions/rv64_zknh
index 431a1bc..468d526 100644
--- a/rv64_zknh
+++ b/extensions/rv64_zknh
@@ -3,4 +3,3 @@ sha512sum0 rd rs1  31..30=0 29..25=0b01000 24..20=0b00100 14..12=1 6..0=0x13
 sha512sum1 rd rs1  31..30=0 29..25=0b01000 24..20=0b00101 14..12=1 6..0=0x13
 sha512sig0 rd rs1  31..30=0 29..25=0b01000 24..20=0b00110 14..12=1 6..0=0x13
 sha512sig1 rd rs1  31..30=0 29..25=0b01000 24..20=0b00111 14..12=1 6..0=0x13
-
diff --git a/rv64_zks b/extensions/rv64_zks
index 848a283..848a283 100644
--- a/rv64_zks
+++ b/extensions/rv64_zks
diff --git a/rv_a b/extensions/rv_a
index 1a70e40..1a70e40 100644
--- a/rv_a
+++ b/extensions/rv_a
diff --git a/rv_c b/extensions/rv_c
index 4b571a4..6fda454 100644
--- a/rv_c
+++ b/extensions/rv_c
@@ -26,7 +26,3 @@ c.ebreak                                 1..0=2 15..13=4 12=1 11..2=0
 c.jalr c_rs1_n0                          1..0=2 15..13=4 12=1 6..2=0
 c.add rd_rs1_n0 c_rs2_n0                 1..0=2 15..13=4 12=1
 c.swsp c_rs2 c_uimm8sp_s                 1..0=2 15..13=6
-
-
-
-
diff --git a/rv_c_d b/extensions/rv_c_d
index cd49b44..66d1ad8 100644
--- a/rv_c_d
+++ b/extensions/rv_c_d
@@ -5,4 +5,3 @@ c.fsd rs1_p rs2_p c_uimm8lo c_uimm8hi   1..0=0 15..13=5
 #quadrant 2
 c.fldsp rd c_uimm9sphi c_uimm9splo      1..0=2 15..13=1
 c.fsdsp c_rs2 c_uimm9sp_s               1..0=2 15..13=5
-
diff --git a/unratified/rv_c_zicfiss b/extensions/rv_c_zicfiss
index 83431c6..83431c6 100644
--- a/unratified/rv_c_zicfiss
+++ b/extensions/rv_c_zicfiss
diff --git a/rv_c_zihintntl b/extensions/rv_c_zihintntl
index f31177b..f31177b 100644
--- a/rv_c_zihintntl
+++ b/extensions/rv_c_zihintntl
diff --git a/rv_d b/extensions/rv_d
index 8c3a3d3..94cf863 100644
--- a/rv_d
+++ b/extensions/rv_d
@@ -24,3 +24,8 @@ fcvt.w.d  rd rs1 24..20=0 31..27=0x18 rm       26..25=1 6..2=0x14 1..0=3
 fcvt.wu.d rd rs1 24..20=1 31..27=0x18 rm       26..25=1 6..2=0x14 1..0=3
 fcvt.d.w  rd rs1 24..20=0 31..27=0x1A rm       26..25=1 6..2=0x14 1..0=3
 fcvt.d.wu rd rs1 24..20=1 31..27=0x1A rm       26..25=1 6..2=0x14 1..0=3
+
+#pseudoinstructions
+$pseudo_op rv_d::fsgnj.d  fmv.d  rd rs1 rs2=rs1 31..27=0x04 14..12=0 26..25=1 6..2=0x14 1..0=3
+$pseudo_op rv_d::fsgnjx.d fabs.d rd rs1 rs2=rs1 31..27=0x04 14..12=2 26..25=1 6..2=0x14 1..0=3
+$pseudo_op rv_d::fsgnjn.d fneg.d rd rs1 rs2=rs1 31..27=0x04 14..12=1 26..25=1 6..2=0x14 1..0=3
diff --git a/rv_d_zfa b/extensions/rv_d_zfa
index 7158eef..7158eef 100644
--- a/rv_d_zfa
+++ b/extensions/rv_d_zfa
diff --git a/rv_d_zfh b/extensions/rv_d_zfh
index 80d3765..80d3765 100644
--- a/rv_d_zfh
+++ b/extensions/rv_d_zfh
diff --git a/rv_f b/extensions/rv_f
index d94547b..8f37053 100644
--- a/rv_f
+++ b/extensions/rv_f
@@ -29,3 +29,17 @@ fmv.w.x   rd rs1 24..20=0 31..27=0x1E 14..12=0 26..25=0 6..2=0x14 1..0=3
 $pseudo_op rv_f::fmv.x.w fmv.x.s   rd rs1 24..20=0 31..27=0x1C 14..12=0 26..25=0 6..2=0x14 1..0=3
 $pseudo_op rv_f::fmv.w.x fmv.s.x   rd rs1 24..20=0 31..27=0x1E 14..12=0 26..25=0 6..2=0x14 1..0=3
 
+#pseudointructions
+$pseudo_op rv_f::fsgnj.s  fmv.s   rd rs1 rs2=rs1 31..27=0x04 14..12=0 26..25=0 6..2=0x14 1..0=3
+$pseudo_op rv_f::fsgnjx.s fabs.s  rd rs1 rs2=rs1 31..27=0x04 14..12=2 26..25=0 6..2=0x14 1..0=3
+$pseudo_op rv_f::fsgnjn.s fneg.s  rd rs1 rs2=rs1 31..27=0x04 14..12=1 26..25=0 6..2=0x14 1..0=3
+
+#CSRs
+$pseudo_op rv_zicsr::csrrs  frflags    rd 19..15=0 31..20=0x001 14..12=2 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrw  fsflags    rd rs1      31..20=0x001 14..12=1 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrwi fsflagsi   rd zimm5     31..20=0x001 14..12=5 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrs  frrm       rd 19..15=0 31..20=0x002 14..12=2 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrw  fsrm       rd rs1      31..20=0x002 14..12=1 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrwi fsrmi      rd zimm5     31..20=0x002 14..12=5 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrw  fscsr      rd rs1      31..20=0x003 14..12=1 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrs  frcsr      rd 19..15=0 31..20=0x003 14..12=2 6..2=0x1C 1..0=3
diff --git a/rv_f_zfa b/extensions/rv_f_zfa
index 045fc27..045fc27 100644
--- a/rv_f_zfa
+++ b/extensions/rv_f_zfa
diff --git a/rv_h b/extensions/rv_h
index 63b9efc..84a361d 100644
--- a/rv_h
+++ b/extensions/rv_h
@@ -12,4 +12,3 @@ hlvx.wu          rd rs1 24..20=0x3 31..25=0x34  14..12=4 6..2=0x1C 1..0=3
 hsv.b       11..7=0 rs1        rs2 31..25=0x31  14..12=4 6..2=0x1C 1..0=3
 hsv.h       11..7=0 rs1        rs2 31..25=0x33  14..12=4 6..2=0x1C 1..0=3
 hsv.w       11..7=0 rs1        rs2 31..25=0x35  14..12=4 6..2=0x1C 1..0=3
-
diff --git a/rv_i b/extensions/rv_i
index 1cf8b58..6d1fd38 100644
--- a/rv_i
+++ b/extensions/rv_i
@@ -45,3 +45,32 @@ ebreak   31..20=0x001 19..7=0 6..2=0x1C 1..0=3
 $pseudo_op rv_i::ecall scall     11..7=0 19..15=0 31..20=0x000 14..12=0 6..2=0x1C 1..0=3
 $pseudo_op rv_i::ebreak sbreak    11..7=0 19..15=0 31..20=0x001 14..12=0 6..2=0x1C 1..0=3
 
+
+#pseudoinstructions from asm manual
+$pseudo_op rv_i::addi   mv rd rs1 31..20=0 14..12=0 6..2=0x04 1..0=3
+$pseudo_op rv_i::sub    neg rd rs1 31..25=32 24..20=0x0 14..12=0 6..2=0x0C 1..0=3
+$pseudo_op rv_i::addi   nop 31..20=0 19..15=0 14..12=0 11..7=0 6..2=0x04 1..0=3
+$pseudo_op rv_i::andi   zext.b rd rs1 31..20=0xff 14..12=7 6..2=0x04 1..0=3
+
+$pseudo_op rv_i::jalr ret 31..20=0 19..15=0x01 14..12=0 11..7=0 6..2=0x19 1..0=3
+
+$pseudo_op rv_i::bgeu   bleu bimm12hi rs2 rs1 bimm12lo 14..12=7 6..2=0x18 1..0=3
+$pseudo_op rv_i::bltu   bgtu bimm12hi rs2 rs1 bimm12lo 14..12=6 6..2=0x18 1..0=3
+$pseudo_op rv_i::bge    ble  bimm12hi rs2 rs1 bimm12lo 14..12=5 6..2=0x18 1..0=3
+$pseudo_op rv_i::bge    bgez bimm12hi rs1     bimm12lo 24..20=0x0 14..12=5 6..2=0x18 1..0=3
+$pseudo_op rv_i::bge    blez bimm12hi rs2     bimm12lo 19..15=0x0 14..12=5 6..2=0x18 1..0=3
+$pseudo_op rv_i::blt    bgt  bimm12hi rs2 rs1 bimm12lo 14..12=4 6..2=0x18 1..0=3
+$pseudo_op rv_i::blt    bgtz bimm12hi rs2     bimm12lo 19..15=0x0 14..12=4 6..2=0x18 1..0=3
+$pseudo_op rv_i::blt    bltz bimm12hi rs1     bimm12lo 24..20=0x0 14..12=4 6..2=0x18 1..0=3
+$pseudo_op rv_i::bne    bnez bimm12hi rs1     bimm12lo 24..20=0x0 14..12=1 6..2=0x18 1..0=3
+$pseudo_op rv_i::beq    beqz bimm12hi rs1     bimm12lo 24..20=0x0 14..12=0 6..2=0x18 1..0=3
+
+$pseudo_op rv_i::sltiu  seqz rd rs1 31..20=1 14..12=3 6..2=0x04 1..0=3
+$pseudo_op rv_i::sltu   snez rd rs2 31..25=0 19..15=0x0 14..12=3 6..2=0x0C 1..0=3
+$pseudo_op rv_i::slt    sltz rd rs1 31..25=0 24..20=0x0 14..12=2 6..2=0x0C 1..0=3
+$pseudo_op rv_i::slt    sgtz rd rs2 31..25=0 19..15=0x0 14..12=2 6..2=0x0C 1..0=3
+
+$pseudo_op rv_i::jalr   jalr rs1    31..20=0 14..12=0 11..7=0x01 6..2=0x19 1..0=3
+$pseudo_op rv_i::jalr   jr   rs1    31..20=0 14..12=0 11..7=0x0  6..2=0x19 1..0=3
+$pseudo_op rv_i::jal    jal  jimm20                   11..7=0x01 6..2=0x1b 1..0=3
+$pseudo_op rv_i::jal    j    jimm20                   11..7=0x0  6..2=0x1b 1..0=3
diff --git a/rv_m b/extensions/rv_m
index 51e6786..51e6786 100644
--- a/rv_m
+++ b/extensions/rv_m
diff --git a/rv_q b/extensions/rv_q
index 298ae87..9e02a60 100644
--- a/rv_q
+++ b/extensions/rv_q
@@ -26,3 +26,9 @@ fcvt.w.q  rd rs1 24..20=0 31..27=0x18 rm       26..25=3 6..2=0x14 1..0=3
 fcvt.wu.q rd rs1 24..20=1 31..27=0x18 rm       26..25=3 6..2=0x14 1..0=3
 fcvt.q.w  rd rs1 24..20=0 31..27=0x1A rm       26..25=3 6..2=0x14 1..0=3
 fcvt.q.wu rd rs1 24..20=1 31..27=0x1A rm       26..25=3 6..2=0x14 1..0=3
+
+
+#pseudoinstructions
+$pseudo_op rv_q::fsgnj.q  fmv.q  rd rs1 rs2=rs1 31..27=0x04 14..12=0 26..25=3 6..2=0x14 1..0=3
+$pseudo_op rv_q::fsgnjx.q fabs.q rd rs1 rs2=rs1 31..27=0x04 14..12=2 26..25=3 6..2=0x14 1..0=3
+$pseudo_op rv_q::fsgnjn.q fneg.q rd rs1 rs2=rs1 31..27=0x04 14..12=1 26..25=3 6..2=0x14 1..0=3
diff --git a/rv_q_zfa b/extensions/rv_q_zfa
index da45f9d..da45f9d 100644
--- a/rv_q_zfa
+++ b/extensions/rv_q_zfa
diff --git a/rv_q_zfh b/extensions/rv_q_zfh
index 24548d5..24548d5 100644
--- a/rv_q_zfh
+++ b/extensions/rv_q_zfh
diff --git a/rv_s b/extensions/rv_s
index 25f3532..8f871a2 100644
--- a/rv_s
+++ b/extensions/rv_s
@@ -1,3 +1,2 @@
 sfence.vma 11..7=0 rs1 rs2 31..25=0x09  14..12=0 6..2=0x1C 1..0=3
 sret      11..7=0 19..15=0 31..20=0x102 14..12=0 6..2=0x1C 1..0=3
-
diff --git a/rv_sdext b/extensions/rv_sdext
index ea1c3ef..ea1c3ef 100644
--- a/rv_sdext
+++ b/extensions/rv_sdext
diff --git a/unratified/rv_smrnmi b/extensions/rv_smrnmi
index db714a3..db714a3 100644
--- a/unratified/rv_smrnmi
+++ b/extensions/rv_smrnmi
diff --git a/unratified/rv_smdbltrp b/extensions/rv_ssctr
index 49dd9a0..49dd9a0 100644
--- a/unratified/rv_smdbltrp
+++ b/extensions/rv_ssctr
diff --git a/rv_svinval b/extensions/rv_svinval
index b35ae7c..cb74e35 100644
--- a/rv_svinval
+++ b/extensions/rv_svinval
@@ -2,6 +2,3 @@
 sinval.vma      11..7=0 rs1        rs2        31..25=0x0b  14..12=0 6..2=0x1C 1..0=3
 sfence.w.inval  11..7=0 19..15=0x0 24..20=0x0 31..25=0x0c  14..12=0 6..2=0x1C 1..0=3
 sfence.inval.ir 11..7=0 19..15=0x0 24..20=0x1 31..25=0x0c  14..12=0 6..2=0x1C 1..0=3
-hinval.vvma     11..7=0 rs1        rs2        31..25=0x13  14..12=0 6..2=0x1C 1..0=3
-hinval.gvma     11..7=0 rs1        rs2        31..25=0x33  14..12=0 6..2=0x1C 1..0=3
-
diff --git a/extensions/rv_svinval_h b/extensions/rv_svinval_h
new file mode 100644
index 0000000..07085f9
--- /dev/null
+++ b/extensions/rv_svinval_h
@@ -0,0 +1,3 @@
+# Svinval
+hinval.vvma     11..7=0 rs1        rs2        31..25=0x13  14..12=0 6..2=0x1C 1..0=3
+hinval.gvma     11..7=0 rs1        rs2        31..25=0x33  14..12=0 6..2=0x1C 1..0=3
diff --git a/rv_system b/extensions/rv_system
index 24ab8fb..f21aa34 100644
--- a/rv_system
+++ b/extensions/rv_system
@@ -1,4 +1,3 @@
 # SYSTEM
 mret      11..7=0 19..15=0 31..20=0x302 14..12=0 6..2=0x1C 1..0=3
 wfi       11..7=0 19..15=0 31..20=0x105 14..12=0 6..2=0x1C 1..0=3
-
diff --git a/rv_v b/extensions/rv_v
index 960336a..b40b860 100644
--- a/rv_v
+++ b/extensions/rv_v
@@ -8,7 +8,7 @@
 
 # configuration setting
 # https://github.com/riscv/riscv-v-spec/blob/master/vcfg-format.adoc
-vsetivli     31=1 30=1 zimm10    zimm 14..12=0x7 rd 6..0=0x57
+vsetivli     31=1 30=1 zimm10    zimm5 14..12=0x7 rd 6..0=0x57
 vsetvli      31=0 zimm11          rs1 14..12=0x7 rd 6..0=0x57
 vsetvl       31=1 30..25=0x0 rs2  rs1 14..12=0x7 rd 6..0=0x57
 
diff --git a/rv_v_aliases b/extensions/rv_v_aliases
index 0f7aaa6..0f7aaa6 100644
--- a/rv_v_aliases
+++ b/extensions/rv_v_aliases
diff --git a/rv_zabha b/extensions/rv_zabha
index b3a6f79..b3a6f79 100644
--- a/rv_zabha
+++ b/extensions/rv_zabha
diff --git a/rv_zacas b/extensions/rv_zacas
index f9bb1e8..f9bb1e8 100644
--- a/rv_zacas
+++ b/extensions/rv_zacas
diff --git a/rv_zawrs b/extensions/rv_zawrs
index 00c260d..00c260d 100644
--- a/rv_zawrs
+++ b/extensions/rv_zawrs
diff --git a/rv_zba b/extensions/rv_zba
index 65eb420..65eb420 100644
--- a/rv_zba
+++ b/extensions/rv_zba
diff --git a/rv_zbb b/extensions/rv_zbb
index 9f384f6..9f384f6 100644
--- a/rv_zbb
+++ b/extensions/rv_zbb
diff --git a/rv_zbc b/extensions/rv_zbc
index c2494bd..821518b 100644
--- a/rv_zbc
+++ b/extensions/rv_zbc
@@ -1,4 +1,3 @@
 clmul      rd rs1 rs2 31..25=5 14..12=1 6..2=0x0C 1..0=3
 clmulr     rd rs1 rs2 31..25=5 14..12=2 6..2=0x0C 1..0=3
 clmulh     rd rs1 rs2 31..25=5 14..12=3 6..2=0x0C 1..0=3
-
diff --git a/rv_zbkb b/extensions/rv_zbkb
index d3f2f8d..d3f2f8d 100644
--- a/rv_zbkb
+++ b/extensions/rv_zbkb
diff --git a/rv_zbkc b/extensions/rv_zbkc
index b82588f..b82588f 100644
--- a/rv_zbkc
+++ b/extensions/rv_zbkc
diff --git a/rv_zbkx b/extensions/rv_zbkx
index 12bc0b4..12bc0b4 100644
--- a/rv_zbkx
+++ b/extensions/rv_zbkx
diff --git a/rv_zbs b/extensions/rv_zbs
index 1949072..3dd77eb 100644
--- a/rv_zbs
+++ b/extensions/rv_zbs
@@ -2,4 +2,3 @@ bclr rd rs1 rs2 31..25=0x24 14..12=1 6..2=0x0C 1..0=3
 bext rd rs1 rs2 31..25=36 14..12=5 6..2=0x0C 1..0=3
 binv rd rs1 rs2 31..25=52 14..12=1 6..2=0x0C 1..0=3
 bset rd rs1 rs2 31..25=20 14..12=1 6..2=0x0C 1..0=3
-
diff --git a/rv_zcb b/extensions/rv_zcb
index 2e65437..2e65437 100644
--- a/rv_zcb
+++ b/extensions/rv_zcb
diff --git a/rv_zcmop b/extensions/rv_zcmop
index 742a3d8..742a3d8 100644
--- a/rv_zcmop
+++ b/extensions/rv_zcmop
diff --git a/rv_zcmp b/extensions/rv_zcmp
index c72d1de..c72d1de 100644
--- a/rv_zcmp
+++ b/extensions/rv_zcmp
diff --git a/rv_zcmt b/extensions/rv_zcmt
index fab3dc9..fab3dc9 100644
--- a/rv_zcmt
+++ b/extensions/rv_zcmt
diff --git a/unratified/rv_zfbfmin b/extensions/rv_zfbfmin
index 003c7b9..003c7b9 100644
--- a/unratified/rv_zfbfmin
+++ b/extensions/rv_zfbfmin
diff --git a/rv_zfh b/extensions/rv_zfh
index 532dde5..c2767fd 100644
--- a/rv_zfh
+++ b/extensions/rv_zfh
@@ -28,3 +28,7 @@ fcvt.h.w  rd rs1 24..20=0 31..27=0x1A rm       26..25=2 6..2=0x14 1..0=3
 fcvt.h.wu rd rs1 24..20=1 31..27=0x1A rm       26..25=2 6..2=0x14 1..0=3
 fmv.h.x   rd rs1 24..20=0 31..27=0x1E 14..12=0 26..25=2 6..2=0x14 1..0=3
 
+#pseudoinstructions
+$pseudo_op rv_zfh::fsgnj.h  fmv.h  rd rs1 rs2=rs1 31..27=0x04 14..12=0 26..25=2 6..2=0x14 1..0=3
+$pseudo_op rv_zfh::fsgnjx.h fabs.h rd rs1 rs2=rs1 31..27=0x04 14..12=2 26..25=2 6..2=0x14 1..0=3
+$pseudo_op rv_zfh::fsgnjn.h fneg.h rd rs1 rs2=rs1 31..27=0x04 14..12=1 26..25=2 6..2=0x14 1..0=3
diff --git a/rv_zfh_zfa b/extensions/rv_zfh_zfa
index f92d7a9..f92d7a9 100644
--- a/rv_zfh_zfa
+++ b/extensions/rv_zfh_zfa
diff --git a/rv_zicbo b/extensions/rv_zicbo
index 65a4567..65a4567 100644
--- a/rv_zicbo
+++ b/extensions/rv_zicbo
diff --git a/unratified/rv_zicfilp b/extensions/rv_zicfilp
index 1ef4844..1ef4844 100644
--- a/unratified/rv_zicfilp
+++ b/extensions/rv_zicfilp
diff --git a/unratified/rv_zicfiss b/extensions/rv_zicfiss
index e46b64d..7e4fa47 100644
--- a/unratified/rv_zicfiss
+++ b/extensions/rv_zicfiss
@@ -11,4 +11,3 @@ $pseudo_op rv_zimop::mop.r.N  ssrdp rd_n0 30=1 27=1 26=1 21=0 20=0 31=1 29..28=0
 # sspush x1/x5 -> mop.rr.7 rd=x0, rs2=x1/x5, rs1=x0
 $pseudo_op rv_zimop::mop.rr.N sspush.x1   30=1 27=1 26=1 31=1 29..28=0 25=1 24..20=1      19..15=0 14..12=4 11..7=0 6..2=0x1C 1..0=3
 $pseudo_op rv_zimop::mop.rr.N sspush.x5   30=1 27=1 26=1 31=1 29..28=0 25=1 24..20=5      19..15=0 14..12=4 11..7=0 6..2=0x1C 1..0=3
-
diff --git a/extensions/rv_zicntr b/extensions/rv_zicntr
new file mode 100644
index 0000000..bedae43
--- /dev/null
+++ b/extensions/rv_zicntr
@@ -0,0 +1,4 @@
+#rv_zicntr instructions
+$pseudo_op rv_zicsr::csrrs  rdcycle    rd 19..15=0 31..20=0xC00 14..12=2 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrs  rdtime     rd 19..15=0 31..20=0xC01 14..12=2 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrs  rdinstret  rd 19..15=0 31..20=0xC02 14..12=2 6..2=0x1C 1..0=3
diff --git a/rv_zicond b/extensions/rv_zicond
index 88e67bf..1676f05 100644
--- a/rv_zicond
+++ b/extensions/rv_zicond
@@ -1,3 +1,2 @@
 czero.eqz   rd rs1 rs2 31..25=7 14..12=5 6..2=0x0C 1..0=3
 czero.nez   rd rs1 rs2 31..25=7 14..12=7 6..2=0x0C 1..0=3
-
diff --git a/extensions/rv_zicsr b/extensions/rv_zicsr
new file mode 100644
index 0000000..9541556
--- /dev/null
+++ b/extensions/rv_zicsr
@@ -0,0 +1,15 @@
+csrrw     rd rs1 csr        14..12=1 6..2=0x1C 1..0=3
+csrrs     rd rs1 csr        14..12=2 6..2=0x1C 1..0=3
+csrrc     rd rs1 csr        14..12=3 6..2=0x1C 1..0=3
+csrrwi    rd csr zimm5       14..12=5 6..2=0x1C 1..0=3
+csrrsi    rd csr zimm5       14..12=6 6..2=0x1C 1..0=3
+csrrci    rd csr zimm5       14..12=7 6..2=0x1C 1..0=3
+
+#pseudoinstructions
+$pseudo_op rv_zicsr::csrrs csrr     rd csr      19..15=0x0 14..12=2 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrw csrw     rs1 csr     14..12=1 11..7=0x0 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrs csrs     rs1 csr     14..12=2 11..7=0x0 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrc csrc     rs1 csr     14..12=3 11..7=0x0 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrwi csrwi   csr zimm5    14..12=5 11..7=0x0 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrsi csrsi   csr zimm5    14..12=6 11..7=0x0 6..2=0x1C 1..0=3
+$pseudo_op rv_zicsr::csrrci csrci   csr zimm5    14..12=7 11..7=0x0 6..2=0x1C 1..0=3
diff --git a/rv_zifencei b/extensions/rv_zifencei
index 8f9ec85..5f6fd7e 100644
--- a/rv_zifencei
+++ b/extensions/rv_zifencei
@@ -1,2 +1 @@
 fence.i     imm12                       rs1 14..12=1 rd 6..2=0x03 1..0=3
-
diff --git a/rv_zihintntl b/extensions/rv_zihintntl
index 56de9ea..56de9ea 100644
--- a/rv_zihintntl
+++ b/extensions/rv_zihintntl
diff --git a/rv_zimop b/extensions/rv_zimop
index c1dcea0..c1dcea0 100644
--- a/rv_zimop
+++ b/extensions/rv_zimop
diff --git a/rv_zk b/extensions/rv_zk
index dc60ee5..dc60ee5 100644
--- a/rv_zk
+++ b/extensions/rv_zk
diff --git a/rv_zkn b/extensions/rv_zkn
index dc60ee5..dc60ee5 100644
--- a/rv_zkn
+++ b/extensions/rv_zkn
diff --git a/rv_zknh b/extensions/rv_zknh
index 2079628..2079628 100644
--- a/rv_zknh
+++ b/extensions/rv_zknh
diff --git a/rv_zks b/extensions/rv_zks
index 0a57115..c78c215 100644
--- a/rv_zks
+++ b/extensions/rv_zks
@@ -23,4 +23,3 @@ $import rv_zksed::sm4ks
 # Scalar SM3 - RV32, RV64
 $import rv_zksh::sm3p0
 $import rv_zksh::sm3p1
-
diff --git a/rv_zksed b/extensions/rv_zksed
index 92e17c5..7975b5d 100644
--- a/rv_zksed
+++ b/extensions/rv_zksed
@@ -1,4 +1,3 @@
 # Scalar SM4 - RV32, RV64
 sm4ed         rd rs1 rs2 bs 29..25=0b11000 14..12=0 6..0=0x33
 sm4ks         rd rs1 rs2 bs 29..25=0b11010 14..12=0 6..0=0x33
-
diff --git a/rv_zksh b/extensions/rv_zksh
index f21eaa8..24e5f70 100644
--- a/rv_zksh
+++ b/extensions/rv_zksh
@@ -1,4 +1,3 @@
 # Scalar SM3 - RV32, RV64
 sm3p0         rd rs1 31..30=0 29..25=0b01000 24..20=0b01000 14..12=1 6..0=0x13
 sm3p1         rd rs1 31..30=0 29..25=0b01000 24..20=0b01001 14..12=1 6..0=0x13
-
diff --git a/rv_zvbb b/extensions/rv_zvbb
index dc48ee2..dc48ee2 100644
--- a/rv_zvbb
+++ b/extensions/rv_zvbb
diff --git a/rv_zvbc b/extensions/rv_zvbc
index 95bf431..95bf431 100644
--- a/rv_zvbc
+++ b/extensions/rv_zvbc
diff --git a/unratified/rv_zvfbfmin b/extensions/rv_zvfbfmin
index 8908959..8908959 100644
--- a/unratified/rv_zvfbfmin
+++ b/extensions/rv_zvfbfmin
diff --git a/unratified/rv_zvfbfwma b/extensions/rv_zvfbfwma
index 27b4ae6..27b4ae6 100644
--- a/unratified/rv_zvfbfwma
+++ b/extensions/rv_zvfbfwma
diff --git a/rv_zvkg b/extensions/rv_zvkg
index ed7bf32..0b99b5b 100644
--- a/rv_zvkg
+++ b/extensions/rv_zvkg
@@ -5,4 +5,3 @@ vgmul.vv 31..26=0x28 25=1 vs2 19..15=0x11 14..12=0x2 vd 6..0=0x77
 
 # Vector Add-Multiply over GHASH Galois-Field
 vghsh.vv 31..26=0x2C 25=1 vs2 vs1 14..12=0x2 vd 6..0=0x77
-
diff --git a/rv_zvkn b/extensions/rv_zvkn
index 5a17e6d..5a17e6d 100644
--- a/rv_zvkn
+++ b/extensions/rv_zvkn
diff --git a/rv_zvkned b/extensions/rv_zvkned
index 572b465..572b465 100644
--- a/rv_zvkned
+++ b/extensions/rv_zvkned
diff --git a/rv_zvknha b/extensions/rv_zvknha
index a09a36c..a09a36c 100644
--- a/rv_zvknha
+++ b/extensions/rv_zvknha
diff --git a/rv_zvknhb b/extensions/rv_zvknhb
index c0b0d8f..c0b0d8f 100644
--- a/rv_zvknhb
+++ b/extensions/rv_zvknhb
diff --git a/rv_zvks b/extensions/rv_zvks
index b5448bf..b5448bf 100644
--- a/rv_zvks
+++ b/extensions/rv_zvks
diff --git a/rv_zvksed b/extensions/rv_zvksed
index b0b3037..b0b3037 100644
--- a/rv_zvksed
+++ b/extensions/rv_zvksed
diff --git a/rv_zvksh b/extensions/rv_zvksh
index 2dc6f6c..2dc6f6c 100644
--- a/rv_zvksh
+++ b/extensions/rv_zvksh
diff --git a/unratified/rv64_zbp b/extensions/unratified/rv64_zbp
index 98d0b2e..98d0b2e 100644
--- a/unratified/rv64_zbp
+++ b/extensions/unratified/rv64_zbp
diff --git a/unratified/rv_zalasr b/extensions/unratified/rv_zalasr
index 43af470..43af470 100644
--- a/unratified/rv_zalasr
+++ b/extensions/unratified/rv_zalasr
diff --git a/unratified/rv_zbp b/extensions/unratified/rv_zbp
index bd95dd2..bd95dd2 100644
--- a/unratified/rv_zbp
+++ b/extensions/unratified/rv_zbp
diff --git a/extensions/unratified/rv_zvfofp8min b/extensions/unratified/rv_zvfofp8min
new file mode 100644
index 0000000..5ea7959
--- /dev/null
+++ b/extensions/unratified/rv_zvfofp8min
@@ -0,0 +1,3 @@
+vfncvtbf16.f.f.q     31..26=0x12 vm vs2 19..15=0x19 14..12=0x1 vd 6..0=0x57
+vfncvtbf16.sat.f.f.q 31..26=0x12 vm vs2 19..15=0x1B 14..12=0x1 vd 6..0=0x57
+vfncvtbf16.sat.f.f.w 31..26=0x12 vm vs2 19..15=0x1F 14..12=0x1 vd 6..0=0x57
diff --git a/extensions/unratified/rv_zvqdotq b/extensions/unratified/rv_zvqdotq
new file mode 100644
index 0000000..94782e4
--- /dev/null
+++ b/extensions/unratified/rv_zvqdotq
@@ -0,0 +1,7 @@
+vqdot.vv         31..26=0x2c vm vs2 vs1 14..12=0x2 vd 6..0=0x57
+vqdot.vx         31..26=0x2c vm vs2 rs1 14..12=0x6 vd 6..0=0x57
+vqdotu.vv        31..26=0x28 vm vs2 vs1 14..12=0x2 vd 6..0=0x57
+vqdotu.vx        31..26=0x28 vm vs2 rs1 14..12=0x6 vd 6..0=0x57
+vqdotsu.vv       31..26=0x2a vm vs2 vs1 14..12=0x2 vd 6..0=0x57
+vqdotsu.vx       31..26=0x2a vm vs2 rs1 14..12=0x6 vd 6..0=0x57
+vqdotus.vx       31..26=0x2e vm vs2 rs1 14..12=0x6 vd 6..0=0x57
diff --git a/go_utils.py b/go_utils.py
new file mode 100644
index 0000000..19a5955
--- /dev/null
+++ b/go_utils.py
@@ -0,0 +1,64 @@
+import logging
+import pprint
+import sys
+
+from constants import csrs
+from shared_utils import InstrDict, signed
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_go(instr_dict: InstrDict):
+
+    args = " ".join(sys.argv)
+    prelude = f"""// Code generated by {args}; DO NOT EDIT."""
+
+    prelude += """
+package riscv
+
+import "cmd/internal/obj"
+
+type inst struct {
+	opcode uint32
+	funct3 uint32
+	rs1    uint32
+	rs2    uint32
+	csr    int64
+	funct7 uint32
+}
+
+func encode(a obj.As) *inst {
+	switch a {
+"""
+
+    csrs_map_str = """  }
+	return nil
+}
+
+var csrs = map[uint16]string {
+"""
+
+    endoffile = """}
+"""
+
+    instr_str = ""
+    for i in instr_dict:
+        enc_match = int(instr_dict[i]["match"], 0)
+        opcode = (enc_match >> 0) & ((1 << 7) - 1)
+        funct3 = (enc_match >> 12) & ((1 << 3) - 1)
+        rs1 = (enc_match >> 15) & ((1 << 5) - 1)
+        rs2 = (enc_match >> 20) & ((1 << 5) - 1)
+        csr = (enc_match >> 20) & ((1 << 12) - 1)
+        funct7 = (enc_match >> 25) & ((1 << 7) - 1)
+        instr_str += f"""  case A{i.upper().replace("_","")}:
+    return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }}
+"""
+    for num, name in sorted(csrs, key=lambda row: row[0]):
+        csrs_map_str += f'{hex(num)} : "{name.upper()}",\n'
+
+    with open("inst.go", "w", encoding="utf-8") as file:
+        file.write(prelude)
+        file.write(instr_str)
+        file.write(csrs_map_str)
+        file.write(endoffile)
diff --git a/latex_utils.py b/latex_utils.py
new file mode 100644
index 0000000..e421c6b
--- /dev/null
+++ b/latex_utils.py
@@ -0,0 +1,450 @@
+import logging
+import pprint
+from typing import TextIO
+
+from constants import latex_fixed_fields, latex_inst_type, latex_mapping
+from shared_utils import InstrDict, arg_lut, create_inst_dict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_priv_latex_table():
+    type_list = ["R-type", "I-type"]
+    system_instr = ["_h", "_s", "_system", "_svinval", "64_h", "_svinval_h"]
+    dataset_list = [(system_instr, "Trap-Return Instructions", ["sret", "mret"], False)]
+    dataset_list.append(
+        (system_instr, "Interrupt-Management Instructions", ["wfi"], False)
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Supervisor Memory-Management Instructions",
+            ["sfence_vma"],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Hypervisor Memory-Management Instructions",
+            ["hfence_vvma", "hfence_gvma"],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Hypervisor Virtual-Machine Load and Store Instructions",
+            [
+                "hlv_b",
+                "hlv_bu",
+                "hlv_h",
+                "hlv_hu",
+                "hlv_w",
+                "hlvx_hu",
+                "hlvx_wu",
+                "hsv_b",
+                "hsv_h",
+                "hsv_w",
+            ],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Hypervisor Virtual-Machine Load and Store Instructions, RV64 only",
+            ["hlv_wu", "hlv_d", "hsv_d"],
+            False,
+        )
+    )
+    dataset_list.append(
+        (
+            system_instr,
+            "Svinval Memory-Management Instructions",
+            [
+                "sinval_vma",
+                "sfence_w_inval",
+                "sfence_inval_ir",
+                "hinval_vvma",
+                "hinval_gvma",
+            ],
+            False,
+        )
+    )
+    caption = "\\caption{RISC-V Privileged Instructions}"
+    with open("priv-instr-table.tex", "w", encoding="utf-8") as latex_file:
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+
+def make_latex_table():
+    """
+    This function is mean to create the instr-table.tex that is meant to be used
+    by the riscv-isa-manual. This function basically creates a single latext
+    file of multiple tables with each table limited to a single page. Only the
+    last table is assigned a latex-caption.
+
+    For each table we assign a type-list which capture the different instruction
+    types (R, I, B, etc) that will be required for the table. Then we select the
+    list of extensions ('_i, '32_i', etc) whose instructions are required to
+    populate the table. For each extension or collection of extension we can
+    assign Title, such that in the end they appear as subheadings within
+    the table (note these are inlined headings and not captions of the table).
+
+    All of the above information is collected/created and sent to
+    make_ext_latex_table function to dump out the latex contents into a file.
+
+    The last table only has to be given a caption - as per the policy of the
+    riscv-isa-manual.
+    """
+    # open the file and use it as a pointer for all further dumps
+    with open("instr-table.tex", "w", encoding="utf-8") as latex_file:
+
+        # create the rv32i table first. Here we set the caption to empty. We use the
+        # files rv_i and rv32_i to capture instructions relevant for rv32i
+        # configuration. The dataset is a list of 4-element tuples :
+        # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions
+        # is empty then it indicates that all instructions of the all the extensions
+        # in list_of_extensions need to be dumped. If not empty, then only the
+        # instructions listed in list_of_instructions will be dumped into latex.
+        caption = ""
+        type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"]
+        dataset_list: list[tuple[list[str], str, list[str], bool]] = [
+            (["_i", "32_i"], "RV32I Base Instruction Set", [], False)
+        ]
+        dataset_list.append((["_i"], "", ["fence_tso", "pause"], True))
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "I-type", "S-type"]
+        dataset_list = [
+            (["64_i"], "RV64I Base Instruction Set (in addition to RV32I)", [], False)
+        ]
+        dataset_list.append(
+            (["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False)
+        )
+        dataset_list.append(
+            (["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False)
+        )
+        dataset_list.append((["_m", "32_m"], "RV32M Standard Extension", [], False))
+        dataset_list.append(
+            (["64_m"], "RV64M Standard Extension (in addition to RV32M)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type"]
+        dataset_list = [(["_a"], "RV32A Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_a"], "RV64A Standard Extension (in addition to RV32A)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [(["_f"], "RV32F Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_f"], "RV64F Standard Extension (in addition to RV32F)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [(["_d"], "RV32D Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_d"], "RV64D Standard Extension (in addition to RV32D)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [(["_q"], "RV32Q Standard Extension", [], False)]
+        dataset_list.append(
+            (["64_q"], "RV64Q Standard Extension (in addition to RV32Q)", [], False)
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        caption = "\\caption{Instruction listing for RISC-V}"
+        type_list = ["R-type", "R4-type", "I-type", "S-type"]
+        dataset_list = [
+            (["_zfh", "_d_zfh", "_q_zfh"], "RV32Zfh Standard Extension", [], False)
+        ]
+        dataset_list.append(
+            (
+                ["64_zfh"],
+                "RV64Zfh Standard Extension (in addition to RV32Zfh)",
+                [],
+                False,
+            )
+        )
+        make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+        ## The following is demo to show that Compressed instructions can also be
+        # dumped in the same manner as above
+
+        # type_list = ['']
+        # dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])]
+        # dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', []))
+        # make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption)
+
+
+def make_ext_latex_table(
+    type_list: "list[str]",
+    dataset: "list[tuple[list[str], str, list[str], bool]]",
+    latex_file: TextIO,
+    ilen: int,
+    caption: str,
+):
+    """
+    For a given collection of extensions this function dumps out a complete
+    latex table which includes the encodings of the instructions.
+
+    The ilen input indicates the length of the instruction for which the table
+    is created.
+
+    The caption input is used to create the latex-table caption.
+
+    The type_list input is a list of instruction types (R, I, B, etc) that are
+    treated as header for each table. Each table will have its own requirements
+    and type_list must include all the instruction-types that the table needs.
+    Note, all elements of this list must be present in the latex_inst_type
+    dictionary defined in constants.py
+
+    The latex_file is a file pointer to which the latex-table will dumped into
+
+    The dataset is a list of 3-element tuples containing:
+        (list_of_extensions, title, list_of_instructions)
+    The list_of_extensions must contain all the set of extensions whose
+    instructions must be populated under a given title. If list_of_instructions
+    is not empty, then only those instructions mentioned in list_of_instructions
+    present in the extension will be dumped into the latex-table, other
+    instructions will be ignored.
+
+    Once the above inputs are received then function first creates table entries
+    for the instruction types. To simplify things, we maintain a dictionary
+    called latex_inst_type in constants.py which is created in the same way the
+    instruction dictionary is created. This allows us to re-use the same logic
+    to create the instruction types table as well
+
+    Once the header is created, we then parse through every entry in the
+    dataset. For each list dataset entry we use the create_inst_dict function to
+    create an exhaustive list of instructions associated with the respective
+    collection of the extension of that dataset. Then we apply the instruction
+    filter, if any, indicated by the list_of_instructions of that dataset.
+    Thereon, for each instruction we create a latex table entry.
+
+    Latex table specification for ilen sized instructions:
+        Each table is created with ilen+1 columns - ilen columns for each bit of the
+        instruction and one column to hold the name of the instruction.
+
+        For each argument of an instruction we use the arg_lut from constants.py
+        to identify its position in the encoding, and thus create a multicolumn
+        entry with the name of the argument as the data. For hardcoded bits, we
+        do the same where we capture a string of continuous 1s and 0s, identify
+        the position and assign the same string as the data of the
+        multicolumn entry in the table.
+
+    """
+    column_size = "".join(["p{0.002in}"] * (ilen + 1))
+
+    type_entries = (
+        """
+    \\multicolumn{3}{l}{31} &
+    \\multicolumn{2}{r}{27} &
+    \\multicolumn{1}{c}{26} &
+    \\multicolumn{1}{r}{25} &
+    \\multicolumn{3}{l}{24} &
+    \\multicolumn{2}{r}{20} &
+    \\multicolumn{3}{l}{19} &
+    \\multicolumn{2}{r}{15} &
+    \\multicolumn{2}{l}{14} &
+    \\multicolumn{1}{r}{12} &
+    \\multicolumn{4}{l}{11} &
+    \\multicolumn{1}{r}{7} &
+    \\multicolumn{6}{l}{6} &
+    \\multicolumn{1}{r}{0} \\\\
+    \\cline{2-33}\n&\n\n
+"""
+        if ilen == 32
+        else """
+    \\multicolumn{1}{c}{15} &
+    \\multicolumn{1}{c}{14} &
+    \\multicolumn{1}{c}{13} &
+    \\multicolumn{1}{c}{12} &
+    \\multicolumn{1}{c}{11} &
+    \\multicolumn{1}{c}{10} &
+    \\multicolumn{1}{c}{9} &
+    \\multicolumn{1}{c}{8} &
+    \\multicolumn{1}{c}{7} &
+    \\multicolumn{1}{c}{6} &
+    \\multicolumn{1}{c}{5} &
+    \\multicolumn{1}{c}{4} &
+    \\multicolumn{1}{c}{3} &
+    \\multicolumn{1}{c}{2} &
+    \\multicolumn{1}{c}{1} &
+    \\multicolumn{1}{c}{0} \\\\
+    \\cline{2-17}\n&\n\n
+"""
+    )
+
+    # depending on the type_list input we create a subset dictionary of
+    # latex_inst_type dictionary present in constants.py
+    type_dict = {
+        key: value for key, value in latex_inst_type.items() if key in type_list
+    }
+
+    # iterate ovr each instruction type and create a table entry
+    for t in type_dict:
+        fields: list[tuple[int, int, str]] = []
+
+        # first capture all "arguments" of the type (funct3, funct7, rd, etc)
+        # and capture their positions using arg_lut.
+        for f in type_dict[t]["variable_fields"]:
+            (msb, lsb) = arg_lut[f]
+            name = f if f not in latex_mapping else latex_mapping[f]
+            fields.append((msb, lsb, name))
+
+        # iterate through the 32 bits, starting from the msb, and assign
+        # argument names to the relevant portions of the instructions. This
+        # information is stored as a 3-element tuple containing the msb, lsb
+        # position of the arugment and the name of the argument.
+        msb = ilen - 1
+        y = ""
+        for r in range(0, ilen):
+            if y != "":
+                fields.append((msb, ilen - 1 - r + 1, y))
+                y = ""
+            msb = ilen - 1 - r - 1
+            if r == 31:
+                if y != "":
+                    fields.append((msb, 0, y))
+                y = ""
+
+        # sort the arguments in decreasing order of msb position
+        fields.sort(key=lambda y: y[0], reverse=True)
+
+        # for each argument/string of 1s or 0s, create a multicolumn latex table
+        # entry
+        entry = ""
+        for r, (msb, lsb, name) in enumerate(fields):
+            if r == len(fields) - 1:
+                entry += (
+                    f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n"
+                )
+            elif r == 0:
+                entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
+            else:
+                entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
+        entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
+        type_entries += entry
+
+    # for each entry in the dataset create a table
+    content = ""
+    for ext_list, title, filter_list, include_pseudo in dataset:
+        instr_dict: InstrDict = {}
+
+        # for all extensions list in ext_list, create a dictionary of
+        # instructions associated with those extensions.
+        for e in ext_list:
+            instr_dict.update(create_inst_dict(["rv" + e], include_pseudo))
+
+        # if filter_list is not empty then use that as the official set of
+        # instructions that need to be dumped into the latex table
+        inst_list = list(instr_dict.keys()) if not filter_list else filter_list
+
+        # for each instruction create an latex table entry just like how we did
+        # above with the instruction-type table.
+        instr_entries = ""
+        for inst in inst_list:
+            if inst not in instr_dict:
+                logging.error(
+                    f"in make_ext_latex_table: Instruction: {inst} not found in instr_dict"
+                )
+                raise SystemExit(1)
+            fields = []
+
+            # only if the argument is available in arg_lut we consume it, else
+            # throw error.
+            for f in instr_dict[inst]["variable_fields"]:
+                if f not in arg_lut:
+                    logging.error(
+                        f"Found variable {f} in instruction {inst} whose mapping is not available"
+                    )
+                    raise SystemExit(1)
+                (msb, lsb) = arg_lut[f]
+                name = (
+                    f.replace("_", ".") if f not in latex_mapping else latex_mapping[f]
+                )
+                fields.append((msb, lsb, name))
+
+            msb = ilen - 1
+            y = ""
+            if ilen == 16:
+                encoding = instr_dict[inst]["encoding"][16:]
+            else:
+                encoding = instr_dict[inst]["encoding"]
+            for r in range(0, ilen):
+                x = encoding[r]
+                if (msb, ilen - 1 - r + 1) in latex_fixed_fields:
+                    fields.append((msb, ilen - 1 - r + 1, y))
+                    msb = ilen - 1 - r
+                    y = ""
+                if x == "-":
+                    if y != "":
+                        fields.append((msb, ilen - 1 - r + 1, y))
+                        y = ""
+                    msb = ilen - 1 - r - 1
+                else:
+                    y += str(x)
+                if r == ilen - 1:
+                    if y != "":
+                        fields.append((msb, 0, y))
+                    y = ""
+
+            fields.sort(key=lambda y: y[0], reverse=True)
+            entry = ""
+            for r, (msb, lsb, name) in enumerate(fields):
+                if r == len(fields) - 1:
+                    entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n'
+                elif r == 0:
+                    entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
+                else:
+                    entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
+            entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
+            instr_entries += entry
+
+        # once an entry of the dataset is completed we create the whole table
+        # with the title of that dataset as sub-heading (sort-of)
+        if title != "":
+            content += f"""
+
+\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\
+\\multicolumn{{{ilen}}}{{c}}{{\\bfseries {title} }} & \\\\
+\\cline{{2-{ilen+1}}}
+
+            &
+{instr_entries}
+"""
+        else:
+            content += f"""
+{instr_entries}
+"""
+
+    header = f"""
+\\newpage
+
+\\begin{{table}}[p]
+\\begin{{small}}
+\\begin{{center}}
+    \\begin{{tabular}} {{{column_size}l}}
+    {" ".join(['&']*ilen)} \\\\
+
+            &
+{type_entries}
+"""
+    endtable = f"""
+
+\\end{{tabular}}
+\\end{{center}}
+\\end{{small}}
+{caption}
+\\end{{table}}
+"""
+    # dump the contents and return
+    latex_file.write(header + content + endtable)
diff --git a/parse.py b/parse.py
index 4553e06..636ea61 100755
--- a/parse.py
+++ b/parse.py
@@ -1,1039 +1,118 @@
 #!/usr/bin/env python3
 
-from constants import *
-import re
-import glob
-import os
-import pprint
+import argparse
+import json
 import logging
-import collections
-import yaml
-import sys
-
-pp = pprint.PrettyPrinter(indent=2)
-logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s')
-
-def process_enc_line(line, ext):
-    '''
-    This function processes each line of the encoding files (rv*). As part of
-    the processing, the function ensures that the encoding is legal through the
-    following checks::
-
-        - there is no over specification (same bits assigned different values)
-        - there is no under specification (some bits not assigned values)
-        - bit ranges are in the format hi..lo=val where hi > lo
-        - value assigned is representable in the bit range
-        - also checks that the mapping of arguments of an instruction exists in
-          arg_lut.
-
-    If the above checks pass, then the function returns a tuple of the name and
-    a dictionary containing basic information of the instruction which includes:
-        - variables: list of arguments used by the instruction whose mapping
-          exists in the arg_lut dictionary
-        - encoding: this contains the 32-bit encoding of the instruction where
-          '-' is used to represent position of arguments and 1/0 is used to
-          reprsent the static encoding of the bits
-        - extension: this field contains the rv* filename from which this
-          instruction was included
-        - match: hex value representing the bits that need to match to detect
-          this instruction
-        - mask: hex value representin the bits that need to be masked to extract
-          the value required for matching.
-    '''
-    single_dict = {}
-
-    # fill all bits with don't care. we use '-' to represent don't care
-    # TODO: hardcoded for 32-bits.
-    encoding = ['-'] * 32
-
-    # get the name of instruction by splitting based on the first space
-    [name, remaining] = line.split(' ', 1)
-
-    # replace dots with underscores as dot doesn't work with C/Sverilog, etc
-    name = name.replace('.', '_')
-
-    # remove leading whitespaces
-    remaining = remaining.lstrip()
-
-    # check each field for it's length and overlapping bits
-    # ex: 1..0=5 will result in an error --> x<y
-    # ex: 5..0=0 2..1=2 --> overlapping bits
-    for (s2, s1, entry) in fixed_ranges.findall(remaining):
-        msb = int(s2)
-        lsb = int(s1)
-
-        # check msb < lsb
-        if msb < lsb:
-            logging.error(
-                f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in it\'s encoding'
-            )
-            raise SystemExit(1)
-
-        # illegal value assigned as per bit width
-        entry_value = int(entry, 0)
-        if entry_value >= (1 << (msb - lsb + 1)):
-            logging.error(
-                f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}'
-            )
-            raise SystemExit(1)
-
-        for ind in range(lsb, msb + 1):
-            # overlapping bits
-            if encoding[31 - ind] != '-':
-                logging.error(
-                    f'{line.split(" ")[0]:<10} has {ind} bit overlapping in it\'s opcodes'
-                )
-                raise SystemExit(1)
-            bit = str((entry_value >> (ind - lsb)) & 1)
-            encoding[31 - ind] = bit
-
-    # extract bit pattern assignments of the form hi..lo=val
-    remaining = fixed_ranges.sub(' ', remaining)
-
-    # do the same as above but for <lsb>=<val> pattern. single_fixed is a regex
-    # expression present in constants.py
-    for (lsb, value, drop) in single_fixed.findall(remaining):
-        lsb = int(lsb, 0)
-        value = int(value, 0)
-        if encoding[31 - lsb] != '-':
-            logging.error(
-                f'{line.split(" ")[0]:<10} has {lsb} bit overlapping in it\'s opcodes'
-            )
-            raise SystemExit(1)
-        encoding[31 - lsb] = str(value)
-
-    # convert the list of encodings into a single string for match and mask
-    match = "".join(encoding).replace('-','0')
-    mask = "".join(encoding).replace('0','1').replace('-','0')
-
-    # check if all args of the instruction are present in arg_lut present in
-    # constants.py
-    args = single_fixed.sub(' ', remaining).split()
-    encoding_args = encoding.copy()
-    for a in args:
-        if a not in arg_lut:
-            logging.error(f' Found variable {a} in instruction {name} whose mapping in arg_lut does not exist')
-            raise SystemExit(1)
-        else:
-            (msb, lsb) = arg_lut[a]
-            for ind in range(lsb, msb + 1):
-                # overlapping bits
-                if encoding_args[31 - ind] != '-':
-                    logging.error(f' Found variable {a} in instruction {name} overlapping {encoding_args[31 - ind]} variable in bit {ind}')
-                    raise SystemExit(1)
-                encoding_args[31 - ind] = a
-
-    # update the fields of the instruction as a dict and return back along with
-    # the name of the instruction
-    single_dict['encoding'] = "".join(encoding)
-    single_dict['variable_fields'] = args
-    single_dict['extension'] = [ext.split('/')[-1]]
-    single_dict['match']=hex(int(match,2))
-    single_dict['mask']=hex(int(mask,2))
-
-    return (name, single_dict)
-
-def same_base_isa(ext_name, ext_name_list):
-    type1 = ext_name.split("_")[0]
-    for ext_name1 in ext_name_list:
-        type2 = ext_name1.split("_")[0]
-        # "rv" mean insn for rv32 and rv64
-        if (type1 == type2 or
-            (type2 == "rv" and (type1 == "rv32" or type1 == "rv64")) or
-            (type1 == "rv" and (type2 == "rv32" or type2 == "rv64"))):
-            return True
-    return False
-
-def overlaps(x, y):
-    x = x.rjust(len(y), '-')
-    y = y.rjust(len(x), '-')
-
-    for i in range(0, len(x)):
-        if not (x[i] == '-' or y[i] == '-' or x[i] == y[i]):
-            return False
-
-    return True
-
-def overlap_allowed(a, x, y):
-    return x in a and y in a[x] or \
-           y in a and x in a[y]
-
-def extension_overlap_allowed(x, y):
-    return overlap_allowed(overlapping_extensions, x, y)
-
-def instruction_overlap_allowed(x, y):
-    return overlap_allowed(overlapping_instructions, x, y)
-
-def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]):
-    '''
-    This function return a dictionary containing all instructions associated
-    with an extension defined by the file_filter input. The file_filter input
-    needs to be rv* file name with out the 'rv' prefix i.e. '_i', '32_i', etc.
-
-    Each node of the dictionary will correspond to an instruction which again is
-    a dictionary. The dictionary contents of each instruction includes:
-        - variables: list of arguments used by the instruction whose mapping
-          exists in the arg_lut dictionary
-        - encoding: this contains the 32-bit encoding of the instruction where
-          '-' is used to represent position of arguments and 1/0 is used to
-          reprsent the static encoding of the bits
-        - extension: this field contains the rv* filename from which this
-          instruction was included
-        - match: hex value representing the bits that need to match to detect
-          this instruction
-        - mask: hex value representin the bits that need to be masked to extract
-          the value required for matching.
-
-    In order to build this dictionary, the function does 2 passes over the same
-    rv<file_filter> file. The first pass is to extract all standard
-    instructions. In this pass, all pseudo ops and imported instructions are
-    skipped. For each selected line of the file, we call process_enc_line
-    function to create the above mentioned dictionary contents of the
-    instruction. Checks are performed in this function to ensure that the same
-    instruction is not added twice to the overall dictionary.
-
-    In the second pass, this function parses only pseudo_ops. For each pseudo_op
-    this function checks if the dependent extension and instruction, both, exist
-    before parsing it. The pseudo op is only added to the overall dictionary if
-    the dependent instruction is not present in the dictionary, else it is
-    skipped.
-
-
-    '''
-    opcodes_dir = os.path.dirname(os.path.realpath(__file__))
-    instr_dict = {}
-
-    # file_names contains all files to be parsed in the riscv-opcodes directory
-    file_names = []
-    for fil in file_filter:
-        file_names += glob.glob(f'{opcodes_dir}/{fil}')
-    file_names.sort(reverse=True)
-    # first pass if for standard/regular instructions
-    logging.debug('Collecting standard instructions first')
-    for f in file_names:
-        logging.debug(f'Parsing File: {f} for standard instructions')
-        with open(f) as fp:
-            lines = (line.rstrip()
-                     for line in fp)  # All lines including the blank ones
-            lines = list(line for line in lines if line)  # Non-blank lines
-            lines = list(
-                line for line in lines
-                if not line.startswith("#"))  # remove comment lines
-
-        # go through each line of the file
-        for line in lines:
-            # if the an instruction needs to be imported then go to the
-            # respective file and pick the line that has the instruction.
-            # The variable 'line' will now point to the new line from the
-            # imported file
-
-            # ignore all lines starting with $import and $pseudo
-            if '$import' in line or '$pseudo' in line:
-                continue
-            logging.debug(f'     Processing line: {line}')
-
-            # call process_enc_line to get the data about the current
-            # instruction
-            (name, single_dict) = process_enc_line(line, f)
-            ext_name = f.split("/")[-1]
-
-            # if an instruction has already been added to the filtered
-            # instruction dictionary throw an error saying the given
-            # instruction is already imported and raise SystemExit
-            if name in instr_dict:
-                var = instr_dict[name]["extension"]
-                if same_base_isa(ext_name, var):
-                    # disable same names on the same base ISA
-                    err_msg = f'instruction : {name} from '
-                    err_msg += f'{ext_name} is already '
-                    err_msg += f'added from {var} in same base ISA'
-                    logging.error(err_msg)
-                    raise SystemExit(1)
-                elif instr_dict[name]['encoding'] != single_dict['encoding']:
-                    # disable same names with different encodings on different base ISAs
-                    err_msg = f'instruction : {name} from '
-                    err_msg += f'{ext_name} is already '
-                    err_msg += f'added from {var} but each have different encodings in different base ISAs'
-                    logging.error(err_msg)
-                    raise SystemExit(1)
-                instr_dict[name]['extension'].extend(single_dict['extension'])
-            else:
-              for key in instr_dict:
-                  item = instr_dict[key]
-                  if overlaps(item['encoding'], single_dict['encoding']) and \
-                    not extension_overlap_allowed(ext_name, item['extension'][0]) and \
-                    not instruction_overlap_allowed(name, key) and \
-                    same_base_isa(ext_name, item['extension']):
-                      # disable different names with overlapping encodings on the same base ISA
-                      err_msg = f'instruction : {name} in extension '
-                      err_msg += f'{ext_name} overlaps instruction {key} '
-                      err_msg += f'in extension {item["extension"]}'
-                      logging.error(err_msg)
-                      raise SystemExit(1)
-
-            if name not in instr_dict:
-                # update the final dict with the instruction
-                instr_dict[name] = single_dict
-
-    # second pass if for pseudo instructions
-    logging.debug('Collecting pseudo instructions now')
-    for f in file_names:
-        logging.debug(f'Parsing File: {f} for pseudo_ops')
-        with open(f) as fp:
-            lines = (line.rstrip()
-                     for line in fp)  # All lines including the blank ones
-            lines = list(line for line in lines if line)  # Non-blank lines
-            lines = list(
-                line for line in lines
-                if not line.startswith("#"))  # remove comment lines
-
-        # go through each line of the file
-        for line in lines:
-
-            # ignore all lines not starting with $pseudo
-            if '$pseudo' not in line:
-                continue
-            logging.debug(f'     Processing line: {line}')
-
-            # use the regex pseudo_regex from constants.py to find the dependent
-            # extension, dependent instruction, the pseudo_op in question and
-            # its encoding
-            (ext, orig_inst, pseudo_inst, line) = pseudo_regex.findall(line)[0]
-            ext_file = f'{opcodes_dir}/{ext}'
-
-            # check if the file of the dependent extension exist. Throw error if
-            # it doesn't
-            if not os.path.exists(ext_file):
-                ext1_file = f'{opcodes_dir}/unratified/{ext}'
-                if not os.path.exists(ext1_file):
-                    logging.error(f'Pseudo op {pseudo_inst} in {f} depends on {ext} which is not available')
-                    raise SystemExit(1)
-                else:
-                    ext_file = ext1_file
-
-            # check if the dependent instruction exist in the dependent
-            # extension. Else throw error.
-            found = False
-            for oline in open(ext_file):
-                if not re.findall(f'^\s*{orig_inst}\s+',oline):
-                    continue
-                else:
-                    found = True
-                    break
-            if not found:
-                logging.error(f'Orig instruction {orig_inst} not found in {ext}. Required by pseudo_op {pseudo_inst} present in {f}')
-                raise SystemExit(1)
-
-
-            (name, single_dict) = process_enc_line(pseudo_inst + ' ' + line, f)
-            # add the pseudo_op to the dictionary only if the original
-            # instruction is not already in the dictionary.
-            if orig_inst.replace('.','_') not in instr_dict \
-                    or include_pseudo \
-                    or name in include_pseudo_ops:
-
-                # update the final dict with the instruction
-                if name not in instr_dict:
-                    instr_dict[name] = single_dict
-                    logging.debug(f'        including pseudo_ops:{name}')
-            else:
-                logging.debug(f'        Skipping pseudo_op {pseudo_inst} since original instruction {orig_inst} already selected in list')
-
-    # third pass if for imported instructions
-    logging.debug('Collecting imported instructions')
-    for f in file_names:
-        logging.debug(f'Parsing File: {f} for imported ops')
-        with open(f) as fp:
-            lines = (line.rstrip()
-                     for line in fp)  # All lines including the blank ones
-            lines = list(line for line in lines if line)  # Non-blank lines
-            lines = list(
-                line for line in lines
-                if not line.startswith("#"))  # remove comment lines
-
-        # go through each line of the file
-        for line in lines:
-            # if the an instruction needs to be imported then go to the
-            # respective file and pick the line that has the instruction.
-            # The variable 'line' will now point to the new line from the
-            # imported file
-
-            # ignore all lines starting with $import and $pseudo
-            if '$import' not in line :
-                continue
-            logging.debug(f'     Processing line: {line}')
-
-            (import_ext, reg_instr) = imported_regex.findall(line)[0]
-            import_ext_file = f'{opcodes_dir}/{import_ext}'
-
-            # check if the file of the dependent extension exist. Throw error if
-            # it doesn't
-            if not os.path.exists(import_ext_file):
-                ext1_file = f'{opcodes_dir}/unratified/{import_ext}'
-                if not os.path.exists(ext1_file):
-                    logging.error(f'Instruction {reg_instr} in {f} cannot be imported from {import_ext}')
-                    raise SystemExit(1)
-                else:
-                    ext_file = ext1_file
-            else:
-                ext_file = import_ext_file
-
-            # check if the dependent instruction exist in the dependent
-            # extension. Else throw error.
-            found = False
-            for oline in open(ext_file):
-                if not re.findall(f'^\s*{reg_instr}\s+',oline):
-                    continue
-                else:
-                    found = True
-                    break
-            if not found:
-                logging.error(f'imported instruction {reg_instr} not found in {ext_file}. Required by {line} present in {f}')
-                logging.error(f'Note: you cannot import pseudo/imported ops.')
-                raise SystemExit(1)
-
-            # call process_enc_line to get the data about the current
-            # instruction
-            (name, single_dict) = process_enc_line(oline, f)
-
-            # if an instruction has already been added to the filtered
-            # instruction dictionary throw an error saying the given
-            # instruction is already imported and raise SystemExit
-            if name in instr_dict:
-                var = instr_dict[name]["extension"]
-                if instr_dict[name]['encoding'] != single_dict['encoding']:
-                    err_msg = f'imported instruction : {name} in '
-                    err_msg += f'{f.split("/")[-1]} is already '
-                    err_msg += f'added from {var} but each have different encodings for the same instruction'
-                    logging.error(err_msg)
-                    raise SystemExit(1)
-                instr_dict[name]['extension'].extend(single_dict['extension'])
-            else:
-                # update the final dict with the instruction
-                instr_dict[name] = single_dict
-    return instr_dict
-
-def make_priv_latex_table():
-    latex_file = open('priv-instr-table.tex','w')
-    type_list = ['R-type','I-type']
-    system_instr = ['_h','_s','_system','_svinval', '64_h']
-    dataset_list = [ (system_instr, 'Trap-Return Instructions',['sret','mret'], False) ]
-    dataset_list.append((system_instr, 'Interrupt-Management Instructions',['wfi'], False))
-    dataset_list.append((system_instr, 'Supervisor Memory-Management Instructions',['sfence_vma'], False))
-    dataset_list.append((system_instr, 'Hypervisor Memory-Management Instructions',['hfence_vvma', 'hfence_gvma'], False))
-    dataset_list.append((system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions',
-        ['hlv_b','hlv_bu', 'hlv_h','hlv_hu', 'hlv_w', 'hlvx_hu', 'hlvx_wu', 'hsv_b', 'hsv_h','hsv_w'], False))
-    dataset_list.append((system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions, RV64 only', ['hlv_wu','hlv_d','hsv_d'], False))
-    dataset_list.append((system_instr, 'Svinval Memory-Management Instructions', ['sinval_vma', 'sfence_w_inval','sfence_inval_ir', 'hinval_vvma','hinval_gvma'], False))
-    caption = '\\caption{RISC-V Privileged Instructions}'
-    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
-    latex_file.close()
-
-def make_latex_table():
-    '''
-    This function is mean to create the instr-table.tex that is meant to be used
-    by the riscv-isa-manual. This function basically creates a single latext
-    file of multiple tables with each table limited to a single page. Only the
-    last table is assigned a latex-caption.
-
-    For each table we assign a type-list which capture the different instruction
-    types (R, I, B, etc) that will be required for the table. Then we select the
-    list of extensions ('_i, '32_i', etc) whose instructions are required to
-    populate the table. For each extension or collection of extension we can
-    assign Title, such that in the end they appear as subheadings within
-    the table (note these are inlined headings and not captions of the table).
-
-    All of the above information is collected/created and sent to
-    make_ext_latex_table function to dump out the latex contents into a file.
-
-    The last table only has to be given a caption - as per the policy of the
-    riscv-isa-manual.
-    '''
-    # open the file and use it as a pointer for all further dumps
-    latex_file = open('instr-table.tex','w')
-
-    # create the rv32i table first. Here we set the caption to empty. We use the
-    # files rv_i and rv32_i to capture instructions relevant for rv32i
-    # configuration. The dataset is a list of 4-element tuples :
-    # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions
-    # is empty then it indicates that all instructions of the all the extensions
-    # in list_of_extensions need to be dumped. If not empty, then only the
-    # instructions listed in list_of_instructions will be dumped into latex.
-    caption = ''
-    type_list = ['R-type','I-type','S-type','B-type','U-type','J-type']
-    dataset_list = [(['_i','32_i'], 'RV32I Base Instruction Set', [], False)]
-    dataset_list.append((['_i'], '', ['fence_tso','pause'], True))
-    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
-    type_list = ['R-type','I-type','S-type']
-    dataset_list = [(['64_i'], 'RV64I Base Instruction Set (in addition to RV32I)', [], False)]
-    dataset_list.append((['_zifencei'], 'RV32/RV64 Zifencei Standard Extension', [], False))
-    dataset_list.append((['_zicsr'], 'RV32/RV64 Zicsr Standard Extension', [], False))
-    dataset_list.append((['_m','32_m'], 'RV32M Standard Extension', [], False))
-    dataset_list.append((['64_m'],'RV64M Standard Extension (in addition to RV32M)', [], False))
-    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
-    type_list = ['R-type']
-    dataset_list = [(['_a'],'RV32A Standard Extension', [], False)]
-    dataset_list.append((['64_a'],'RV64A Standard Extension (in addition to RV32A)', [], False))
-    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
-    type_list = ['R-type','R4-type','I-type','S-type']
-    dataset_list = [(['_f'],'RV32F Standard Extension', [], False)]
-    dataset_list.append((['64_f'],'RV64F Standard Extension (in addition to RV32F)', [], False))
-    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
-    type_list = ['R-type','R4-type','I-type','S-type']
-    dataset_list = [(['_d'],'RV32D Standard Extension', [], False)]
-    dataset_list.append((['64_d'],'RV64D Standard Extension (in addition to RV32D)', [], False))
-    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
-    type_list = ['R-type','R4-type','I-type','S-type']
-    dataset_list = [(['_q'],'RV32Q Standard Extension', [], False)]
-    dataset_list.append((['64_q'],'RV64Q Standard Extension (in addition to RV32Q)', [], False))
-    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
-    caption = '\\caption{Instruction listing for RISC-V}'
-    type_list = ['R-type','R4-type','I-type','S-type']
-    dataset_list = [(['_zfh', '_d_zfh','_q_zfh'],'RV32Zfh Standard Extension', [], False)]
-    dataset_list.append((['64_zfh'],'RV64Zfh Standard Extension (in addition to RV32Zfh)', [], False))
-    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
-    ## The following is demo to show that Compressed instructions can also be
-    # dumped in the same manner as above
-
-    #type_list = ['']
-    #dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])]
-    #dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', []))
-    #make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption)
-
-    latex_file.close()
-
-def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption):
-    '''
-    For a given collection of extensions this function dumps out a complete
-    latex table which includes the encodings of the instructions.
-
-    The ilen input indicates the length of the instruction for which the table
-    is created.
-
-    The caption input is used to create the latex-table caption.
-
-    The type_list input is a list of instruction types (R, I, B, etc) that are
-    treated as header for each table. Each table will have its own requirements
-    and type_list must include all the instruction-types that the table needs.
-    Note, all elements of this list must be present in the latex_inst_type
-    dictionary defined in constants.py
-
-    The latex_file is a file pointer to which the latex-table will dumped into
-
-    The dataset is a list of 3-element tuples containing:
-        (list_of_extensions, title, list_of_instructions)
-    The list_of_extensions must contain all the set of extensions whose
-    instructions must be populated under a given title. If list_of_instructions
-    is not empty, then only those instructions mentioned in list_of_instructions
-    present in the extension will be dumped into the latex-table, other
-    instructions will be ignored.
-
-    Once the above inputs are received then function first creates table entries
-    for the instruction types. To simplify things, we maintain a dictionary
-    called latex_inst_type in constants.py which is created in the same way the
-    instruction dictionary is created. This allows us to re-use the same logic
-    to create the instruction types table as well
-
-    Once the header is created, we then parse through every entry in the
-    dataset. For each list dataset entry we use the create_inst_dict function to
-    create an exhaustive list of instructions associated with the respective
-    collection of the extension of that dataset. Then we apply the instruction
-    filter, if any, indicated by the list_of_instructions of that dataset.
-    Thereon, for each instruction we create a latex table entry.
-
-    Latex table specification for ilen sized instructions:
-        Each table is created with ilen+1 columns - ilen columns for each bit of the
-        instruction and one column to hold the name of the instruction.
-
-        For each argument of an instruction we use the arg_lut from constants.py
-        to identify its position in the encoding, and thus create a multicolumn
-        entry with the name of the argument as the data. For hardcoded bits, we
-        do the same where we capture a string of continuous 1s and 0s, identify
-        the position and assign the same string as the data of the
-        multicolumn entry in the table.
-
-    '''
-    column_size = "".join(['p{0.002in}']*(ilen+1))
-
-    type_entries = '''
-    \\multicolumn{3}{l}{31} &
-    \\multicolumn{2}{r}{27} &
-    \\multicolumn{1}{c}{26} &
-    \\multicolumn{1}{r}{25} &
-    \\multicolumn{3}{l}{24} &
-    \\multicolumn{2}{r}{20} &
-    \\multicolumn{3}{l}{19} &
-    \\multicolumn{2}{r}{15} &
-    \\multicolumn{2}{l}{14} &
-    \\multicolumn{1}{r}{12} &
-    \\multicolumn{4}{l}{11} &
-    \\multicolumn{1}{r}{7} &
-    \\multicolumn{6}{l}{6} &
-    \\multicolumn{1}{r}{0} \\\\
-    \\cline{2-33}\n&\n\n
-''' if ilen == 32 else '''
-    \\multicolumn{1}{c}{15} &
-    \\multicolumn{1}{c}{14} &
-    \\multicolumn{1}{c}{13} &
-    \\multicolumn{1}{c}{12} &
-    \\multicolumn{1}{c}{11} &
-    \\multicolumn{1}{c}{10} &
-    \\multicolumn{1}{c}{9} &
-    \\multicolumn{1}{c}{8} &
-    \\multicolumn{1}{c}{7} &
-    \\multicolumn{1}{c}{6} &
-    \\multicolumn{1}{c}{5} &
-    \\multicolumn{1}{c}{4} &
-    \\multicolumn{1}{c}{3} &
-    \\multicolumn{1}{c}{2} &
-    \\multicolumn{1}{c}{1} &
-    \\multicolumn{1}{c}{0} \\\\
-    \\cline{2-17}\n&\n\n
-'''
-
-    # depending on the type_list input we create a subset dictionary of
-    # latex_inst_type dictionary present in constants.py
-    type_dict = {key: value for key, value in latex_inst_type.items() if key in type_list}
-
-    # iterate ovr each instruction type and create a table entry
-    for t in type_dict:
-        fields = []
-
-        # first capture all "arguments" of the type (funct3, funct7, rd, etc)
-        # and capture their positions using arg_lut.
-        for f in type_dict[t]['variable_fields']:
-            (msb, lsb) = arg_lut[f]
-            name = f if f not in latex_mapping else latex_mapping[f]
-            fields.append((msb, lsb, name))
-
-        # iterate through the 32 bits, starting from the msb, and assign
-        # argument names to the relevant portions of the instructions. This
-        # information is stored as a 3-element tuple containing the msb, lsb
-        # position of the arugment and the name of the argument.
-        msb = ilen - 1
-        y = ''
-        for r in range(0,ilen):
-            if y != '':
-                fields.append((msb,ilen-1-r+1,y))
-                y = ''
-            msb = ilen-1-r-1
-            if r == 31:
-                if y != '':
-                    fields.append((msb, 0, y))
-                y = ''
-
-        # sort the arguments in decreasing order of msb position
-        fields.sort(key=lambda y: y[0], reverse=True)
-
-        # for each argument/string of 1s or 0s, create a multicolumn latex table
-        # entry
-        entry = ''
-        for r in range(len(fields)):
-            (msb, lsb, name) = fields[r]
-            if r == len(fields)-1:
-                entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n'
-            elif r == 0:
-                entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n'
-            else:
-                entry += f'\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n'
-        entry += f'\\cline{{2-{ilen+1}}}\n&\n\n'
-        type_entries += entry
-
-    # for each entry in the dataset create a table
-    content = ''
-    for (ext_list, title, filter_list, include_pseudo) in dataset:
-        instr_dict = {}
-
-        # for all extensions list in ext_list, create a dictionary of
-        # instructions associated with those extensions.
-        for e in ext_list:
-            instr_dict.update(create_inst_dict(['rv'+e], include_pseudo))
-
-        # if filter_list is not empty then use that as the official set of
-        # instructions that need to be dumped into the latex table
-        inst_list = list(instr_dict.keys()) if not filter_list else filter_list
-
-        # for each instruction create an latex table entry just like how we did
-        # above with the instruction-type table.
-        instr_entries = ''
-        for inst in inst_list:
-            if inst not in instr_dict:
-                logging.error(f'in make_ext_latex_table: Instruction: {inst} not found in instr_dict')
-                raise SystemExit(1)
-            fields = []
-
-            # only if the argument is available in arg_lut we consume it, else
-            # throw error.
-            for f in instr_dict[inst]['variable_fields']:
-                if f not in arg_lut:
-                    logging.error(f'Found variable {f} in instruction {inst} whose mapping is not available')
-                    raise SystemExit(1)
-                (msb,lsb) = arg_lut[f]
-                name = f.replace('_','.') if f not in latex_mapping else latex_mapping[f]
-                fields.append((msb, lsb, name))
-
-            msb = ilen -1
-            y = ''
-            if ilen == 16:
-                encoding = instr_dict[inst]['encoding'][16:]
-            else:
-                encoding = instr_dict[inst]['encoding']
-            for r in range(0,ilen):
-                x = encoding [r]
-                if ((msb, ilen-1-r+1)) in latex_fixed_fields:
-                    fields.append((msb,ilen-1-r+1,y))
-                    msb = ilen-1-r
-                    y = ''
-                if x == '-':
-                    if y != '':
-                        fields.append((msb,ilen-1-r+1,y))
-                        y = ''
-                    msb = ilen-1-r-1
-                else:
-                    y += str(x)
-                if r == ilen-1:
-                    if y != '':
-                        fields.append((msb, 0, y))
-                    y = ''
-
-            fields.sort(key=lambda y: y[0], reverse=True)
-            entry = ''
-            for r in range(len(fields)):
-                (msb, lsb, name) = fields[r]
-                if r == len(fields)-1:
-                    entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n'
-                elif r == 0:
-                    entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n'
-                else:
-                    entry += f'\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n'
-            entry += f'\\cline{{2-{ilen+1}}}\n&\n\n'
-            instr_entries += entry
-
-        # once an entry of the dataset is completed we create the whole table
-        # with the title of that dataset as sub-heading (sort-of)
-        if title != '':
-            content += f'''
-
-\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\
-\\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\
-\\cline{{2-{ilen+1}}}
-
-            &
-{instr_entries}
-'''
-        else:
-            content += f'''
-{instr_entries}
-'''
-
-
-    header = f'''
-\\newpage
-
-\\begin{{table}}[p]
-\\begin{{small}}
-\\begin{{center}}
-    \\begin{{tabular}} {{{column_size}l}}
-    {" ".join(['&']*ilen)} \\\\
-
-            &
-{type_entries}
-'''
-    endtable=f'''
-
-\\end{{tabular}}
-\\end{{center}}
-\\end{{small}}
-{caption}
-\\end{{table}}
-'''
-    # dump the contents and return
-    latex_file.write(header+content+endtable)
-
-def instr_dict_2_extensions(instr_dict):
-    extensions = []
-    for item in instr_dict.values():
-        if item['extension'][0] not in extensions:
-            extensions.append(item['extension'][0])
-    return extensions
-
-def make_chisel(instr_dict, spinal_hdl=False):
-
-    chisel_names=''
-    cause_names_str=''
-    csr_names_str = ''
-    for i in instr_dict:
-        if spinal_hdl:
-            chisel_names += f'  def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n'
-        # else:
-        #     chisel_names += f'  def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n'
-    if not spinal_hdl:
-        extensions = instr_dict_2_extensions(instr_dict)
-        for e in extensions:
-            e_instrs = filter(lambda i: instr_dict[i]['extension'][0] == e, instr_dict)
-            if "rv64_" in e:
-                e_format = e.replace("rv64_", "").upper() + "64"
-            elif "rv32_" in e:
-                e_format = e.replace("rv32_", "").upper() + "32"
-            elif "rv_" in e:
-                e_format = e.replace("rv_", "").upper()
-            else:
-                e_format = e.upper
-            chisel_names += f'  val {e_format+"Type"} = Map(\n'
-            for instr in e_instrs:
-                tmp_instr_name = '"'+instr.upper().replace(".","_")+'"'
-                chisel_names += f'   {tmp_instr_name:<18s} -> BitPat("b{instr_dict[instr]["encoding"].replace("-","?")}"),\n'
-            chisel_names += f'  )\n'
-
-    for num, name in causes:
-        cause_names_str += f'  val {name.lower().replace(" ","_")} = {hex(num)}\n'
-    cause_names_str += '''  val all = {
-    val res = collection.mutable.ArrayBuffer[Int]()
-'''
-    for num, name in causes:
-        cause_names_str += f'    res += {name.lower().replace(" ","_")}\n'
-    cause_names_str += '''    res.toArray
-  }'''
-
-    for num, name in csrs+csrs32:
-        csr_names_str += f'  val {name} = {hex(num)}\n'
-    csr_names_str += '''  val all = {
-    val res = collection.mutable.ArrayBuffer[Int]()
-'''
-    for num, name in csrs:
-        csr_names_str += f'''    res += {name}\n'''
-    csr_names_str += '''    res.toArray
-  }
-  val all32 = {
-    val res = collection.mutable.ArrayBuffer(all:_*)
-'''
-    for num, name in csrs32:
-        csr_names_str += f'''    res += {name}\n'''
-    csr_names_str += '''    res.toArray
-  }'''
-
-    if spinal_hdl:
-        chisel_file = open('inst.spinalhdl','w')
-    else:
-        chisel_file = open('inst.chisel','w')
-    chisel_file.write(f'''
-/* Automatically generated by parse_opcodes */
-object Instructions {{
-{chisel_names}
-}}
-object Causes {{
-{cause_names_str}
-}}
-object CSRs {{
-{csr_names_str}
-}}
-''')
-    chisel_file.close()
-
-def make_rust(instr_dict):
-    mask_match_str= ''
-    for i in instr_dict:
-        mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n'
-        mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n'
-    for num, name in csrs+csrs32:
-        mask_match_str += f'const CSR_{name.upper()}: u16 = {hex(num)};\n'
-    for num, name in causes:
-        mask_match_str += f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n'
-    rust_file = open('inst.rs','w')
-    rust_file.write(f'''
-/* Automatically generated by parse_opcodes */
-{mask_match_str}
-''')
-    rust_file.close()
-
-def make_sverilog(instr_dict):
-    names_str = ''
-    for i in instr_dict:
-        names_str += f"  localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n"
-    names_str += '  /* CSR Addresses */\n'
-    for num, name in csrs+csrs32:
-        names_str += f"  localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n"
-
-    sverilog_file = open('inst.sverilog','w')
-    sverilog_file.write(f'''
-/* Automatically generated by parse_opcodes */
-package riscv_instr;
-{names_str}
-endpackage
-''')
-    sverilog_file.close()
-def make_c(instr_dict):
-    mask_match_str = ''
-    declare_insn_str = ''
-    for i in instr_dict:
-        mask_match_str += f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n'
-        mask_match_str += f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n'
-        declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n'
-
-    csr_names_str = ''
-    declare_csr_str = ''
-    for num, name in csrs+csrs32:
-        csr_names_str += f'#define CSR_{name.upper()} {hex(num)}\n'
-        declare_csr_str += f'DECLARE_CSR({name}, CSR_{name.upper()})\n'
-
-    causes_str= ''
-    declare_cause_str = ''
-    for num, name in causes:
-        causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n"
-        declare_cause_str += f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n"
-
-    arg_str = ''
-    for name, rng in arg_lut.items():
-        begin = rng[1]
-        end   = rng[0]
-        mask = ((1 << (end - begin + 1)) - 1) << begin
-        arg_str += f"#define INSN_FIELD_{name.upper().replace(' ', '_')} {hex(mask)}\n"
-
-    with open(f'{os.path.dirname(__file__)}/encoding.h', 'r') as file:
-        enc_header = file.read()
-
-    commit = os.popen('git log -1 --format="format:%h"').read()
-    enc_file = open('encoding.out.h','w')
-    enc_file.write(f'''/* SPDX-License-Identifier: BSD-3-Clause */
-
-/* Copyright (c) 2023 RISC-V International */
-
-/*
- * This file is auto-generated by running 'make' in
- * https://github.com/riscv/riscv-opcodes ({commit})
- */
-
-{enc_header}
-/* Automatically generated by parse_opcodes. */
-#ifndef RISCV_ENCODING_H
-#define RISCV_ENCODING_H
-{mask_match_str}
-{csr_names_str}
-{causes_str}
-{arg_str}#endif
-#ifdef DECLARE_INSN
-{declare_insn_str}#endif
-#ifdef DECLARE_CSR
-{declare_csr_str}#endif
-#ifdef DECLARE_CAUSE
-{declare_cause_str}#endif
-''')
-    enc_file.close()
-
-def make_go(instr_dict):
-
-    args = " ".join(sys.argv)
-    prelude = f'''// Code generated by {args}; DO NOT EDIT.'''
-
-    prelude += '''
-package riscv
-
-import "cmd/internal/obj"
-
-type inst struct {
-	opcode uint32
-	funct3 uint32
-	rs1    uint32
-	rs2    uint32
-	csr    int64
-	funct7 uint32
-}
-
-func encode(a obj.As) *inst {
-	switch a {
-'''
-
-    endoffile = '''  }
-	return nil
-}
-'''
-
-    instr_str = ''
-    for i in instr_dict:
-        enc_match = int(instr_dict[i]['match'],0)
-        opcode = (enc_match >> 0) & ((1<<7)-1)
-        funct3 = (enc_match >> 12) & ((1<<3)-1)
-        rs1 = (enc_match >> 15) & ((1<<5)-1)
-        rs2 = (enc_match >> 20) & ((1<<5)-1)
-        csr = (enc_match >> 20) & ((1<<12)-1)
-        funct7 = (enc_match >> 25) & ((1<<7)-1)
-        instr_str += f'''  case A{i.upper().replace("_","")}:
-    return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }}
-'''
-        
-    with open('inst.go','w') as file:
-        file.write(prelude)
-        file.write(instr_str)
-        file.write(endoffile)
-
-    try:
-        import subprocess
-        subprocess.run(["go", "fmt", "inst.go"])
-    except:
-        pass
-
-def signed(value, width):
-  if 0 <= value < (1<<(width-1)):
-    return value
-  else:
-    return value - (1<<width)
-
-
-if __name__ == "__main__":
-    print(f'Running with args : {sys.argv}')
-
-    extensions = sys.argv[1:]
-    for i in ['-c','-latex','-chisel','-sverilog','-rust', '-go', '-spinalhdl']:
-        if i in extensions:
-            extensions.remove(i)
-    print(f'Extensions selected : {extensions}')
-
-    include_pseudo = False
-    if "-go" in sys.argv[1:]:
-        include_pseudo = True
+import pprint
 
+from c_utils import make_c
+from chisel_utils import make_chisel
+from constants import emitted_pseudo_ops
+from go_utils import make_go
+from latex_utils import make_latex_table, make_priv_latex_table
+from rust_utils import make_rust
+from shared_utils import add_segmented_vls_insn, create_inst_dict
+from sverilog_utils import make_sverilog
+
+LOG_FORMAT = "%(levelname)s:: %(message)s"
+LOG_LEVEL = logging.INFO
+
+pretty_printer = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
+
+
+def generate_extensions(
+    extensions: list[str],
+    include_pseudo: bool,
+    c: bool,
+    chisel: bool,
+    spinalhdl: bool,
+    sverilog: bool,
+    rust: bool,
+    go: bool,
+    latex: bool,
+):
     instr_dict = create_inst_dict(extensions, include_pseudo)
-    with open('instr_dict.yaml', 'w') as outfile:
-        yaml.dump(instr_dict, outfile, default_flow_style=False)
-    instr_dict = collections.OrderedDict(sorted(instr_dict.items()))
+    instr_dict = dict(sorted(instr_dict.items()))
 
-    if '-c' in sys.argv[1:]:
-        instr_dict_c = create_inst_dict(extensions, False, 
-                                        include_pseudo_ops=emitted_pseudo_ops)
-        instr_dict_c = collections.OrderedDict(sorted(instr_dict_c.items()))
+    with open("instr_dict.json", "w", encoding="utf-8") as outfile:
+        json.dump(add_segmented_vls_insn(instr_dict), outfile, indent=2)
+
+    if c:
+        instr_dict_c = create_inst_dict(
+            extensions, False, include_pseudo_ops=emitted_pseudo_ops
+        )
+        instr_dict_c = dict(sorted(instr_dict_c.items()))
         make_c(instr_dict_c)
-        logging.info('encoding.out.h generated successfully')
+        logging.info("encoding.out.h generated successfully")
 
-    if '-chisel' in sys.argv[1:]:
+    if chisel:
         make_chisel(instr_dict)
-        logging.info('inst.chisel generated successfully')
+        logging.info("inst.chisel generated successfully")
 
-    if '-spinalhdl' in sys.argv[1:]:
+    if spinalhdl:
         make_chisel(instr_dict, True)
-        logging.info('inst.spinalhdl generated successfully')
+        logging.info("inst.spinalhdl generated successfully")
 
-    if '-sverilog' in sys.argv[1:]:
+    if sverilog:
         make_sverilog(instr_dict)
-        logging.info('inst.sverilog generated successfully')
+        logging.info("inst.sverilog generated successfully")
 
-    if '-rust' in sys.argv[1:]:
+    if rust:
         make_rust(instr_dict)
-        logging.info('inst.rs generated successfully')
+        logging.info("inst.rs generated successfully")
 
-    if '-go' in sys.argv[1:]:
+    if go:
         make_go(instr_dict)
-        logging.info('inst.go generated successfully')
+        logging.info("inst.go generated successfully")
 
-    if '-latex' in sys.argv[1:]:
+    if latex:
         make_latex_table()
-        logging.info('instr-table.tex generated successfully')
+        logging.info("instr-table.tex generated successfully")
         make_priv_latex_table()
-        logging.info('priv-instr-table.tex generated successfully')
+        logging.info("priv-instr-table.tex generated successfully")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate RISC-V constants headers")
+    parser.add_argument(
+        "-pseudo", action="store_true", help="Include pseudo-instructions"
+    )
+    parser.add_argument("-c", action="store_true", help="Generate output for C")
+    parser.add_argument(
+        "-chisel", action="store_true", help="Generate output for Chisel"
+    )
+    parser.add_argument(
+        "-spinalhdl", action="store_true", help="Generate output for SpinalHDL"
+    )
+    parser.add_argument(
+        "-sverilog", action="store_true", help="Generate output for SystemVerilog"
+    )
+    parser.add_argument("-rust", action="store_true", help="Generate output for Rust")
+    parser.add_argument("-go", action="store_true", help="Generate output for Go")
+    parser.add_argument("-latex", action="store_true", help="Generate output for Latex")
+    parser.add_argument(
+        "extensions",
+        nargs="*",
+        help="Extensions to use. This is a glob of the rv_.. files, e.g. 'rv*' will give all extensions.",
+    )
+
+    args = parser.parse_args()
+
+    print(f"Extensions selected : {args.extensions}")
+
+    generate_extensions(
+        args.extensions,
+        args.pseudo,
+        args.c,
+        args.chisel,
+        args.spinalhdl,
+        args.sverilog,
+        args.rust,
+        args.go,
+        args.latex,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyrightconfig.json b/pyrightconfig.json
new file mode 100644
index 0000000..79a6cbf
--- /dev/null
+++ b/pyrightconfig.json
@@ -0,0 +1,4 @@
+{
+    "typeCheckingMode": "strict",
+    "pythonVersion": "3.9",
+}
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index c3726e8..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-pyyaml
diff --git a/rust_utils.py b/rust_utils.py
new file mode 100644
index 0000000..bc9153b
--- /dev/null
+++ b/rust_utils.py
@@ -0,0 +1,28 @@
+import logging
+import pprint
+
+from constants import causes, csrs, csrs32
+from shared_utils import InstrDict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_rust(instr_dict: InstrDict):
+    mask_match_str = ""
+    for i in instr_dict:
+        mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n'
+        mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n'
+    for num, name in csrs + csrs32:
+        mask_match_str += f"const CSR_{name.upper()}: u16 = {hex(num)};\n"
+    for num, name in causes:
+        mask_match_str += (
+            f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n'
+        )
+    with open("inst.rs", "w", encoding="utf-8") as rust_file:
+        rust_file.write(
+            f"""
+/* Automatically generated by parse_opcodes */
+{mask_match_str}
+"""
+        )
diff --git a/rv64_zcb b/rv64_zcb
deleted file mode 100644
index ed38047..0000000
--- a/rv64_zcb
+++ /dev/null
@@ -1 +0,0 @@
-c.zext.w  rd_rs1_p 1..0=1 15..13=4 12..10=7 6..5=3 4..2=4
diff --git a/rv_zicsr b/rv_zicsr
deleted file mode 100644
index c58b5bd..0000000
--- a/rv_zicsr
+++ /dev/null
@@ -1,23 +0,0 @@
-csrrw     rd rs1 csr 14..12=1 6..2=0x1C 1..0=3
-csrrs     rd rs1 csr        14..12=2 6..2=0x1C 1..0=3
-csrrc     rd rs1 csr        14..12=3 6..2=0x1C 1..0=3
-csrrwi    rd csr zimm       14..12=5 6..2=0x1C 1..0=3
-csrrsi    rd csr zimm       14..12=6 6..2=0x1C 1..0=3
-csrrci    rd csr zimm       14..12=7 6..2=0x1C 1..0=3
-
-$pseudo_op rv_zicsr::csrrs  frflags    rd 19..15=0 31..20=0x001 14..12=2 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrw  fsflags    rd rs1      31..20=0x001 14..12=1 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrwi fsflagsi   rd zimm     31..20=0x001 14..12=5 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrs  frrm       rd 19..15=0 31..20=0x002 14..12=2 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrw  fsrm       rd rs1      31..20=0x002 14..12=1 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrwi fsrmi      rd zimm     31..20=0x002 14..12=5 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrw  fscsr      rd rs1      31..20=0x003 14..12=1 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrs  frcsr      rd 19..15=0 31..20=0x003 14..12=2 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrs  rdcycle    rd 19..15=0 31..20=0xC00 14..12=2 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrs  rdtime     rd 19..15=0 31..20=0xC01 14..12=2 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrs  rdinstret  rd 19..15=0 31..20=0xC02 14..12=2 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrs  rdcycleh   rd 19..15=0 31..20=0xC80 14..12=2 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrs  rdtimeh    rd 19..15=0 31..20=0xC81 14..12=2 6..2=0x1C 1..0=3
-$pseudo_op rv_zicsr::csrrs  rdinstreth rd 19..15=0 31..20=0xC82 14..12=2 6..2=0x1C 1..0=3
-
-
diff --git a/shared_utils.py b/shared_utils.py
new file mode 100644
index 0000000..9dd82e9
--- /dev/null
+++ b/shared_utils.py
@@ -0,0 +1,623 @@
+#!/usr/bin/env python3
+import copy
+import glob
+import logging
+import os
+import pprint
+import re
+from itertools import chain
+from typing import Dict, Optional, TypedDict
+
+from constants import (
+    arg_lut,
+    fixed_ranges,
+    imported_regex,
+    overlapping_extensions,
+    overlapping_instructions,
+    pseudo_regex,
+    single_fixed,
+)
+
+LOG_FORMAT = "%(levelname)s:: %(message)s"
+LOG_LEVEL = logging.INFO
+
+pretty_printer = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
+
+
+# Log an error message
+def log_and_exit(message: str):
+    """Log an error message and exit the program."""
+    logging.error(message)
+    raise SystemExit(1)
+
+
+# Initialize encoding to 32-bit '-' values
+def initialize_encoding(bits: int = 32) -> "list[str]":
+    """Initialize encoding with '-' to represent don't care bits."""
+    return ["-"] * bits
+
+
+# Validate bit range and value
+def validate_bit_range(msb: int, lsb: int, entry_value: int, line: str):
+    """Validate the bit range and entry value."""
+    if msb < lsb:
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in its encoding'
+        )
+
+    if entry_value >= (1 << (msb - lsb + 1)):
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}'
+        )
+
+
+# Split the instruction line into name and remaining part
+def parse_instruction_line(line: str) -> "tuple[str, str]":
+    """Parse the instruction name and the remaining encoding details."""
+    name, remaining = line.replace("\t", " ").split(" ", 1)
+    name = name.replace(".", "_")  # Replace dots for compatibility
+    remaining = remaining.lstrip()  # Remove leading whitespace
+    return name, remaining
+
+
+# Verify Overlapping Bits
+def check_overlapping_bits(encoding: "list[str]", ind: int, line: str):
+    """Check for overlapping bits in the encoding."""
+    if encoding[31 - ind] != "-":
+        log_and_exit(
+            f'{line.split(" ")[0]:<10} has {ind} bit overlapping in its opcodes'
+        )
+
+
+# Update encoding for fixed ranges
+def update_encoding_for_fixed_range(
+    encoding: "list[str]", msb: int, lsb: int, entry_value: int, line: str
+):
+    """
+    Update encoding bits for a given bit range.
+    Checks for overlapping bits and assigns the value accordingly.
+    """
+    for ind in range(lsb, msb + 1):
+        check_overlapping_bits(encoding, ind, line)
+        bit = str((entry_value >> (ind - lsb)) & 1)
+        encoding[31 - ind] = bit
+
+
+# Process fixed bit patterns
+def process_fixed_ranges(remaining: str, encoding: "list[str]", line: str):
+    """Process fixed bit ranges in the encoding."""
+    for s2, s1, entry in fixed_ranges.findall(remaining):
+        msb, lsb, entry_value = int(s2), int(s1), int(entry, 0)
+
+        # Validate bit range and entry value
+        validate_bit_range(msb, lsb, entry_value, line)
+        update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line)
+
+    return fixed_ranges.sub(" ", remaining)
+
+
+# Process single bit assignments
+def process_single_fixed(remaining: str, encoding: "list[str]", line: str):
+    """Process single fixed assignments in the encoding."""
+    for lsb, value, _drop in single_fixed.findall(remaining):
+        lsb = int(lsb, 0)
+        value = int(value, 0)
+
+        check_overlapping_bits(encoding, lsb, line)
+        encoding[31 - lsb] = str(value)
+
+
+# Main function to check argument look-up table
+def check_arg_lut(args: "list[str]", encoding_args: "list[str]", name: str):
+    """Check if arguments are present in arg_lut."""
+    for arg in args:
+        if arg not in arg_lut:
+            arg = handle_arg_lut_mapping(arg, name)
+        msb, lsb = arg_lut[arg]
+        update_encoding_args(encoding_args, arg, msb, lsb)
+
+
+# Handle missing argument mappings
+def handle_arg_lut_mapping(arg: str, name: str):
+    """Handle cases where an argument needs to be mapped to an existing one."""
+    parts = arg.split("=")
+    if len(parts) == 2:
+        existing_arg, _new_arg = parts
+        if existing_arg in arg_lut:
+            arg_lut[arg] = arg_lut[existing_arg]
+        else:
+            log_and_exit(
+                f" Found field {existing_arg} in variable {arg} in instruction {name} "
+                f"whose mapping in arg_lut does not exist"
+            )
+    else:
+        log_and_exit(
+            f" Found variable {arg} in instruction {name} "
+            f"whose mapping in arg_lut does not exist"
+        )
+    return arg
+
+
+# Update encoding args with variables
+def update_encoding_args(encoding_args: "list[str]", arg: str, msb: int, lsb: int):
+    """Update encoding arguments and ensure no overlapping."""
+    for ind in range(lsb, msb + 1):
+        check_overlapping_bits(encoding_args, ind, arg)
+        encoding_args[31 - ind] = arg
+
+
+# Compute match and mask
+def convert_encoding_to_match_mask(encoding: "list[str]") -> "tuple[str, str]":
+    """Convert the encoding list to match and mask strings."""
+    match = "".join(encoding).replace("-", "0")
+    mask = "".join(encoding).replace("0", "1").replace("-", "0")
+    return hex(int(match, 2)), hex(int(mask, 2))
+
+
+class SingleInstr(TypedDict):
+    encoding: str
+    variable_fields: "list[str]"
+    extension: "list[str]"
+    match: str
+    mask: str
+
+
+InstrDict = Dict[str, SingleInstr]
+
+
+# Processing main function for a line in the encoding file
+def process_enc_line(line: str, ext: str) -> "tuple[str, SingleInstr]":
+    """
+    This function processes each line of the encoding files (rv*). As part of
+    the processing, the function ensures that the encoding is legal through the
+    following checks::
+        - there is no over specification (same bits assigned different values)
+        - there is no under specification (some bits not assigned values)
+        - bit ranges are in the format hi..lo=val where hi > lo
+        - value assigned is representable in the bit range
+        - also checks that the mapping of arguments of an instruction exists in
+          arg_lut.
+    If the above checks pass, then the function returns a tuple of the name and
+    a dictionary containing basic information of the instruction which includes:
+        - variables: list of arguments used by the instruction whose mapping
+          exists in the arg_lut dictionary
+        - encoding: this contains the 32-bit encoding of the instruction where
+          '-' is used to represent position of arguments and 1/0 is used to
+          reprsent the static encoding of the bits
+        - extension: this field contains the rv* filename from which this
+          instruction was included
+        - match: hex value representing the bits that need to match to detect
+          this instruction
+        - mask: hex value representin the bits that need to be masked to extract
+          the value required for matching.
+    """
+    encoding = initialize_encoding()
+
+    # Parse the instruction line
+    name, remaining = parse_instruction_line(line)
+
+    # Process fixed ranges
+    remaining = process_fixed_ranges(remaining, encoding, line)
+
+    # Process single fixed assignments
+    process_single_fixed(remaining, encoding, line)
+
+    # Convert the list of encodings into a match and mask
+    match, mask = convert_encoding_to_match_mask(encoding)
+
+    # Check arguments in arg_lut
+    args = single_fixed.sub(" ", remaining).split()
+    encoding_args = encoding.copy()
+
+    check_arg_lut(args, encoding_args, name)
+
+    # Return single_dict
+    return name, {
+        "encoding": "".join(encoding),
+        "variable_fields": args,
+        "extension": [os.path.basename(ext)],
+        "match": match,
+        "mask": mask,
+    }
+
+
+# Extract ISA Type
+def extract_isa_type(ext_name: str) -> str:
+    """Extracts the ISA type from the extension name."""
+    return ext_name.split("_")[0]
+
+
+# Verify the types for RV*
+def is_rv_variant(type1: str, type2: str) -> bool:
+    """Checks if the types are RV variants (rv32/rv64)."""
+    return (type2 == "rv" and type1 in {"rv32", "rv64"}) or (
+        type1 == "rv" and type2 in {"rv32", "rv64"}
+    )
+
+
+# Check for same base ISA
+def has_same_base_isa(type1: str, type2: str) -> bool:
+    """Determines if the two ISA types share the same base."""
+    return type1 == type2 or is_rv_variant(type1, type2)
+
+
+# Compare the base ISA type of a given extension name against a list of extension names
+def same_base_isa(ext_name: str, ext_name_list: "list[str]") -> bool:
+    """Checks if the base ISA type of ext_name matches any in ext_name_list."""
+    type1 = extract_isa_type(ext_name)
+    return any(has_same_base_isa(type1, extract_isa_type(ext)) for ext in ext_name_list)
+
+
+# Pad two strings to equal length
+def pad_to_equal_length(str1: str, str2: str, pad_char: str = "-") -> "tuple[str, str]":
+    """Pads two strings to equal length using the given padding character."""
+    max_len = max(len(str1), len(str2))
+    return str1.rjust(max_len, pad_char), str2.rjust(max_len, pad_char)
+
+
+# Check compatibility for two characters
+def has_no_conflict(char1: str, char2: str) -> bool:
+    """Checks if two characters are compatible (either matching or don't-care)."""
+    return char1 == "-" or char2 == "-" or char1 == char2
+
+
+# Conflict check between two encoded strings
+def overlaps(x: str, y: str) -> bool:
+    """Checks if two encoded strings overlap without conflict."""
+    x, y = pad_to_equal_length(x, y)
+    return all(has_no_conflict(x[i], y[i]) for i in range(len(x)))
+
+
+# Check presence of keys in dictionary.
+def is_in_nested_dict(a: "dict[str, set[str]]", key1: str, key2: str) -> bool:
+    """Checks if key2 exists in the dictionary under key1."""
+    return key1 in a and key2 in a[key1]
+
+
+# Overlap allowance
+def overlap_allowed(a: "dict[str, set[str]]", x: str, y: str) -> bool:
+    """Determines if overlap is allowed between x and y based on nested dictionary checks"""
+    return is_in_nested_dict(a, x, y) or is_in_nested_dict(a, y, x)
+
+
+# Check overlap allowance between extensions
+def extension_overlap_allowed(x: str, y: str) -> bool:
+    """Checks if overlap is allowed between two extensions using the overlapping_extensions dictionary."""
+    return overlap_allowed(overlapping_extensions, x, y)
+
+
+# Check overlap allowance between instructions
+def instruction_overlap_allowed(x: str, y: str) -> bool:
+    """Checks if overlap is allowed between two instructions using the overlapping_instructions dictionary."""
+    return overlap_allowed(overlapping_instructions, x, y)
+
+
+# Check 'nf' field
+def is_segmented_instruction(instruction: SingleInstr) -> bool:
+    """Checks if an instruction contains the 'nf' field."""
+    return "nf" in instruction["variable_fields"]
+
+
+# Expand 'nf' fields
+def update_with_expanded_instructions(
+    updated_dict: InstrDict, key: str, value: SingleInstr
+):
+    """Expands 'nf' fields in the instruction dictionary and updates it with new instructions."""
+    for new_key, new_value in expand_nf_field(key, value):
+        updated_dict[new_key] = new_value
+
+
+# Process instructions, expanding segmented ones and updating the dictionary
+def add_segmented_vls_insn(instr_dict: InstrDict) -> InstrDict:
+    """Processes instructions, expanding segmented ones and updating the dictionary."""
+    # Use dictionary comprehension for efficiency
+    return dict(
+        chain.from_iterable(
+            (
+                expand_nf_field(key, value)
+                if is_segmented_instruction(value)
+                else [(key, value)]
+            )
+            for key, value in instr_dict.items()
+        )
+    )
+
+
+# Expand the 'nf' field in the instruction dictionary
+def expand_nf_field(
+    name: str, single_dict: SingleInstr
+) -> "list[tuple[str, SingleInstr]]":
+    """Validate and prepare the instruction dictionary."""
+    validate_nf_field(single_dict, name)
+    remove_nf_field(single_dict)
+    update_mask(single_dict)
+
+    name_expand_index = name.find("e")
+
+    # Pre compute the base match value and encoding prefix
+    base_match = int(single_dict["match"], 16)
+    encoding_prefix = single_dict["encoding"][3:]
+
+    expanded_instructions = [
+        create_expanded_instruction(
+            name, single_dict, nf, name_expand_index, base_match, encoding_prefix
+        )
+        for nf in range(8)  # Range of 0 to 7
+    ]
+
+    return expanded_instructions
+
+
+# Validate the presence of 'nf'
+def validate_nf_field(single_dict: SingleInstr, name: str):
+    """Validates the presence of 'nf' in variable fields before expansion."""
+    if "nf" not in single_dict["variable_fields"]:
+        log_and_exit(f"Cannot expand nf field for instruction {name}")
+
+
+# Remove 'nf' from variable fields
+def remove_nf_field(single_dict: SingleInstr):
+    """Removes 'nf' from variable fields in the instruction dictionary."""
+    single_dict["variable_fields"].remove("nf")
+
+
+# Update the mask to include the 'nf' field
+def update_mask(single_dict: SingleInstr):
+    """Updates the mask to include the 'nf' field in the instruction dictionary."""
+    single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29)
+
+
+# Create an expanded instruction
+def create_expanded_instruction(
+    name: str,
+    single_dict: SingleInstr,
+    nf: int,
+    name_expand_index: int,
+    base_match: int,
+    encoding_prefix: str,
+) -> "tuple[str, SingleInstr]":
+    """Creates an expanded instruction based on 'nf' value."""
+    new_single_dict = copy.deepcopy(single_dict)
+
+    # Update match value in one step
+    new_single_dict["match"] = hex(base_match | (nf << 29))
+    new_single_dict["encoding"] = format(nf, "03b") + encoding_prefix
+
+    # Construct new instruction name
+    new_name = (
+        name
+        if nf == 0
+        else f"{name[:name_expand_index]}seg{nf + 1}{name[name_expand_index:]}"
+    )
+
+    return (new_name, new_single_dict)
+
+
+# Return a list of relevant lines from the specified file
+def read_lines(file: str) -> "list[str]":
+    """Reads lines from a file and returns non-blank, non-comment lines."""
+    with open(file, encoding="utf-8") as fp:
+        lines = (line.rstrip() for line in fp)
+        return [line for line in lines if line and not line.startswith("#")]
+
+
+# Update the instruction dictionary
+def process_standard_instructions(
+    lines: "list[str]", instr_dict: InstrDict, file_name: str
+):
+    """Processes standard instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$import" in line or "$pseudo" in line:
+            continue
+        logging.debug(f"Processing line: {line}")
+        name, single_dict = process_enc_line(line, file_name)
+        ext_name = os.path.basename(file_name)
+
+        if name in instr_dict:
+            var = instr_dict[name]["extension"]
+            if same_base_isa(ext_name, var):
+                log_and_exit(
+                    f"Instruction {name} from {ext_name} is already added from {var} in same base ISA"
+                )
+            elif instr_dict[name]["encoding"] != single_dict["encoding"]:
+                log_and_exit(
+                    f"Instruction {name} from {ext_name} has different encodings in different base ISAs"
+                )
+
+            instr_dict[name]["extension"].extend(single_dict["extension"])
+        else:
+            for key, item in instr_dict.items():
+                if (
+                    overlaps(item["encoding"], single_dict["encoding"])
+                    and not extension_overlap_allowed(ext_name, item["extension"][0])
+                    and not instruction_overlap_allowed(name, key)
+                    and same_base_isa(ext_name, item["extension"])
+                ):
+                    log_and_exit(
+                        f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}'
+                    )
+
+            instr_dict[name] = single_dict
+
+
+# Incorporate pseudo instructions into the instruction dictionary based on given conditions
+def process_pseudo_instructions(
+    lines: "list[str]",
+    instr_dict: InstrDict,
+    file_name: str,
+    opcodes_dir: str,
+    include_pseudo: bool,
+    include_pseudo_ops: "list[str]",
+):
+    """Processes pseudo instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$pseudo" not in line:
+            continue
+        logging.debug(f"Processing pseudo line: {line}")
+        ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0]
+        ext_file = find_extension_file(ext, opcodes_dir)
+        # print("ext_file",ext_file)
+        validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst)
+
+        name, single_dict = process_enc_line(f"{pseudo_inst} {line_content}", file_name)
+        if (
+            orig_inst.replace(".", "_") not in instr_dict
+            or include_pseudo
+            or name in include_pseudo_ops
+        ):
+            if name not in instr_dict:
+                instr_dict[name] = single_dict
+                logging.debug(f"Including pseudo_op: {name}")
+            else:
+                if single_dict["match"] != instr_dict[name]["match"]:
+                    instr_dict[f"{name}_pseudo"] = single_dict
+                # TODO: This expression is always false since both sides are list[str].
+                elif single_dict["extension"] not in instr_dict[name]["extension"]:  # type: ignore
+                    instr_dict[name]["extension"].extend(single_dict["extension"])
+
+
+# Integrate imported instructions into the instruction dictionary
+def process_imported_instructions(
+    lines: "list[str]", instr_dict: InstrDict, file_name: str, opcodes_dir: str
+):
+    """Processes imported instructions from the given lines and updates the instruction dictionary."""
+    for line in lines:
+        if "$import" not in line:
+            continue
+        logging.debug(f"Processing imported line: {line}")
+        import_ext, reg_instr = imported_regex.findall(line)[0]
+        ext_filename = find_extension_file(import_ext, opcodes_dir)
+
+        validate_instruction_in_extension(reg_instr, ext_filename, file_name, line)
+
+        with open(ext_filename, encoding="utf-8") as ext_file:
+            for oline in ext_file:
+                if re.findall(f"^\\s*{reg_instr}\\s+", oline):
+                    name, single_dict = process_enc_line(oline, file_name)
+                    if name in instr_dict:
+                        if instr_dict[name]["encoding"] != single_dict["encoding"]:
+                            log_and_exit(
+                                f"Imported instruction {name} from {os.path.basename(file_name)} has different encodings"
+                            )
+                        instr_dict[name]["extension"].extend(single_dict["extension"])
+                    else:
+                        instr_dict[name] = single_dict
+                    break
+
+
+# Locate the path of the specified extension file, checking fallback directories
+def find_extension_file(ext: str, opcodes_dir: str):
+    """Finds the extension file path, considering the unratified directory if necessary."""
+    ext_file = f"{opcodes_dir}/{ext}"
+    if not os.path.exists(ext_file):
+        ext_file = f"{opcodes_dir}/unratified/{ext}"
+        if not os.path.exists(ext_file):
+            log_and_exit(f"Extension {ext} not found.")
+    # print(ext_file)
+    return ext_file
+
+
+# Confirm the presence of an original instruction in the corresponding extension file.
+def validate_instruction_in_extension(
+    inst: str, ext_filename: str, file_name: str, pseudo_inst: str
+):
+    """Validates if the original instruction exists in the dependent extension."""
+    found = False
+    with open(ext_filename, encoding="utf-8") as ext_file:
+        for oline in ext_file:
+            if re.findall(f"^\\s*{inst}\\s+", oline):
+                found = True
+                break
+    if not found:
+        log_and_exit(
+            f"Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}"
+        )
+
+
+# Construct a dictionary of instructions filtered by specified criteria
+def create_inst_dict(
+    file_filter: "list[str]",
+    include_pseudo: bool = False,
+    include_pseudo_ops: "Optional[list[str]]" = None,
+) -> InstrDict:
+    """
+    Creates a dictionary of instructions based on the provided file filters.
+
+    This function return a dictionary containing all instructions associated
+    with an extension defined by the file_filter input.
+    Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc.
+    Each node of the dictionary will correspond to an instruction which again is
+    a dictionary. The dictionary contents of each instruction includes:
+        - variables: list of arguments used by the instruction whose mapping
+          exists in the arg_lut dictionary
+        - encoding: this contains the 32-bit encoding of the instruction where
+          '-' is used to represent position of arguments and 1/0 is used to
+          reprsent the static encoding of the bits
+        - extension: this field contains the rv* filename from which this
+          instruction was included
+        - match: hex value representing the bits that need to match to detect
+          this instruction
+        - mask: hex value representin the bits that need to be masked to extract
+          the value required for matching.
+    In order to build this dictionary, the function does 2 passes over the same
+    rv<file_filter> file:
+        - First pass: extracts all standard instructions, skipping pseudo ops
+          and imported instructions. For each selected line, the `process_enc_line`
+          function is called to create the dictionary contents of the instruction.
+          Checks are performed to ensure that the same instruction is not added
+          twice to the overall dictionary.
+        - Second pass: parses only pseudo_ops. For each pseudo_op, the function:
+            - Checks if the dependent extension and instruction exist.
+            - Adds the pseudo_op to the dictionary if the dependent instruction
+              is not already present; otherwise, it is skipped.
+    """
+    if include_pseudo_ops is None:
+        include_pseudo_ops = []
+
+    opcodes_dir = os.path.dirname(os.path.realpath(__file__)) + "/extensions"
+    instr_dict: InstrDict = {}
+
+    file_names = [
+        file
+        for fil in file_filter
+        for file in sorted(glob.glob(f"{opcodes_dir}/{fil}"), reverse=True)
+    ]
+
+    logging.debug("Collecting standard instructions")
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for standard instructions")
+        lines = read_lines(file_name)
+        process_standard_instructions(lines, instr_dict, file_name)
+
+    logging.debug("Collecting pseudo instructions")
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for pseudo instructions")
+        lines = read_lines(file_name)
+        process_pseudo_instructions(
+            lines,
+            instr_dict,
+            file_name,
+            opcodes_dir,
+            include_pseudo,
+            include_pseudo_ops,
+        )
+
+    logging.debug("Collecting imported instructions")
+
+    for file_name in file_names:
+        logging.debug(f"Parsing File: {file_name} for imported instructions")
+        lines = read_lines(file_name)
+        process_imported_instructions(lines, instr_dict, file_name, opcodes_dir)
+
+    return instr_dict
+
+
+# Extracts the extensions used in an instruction dictionary
+def instr_dict_2_extensions(instr_dict: InstrDict) -> "list[str]":
+    return list({item["extension"][0] for item in instr_dict.values()})
+
+
+# Returns signed interpretation of a value within a given width
+def signed(value: int, width: int) -> int:
+    return value if 0 <= value < (1 << (width - 1)) else value - (1 << width)
diff --git a/sverilog_utils.py b/sverilog_utils.py
new file mode 100644
index 0000000..d2b26b6
--- /dev/null
+++ b/sverilog_utils.py
@@ -0,0 +1,30 @@
+import logging
+import pprint
+from pathlib import Path
+
+from constants import csrs, csrs32
+from shared_utils import InstrDict
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_sverilog(instr_dict: InstrDict):
+    names_str = ""
+    for i in instr_dict:
+        names_str += f"  localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n"
+    names_str += "  /* CSR Addresses */\n"
+    for num, name in csrs + csrs32:
+        names_str += (
+            f"  localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n"
+        )
+
+    Path("inst.sverilog").write_text(
+        f"""
+/* Automatically generated by parse_opcodes */
+package riscv_instr;
+{names_str}
+endpackage
+""",
+        encoding="utf-8",
+    )
diff --git a/test.py b/test.py
index 9ddd072..e8579b9 100644
--- a/test.py
+++ b/test.py
@@ -1,39 +1,257 @@
 #!/usr/bin/env python3
 
-from parse import *
 import logging
 import unittest
+from unittest.mock import Mock, patch
+
+from shared_utils import (
+    InstrDict,
+    check_arg_lut,
+    check_overlapping_bits,
+    extract_isa_type,
+    find_extension_file,
+    handle_arg_lut_mapping,
+    initialize_encoding,
+    is_rv_variant,
+    overlaps,
+    pad_to_equal_length,
+    parse_instruction_line,
+    process_enc_line,
+    process_fixed_ranges,
+    process_standard_instructions,
+    same_base_isa,
+    update_encoding_for_fixed_range,
+    validate_bit_range,
+)
+
+
+class EncodingUtilsTest(unittest.TestCase):
+    """Tests for basic encoding utilities"""
+
+    def setUp(self):
+        self.logger = logging.getLogger()
+        self.logger.disabled = True
+
+    def test_initialize_encoding(self):
+        """Test encoding initialization with different bit lengths"""
+        self.assertEqual(initialize_encoding(32), ["-"] * 32)
+        self.assertEqual(initialize_encoding(16), ["-"] * 16)
+        self.assertEqual(initialize_encoding(), ["-"] * 32)  # default case
+
+    def test_validate_bit_range(self):
+        """Test bit range validation"""
+        # Valid cases
+        validate_bit_range(7, 3, 15, "test_instr")  # 15 fits in 5 bits
+        validate_bit_range(31, 0, 0xFFFFFFFF, "test_instr")  # max 32-bit value
+
+        # Invalid cases
+        with self.assertRaises(SystemExit):
+            validate_bit_range(3, 7, 1, "test_instr")  # msb < lsb
+        with self.assertRaises(SystemExit):
+            validate_bit_range(3, 0, 16, "test_instr")  # value too large for range
+
+    def test_parse_instruction_line(self):
+        """Test instruction line parsing"""
+        name, remaining = parse_instruction_line("add.w r1, r2, r3")
+        self.assertEqual(name, "add_w")
+        self.assertEqual(remaining, "r1, r2, r3")
+
+        name, remaining = parse_instruction_line("lui rd imm20 6..2=0x0D")
+        self.assertEqual(name, "lui")
+        self.assertEqual(remaining, "rd imm20 6..2=0x0D")
+
+
+class BitManipulationTest(unittest.TestCase):
+    """Tests for bit manipulation and checking functions"""
+
+    def setUp(self):
+        self.logger = logging.getLogger()
+        self.logger.disabled = True
+        self.test_encoding = initialize_encoding()
+
+    def test_check_overlapping_bits(self):
+        """Test overlapping bits detection"""
+        # Valid case - no overlap
+        self.test_encoding[31 - 5] = "-"
+        check_overlapping_bits(self.test_encoding, 5, "test_instr")
+
+        # Invalid case - overlap
+        self.test_encoding[31 - 5] = "1"
+        with self.assertRaises(SystemExit):
+            check_overlapping_bits(self.test_encoding, 5, "test_instr")
+
+    def test_update_encoding_for_fixed_range(self):
+        """Test encoding updates for fixed ranges"""
+        encoding = initialize_encoding()
+        update_encoding_for_fixed_range(encoding, 6, 2, 0x0D, "test_instr")
+
+        # Check specific bits are set correctly
+        self.assertEqual(encoding[31 - 6 : 31 - 1], ["0", "1", "1", "0", "1"])
+
+    def test_process_fixed_ranges(self):
+        """Test processing of fixed bit ranges"""
+        encoding = initialize_encoding()
+        remaining = "rd imm20 6..2=0x0D 1..0=3"
+
+        result = process_fixed_ranges(remaining, encoding, "test_instr")
+        self.assertNotIn("6..2=0x0D", result)
+        self.assertNotIn("1..0=3", result)
+
+
+class EncodingArgsTest(unittest.TestCase):
+    """Tests for encoding arguments handling"""
+
+    def setUp(self):
+        self.logger = logging.getLogger()
+        self.logger.disabled = True
+
+    @patch.dict("shared_utils.arg_lut", {"rd": (11, 7), "rs1": (19, 15)})
+    def test_check_arg_lut(self):
+        """Test argument lookup table checking"""
+        encoding_args = initialize_encoding()
+        args = ["rd", "rs1"]
+        check_arg_lut(args, encoding_args, "test_instr")
+
+        # Verify encoding_args has been updated correctly
+        self.assertEqual(encoding_args[31 - 11 : 31 - 6], ["rd"] * 5)
+        self.assertEqual(encoding_args[31 - 19 : 31 - 14], ["rs1"] * 5)
+
+    @patch.dict("shared_utils.arg_lut", {"rs1": (19, 15)})
+    def test_handle_arg_lut_mapping(self):
+        """Test handling of argument mappings"""
+        # Valid mapping
+        result = handle_arg_lut_mapping("rs1=new_arg", "test_instr")
+        self.assertEqual(result, "rs1=new_arg")
+
+        # Invalid mapping
+        with self.assertRaises(SystemExit):
+            handle_arg_lut_mapping("invalid_arg=new_arg", "test_instr")
+
+
+class ISAHandlingTest(unittest.TestCase):
+    """Tests for ISA type handling and validation"""
+
+    def test_extract_isa_type(self):
+        """Test ISA type extraction"""
+        self.assertEqual(extract_isa_type("rv32_i"), "rv32")
+        self.assertEqual(extract_isa_type("rv64_m"), "rv64")
+        self.assertEqual(extract_isa_type("rv_c"), "rv")
+
+    def test_is_rv_variant(self):
+        """Test RV variant checking"""
+        self.assertTrue(is_rv_variant("rv32", "rv"))
+        self.assertTrue(is_rv_variant("rv", "rv64"))
+        self.assertFalse(is_rv_variant("rv32", "rv64"))
+
+    def test_same_base_isa(self):
+        """Test base ISA comparison"""
+        self.assertTrue(same_base_isa("rv32_i", ["rv32_m", "rv32_a"]))
+        self.assertTrue(same_base_isa("rv_i", ["rv32_i", "rv64_i"]))
+        self.assertFalse(same_base_isa("rv32_i", ["rv64_m"]))
+
+
+class StringManipulationTest(unittest.TestCase):
+    """Tests for string manipulation utilities"""
+
+    def test_pad_to_equal_length(self):
+        """Test string padding"""
+        str1, str2 = pad_to_equal_length("101", "1101")
+        self.assertEqual(len(str1), len(str2))
+        self.assertEqual(str1, "-101")
+        self.assertEqual(str2, "1101")
+
+    def test_overlaps(self):
+        """Test string overlap checking"""
+        self.assertTrue(overlaps("1-1", "101"))
+        self.assertTrue(overlaps("---", "101"))
+        self.assertFalse(overlaps("111", "101"))
+
+
+class InstructionProcessingTest(unittest.TestCase):
+    """Tests for instruction processing and validation"""
 
-class EncodingLineTest(unittest.TestCase):
     def setUp(self):
-        logger = logging.getLogger()
-        logger.disabled = True
+        self.logger = logging.getLogger()
+        self.logger.disabled = True
+        # Create a patch for arg_lut
+        self.arg_lut_patcher = patch.dict(
+            "shared_utils.arg_lut", {"rd": (11, 7), "imm20": (31, 12)}
+        )
+        self.arg_lut_patcher.start()
+
+    def tearDown(self):
+        self.arg_lut_patcher.stop()
+
+    @patch("shared_utils.fixed_ranges")
+    @patch("shared_utils.single_fixed")
+    def test_process_enc_line(self, mock_single_fixed: Mock, mock_fixed_ranges: Mock):
+        """Test processing of encoding lines"""
+        # Setup mock return values
+        mock_fixed_ranges.findall.return_value = [(6, 2, "0x0D")]
+        mock_fixed_ranges.sub.return_value = "rd imm20"
+        mock_single_fixed.findall.return_value = []
+        mock_single_fixed.sub.return_value = "rd imm20"
+
+        # Create a mock for split() that returns the expected list
+        mock_split = Mock(return_value=["rd", "imm20"])
+        mock_single_fixed.sub.return_value = Mock(split=mock_split)
+
+        name, data = process_enc_line("lui rd imm20 6..2=0x0D", "rv_i")
+
+        self.assertEqual(name, "lui")
+        self.assertEqual(data["extension"], ["rv_i"])
+        self.assertIn("rd", data["variable_fields"])
+        self.assertIn("imm20", data["variable_fields"])
+
+    @patch("os.path.exists")
+    @patch("shared_utils.logging.error")
+    def test_find_extension_file(self, mock_logging: Mock, mock_exists: Mock):
+        """Test extension file finding"""
+        # Test successful case - file exists in main directory
+        mock_exists.side_effect = [True, False]
+        result = find_extension_file("rv32i", "/path/to/opcodes")
+        self.assertEqual(result, "/path/to/opcodes/rv32i")
+
+        # Test successful case - file exists in unratified directory
+        mock_exists.side_effect = [False, True]
+        result = find_extension_file("rv32i", "/path/to/opcodes")
+        self.assertEqual(result, "/path/to/opcodes/unratified/rv32i")
+
+        # Test failure case - file doesn't exist anywhere
+        mock_exists.side_effect = [False, False]
+        with self.assertRaises(SystemExit):
+            find_extension_file("rv32i", "/path/to/opcodes")
+        mock_logging.assert_called_with("Extension rv32i not found.")
+
+    def test_process_standard_instructions(self):
+        """Test processing of standard instructions"""
+        lines = [
+            "add rd rs1 rs2 31..25=0 14..12=0 6..2=0x0C 1..0=3",
+            "sub rd rs1 rs2 31..25=0x20 14..12=0 6..2=0x0C 1..0=3",
+            "$pseudo add_pseudo rd rs1 rs2",  # Should be skipped
+            "$import rv32i::mul",  # Should be skipped
+        ]
 
-    def assertError(self, string):
-        self.assertRaises(SystemExit, process_enc_line, string, 'rv_i')
+        instr_dict: InstrDict = {}
+        file_name = "rv32i"
 
-    def test_lui(self):
-        name, data = process_enc_line('lui     rd imm20 6..2=0x0D 1=1 0=1', 'rv_i')
-        self.assertEqual(name, 'lui')
-        self.assertEqual(data['extension'], ['rv_i'])
-        self.assertEqual(data['match'], '0x37')
-        self.assertEqual(data['mask'], '0x7f')
+        with patch("shared_utils.process_enc_line") as mock_process_enc:
+            # Setup mock return values
+            mock_process_enc.side_effect = [
+                ("add", {"extension": ["rv32i"], "encoding": "encoding1"}),
+                ("sub", {"extension": ["rv32i"], "encoding": "encoding2"}),
+            ]
 
-    def test_overlapping(self):
-        self.assertError('jol rd jimm20 6..2=0x00 3..0=7')
-        self.assertError('jol rd jimm20 6..2=0x00 3=1')
-        self.assertError('jol rd jimm20 6..2=0x00 10=1')
-        self.assertError('jol rd jimm20 6..2=0x00 31..10=1')
+            process_standard_instructions(lines, instr_dict, file_name)
 
-    def test_invalid_order(self):
-        self.assertError('jol 2..6=0x1b')
+            # Verify process_enc_line was called twice (skipping pseudo and import)
+            self.assertEqual(mock_process_enc.call_count, 2)
 
-    def test_illegal_value(self):
-        self.assertError('jol rd jimm20 2..0=10')
-        self.assertError('jol rd jimm20 2..0=0xB')
+            # Verify the instruction dictionary was updated correctly
+            self.assertEqual(len(instr_dict), 2)
+            self.assertIn("add", instr_dict)
+            self.assertIn("sub", instr_dict)
 
-    def test_overlapping_field(self):
-        self.assertError('jol rd rs1 jimm20 6..2=0x1b 1..0=3')
 
-    def test_illegal_field(self):
-        self.assertError('jol rd jimm128 2..0=3')
+if __name__ == "__main__":
+    unittest.main()