aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--c_utils.py85
-rw-r--r--chisel_utils.py94
-rw-r--r--go_utils.py69
-rw-r--r--latex_utils.py448
-rwxr-xr-xparse.py1206
-rw-r--r--rust_utils.py39
-rw-r--r--rv64_zcb2
-rw-r--r--shared_utils.py568
-rw-r--r--sverilog_utils.py37
-rw-r--r--test.py1
10 files changed, 1358 insertions, 1191 deletions
diff --git a/c_utils.py b/c_utils.py
new file mode 100644
index 0000000..cff33dd
--- /dev/null
+++ b/c_utils.py
@@ -0,0 +1,85 @@
+import collections
+import glob
+import logging
+import os
+import pprint
+import re
+import sys
+
+import yaml
+
+# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field
+from shared_utils import *
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_c(instr_dict):
+ mask_match_str = ""
+ declare_insn_str = ""
+ for i in instr_dict:
+ mask_match_str += (
+ f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n'
+ )
+ mask_match_str += (
+ f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n'
+ )
+ declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n'
+
+ csr_names_str = ""
+ declare_csr_str = ""
+ for num, name in csrs + csrs32:
+ csr_names_str += f"#define CSR_{name.upper()} {hex(num)}\n"
+ declare_csr_str += f"DECLARE_CSR({name}, CSR_{name.upper()})\n"
+
+ causes_str = ""
+ declare_cause_str = ""
+ for num, name in causes:
+ causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n"
+ declare_cause_str += (
+ f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n"
+ )
+
+ arg_str = ""
+ for name, rng in arg_lut.items():
+ sanitized_name = name.replace(" ", "_").replace("=", "_eq_")
+ begin = rng[1]
+ end = rng[0]
+ mask = ((1 << (end - begin + 1)) - 1) << begin
+ arg_str += f"#define INSN_FIELD_{sanitized_name.upper()} {hex(mask)}\n"
+
+ with open(f"{os.path.dirname(__file__)}/encoding.h", "r") as file:
+ enc_header = file.read()
+
+ commit = os.popen('git log -1 --format="format:%h"').read()
+
+ # Generate the output as a string
+ output_str = f"""/* SPDX-License-Identifier: BSD-3-Clause */
+
+/* Copyright (c) 2023 RISC-V International */
+
+/*
+ * This file is auto-generated by running 'make' in
+ * https://github.com/riscv/riscv-opcodes ({commit})
+ */
+
+{enc_header}
+/* Automatically generated by parse_opcodes. */
+#ifndef RISCV_ENCODING_H
+#define RISCV_ENCODING_H
+{mask_match_str}
+{csr_names_str}
+{causes_str}
+{arg_str}#endif
+#ifdef DECLARE_INSN
+{declare_insn_str}#endif
+#ifdef DECLARE_CSR
+{declare_csr_str}#endif
+#ifdef DECLARE_CAUSE
+{declare_cause_str}#endif
+"""
+
+ # Write the modified output to the file
+ with open("encoding.out.h", "w") as enc_file:
+ enc_file.write(output_str)
diff --git a/chisel_utils.py b/chisel_utils.py
new file mode 100644
index 0000000..957e4f8
--- /dev/null
+++ b/chisel_utils.py
@@ -0,0 +1,94 @@
+import collections
+import copy
+import glob
+import logging
+import os
+import pprint
+import re
+import sys
+
+import yaml
+
+from constants import *
+
+# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field
+from shared_utils import *
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_chisel(instr_dict, spinal_hdl=False):
+
+ chisel_names = ""
+ cause_names_str = ""
+ csr_names_str = ""
+ for i in instr_dict:
+ if spinal_hdl:
+ chisel_names += f' def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n'
+ # else:
+ # chisel_names += f' def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n'
+ if not spinal_hdl:
+ extensions = instr_dict_2_extensions(instr_dict)
+ for e in extensions:
+ e_instrs = filter(lambda i: instr_dict[i]["extension"][0] == e, instr_dict)
+ if "rv64_" in e:
+ e_format = e.replace("rv64_", "").upper() + "64"
+ elif "rv32_" in e:
+ e_format = e.replace("rv32_", "").upper() + "32"
+ elif "rv_" in e:
+ e_format = e.replace("rv_", "").upper()
+ else:
+ e_format = e.upper
+ chisel_names += f' val {e_format+"Type"} = Map(\n'
+ for instr in e_instrs:
+ tmp_instr_name = '"' + instr.upper().replace(".", "_") + '"'
+ chisel_names += f' {tmp_instr_name:<18s} -> BitPat("b{instr_dict[instr]["encoding"].replace("-","?")}"),\n'
+ chisel_names += f" )\n"
+
+ for num, name in causes:
+ cause_names_str += f' val {name.lower().replace(" ","_")} = {hex(num)}\n'
+ cause_names_str += """ val all = {
+ val res = collection.mutable.ArrayBuffer[Int]()
+"""
+ for num, name in causes:
+ cause_names_str += f' res += {name.lower().replace(" ","_")}\n'
+ cause_names_str += """ res.toArray
+ }"""
+
+ for num, name in csrs + csrs32:
+ csr_names_str += f" val {name} = {hex(num)}\n"
+ csr_names_str += """ val all = {
+ val res = collection.mutable.ArrayBuffer[Int]()
+"""
+ for num, name in csrs:
+ csr_names_str += f""" res += {name}\n"""
+ csr_names_str += """ res.toArray
+ }
+ val all32 = {
+ val res = collection.mutable.ArrayBuffer(all:_*)
+"""
+ for num, name in csrs32:
+ csr_names_str += f""" res += {name}\n"""
+ csr_names_str += """ res.toArray
+ }"""
+
+ if spinal_hdl:
+ chisel_file = open("inst.spinalhdl", "w")
+ else:
+ chisel_file = open("inst.chisel", "w")
+ chisel_file.write(
+ f"""
+/* Automatically generated by parse_opcodes */
+object Instructions {{
+{chisel_names}
+}}
+object Causes {{
+{cause_names_str}
+}}
+object CSRs {{
+{csr_names_str}
+}}
+"""
+ )
+ chisel_file.close()
diff --git a/go_utils.py b/go_utils.py
new file mode 100644
index 0000000..1f4c94b
--- /dev/null
+++ b/go_utils.py
@@ -0,0 +1,69 @@
+import collections
+import glob
+import logging
+import os
+import pprint
+import re
+import sys
+
+import yaml
+
+# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field
+from shared_utils import *
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_go(instr_dict):
+
+ args = " ".join(sys.argv)
+ prelude = f"""// Code generated by {args}; DO NOT EDIT."""
+
+ prelude += """
+package riscv
+
+import "cmd/internal/obj"
+
+type inst struct {
+ opcode uint32
+ funct3 uint32
+ rs1 uint32
+ rs2 uint32
+ csr int64
+ funct7 uint32
+}
+
+func encode(a obj.As) *inst {
+ switch a {
+"""
+
+ endoffile = """ }
+ return nil
+}
+"""
+
+ instr_str = ""
+ for i in instr_dict:
+ enc_match = int(instr_dict[i]["match"], 0)
+ opcode = (enc_match >> 0) & ((1 << 7) - 1)
+ funct3 = (enc_match >> 12) & ((1 << 3) - 1)
+ rs1 = (enc_match >> 15) & ((1 << 5) - 1)
+ rs2 = (enc_match >> 20) & ((1 << 5) - 1)
+ csr = (enc_match >> 20) & ((1 << 12) - 1)
+ funct7 = (enc_match >> 25) & ((1 << 7) - 1)
+ instr_str += f""" case A{i.upper().replace("_","")}:
+ return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }}
+"""
+
+ with open("inst.go", "w") as file:
+ file.write(prelude)
+ file.write(instr_str)
+ file.write(endoffile)
+
+ try:
+ import subprocess
+
+ subprocess.run(["go", "fmt", "inst.go"])
+ except:
+ pass
diff --git a/latex_utils.py b/latex_utils.py
new file mode 100644
index 0000000..ab5f6f9
--- /dev/null
+++ b/latex_utils.py
@@ -0,0 +1,448 @@
+import collections
+import copy
+import glob
+import logging
+import os
+import pprint
+import re
+import sys
+
+import yaml
+
+from constants import *
+from shared_utils import *
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_priv_latex_table():
+ latex_file = open("priv-instr-table.tex", "w")
+ type_list = ["R-type", "I-type"]
+ system_instr = ["_h", "_s", "_system", "_svinval", "64_h", "_svinval_h"]
+ dataset_list = [(system_instr, "Trap-Return Instructions", ["sret", "mret"], False)]
+ dataset_list.append(
+ (system_instr, "Interrupt-Management Instructions", ["wfi"], False)
+ )
+ dataset_list.append(
+ (
+ system_instr,
+ "Supervisor Memory-Management Instructions",
+ ["sfence_vma"],
+ False,
+ )
+ )
+ dataset_list.append(
+ (
+ system_instr,
+ "Hypervisor Memory-Management Instructions",
+ ["hfence_vvma", "hfence_gvma"],
+ False,
+ )
+ )
+ dataset_list.append(
+ (
+ system_instr,
+ "Hypervisor Virtual-Machine Load and Store Instructions",
+ [
+ "hlv_b",
+ "hlv_bu",
+ "hlv_h",
+ "hlv_hu",
+ "hlv_w",
+ "hlvx_hu",
+ "hlvx_wu",
+ "hsv_b",
+ "hsv_h",
+ "hsv_w",
+ ],
+ False,
+ )
+ )
+ dataset_list.append(
+ (
+ system_instr,
+ "Hypervisor Virtual-Machine Load and Store Instructions, RV64 only",
+ ["hlv_wu", "hlv_d", "hsv_d"],
+ False,
+ )
+ )
+ dataset_list.append(
+ (
+ system_instr,
+ "Svinval Memory-Management Instructions",
+ [
+ "sinval_vma",
+ "sfence_w_inval",
+ "sfence_inval_ir",
+ "hinval_vvma",
+ "hinval_gvma",
+ ],
+ False,
+ )
+ )
+ caption = "\\caption{RISC-V Privileged Instructions}"
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ latex_file.close()
+
+
+def make_latex_table():
+ """
+ This function is mean to create the instr-table.tex that is meant to be used
+ by the riscv-isa-manual. This function basically creates a single latext
+ file of multiple tables with each table limited to a single page. Only the
+ last table is assigned a latex-caption.
+
+ For each table we assign a type-list which capture the different instruction
+ types (R, I, B, etc) that will be required for the table. Then we select the
+ list of extensions ('_i, '32_i', etc) whose instructions are required to
+ populate the table. For each extension or collection of extension we can
+ assign Title, such that in the end they appear as subheadings within
+ the table (note these are inlined headings and not captions of the table).
+
+ All of the above information is collected/created and sent to
+ make_ext_latex_table function to dump out the latex contents into a file.
+
+ The last table only has to be given a caption - as per the policy of the
+ riscv-isa-manual.
+ """
+ # open the file and use it as a pointer for all further dumps
+ latex_file = open("instr-table.tex", "w")
+
+ # create the rv32i table first. Here we set the caption to empty. We use the
+ # files rv_i and rv32_i to capture instructions relevant for rv32i
+ # configuration. The dataset is a list of 4-element tuples :
+ # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions
+ # is empty then it indicates that all instructions of the all the extensions
+ # in list_of_extensions need to be dumped. If not empty, then only the
+ # instructions listed in list_of_instructions will be dumped into latex.
+ caption = ""
+ type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"]
+ dataset_list = [(["_i", "32_i"], "RV32I Base Instruction Set", [], False)]
+ dataset_list.append((["_i"], "", ["fence_tso", "pause"], True))
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ type_list = ["R-type", "I-type", "S-type"]
+ dataset_list = [
+ (["64_i"], "RV64I Base Instruction Set (in addition to RV32I)", [], False)
+ ]
+ dataset_list.append(
+ (["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False)
+ )
+ dataset_list.append((["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False))
+ dataset_list.append((["_m", "32_m"], "RV32M Standard Extension", [], False))
+ dataset_list.append(
+ (["64_m"], "RV64M Standard Extension (in addition to RV32M)", [], False)
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ type_list = ["R-type"]
+ dataset_list = [(["_a"], "RV32A Standard Extension", [], False)]
+ dataset_list.append(
+ (["64_a"], "RV64A Standard Extension (in addition to RV32A)", [], False)
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ type_list = ["R-type", "R4-type", "I-type", "S-type"]
+ dataset_list = [(["_f"], "RV32F Standard Extension", [], False)]
+ dataset_list.append(
+ (["64_f"], "RV64F Standard Extension (in addition to RV32F)", [], False)
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ type_list = ["R-type", "R4-type", "I-type", "S-type"]
+ dataset_list = [(["_d"], "RV32D Standard Extension", [], False)]
+ dataset_list.append(
+ (["64_d"], "RV64D Standard Extension (in addition to RV32D)", [], False)
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ type_list = ["R-type", "R4-type", "I-type", "S-type"]
+ dataset_list = [(["_q"], "RV32Q Standard Extension", [], False)]
+ dataset_list.append(
+ (["64_q"], "RV64Q Standard Extension (in addition to RV32Q)", [], False)
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ caption = "\\caption{Instruction listing for RISC-V}"
+ type_list = ["R-type", "R4-type", "I-type", "S-type"]
+ dataset_list = [
+ (["_zfh", "_d_zfh", "_q_zfh"], "RV32Zfh Standard Extension", [], False)
+ ]
+ dataset_list.append(
+ (["64_zfh"], "RV64Zfh Standard Extension (in addition to RV32Zfh)", [], False)
+ )
+ make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
+
+ ## The following is demo to show that Compressed instructions can also be
+ # dumped in the same manner as above
+
+ # type_list = ['']
+ # dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])]
+ # dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', []))
+ # make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption)
+
+ latex_file.close()
+
+
+def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption):
+ """
+ For a given collection of extensions this function dumps out a complete
+ latex table which includes the encodings of the instructions.
+
+ The ilen input indicates the length of the instruction for which the table
+ is created.
+
+ The caption input is used to create the latex-table caption.
+
+ The type_list input is a list of instruction types (R, I, B, etc) that are
+ treated as header for each table. Each table will have its own requirements
+ and type_list must include all the instruction-types that the table needs.
+ Note, all elements of this list must be present in the latex_inst_type
+ dictionary defined in constants.py
+
+ The latex_file is a file pointer to which the latex-table will dumped into
+
+ The dataset is a list of 3-element tuples containing:
+ (list_of_extensions, title, list_of_instructions)
+ The list_of_extensions must contain all the set of extensions whose
+ instructions must be populated under a given title. If list_of_instructions
+ is not empty, then only those instructions mentioned in list_of_instructions
+ present in the extension will be dumped into the latex-table, other
+ instructions will be ignored.
+
+ Once the above inputs are received then function first creates table entries
+ for the instruction types. To simplify things, we maintain a dictionary
+ called latex_inst_type in constants.py which is created in the same way the
+ instruction dictionary is created. This allows us to re-use the same logic
+ to create the instruction types table as well
+
+ Once the header is created, we then parse through every entry in the
+ dataset. For each list dataset entry we use the create_inst_dict function to
+ create an exhaustive list of instructions associated with the respective
+ collection of the extension of that dataset. Then we apply the instruction
+ filter, if any, indicated by the list_of_instructions of that dataset.
+ Thereon, for each instruction we create a latex table entry.
+
+ Latex table specification for ilen sized instructions:
+ Each table is created with ilen+1 columns - ilen columns for each bit of the
+ instruction and one column to hold the name of the instruction.
+
+ For each argument of an instruction we use the arg_lut from constants.py
+ to identify its position in the encoding, and thus create a multicolumn
+ entry with the name of the argument as the data. For hardcoded bits, we
+ do the same where we capture a string of continuous 1s and 0s, identify
+ the position and assign the same string as the data of the
+ multicolumn entry in the table.
+
+ """
+ column_size = "".join(["p{0.002in}"] * (ilen + 1))
+
+ type_entries = (
+ """
+ \\multicolumn{3}{l}{31} &
+ \\multicolumn{2}{r}{27} &
+ \\multicolumn{1}{c}{26} &
+ \\multicolumn{1}{r}{25} &
+ \\multicolumn{3}{l}{24} &
+ \\multicolumn{2}{r}{20} &
+ \\multicolumn{3}{l}{19} &
+ \\multicolumn{2}{r}{15} &
+ \\multicolumn{2}{l}{14} &
+ \\multicolumn{1}{r}{12} &
+ \\multicolumn{4}{l}{11} &
+ \\multicolumn{1}{r}{7} &
+ \\multicolumn{6}{l}{6} &
+ \\multicolumn{1}{r}{0} \\\\
+ \\cline{2-33}\n&\n\n
+"""
+ if ilen == 32
+ else """
+ \\multicolumn{1}{c}{15} &
+ \\multicolumn{1}{c}{14} &
+ \\multicolumn{1}{c}{13} &
+ \\multicolumn{1}{c}{12} &
+ \\multicolumn{1}{c}{11} &
+ \\multicolumn{1}{c}{10} &
+ \\multicolumn{1}{c}{9} &
+ \\multicolumn{1}{c}{8} &
+ \\multicolumn{1}{c}{7} &
+ \\multicolumn{1}{c}{6} &
+ \\multicolumn{1}{c}{5} &
+ \\multicolumn{1}{c}{4} &
+ \\multicolumn{1}{c}{3} &
+ \\multicolumn{1}{c}{2} &
+ \\multicolumn{1}{c}{1} &
+ \\multicolumn{1}{c}{0} \\\\
+ \\cline{2-17}\n&\n\n
+"""
+ )
+
+ # depending on the type_list input we create a subset dictionary of
+ # latex_inst_type dictionary present in constants.py
+ type_dict = {
+ key: value for key, value in latex_inst_type.items() if key in type_list
+ }
+
+ # iterate ovr each instruction type and create a table entry
+ for t in type_dict:
+ fields = []
+
+ # first capture all "arguments" of the type (funct3, funct7, rd, etc)
+ # and capture their positions using arg_lut.
+ for f in type_dict[t]["variable_fields"]:
+ (msb, lsb) = arg_lut[f]
+ name = f if f not in latex_mapping else latex_mapping[f]
+ fields.append((msb, lsb, name))
+
+ # iterate through the 32 bits, starting from the msb, and assign
+ # argument names to the relevant portions of the instructions. This
+ # information is stored as a 3-element tuple containing the msb, lsb
+ # position of the arugment and the name of the argument.
+ msb = ilen - 1
+ y = ""
+ for r in range(0, ilen):
+ if y != "":
+ fields.append((msb, ilen - 1 - r + 1, y))
+ y = ""
+ msb = ilen - 1 - r - 1
+ if r == 31:
+ if y != "":
+ fields.append((msb, 0, y))
+ y = ""
+
+ # sort the arguments in decreasing order of msb position
+ fields.sort(key=lambda y: y[0], reverse=True)
+
+ # for each argument/string of 1s or 0s, create a multicolumn latex table
+ # entry
+ entry = ""
+ for r in range(len(fields)):
+ (msb, lsb, name) = fields[r]
+ if r == len(fields) - 1:
+ entry += (
+ f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n"
+ )
+ elif r == 0:
+ entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
+ else:
+ entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
+ entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
+ type_entries += entry
+
+ # for each entry in the dataset create a table
+ content = ""
+ for ext_list, title, filter_list, include_pseudo in dataset:
+ instr_dict = {}
+
+ # for all extensions list in ext_list, create a dictionary of
+ # instructions associated with those extensions.
+ for e in ext_list:
+ instr_dict.update(create_inst_dict(["rv" + e], include_pseudo))
+
+ # if filter_list is not empty then use that as the official set of
+ # instructions that need to be dumped into the latex table
+ inst_list = list(instr_dict.keys()) if not filter_list else filter_list
+
+ # for each instruction create an latex table entry just like how we did
+ # above with the instruction-type table.
+ instr_entries = ""
+ for inst in inst_list:
+ if inst not in instr_dict:
+ logging.error(
+ f"in make_ext_latex_table: Instruction: {inst} not found in instr_dict"
+ )
+ raise SystemExit(1)
+ fields = []
+
+ # only if the argument is available in arg_lut we consume it, else
+ # throw error.
+ for f in instr_dict[inst]["variable_fields"]:
+ if f not in arg_lut:
+ logging.error(
+ f"Found variable {f} in instruction {inst} whose mapping is not available"
+ )
+ raise SystemExit(1)
+ (msb, lsb) = arg_lut[f]
+ name = (
+ f.replace("_", ".") if f not in latex_mapping else latex_mapping[f]
+ )
+ fields.append((msb, lsb, name))
+
+ msb = ilen - 1
+ y = ""
+ if ilen == 16:
+ encoding = instr_dict[inst]["encoding"][16:]
+ else:
+ encoding = instr_dict[inst]["encoding"]
+ for r in range(0, ilen):
+ x = encoding[r]
+ if ((msb, ilen - 1 - r + 1)) in latex_fixed_fields:
+ fields.append((msb, ilen - 1 - r + 1, y))
+ msb = ilen - 1 - r
+ y = ""
+ if x == "-":
+ if y != "":
+ fields.append((msb, ilen - 1 - r + 1, y))
+ y = ""
+ msb = ilen - 1 - r - 1
+ else:
+ y += str(x)
+ if r == ilen - 1:
+ if y != "":
+ fields.append((msb, 0, y))
+ y = ""
+
+ fields.sort(key=lambda y: y[0], reverse=True)
+ entry = ""
+ for r in range(len(fields)):
+ (msb, lsb, name) = fields[r]
+ if r == len(fields) - 1:
+ entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n'
+ elif r == 0:
+ entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
+ else:
+ entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
+ entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
+ instr_entries += entry
+
+ # once an entry of the dataset is completed we create the whole table
+ # with the title of that dataset as sub-heading (sort-of)
+ if title != "":
+ content += f"""
+
+\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\
+\\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\
+\\cline{{2-{ilen+1}}}
+
+ &
+{instr_entries}
+"""
+ else:
+ content += f"""
+{instr_entries}
+"""
+
+ header = f"""
+\\newpage
+
+\\begin{{table}}[p]
+\\begin{{small}}
+\\begin{{center}}
+ \\begin{{tabular}} {{{column_size}l}}
+ {" ".join(['&']*ilen)} \\\\
+
+ &
+{type_entries}
+"""
+ endtable = f"""
+
+\\end{{tabular}}
+\\end{{center}}
+\\end{{small}}
+{caption}
+\\end{{table}}
+"""
+ # dump the contents and return
+ latex_file.write(header + content + endtable)
diff --git a/parse.py b/parse.py
index 72af94d..29f6062 100755
--- a/parse.py
+++ b/parse.py
@@ -1,1205 +1,32 @@
#!/usr/bin/env python3
-
import collections
-import copy
-import glob
import logging
-import os
import pprint
-import re
import sys
import yaml
+from c_utils import *
+from chisel_utils import *
from constants import *
+from go_utils import *
+from latex_utils import *
+from rust_utils import *
+from shared_utils import *
+from sverilog_utils import *
-pp = pprint.PrettyPrinter(indent=2)
-logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
-
-
-def process_enc_line(line, ext):
- """
- This function processes each line of the encoding files (rv*). As part of
- the processing, the function ensures that the encoding is legal through the
- following checks::
-
- - there is no over specification (same bits assigned different values)
- - there is no under specification (some bits not assigned values)
- - bit ranges are in the format hi..lo=val where hi > lo
- - value assigned is representable in the bit range
- - also checks that the mapping of arguments of an instruction exists in
- arg_lut.
-
- If the above checks pass, then the function returns a tuple of the name and
- a dictionary containing basic information of the instruction which includes:
- - variables: list of arguments used by the instruction whose mapping
- exists in the arg_lut dictionary
- - encoding: this contains the 32-bit encoding of the instruction where
- '-' is used to represent position of arguments and 1/0 is used to
- reprsent the static encoding of the bits
- - extension: this field contains the rv* filename from which this
- instruction was included
- - match: hex value representing the bits that need to match to detect
- this instruction
- - mask: hex value representin the bits that need to be masked to extract
- the value required for matching.
- """
- single_dict = {}
-
- # fill all bits with don't care. we use '-' to represent don't care
- # TODO: hardcoded for 32-bits.
- encoding = ["-"] * 32
-
- # get the name of instruction by splitting based on the first space
- [name, remaining] = line.split(" ", 1)
-
- # replace dots with underscores as dot doesn't work with C/Sverilog, etc
- name = name.replace(".", "_")
-
- # remove leading whitespaces
- remaining = remaining.lstrip()
-
- # check each field for it's length and overlapping bits
- # ex: 1..0=5 will result in an error --> x<y
- # ex: 5..0=0 2..1=2 --> overlapping bits
- for s2, s1, entry in fixed_ranges.findall(remaining):
- msb = int(s2)
- lsb = int(s1)
-
- # check msb < lsb
- if msb < lsb:
- logging.error(
- f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in it\'s encoding'
- )
- raise SystemExit(1)
-
- # illegal value assigned as per bit width
- entry_value = int(entry, 0)
- if entry_value >= (1 << (msb - lsb + 1)):
- logging.error(
- f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}'
- )
- raise SystemExit(1)
-
- for ind in range(lsb, msb + 1):
- # overlapping bits
- if encoding[31 - ind] != "-":
- logging.error(
- f'{line.split(" ")[0]:<10} has {ind} bit overlapping in it\'s opcodes'
- )
- raise SystemExit(1)
- bit = str((entry_value >> (ind - lsb)) & 1)
- encoding[31 - ind] = bit
-
- # extract bit pattern assignments of the form hi..lo=val
- remaining = fixed_ranges.sub(" ", remaining)
-
- # do the same as above but for <lsb>=<val> pattern. single_fixed is a regex
- # expression present in constants.py
- for lsb, value, drop in single_fixed.findall(remaining):
- lsb = int(lsb, 0)
- value = int(value, 0)
- if encoding[31 - lsb] != "-":
- logging.error(
- f'{line.split(" ")[0]:<10} has {lsb} bit overlapping in it\'s opcodes'
- )
- raise SystemExit(1)
- encoding[31 - lsb] = str(value)
-
- # convert the list of encodings into a single string for match and mask
- match = "".join(encoding).replace("-", "0")
- mask = "".join(encoding).replace("0", "1").replace("-", "0")
-
- # check if all args of the instruction are present in arg_lut present in
- # constants.py
- args = single_fixed.sub(" ", remaining).split()
- encoding_args = encoding.copy()
- for a in args:
- if a not in arg_lut:
- parts = a.split("=")
- if len(parts) == 2:
- existing_arg, new_arg = parts
- if existing_arg in arg_lut:
- arg_lut[a] = arg_lut[existing_arg]
-
- else:
- logging.error(
- f" Found field {existing_arg} in variable {a} in instruction {name} whose mapping in arg_lut does not exist"
- )
- raise SystemExit(1)
- else:
- logging.error(
- f" Found variable {a} in instruction {name} whose mapping in arg_lut does not exist"
- )
- raise SystemExit(1)
- (msb, lsb) = arg_lut[a]
- for ind in range(lsb, msb + 1):
- # overlapping bits
- if encoding_args[31 - ind] != "-":
- logging.error(
- f" Found variable {a} in instruction {name} overlapping {encoding_args[31 - ind]} variable in bit {ind}"
- )
- raise SystemExit(1)
- encoding_args[31 - ind] = a
-
- # update the fields of the instruction as a dict and return back along with
- # the name of the instruction
- single_dict["encoding"] = "".join(encoding)
- single_dict["variable_fields"] = args
- single_dict["extension"] = [os.path.basename(ext)]
- single_dict["match"] = hex(int(match, 2))
- single_dict["mask"] = hex(int(mask, 2))
-
- return (name, single_dict)
-
-
-def same_base_isa(ext_name, ext_name_list):
- type1 = ext_name.split("_")[0]
- for ext_name1 in ext_name_list:
- type2 = ext_name1.split("_")[0]
- # "rv" mean insn for rv32 and rv64
- if (
- type1 == type2
- or (type2 == "rv" and (type1 == "rv32" or type1 == "rv64"))
- or (type1 == "rv" and (type2 == "rv32" or type2 == "rv64"))
- ):
- return True
- return False
-
-
-def overlaps(x, y):
- x = x.rjust(len(y), "-")
- y = y.rjust(len(x), "-")
-
- for i in range(0, len(x)):
- if not (x[i] == "-" or y[i] == "-" or x[i] == y[i]):
- return False
-
- return True
-
-
-def overlap_allowed(a, x, y):
- return x in a and y in a[x] or y in a and x in a[y]
-
-
-def extension_overlap_allowed(x, y):
- return overlap_allowed(overlapping_extensions, x, y)
-
-
-def instruction_overlap_allowed(x, y):
- return overlap_allowed(overlapping_instructions, x, y)
-
-
-def add_segmented_vls_insn(instr_dict):
- updated_dict = {}
- for k, v in instr_dict.items():
- if "nf" in v["variable_fields"]:
- for new_key, new_value in expand_nf_field(k, v):
- updated_dict[new_key] = new_value
- else:
- updated_dict[k] = v
- return updated_dict
-
-
-def expand_nf_field(name, single_dict):
- if "nf" not in single_dict["variable_fields"]:
- logging.error(f"Cannot expand nf field for instruction {name}")
- raise SystemExit(1)
-
- # nf no longer a variable field
- single_dict["variable_fields"].remove("nf")
- # include nf in mask
- single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29)
-
- name_expand_index = name.find("e")
- expanded_instructions = []
- for nf in range(0, 8):
- new_single_dict = copy.deepcopy(single_dict)
- new_single_dict["match"] = hex(int(single_dict["match"], 16) | nf << 29)
- new_single_dict["encoding"] = format(nf, "03b") + single_dict["encoding"][3:]
- new_name = (
- name
- if nf == 0
- else name[:name_expand_index]
- + "seg"
- + str(nf + 1)
- + name[name_expand_index:]
- )
- expanded_instructions.append((new_name, new_single_dict))
- return expanded_instructions
-
-
-def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]):
- """
- This function return a dictionary containing all instructions associated
- with an extension defined by the file_filter input. The file_filter input
- needs to be rv* file name with out the 'rv' prefix i.e. '_i', '32_i', etc.
-
- Each node of the dictionary will correspond to an instruction which again is
- a dictionary. The dictionary contents of each instruction includes:
- - variables: list of arguments used by the instruction whose mapping
- exists in the arg_lut dictionary
- - encoding: this contains the 32-bit encoding of the instruction where
- '-' is used to represent position of arguments and 1/0 is used to
- reprsent the static encoding of the bits
- - extension: this field contains the rv* filename from which this
- instruction was included
- - match: hex value representing the bits that need to match to detect
- this instruction
- - mask: hex value representin the bits that need to be masked to extract
- the value required for matching.
-
- In order to build this dictionary, the function does 2 passes over the same
- rv<file_filter> file. The first pass is to extract all standard
- instructions. In this pass, all pseudo ops and imported instructions are
- skipped. For each selected line of the file, we call process_enc_line
- function to create the above mentioned dictionary contents of the
- instruction. Checks are performed in this function to ensure that the same
- instruction is not added twice to the overall dictionary.
-
- In the second pass, this function parses only pseudo_ops. For each pseudo_op
- this function checks if the dependent extension and instruction, both, exist
- before parsing it. The pseudo op is only added to the overall dictionary if
- the dependent instruction is not present in the dictionary, else it is
- skipped.
-
-
- """
- opcodes_dir = os.path.dirname(os.path.realpath(__file__))
- instr_dict = {}
-
- # file_names contains all files to be parsed in the riscv-opcodes directory
- file_names = []
- for fil in file_filter:
- file_names += glob.glob(f"{opcodes_dir}/{fil}")
- file_names.sort(reverse=True)
- # first pass if for standard/regular instructions
- logging.debug("Collecting standard instructions first")
- for f in file_names:
- logging.debug(f"Parsing File: {f} for standard instructions")
- with open(f) as fp:
- lines = (line.rstrip() for line in fp) # All lines including the blank ones
- lines = list(line for line in lines if line) # Non-blank lines
- lines = list(
- line for line in lines if not line.startswith("#")
- ) # remove comment lines
-
- # go through each line of the file
- for line in lines:
- # if the an instruction needs to be imported then go to the
- # respective file and pick the line that has the instruction.
- # The variable 'line' will now point to the new line from the
- # imported file
-
- # ignore all lines starting with $import and $pseudo
- if "$import" in line or "$pseudo" in line:
- continue
- logging.debug(f" Processing line: {line}")
-
- # call process_enc_line to get the data about the current
- # instruction
- (name, single_dict) = process_enc_line(line, f)
- ext_name = os.path.basename(f)
-
- # if an instruction has already been added to the filtered
- # instruction dictionary throw an error saying the given
- # instruction is already imported and raise SystemExit
- if name in instr_dict:
- var = instr_dict[name]["extension"]
- if same_base_isa(ext_name, var):
- # disable same names on the same base ISA
- err_msg = f"instruction : {name} from "
- err_msg += f"{ext_name} is already "
- err_msg += f"added from {var} in same base ISA"
- logging.error(err_msg)
- raise SystemExit(1)
- elif instr_dict[name]["encoding"] != single_dict["encoding"]:
- # disable same names with different encodings on different base ISAs
- err_msg = f"instruction : {name} from "
- err_msg += f"{ext_name} is already "
- err_msg += f"added from {var} but each have different encodings in different base ISAs"
- logging.error(err_msg)
- raise SystemExit(1)
- instr_dict[name]["extension"].extend(single_dict["extension"])
- else:
- for key in instr_dict:
- item = instr_dict[key]
- if (
- overlaps(item["encoding"], single_dict["encoding"])
- and not extension_overlap_allowed(
- ext_name, item["extension"][0]
- )
- and not instruction_overlap_allowed(name, key)
- and same_base_isa(ext_name, item["extension"])
- ):
- # disable different names with overlapping encodings on the same base ISA
- err_msg = f"instruction : {name} in extension "
- err_msg += f"{ext_name} overlaps instruction {key} "
- err_msg += f'in extension {item["extension"]}'
- logging.error(err_msg)
- raise SystemExit(1)
-
- if name not in instr_dict:
- # update the final dict with the instruction
- instr_dict[name] = single_dict
-
- # second pass if for pseudo instructions
- logging.debug("Collecting pseudo instructions now")
- for f in file_names:
- logging.debug(f"Parsing File: {f} for pseudo_ops")
- with open(f) as fp:
- lines = (line.rstrip() for line in fp) # All lines including the blank ones
- lines = list(line for line in lines if line) # Non-blank lines
- lines = list(
- line for line in lines if not line.startswith("#")
- ) # remove comment lines
-
- # go through each line of the file
- for line in lines:
-
- # ignore all lines not starting with $pseudo
- if "$pseudo" not in line:
- continue
- logging.debug(f" Processing line: {line}")
-
- # use the regex pseudo_regex from constants.py to find the dependent
- # extension, dependent instruction, the pseudo_op in question and
- # its encoding
- (ext, orig_inst, pseudo_inst, line) = pseudo_regex.findall(line)[0]
- ext_file = f"{opcodes_dir}/{ext}"
-
- # check if the file of the dependent extension exist. Throw error if
- # it doesn't
- if not os.path.exists(ext_file):
- ext1_file = f"{opcodes_dir}/unratified/{ext}"
- if not os.path.exists(ext1_file):
- logging.error(
- f"Pseudo op {pseudo_inst} in {f} depends on {ext} which is not available"
- )
- raise SystemExit(1)
- else:
- ext_file = ext1_file
-
- # check if the dependent instruction exist in the dependent
- # extension. Else throw error.
- found = False
- for oline in open(ext_file):
- if not re.findall(f"^\\s*{orig_inst}\\s+", oline):
- continue
- else:
- found = True
- break
- if not found:
- logging.error(
- f"Orig instruction {orig_inst} not found in {ext}. Required by pseudo_op {pseudo_inst} present in {f}"
- )
- raise SystemExit(1)
-
- (name, single_dict) = process_enc_line(pseudo_inst + " " + line, f)
- # add the pseudo_op to the dictionary only if the original
- # instruction is not already in the dictionary.
- if (
- orig_inst.replace(".", "_") not in instr_dict
- or include_pseudo
- or name in include_pseudo_ops
- ):
-
- # update the final dict with the instruction
- if name not in instr_dict:
- instr_dict[name] = single_dict
- logging.debug(f" including pseudo_ops:{name}")
- else:
- if single_dict["match"] != instr_dict[name]["match"]:
- instr_dict[name + "_pseudo"] = single_dict
-
- # if a pseudo instruction has already been added to the filtered
- # instruction dictionary but the extension is not in the current
- # list, add it
- else:
- ext_name = single_dict["extension"]
-
- if (ext_name not in instr_dict[name]["extension"]) & (
- name + "_pseudo" not in instr_dict
- ):
- instr_dict[name]["extension"].extend(ext_name)
- else:
- logging.debug(
- f" Skipping pseudo_op {pseudo_inst} since original instruction {orig_inst} already selected in list"
- )
-
- # third pass if for imported instructions
- logging.debug("Collecting imported instructions")
- for f in file_names:
- logging.debug(f"Parsing File: {f} for imported ops")
- with open(f) as fp:
- lines = (line.rstrip() for line in fp) # All lines including the blank ones
- lines = list(line for line in lines if line) # Non-blank lines
- lines = list(
- line for line in lines if not line.startswith("#")
- ) # remove comment lines
-
- # go through each line of the file
- for line in lines:
- # if the an instruction needs to be imported then go to the
- # respective file and pick the line that has the instruction.
- # The variable 'line' will now point to the new line from the
- # imported file
-
- # ignore all lines starting with $import and $pseudo
- if "$import" not in line:
- continue
- logging.debug(f" Processing line: {line}")
-
- (import_ext, reg_instr) = imported_regex.findall(line)[0]
- import_ext_file = f"{opcodes_dir}/{import_ext}"
-
- # check if the file of the dependent extension exist. Throw error if
- # it doesn't
- if not os.path.exists(import_ext_file):
- ext1_file = f"{opcodes_dir}/unratified/{import_ext}"
- if not os.path.exists(ext1_file):
- logging.error(
- f"Instruction {reg_instr} in {f} cannot be imported from {import_ext}"
- )
- raise SystemExit(1)
- else:
- ext_file = ext1_file
- else:
- ext_file = import_ext_file
-
- # check if the dependent instruction exist in the dependent
- # extension. Else throw error.
- found = False
- for oline in open(ext_file):
- if not re.findall(f"^\\s*{reg_instr}\\s+", oline):
- continue
- else:
- found = True
- break
- if not found:
- logging.error(
- f"imported instruction {reg_instr} not found in {ext_file}. Required by {line} present in {f}"
- )
- logging.error(f"Note: you cannot import pseudo/imported ops.")
- raise SystemExit(1)
-
- # call process_enc_line to get the data about the current
- # instruction
- (name, single_dict) = process_enc_line(oline, f)
-
- # if an instruction has already been added to the filtered
- # instruction dictionary throw an error saying the given
- # instruction is already imported and raise SystemExit
- if name in instr_dict:
- var = instr_dict[name]["extension"]
- if instr_dict[name]["encoding"] != single_dict["encoding"]:
- err_msg = f"imported instruction : {name} in "
- err_msg += f"{os.path.basename(f)} is already "
- err_msg += f"added from {var} but each have different encodings for the same instruction"
- logging.error(err_msg)
- raise SystemExit(1)
- instr_dict[name]["extension"].extend(single_dict["extension"])
- else:
- # update the final dict with the instruction
- instr_dict[name] = single_dict
- return instr_dict
-
-
-def make_priv_latex_table():
- latex_file = open("priv-instr-table.tex", "w")
- type_list = ["R-type", "I-type"]
- system_instr = ["_h", "_s", "_system", "_svinval", "64_h", "_svinval_h"]
- dataset_list = [(system_instr, "Trap-Return Instructions", ["sret", "mret"], False)]
- dataset_list.append(
- (system_instr, "Interrupt-Management Instructions", ["wfi"], False)
- )
- dataset_list.append(
- (
- system_instr,
- "Supervisor Memory-Management Instructions",
- ["sfence_vma"],
- False,
- )
- )
- dataset_list.append(
- (
- system_instr,
- "Hypervisor Memory-Management Instructions",
- ["hfence_vvma", "hfence_gvma"],
- False,
- )
- )
- dataset_list.append(
- (
- system_instr,
- "Hypervisor Virtual-Machine Load and Store Instructions",
- [
- "hlv_b",
- "hlv_bu",
- "hlv_h",
- "hlv_hu",
- "hlv_w",
- "hlvx_hu",
- "hlvx_wu",
- "hsv_b",
- "hsv_h",
- "hsv_w",
- ],
- False,
- )
- )
- dataset_list.append(
- (
- system_instr,
- "Hypervisor Virtual-Machine Load and Store Instructions, RV64 only",
- ["hlv_wu", "hlv_d", "hsv_d"],
- False,
- )
- )
- dataset_list.append(
- (
- system_instr,
- "Svinval Memory-Management Instructions",
- [
- "sinval_vma",
- "sfence_w_inval",
- "sfence_inval_ir",
- "hinval_vvma",
- "hinval_gvma",
- ],
- False,
- )
- )
- caption = "\\caption{RISC-V Privileged Instructions}"
- make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
- latex_file.close()
-
-
-def make_latex_table():
- """
- This function is mean to create the instr-table.tex that is meant to be used
- by the riscv-isa-manual. This function basically creates a single latext
- file of multiple tables with each table limited to a single page. Only the
- last table is assigned a latex-caption.
-
- For each table we assign a type-list which capture the different instruction
- types (R, I, B, etc) that will be required for the table. Then we select the
- list of extensions ('_i, '32_i', etc) whose instructions are required to
- populate the table. For each extension or collection of extension we can
- assign Title, such that in the end they appear as subheadings within
- the table (note these are inlined headings and not captions of the table).
-
- All of the above information is collected/created and sent to
- make_ext_latex_table function to dump out the latex contents into a file.
-
- The last table only has to be given a caption - as per the policy of the
- riscv-isa-manual.
- """
- # open the file and use it as a pointer for all further dumps
- latex_file = open("instr-table.tex", "w")
-
- # create the rv32i table first. Here we set the caption to empty. We use the
- # files rv_i and rv32_i to capture instructions relevant for rv32i
- # configuration. The dataset is a list of 4-element tuples :
- # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions
- # is empty then it indicates that all instructions of the all the extensions
- # in list_of_extensions need to be dumped. If not empty, then only the
- # instructions listed in list_of_instructions will be dumped into latex.
- caption = ""
- type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"]
- dataset_list = [(["_i", "32_i"], "RV32I Base Instruction Set", [], False)]
- dataset_list.append((["_i"], "", ["fence_tso", "pause"], True))
- make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
- type_list = ["R-type", "I-type", "S-type"]
- dataset_list = [
- (["64_i"], "RV64I Base Instruction Set (in addition to RV32I)", [], False)
- ]
- dataset_list.append(
- (["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False)
- )
- dataset_list.append((["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False))
- dataset_list.append((["_m", "32_m"], "RV32M Standard Extension", [], False))
- dataset_list.append(
- (["64_m"], "RV64M Standard Extension (in addition to RV32M)", [], False)
- )
- make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
- type_list = ["R-type"]
- dataset_list = [(["_a"], "RV32A Standard Extension", [], False)]
- dataset_list.append(
- (["64_a"], "RV64A Standard Extension (in addition to RV32A)", [], False)
- )
- make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
- type_list = ["R-type", "R4-type", "I-type", "S-type"]
- dataset_list = [(["_f"], "RV32F Standard Extension", [], False)]
- dataset_list.append(
- (["64_f"], "RV64F Standard Extension (in addition to RV32F)", [], False)
- )
- make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
- type_list = ["R-type", "R4-type", "I-type", "S-type"]
- dataset_list = [(["_d"], "RV32D Standard Extension", [], False)]
- dataset_list.append(
- (["64_d"], "RV64D Standard Extension (in addition to RV32D)", [], False)
- )
- make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
- type_list = ["R-type", "R4-type", "I-type", "S-type"]
- dataset_list = [(["_q"], "RV32Q Standard Extension", [], False)]
- dataset_list.append(
- (["64_q"], "RV64Q Standard Extension (in addition to RV32Q)", [], False)
- )
- make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
- caption = "\\caption{Instruction listing for RISC-V}"
- type_list = ["R-type", "R4-type", "I-type", "S-type"]
- dataset_list = [
- (["_zfh", "_d_zfh", "_q_zfh"], "RV32Zfh Standard Extension", [], False)
- ]
- dataset_list.append(
- (["64_zfh"], "RV64Zfh Standard Extension (in addition to RV32Zfh)", [], False)
- )
- make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)
-
- ## The following is demo to show that Compressed instructions can also be
- # dumped in the same manner as above
-
- # type_list = ['']
- # dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])]
- # dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', []))
- # make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption)
-
- latex_file.close()
-
-
-def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption):
- """
- For a given collection of extensions this function dumps out a complete
- latex table which includes the encodings of the instructions.
-
- The ilen input indicates the length of the instruction for which the table
- is created.
-
- The caption input is used to create the latex-table caption.
-
- The type_list input is a list of instruction types (R, I, B, etc) that are
- treated as header for each table. Each table will have its own requirements
- and type_list must include all the instruction-types that the table needs.
- Note, all elements of this list must be present in the latex_inst_type
- dictionary defined in constants.py
-
- The latex_file is a file pointer to which the latex-table will dumped into
-
- The dataset is a list of 3-element tuples containing:
- (list_of_extensions, title, list_of_instructions)
- The list_of_extensions must contain all the set of extensions whose
- instructions must be populated under a given title. If list_of_instructions
- is not empty, then only those instructions mentioned in list_of_instructions
- present in the extension will be dumped into the latex-table, other
- instructions will be ignored.
-
- Once the above inputs are received then function first creates table entries
- for the instruction types. To simplify things, we maintain a dictionary
- called latex_inst_type in constants.py which is created in the same way the
- instruction dictionary is created. This allows us to re-use the same logic
- to create the instruction types table as well
-
- Once the header is created, we then parse through every entry in the
- dataset. For each list dataset entry we use the create_inst_dict function to
- create an exhaustive list of instructions associated with the respective
- collection of the extension of that dataset. Then we apply the instruction
- filter, if any, indicated by the list_of_instructions of that dataset.
- Thereon, for each instruction we create a latex table entry.
-
- Latex table specification for ilen sized instructions:
- Each table is created with ilen+1 columns - ilen columns for each bit of the
- instruction and one column to hold the name of the instruction.
-
- For each argument of an instruction we use the arg_lut from constants.py
- to identify its position in the encoding, and thus create a multicolumn
- entry with the name of the argument as the data. For hardcoded bits, we
- do the same where we capture a string of continuous 1s and 0s, identify
- the position and assign the same string as the data of the
- multicolumn entry in the table.
-
- """
- column_size = "".join(["p{0.002in}"] * (ilen + 1))
-
- type_entries = (
- """
- \\multicolumn{3}{l}{31} &
- \\multicolumn{2}{r}{27} &
- \\multicolumn{1}{c}{26} &
- \\multicolumn{1}{r}{25} &
- \\multicolumn{3}{l}{24} &
- \\multicolumn{2}{r}{20} &
- \\multicolumn{3}{l}{19} &
- \\multicolumn{2}{r}{15} &
- \\multicolumn{2}{l}{14} &
- \\multicolumn{1}{r}{12} &
- \\multicolumn{4}{l}{11} &
- \\multicolumn{1}{r}{7} &
- \\multicolumn{6}{l}{6} &
- \\multicolumn{1}{r}{0} \\\\
- \\cline{2-33}\n&\n\n
-"""
- if ilen == 32
- else """
- \\multicolumn{1}{c}{15} &
- \\multicolumn{1}{c}{14} &
- \\multicolumn{1}{c}{13} &
- \\multicolumn{1}{c}{12} &
- \\multicolumn{1}{c}{11} &
- \\multicolumn{1}{c}{10} &
- \\multicolumn{1}{c}{9} &
- \\multicolumn{1}{c}{8} &
- \\multicolumn{1}{c}{7} &
- \\multicolumn{1}{c}{6} &
- \\multicolumn{1}{c}{5} &
- \\multicolumn{1}{c}{4} &
- \\multicolumn{1}{c}{3} &
- \\multicolumn{1}{c}{2} &
- \\multicolumn{1}{c}{1} &
- \\multicolumn{1}{c}{0} \\\\
- \\cline{2-17}\n&\n\n
-"""
- )
-
- # depending on the type_list input we create a subset dictionary of
- # latex_inst_type dictionary present in constants.py
- type_dict = {
- key: value for key, value in latex_inst_type.items() if key in type_list
- }
-
- # iterate ovr each instruction type and create a table entry
- for t in type_dict:
- fields = []
-
- # first capture all "arguments" of the type (funct3, funct7, rd, etc)
- # and capture their positions using arg_lut.
- for f in type_dict[t]["variable_fields"]:
- (msb, lsb) = arg_lut[f]
- name = f if f not in latex_mapping else latex_mapping[f]
- fields.append((msb, lsb, name))
-
- # iterate through the 32 bits, starting from the msb, and assign
- # argument names to the relevant portions of the instructions. This
- # information is stored as a 3-element tuple containing the msb, lsb
- # position of the arugment and the name of the argument.
- msb = ilen - 1
- y = ""
- for r in range(0, ilen):
- if y != "":
- fields.append((msb, ilen - 1 - r + 1, y))
- y = ""
- msb = ilen - 1 - r - 1
- if r == 31:
- if y != "":
- fields.append((msb, 0, y))
- y = ""
-
- # sort the arguments in decreasing order of msb position
- fields.sort(key=lambda y: y[0], reverse=True)
-
- # for each argument/string of 1s or 0s, create a multicolumn latex table
- # entry
- entry = ""
- for r in range(len(fields)):
- (msb, lsb, name) = fields[r]
- if r == len(fields) - 1:
- entry += (
- f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n"
- )
- elif r == 0:
- entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
- else:
- entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
- entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
- type_entries += entry
-
- # for each entry in the dataset create a table
- content = ""
- for ext_list, title, filter_list, include_pseudo in dataset:
- instr_dict = {}
-
- # for all extensions list in ext_list, create a dictionary of
- # instructions associated with those extensions.
- for e in ext_list:
- instr_dict.update(create_inst_dict(["rv" + e], include_pseudo))
-
- # if filter_list is not empty then use that as the official set of
- # instructions that need to be dumped into the latex table
- inst_list = list(instr_dict.keys()) if not filter_list else filter_list
-
- # for each instruction create an latex table entry just like how we did
- # above with the instruction-type table.
- instr_entries = ""
- for inst in inst_list:
- if inst not in instr_dict:
- logging.error(
- f"in make_ext_latex_table: Instruction: {inst} not found in instr_dict"
- )
- raise SystemExit(1)
- fields = []
-
- # only if the argument is available in arg_lut we consume it, else
- # throw error.
- for f in instr_dict[inst]["variable_fields"]:
- if f not in arg_lut:
- logging.error(
- f"Found variable {f} in instruction {inst} whose mapping is not available"
- )
- raise SystemExit(1)
- (msb, lsb) = arg_lut[f]
- name = (
- f.replace("_", ".") if f not in latex_mapping else latex_mapping[f]
- )
- fields.append((msb, lsb, name))
-
- msb = ilen - 1
- y = ""
- if ilen == 16:
- encoding = instr_dict[inst]["encoding"][16:]
- else:
- encoding = instr_dict[inst]["encoding"]
- for r in range(0, ilen):
- x = encoding[r]
- if ((msb, ilen - 1 - r + 1)) in latex_fixed_fields:
- fields.append((msb, ilen - 1 - r + 1, y))
- msb = ilen - 1 - r
- y = ""
- if x == "-":
- if y != "":
- fields.append((msb, ilen - 1 - r + 1, y))
- y = ""
- msb = ilen - 1 - r - 1
- else:
- y += str(x)
- if r == ilen - 1:
- if y != "":
- fields.append((msb, 0, y))
- y = ""
-
- fields.sort(key=lambda y: y[0], reverse=True)
- entry = ""
- for r in range(len(fields)):
- (msb, lsb, name) = fields[r]
- if r == len(fields) - 1:
- entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n'
- elif r == 0:
- entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n"
- else:
- entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n"
- entry += f"\\cline{{2-{ilen+1}}}\n&\n\n"
- instr_entries += entry
-
- # once an entry of the dataset is completed we create the whole table
- # with the title of that dataset as sub-heading (sort-of)
- if title != "":
- content += f"""
-
-\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\
-\\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\
-\\cline{{2-{ilen+1}}}
-
- &
-{instr_entries}
-"""
- else:
- content += f"""
-{instr_entries}
-"""
-
- header = f"""
-\\newpage
-
-\\begin{{table}}[p]
-\\begin{{small}}
-\\begin{{center}}
- \\begin{{tabular}} {{{column_size}l}}
- {" ".join(['&']*ilen)} \\\\
-
- &
-{type_entries}
-"""
- endtable = f"""
-
-\\end{{tabular}}
-\\end{{center}}
-\\end{{small}}
-{caption}
-\\end{{table}}
-"""
- # dump the contents and return
- latex_file.write(header + content + endtable)
-
-
-def instr_dict_2_extensions(instr_dict):
- extensions = []
- for item in instr_dict.values():
- if item["extension"][0] not in extensions:
- extensions.append(item["extension"][0])
- return extensions
-
-
-def make_chisel(instr_dict, spinal_hdl=False):
-
- chisel_names = ""
- cause_names_str = ""
- csr_names_str = ""
- for i in instr_dict:
- if spinal_hdl:
- chisel_names += f' def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n'
- # else:
- # chisel_names += f' def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n'
- if not spinal_hdl:
- extensions = instr_dict_2_extensions(instr_dict)
- for e in extensions:
- e_instrs = filter(lambda i: instr_dict[i]["extension"][0] == e, instr_dict)
- if "rv64_" in e:
- e_format = e.replace("rv64_", "").upper() + "64"
- elif "rv32_" in e:
- e_format = e.replace("rv32_", "").upper() + "32"
- elif "rv_" in e:
- e_format = e.replace("rv_", "").upper()
- else:
- e_format = e.upper
- chisel_names += f' val {e_format+"Type"} = Map(\n'
- for instr in e_instrs:
- tmp_instr_name = '"' + instr.upper().replace(".", "_") + '"'
- chisel_names += f' {tmp_instr_name:<18s} -> BitPat("b{instr_dict[instr]["encoding"].replace("-","?")}"),\n'
- chisel_names += f" )\n"
-
- for num, name in causes:
- cause_names_str += f' val {name.lower().replace(" ","_")} = {hex(num)}\n'
- cause_names_str += """ val all = {
- val res = collection.mutable.ArrayBuffer[Int]()
-"""
- for num, name in causes:
- cause_names_str += f' res += {name.lower().replace(" ","_")}\n'
- cause_names_str += """ res.toArray
- }"""
-
- for num, name in csrs + csrs32:
- csr_names_str += f" val {name} = {hex(num)}\n"
- csr_names_str += """ val all = {
- val res = collection.mutable.ArrayBuffer[Int]()
-"""
- for num, name in csrs:
- csr_names_str += f""" res += {name}\n"""
- csr_names_str += """ res.toArray
- }
- val all32 = {
- val res = collection.mutable.ArrayBuffer(all:_*)
-"""
- for num, name in csrs32:
- csr_names_str += f""" res += {name}\n"""
- csr_names_str += """ res.toArray
- }"""
-
- if spinal_hdl:
- chisel_file = open("inst.spinalhdl", "w")
- else:
- chisel_file = open("inst.chisel", "w")
- chisel_file.write(
- f"""
-/* Automatically generated by parse_opcodes */
-object Instructions {{
-{chisel_names}
-}}
-object Causes {{
-{cause_names_str}
-}}
-object CSRs {{
-{csr_names_str}
-}}
-"""
- )
- chisel_file.close()
-
-
-def make_rust(instr_dict):
- mask_match_str = ""
- for i in instr_dict:
- mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n'
- mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n'
- for num, name in csrs + csrs32:
- mask_match_str += f"const CSR_{name.upper()}: u16 = {hex(num)};\n"
- for num, name in causes:
- mask_match_str += (
- f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n'
- )
- rust_file = open("inst.rs", "w")
- rust_file.write(
- f"""
-/* Automatically generated by parse_opcodes */
-{mask_match_str}
-"""
- )
- rust_file.close()
-
-
-def make_sverilog(instr_dict):
- names_str = ""
- for i in instr_dict:
- names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n"
- names_str += " /* CSR Addresses */\n"
- for num, name in csrs + csrs32:
- names_str += (
- f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n"
- )
-
- sverilog_file = open("inst.sverilog", "w")
- sverilog_file.write(
- f"""
-/* Automatically generated by parse_opcodes */
-package riscv_instr;
-{names_str}
-endpackage
-"""
- )
- sverilog_file.close()
-
-
-def make_c(instr_dict):
- mask_match_str = ""
- declare_insn_str = ""
- for i in instr_dict:
- mask_match_str += (
- f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n'
- )
- mask_match_str += (
- f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n'
- )
- declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n'
-
- csr_names_str = ""
- declare_csr_str = ""
- for num, name in csrs + csrs32:
- csr_names_str += f"#define CSR_{name.upper()} {hex(num)}\n"
- declare_csr_str += f"DECLARE_CSR({name}, CSR_{name.upper()})\n"
-
- causes_str = ""
- declare_cause_str = ""
- for num, name in causes:
- causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n"
- declare_cause_str += (
- f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n"
- )
-
- arg_str = ""
- for name, rng in arg_lut.items():
- sanitized_name = name.replace(" ", "_").replace("=", "_eq_")
- begin = rng[1]
- end = rng[0]
- mask = ((1 << (end - begin + 1)) - 1) << begin
- arg_str += f"#define INSN_FIELD_{sanitized_name.upper()} {hex(mask)}\n"
-
- with open(f"{os.path.dirname(__file__)}/encoding.h", "r") as file:
- enc_header = file.read()
-
- commit = os.popen('git log -1 --format="format:%h"').read()
-
- # Generate the output as a string
- output_str = f"""/* SPDX-License-Identifier: BSD-3-Clause */
-
-/* Copyright (c) 2023 RISC-V International */
-
-/*
- * This file is auto-generated by running 'make' in
- * https://github.com/riscv/riscv-opcodes ({commit})
- */
-
-{enc_header}
-/* Automatically generated by parse_opcodes. */
-#ifndef RISCV_ENCODING_H
-#define RISCV_ENCODING_H
-{mask_match_str}
-{csr_names_str}
-{causes_str}
-{arg_str}#endif
-#ifdef DECLARE_INSN
-{declare_insn_str}#endif
-#ifdef DECLARE_CSR
-{declare_csr_str}#endif
-#ifdef DECLARE_CAUSE
-{declare_cause_str}#endif
-"""
-
- # Write the modified output to the file
- with open("encoding.out.h", "w") as enc_file:
- enc_file.write(output_str)
-
-
-def make_go(instr_dict):
-
- args = " ".join(sys.argv)
- prelude = f"""// Code generated by {args}; DO NOT EDIT."""
-
- prelude += """
-package riscv
-
-import "cmd/internal/obj"
-
-type inst struct {
- opcode uint32
- funct3 uint32
- rs1 uint32
- rs2 uint32
- csr int64
- funct7 uint32
-}
-
-func encode(a obj.As) *inst {
- switch a {
-"""
-
- endoffile = """ }
- return nil
-}
-"""
-
- instr_str = ""
- for i in instr_dict:
- enc_match = int(instr_dict[i]["match"], 0)
- opcode = (enc_match >> 0) & ((1 << 7) - 1)
- funct3 = (enc_match >> 12) & ((1 << 3) - 1)
- rs1 = (enc_match >> 15) & ((1 << 5) - 1)
- rs2 = (enc_match >> 20) & ((1 << 5) - 1)
- csr = (enc_match >> 20) & ((1 << 12) - 1)
- funct7 = (enc_match >> 25) & ((1 << 7) - 1)
- instr_str += f""" case A{i.upper().replace("_","")}:
- return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }}
-"""
-
- with open("inst.go", "w") as file:
- file.write(prelude)
- file.write(instr_str)
- file.write(endoffile)
-
- try:
- import subprocess
-
- subprocess.run(["go", "fmt", "inst.go"])
- except:
- pass
-
-
-def signed(value, width):
- if 0 <= value < (1 << (width - 1)):
- return value
- else:
- return value - (1 << width)
+LOG_FORMAT = "%(levelname)s:: %(message)s"
+LOG_LEVEL = logging.INFO
+pretty_printer = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
if __name__ == "__main__":
print(f"Running with args : {sys.argv}")
extensions = sys.argv[1:]
- for i in [
+
+ targets = {
"-c",
"-chisel",
"-go",
@@ -1208,13 +35,12 @@ if __name__ == "__main__":
"-rust",
"-spinalhdl",
"-sverilog",
- ]:
- if i in extensions:
- extensions.remove(i)
+ }
+
+ extensions = [ext for ext in extensions if ext not in targets]
print(f"Extensions selected : {extensions}")
include_pseudo = False
-
if "-pseudo" in sys.argv[1:]:
include_pseudo = True
diff --git a/rust_utils.py b/rust_utils.py
new file mode 100644
index 0000000..19a47b9
--- /dev/null
+++ b/rust_utils.py
@@ -0,0 +1,39 @@
+import collections
+import copy
+import glob
+import logging
+import os
+import pprint
+import re
+import sys
+
+import yaml
+
+from constants import *
+
+# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field
+from shared_utils import *
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_rust(instr_dict):
+ mask_match_str = ""
+ for i in instr_dict:
+ mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n'
+ mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n'
+ for num, name in csrs + csrs32:
+ mask_match_str += f"const CSR_{name.upper()}: u16 = {hex(num)};\n"
+ for num, name in causes:
+ mask_match_str += (
+ f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n'
+ )
+ rust_file = open("inst.rs", "w")
+ rust_file.write(
+ f"""
+/* Automatically generated by parse_opcodes */
+{mask_match_str}
+"""
+ )
+ rust_file.close()
diff --git a/rv64_zcb b/rv64_zcb
index c47d011..8ce4429 100644
--- a/rv64_zcb
+++ b/rv64_zcb
@@ -1,3 +1,3 @@
c.zext.w rd_rs1_p 1..0=1 15..13=4 12..10=7 6..5=3 4..2=4
-$pseudo_op rv64_c::c.addiw c.sext.w rd_rs1_n0 15..13=1 12=0 6..2=0 1..0=1
+$pseudo_op rv64_c::c.addiw c.sext.w rd_rs1_n0 15..13=1 12=0 6..2=0 1..0=1
diff --git a/shared_utils.py b/shared_utils.py
new file mode 100644
index 0000000..5c92515
--- /dev/null
+++ b/shared_utils.py
@@ -0,0 +1,568 @@
+#!/usr/bin/env python3
+import copy
+import glob
+import logging
+import os
+import pprint
+import re
+from itertools import chain
+
+from constants import *
+
+LOG_FORMAT = "%(levelname)s:: %(message)s"
+LOG_LEVEL = logging.INFO
+
+pretty_printer = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT)
+
+
+# Initialize encoding to 32-bit '-' values
+def initialize_encoding(bits=32):
+ """Initialize encoding with '-' to represent don't care bits."""
+ return ["-"] * bits
+
+
+# Validate bit range and value
+def validate_bit_range(msb, lsb, entry_value, line):
+ """Validate the bit range and entry value."""
+ if msb < lsb:
+ logging.error(
+ f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in its encoding'
+ )
+ raise SystemExit(1)
+
+ if entry_value >= (1 << (msb - lsb + 1)):
+ logging.error(
+ f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}'
+ )
+ raise SystemExit(1)
+
+
+# Split the instruction line into name and remaining part
+def parse_instruction_line(line):
+ """Parse the instruction name and the remaining encoding details."""
+ name, remaining = line.split(" ", 1)
+ name = name.replace(".", "_") # Replace dots for compatibility
+ remaining = remaining.lstrip() # Remove leading whitespace
+ return name, remaining
+
+
+# Verify Overlapping Bits
+def check_overlapping_bits(encoding, ind, line):
+ """Check for overlapping bits in the encoding."""
+ if encoding[31 - ind] != "-":
+ logging.error(
+ f'{line.split(" ")[0]:<10} has {ind} bit overlapping in its opcodes'
+ )
+ raise SystemExit(1)
+
+
+# Update encoding for fixed ranges
+def update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line):
+ """
+ Update encoding bits for a given bit range.
+ Checks for overlapping bits and assigns the value accordingly.
+ """
+ for ind in range(lsb, msb + 1):
+ check_overlapping_bits(encoding, ind, line)
+ bit = str((entry_value >> (ind - lsb)) & 1)
+ encoding[31 - ind] = bit
+
+
+# Process fixed bit patterns
+def process_fixed_ranges(remaining, encoding, line):
+ """Process fixed bit ranges in the encoding."""
+ for s2, s1, entry in fixed_ranges.findall(remaining):
+ msb, lsb, entry_value = int(s2), int(s1), int(entry, 0)
+
+ # Validate bit range and entry value
+ validate_bit_range(msb, lsb, entry_value, line)
+ update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line)
+
+ return fixed_ranges.sub(" ", remaining)
+
+
+# Process single bit assignments
+def process_single_fixed(remaining, encoding, line):
+ """Process single fixed assignments in the encoding."""
+ for lsb, value, drop in single_fixed.findall(remaining):
+ lsb = int(lsb, 0)
+ value = int(value, 0)
+
+ check_overlapping_bits(encoding, lsb, line)
+ encoding[31 - lsb] = str(value)
+
+
+# Main function to check argument look-up table
+def check_arg_lut(args, encoding_args, name):
+ """Check if arguments are present in arg_lut."""
+ for arg in args:
+ if arg not in arg_lut:
+ arg = handle_arg_lut_mapping(arg, name)
+ msb, lsb = arg_lut[arg]
+ update_encoding_args(encoding_args, arg, msb, lsb)
+
+
+# Handle missing argument mappings
+def handle_arg_lut_mapping(arg, name):
+ """Handle cases where an argument needs to be mapped to an existing one."""
+ parts = arg.split("=")
+ if len(parts) == 2:
+ existing_arg, new_arg = parts
+ if existing_arg in arg_lut:
+ arg_lut[arg] = arg_lut[existing_arg]
+ else:
+ logging.error(
+ f" Found field {existing_arg} in variable {arg} in instruction {name} "
+ f"whose mapping in arg_lut does not exist"
+ )
+ raise SystemExit(1)
+ else:
+ logging.error(
+ f" Found variable {arg} in instruction {name} "
+ f"whose mapping in arg_lut does not exist"
+ )
+ raise SystemExit(1)
+ return arg
+
+
+# Update encoding args with variables
+def update_encoding_args(encoding_args, arg, msb, lsb):
+ """Update encoding arguments and ensure no overlapping."""
+ for ind in range(lsb, msb + 1):
+ check_overlapping_bits(encoding_args, ind, arg)
+ encoding_args[31 - ind] = arg
+
+
+# Compute match and mask
+def convert_encoding_to_match_mask(encoding):
+ """Convert the encoding list to match and mask strings."""
+ match = "".join(encoding).replace("-", "0")
+ mask = "".join(encoding).replace("0", "1").replace("-", "0")
+ return hex(int(match, 2)), hex(int(mask, 2))
+
+
+# Processing main function for a line in the encoding file
+def process_enc_line(line, ext):
+ """
+ This function processes each line of the encoding files (rv*). As part of
+ the processing, the function ensures that the encoding is legal through the
+ following checks::
+ - there is no over specification (same bits assigned different values)
+ - there is no under specification (some bits not assigned values)
+ - bit ranges are in the format hi..lo=val where hi > lo
+ - value assigned is representable in the bit range
+ - also checks that the mapping of arguments of an instruction exists in
+ arg_lut.
+ If the above checks pass, then the function returns a tuple of the name and
+ a dictionary containing basic information of the instruction which includes:
+ - variables: list of arguments used by the instruction whose mapping
+ exists in the arg_lut dictionary
+ - encoding: this contains the 32-bit encoding of the instruction where
+ '-' is used to represent position of arguments and 1/0 is used to
+ reprsent the static encoding of the bits
+ - extension: this field contains the rv* filename from which this
+ instruction was included
+ - match: hex value representing the bits that need to match to detect
+ this instruction
+ - mask: hex value representin the bits that need to be masked to extract
+ the value required for matching.
+ """
+ encoding = initialize_encoding()
+
+ # Parse the instruction line
+ name, remaining = parse_instruction_line(line)
+
+ # Process fixed ranges
+ remaining = process_fixed_ranges(remaining, encoding, line)
+
+ # Process single fixed assignments
+ process_single_fixed(remaining, encoding, line)
+
+ # Convert the list of encodings into a match and mask
+ match, mask = convert_encoding_to_match_mask(encoding)
+
+ # Check arguments in arg_lut
+ args = single_fixed.sub(" ", remaining).split()
+ encoding_args = encoding.copy()
+
+ check_arg_lut(args, encoding_args, name)
+
+ # Return single_dict
+ return name, {
+ "encoding": "".join(encoding),
+ "variable_fields": args,
+ "extension": [os.path.basename(ext)],
+ "match": match,
+ "mask": mask,
+ }
+
+
+# Extract ISA Type
+def extract_isa_type(ext_name):
+ """Extracts the ISA type from the extension name."""
+ return ext_name.split("_")[0]
+
+
+# Verify the types for RV*
+def is_rv_variant(type1, type2):
+ """Checks if the types are RV variants (rv32/rv64)."""
+ return (type2 == "rv" and type1 in {"rv32", "rv64"}) or (
+ type1 == "rv" and type2 in {"rv32", "rv64"}
+ )
+
+
+# Check for same base ISA
+def has_same_base_isa(type1, type2):
+ """Determines if the two ISA types share the same base."""
+ return type1 == type2 or is_rv_variant(type1, type2)
+
+
+# Compare the base ISA type of a given extension name against a list of extension names
+def same_base_isa(ext_name, ext_name_list):
+ """Checks if the base ISA type of ext_name matches any in ext_name_list."""
+ type1 = extract_isa_type(ext_name)
+ return any(has_same_base_isa(type1, extract_isa_type(ext)) for ext in ext_name_list)
+
+
+# Pad two strings to equal length
+def pad_to_equal_length(str1, str2, pad_char="-"):
+ """Pads two strings to equal length using the given padding character."""
+ max_len = max(len(str1), len(str2))
+ return str1.rjust(max_len, pad_char), str2.rjust(max_len, pad_char)
+
+
+# Check compatibility for two characters
+def has_no_conflict(char1, char2):
+ """Checks if two characters are compatible (either matching or don't-care)."""
+ return char1 == "-" or char2 == "-" or char1 == char2
+
+
+# Conflict check between two encoded strings
+def overlaps(x, y):
+ """Checks if two encoded strings overlap without conflict."""
+ x, y = pad_to_equal_length(x, y)
+ return all(has_no_conflict(x[i], y[i]) for i in range(len(x)))
+
+
+# Check presence of keys in dictionary.
+def is_in_nested_dict(a, key1, key2):
+ """Checks if key2 exists in the dictionary under key1."""
+ return key1 in a and key2 in a[key1]
+
+
+# Overlap allowance
+def overlap_allowed(a, x, y):
+ """Determines if overlap is allowed between x and y based on nested dictionary checks"""
+ return is_in_nested_dict(a, x, y) or is_in_nested_dict(a, y, x)
+
+
+# Check overlap allowance between extensions
+def extension_overlap_allowed(x, y):
+ """Checks if overlap is allowed between two extensions using the overlapping_extensions dictionary."""
+ return overlap_allowed(overlapping_extensions, x, y)
+
+
+# Check overlap allowance between instructions
+def instruction_overlap_allowed(x, y):
+ """Checks if overlap is allowed between two instructions using the overlapping_instructions dictionary."""
+ return overlap_allowed(overlapping_instructions, x, y)
+
+
+# Check 'nf' field
+def is_segmented_instruction(instruction):
+ """Checks if an instruction contains the 'nf' field."""
+ return "nf" in instruction["variable_fields"]
+
+
+# Expand 'nf' fields
+def update_with_expanded_instructions(updated_dict, key, value):
+ """Expands 'nf' fields in the instruction dictionary and updates it with new instructions."""
+ for new_key, new_value in expand_nf_field(key, value):
+ updated_dict[new_key] = new_value
+
+
+# Process instructions, expanding segmented ones and updating the dictionary
+def add_segmented_vls_insn(instr_dict):
+ """Processes instructions, expanding segmented ones and updating the dictionary."""
+ # Use dictionary comprehension for efficiency
+ return dict(
+ chain.from_iterable(
+ (
+ expand_nf_field(key, value)
+ if is_segmented_instruction(value)
+ else [(key, value)]
+ )
+ for key, value in instr_dict.items()
+ )
+ )
+
+
+# Expand the 'nf' field in the instruction dictionary
+def expand_nf_field(name, single_dict):
+ """Validate and prepare the instruction dictionary."""
+ validate_nf_field(single_dict, name)
+ remove_nf_field(single_dict)
+ update_mask(single_dict)
+
+ name_expand_index = name.find("e")
+
+ # Pre compute the base match value and encoding prefix
+ base_match = int(single_dict["match"], 16)
+ encoding_prefix = single_dict["encoding"][3:]
+
+ expanded_instructions = [
+ create_expanded_instruction(
+ name, single_dict, nf, name_expand_index, base_match, encoding_prefix
+ )
+ for nf in range(8) # Range of 0 to 7
+ ]
+
+ return expanded_instructions
+
+
+# Validate the presence of 'nf'
+def validate_nf_field(single_dict, name):
+ """Validates the presence of 'nf' in variable fields before expansion."""
+ if "nf" not in single_dict["variable_fields"]:
+ logging.error(f"Cannot expand nf field for instruction {name}")
+ raise SystemExit(1)
+
+
+# Remove 'nf' from variable fields
+def remove_nf_field(single_dict):
+ """Removes 'nf' from variable fields in the instruction dictionary."""
+ single_dict["variable_fields"].remove("nf")
+
+
+# Update the mask to include the 'nf' field
+def update_mask(single_dict):
+ """Updates the mask to include the 'nf' field in the instruction dictionary."""
+ single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29)
+
+
+# Create an expanded instruction
+def create_expanded_instruction(
+ name, single_dict, nf, name_expand_index, base_match, encoding_prefix
+):
+ """Creates an expanded instruction based on 'nf' value."""
+ new_single_dict = copy.deepcopy(single_dict)
+
+ # Update match value in one step
+ new_single_dict["match"] = hex(base_match | (nf << 29))
+ new_single_dict["encoding"] = format(nf, "03b") + encoding_prefix
+
+ # Construct new instruction name
+ new_name = (
+ name
+ if nf == 0
+ else f"{name[:name_expand_index]}seg{nf + 1}{name[name_expand_index:]}"
+ )
+
+ return (new_name, new_single_dict)
+
+
+# Return a list of relevant lines from the specified file
+def read_lines(file):
+ """Reads lines from a file and returns non-blank, non-comment lines."""
+ with open(file) as fp:
+ lines = (line.rstrip() for line in fp)
+ return [line for line in lines if line and not line.startswith("#")]
+
+
+# Update the instruction dictionary
+def process_standard_instructions(lines, instr_dict, file_name):
+ """Processes standard instructions from the given lines and updates the instruction dictionary."""
+ for line in lines:
+ if "$import" in line or "$pseudo" in line:
+ continue
+ logging.debug(f"Processing line: {line}")
+ name, single_dict = process_enc_line(line, file_name)
+ ext_name = os.path.basename(file_name)
+
+ if name in instr_dict:
+ var = instr_dict[name]["extension"]
+ if same_base_isa(ext_name, var):
+ log_and_exit(
+ f"Instruction {name} from {ext_name} is already added from {var} in same base ISA"
+ )
+ elif instr_dict[name]["encoding"] != single_dict["encoding"]:
+ log_and_exit(
+ f"Instruction {name} from {ext_name} has different encodings in different base ISAs"
+ )
+
+ instr_dict[name]["extension"].extend(single_dict["extension"])
+ else:
+ for key, item in instr_dict.items():
+ if (
+ overlaps(item["encoding"], single_dict["encoding"])
+ and not extension_overlap_allowed(ext_name, item["extension"][0])
+ and not instruction_overlap_allowed(name, key)
+ and same_base_isa(ext_name, item["extension"])
+ ):
+ log_and_exit(
+ f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}'
+ )
+
+ instr_dict[name] = single_dict
+
+
+# Incorporate pseudo instructions into the instruction dictionary based on given conditions
+def process_pseudo_instructions(
+ lines, instr_dict, file_name, opcodes_dir, include_pseudo, include_pseudo_ops
+):
+ """Processes pseudo instructions from the given lines and updates the instruction dictionary."""
+ for line in lines:
+ if "$pseudo" not in line:
+ continue
+ logging.debug(f"Processing pseudo line: {line}")
+ ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0]
+ ext_file = find_extension_file(ext, opcodes_dir)
+
+ validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst)
+
+ name, single_dict = process_enc_line(f"{pseudo_inst} {line_content}", file_name)
+ if (
+ orig_inst.replace(".", "_") not in instr_dict
+ or include_pseudo
+ or name in include_pseudo_ops
+ ):
+ if name not in instr_dict:
+ instr_dict[name] = single_dict
+ logging.debug(f"Including pseudo_op: {name}")
+ else:
+ if single_dict["match"] != instr_dict[name]["match"]:
+ instr_dict[f"{name}_pseudo"] = single_dict
+ elif single_dict["extension"] not in instr_dict[name]["extension"]:
+ instr_dict[name]["extension"].extend(single_dict["extension"])
+
+
+# Integrate imported instructions into the instruction dictionary
+def process_imported_instructions(lines, instr_dict, file_name, opcodes_dir):
+ """Processes imported instructions from the given lines and updates the instruction dictionary."""
+ for line in lines:
+ if "$import" not in line:
+ continue
+ logging.debug(f"Processing imported line: {line}")
+ import_ext, reg_instr = imported_regex.findall(line)[0]
+ ext_file = find_extension_file(import_ext, opcodes_dir)
+
+ validate_instruction_in_extension(reg_instr, ext_file, file_name, line)
+
+ for oline in open(ext_file):
+ if re.findall(f"^\\s*{reg_instr}\\s+", oline):
+ name, single_dict = process_enc_line(oline, file_name)
+ if name in instr_dict:
+ if instr_dict[name]["encoding"] != single_dict["encoding"]:
+ log_and_exit(
+ f"Imported instruction {name} from {os.path.basename(file_name)} has different encodings"
+ )
+ instr_dict[name]["extension"].extend(single_dict["extension"])
+ else:
+ instr_dict[name] = single_dict
+ break
+
+
+# Locate the path of the specified extension file, checking fallback directories
+def find_extension_file(ext, opcodes_dir):
+ """Finds the extension file path, considering the unratified directory if necessary."""
+ ext_file = f"{opcodes_dir}/{ext}"
+ if not os.path.exists(ext_file):
+ ext_file = f"{opcodes_dir}/unratified/{ext}"
+ if not os.path.exists(ext_file):
+ log_and_exit(f"Extension {ext} not found.")
+ return ext_file
+
+
+# Confirm the presence of an original instruction in the corresponding extension file.
+def validate_instruction_in_extension(inst, ext_file, file_name, pseudo_inst):
+ """Validates if the original instruction exists in the dependent extension."""
+ found = False
+ for oline in open(ext_file):
+ if re.findall(f"^\\s*{inst}\\s+", oline):
+ found = True
+ break
+ if not found:
+ log_and_exit(
+ f"Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}"
+ )
+
+
+# Construct a dictionary of instructions filtered by specified criteria
+def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]):
+ """Creates a dictionary of instructions based on the provided file filters."""
+
+ """
+ This function return a dictionary containing all instructions associated
+ with an extension defined by the file_filter input.
+ Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc.
+ Each node of the dictionary will correspond to an instruction which again is
+ a dictionary. The dictionary contents of each instruction includes:
+ - variables: list of arguments used by the instruction whose mapping
+ exists in the arg_lut dictionary
+ - encoding: this contains the 32-bit encoding of the instruction where
+ '-' is used to represent position of arguments and 1/0 is used to
+ reprsent the static encoding of the bits
+ - extension: this field contains the rv* filename from which this
+ instruction was included
+ - match: hex value representing the bits that need to match to detect
+ this instruction
+ - mask: hex value representin the bits that need to be masked to extract
+ the value required for matching.
+ In order to build this dictionary, the function does 2 passes over the same
+ rv<file_filter> file:
+ - First pass: extracts all standard instructions, skipping pseudo ops
+ and imported instructions. For each selected line, the `process_enc_line`
+ function is called to create the dictionary contents of the instruction.
+ Checks are performed to ensure that the same instruction is not added
+ twice to the overall dictionary.
+ - Second pass: parses only pseudo_ops. For each pseudo_op, the function:
+ - Checks if the dependent extension and instruction exist.
+ - Adds the pseudo_op to the dictionary if the dependent instruction
+ is not already present; otherwise, it is skipped.
+ """
+ opcodes_dir = os.path.dirname(os.path.realpath(__file__))
+ instr_dict = {}
+
+ file_names = [
+ file
+ for fil in file_filter
+ for file in sorted(glob.glob(f"{opcodes_dir}/{fil}"), reverse=True)
+ ]
+
+ logging.debug("Collecting standard instructions")
+ for file_name in file_names:
+ logging.debug(f"Parsing File: {file_name} for standard instructions")
+ lines = read_lines(file_name)
+ process_standard_instructions(lines, instr_dict, file_name)
+
+ logging.debug("Collecting pseudo instructions")
+ for file_name in file_names:
+ logging.debug(f"Parsing File: {file_name} for pseudo instructions")
+ lines = read_lines(file_name)
+ process_pseudo_instructions(
+ lines,
+ instr_dict,
+ file_name,
+ opcodes_dir,
+ include_pseudo,
+ include_pseudo_ops,
+ )
+
+ logging.debug("Collecting imported instructions")
+ for file_name in file_names:
+ logging.debug(f"Parsing File: {file_name} for imported instructions")
+ lines = read_lines(file_name)
+ process_imported_instructions(lines, instr_dict, file_name, opcodes_dir)
+
+ return instr_dict
+
+
+# Extracts the extensions used in an instruction dictionary
+def instr_dict_2_extensions(instr_dict):
+ return list({item["extension"][0] for item in instr_dict.values()})
+
+
+# Returns signed interpretation of a value within a given width
+def signed(value, width):
+ return value if 0 <= value < (1 << (width - 1)) else value - (1 << width)
diff --git a/sverilog_utils.py b/sverilog_utils.py
new file mode 100644
index 0000000..1fe2068
--- /dev/null
+++ b/sverilog_utils.py
@@ -0,0 +1,37 @@
+import collections
+import glob
+import logging
+import os
+import pprint
+import re
+import sys
+
+import yaml
+
+# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field
+from shared_utils import *
+
+pp = pprint.PrettyPrinter(indent=2)
+logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s")
+
+
+def make_sverilog(instr_dict):
+ names_str = ""
+ for i in instr_dict:
+ names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n"
+ names_str += " /* CSR Addresses */\n"
+ for num, name in csrs + csrs32:
+ names_str += (
+ f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n"
+ )
+
+ sverilog_file = open("inst.sverilog", "w")
+ sverilog_file.write(
+ f"""
+/* Automatically generated by parse_opcodes */
+package riscv_instr;
+{names_str}
+endpackage
+"""
+ )
+ sverilog_file.close()
diff --git a/test.py b/test.py
index 699b0a1..eb9b678 100644
--- a/test.py
+++ b/test.py
@@ -4,6 +4,7 @@ import logging
import unittest
from parse import *
+from shared_utils import *
class EncodingLineTest(unittest.TestCase):