From bd26319a75b2ae4f52dc397b22c97e19b08d527f Mon Sep 17 00:00:00 2001 From: Neel Gala Date: Fri, 8 Apr 2022 20:51:52 +0530 Subject: updated python script, Makefile and README to process new files - the python file is well commented - the README provides a brief overview of how the python script works and the various artifacts it can generate --- .gitignore | 7 + Makefile | 55 ++- README.md | 203 +++++++++- constants.py | 597 +++++++++++++++++++++++++++ parse.py | 872 ++++++++++++++++++++++++++++++++++++++++ parse_opcodes | 1240 --------------------------------------------------------- 6 files changed, 1700 insertions(+), 1274 deletions(-) create mode 100644 constants.py create mode 100755 parse.py delete mode 100755 parse_opcodes diff --git a/.gitignore b/.gitignore index ccff222..406d882 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,10 @@ inst.chisel inst.go instr-table.tex priv-instr-table.tex +inst.rs + +inst.sverilog + +instr_dict.yaml + +__pycache__/ diff --git a/Makefile b/Makefile index 6354ecc..828916f 100644 --- a/Makefile +++ b/Makefile @@ -1,44 +1,41 @@ -SHELL := /bin/sh - +EXTENSIONS := "rv*" "unratified/rv*" ISASIM_H := ../riscv-isa-sim/riscv/encoding.h PK_H := ../riscv-pk/machine/encoding.h ENV_H := ../riscv-tests/env/encoding.h OPENOCD_H := ../riscv-openocd/src/target/riscv/encoding.h INSTALL_HEADER_FILES := $(ISASIM_H) $(PK_H) $(ENV_H) $(OPENOCD_H) -ALL_REAL_ILEN32_OPCODES := opcodes-rv32i opcodes-rv64i opcodes-rv32m opcodes-rv64m opcodes-rv32a opcodes-rv64a opcodes-rv32h opcodes-rv64h opcodes-rv32f opcodes-rv64f opcodes-rv32d opcodes-rv64d opcodes-rv32q opcodes-rv64q opcodes-rv32xbitmanip opcodes-rv64xbitmanip opcodes-system opcodes-svinval opcodes-rv32zfh opcodes-rv32d-zfh opcodes-rv32q-zfh opcodes-rv64zfh opcodes-rvzk opcodes-rv32zk opcodes-rv64zk opcodes-zicbo opcodes-rv32zba opcodes-rv32zbb opcodes-rv32zbc opcodes-rv32zbkb opcodes-rv32zbkx opcodes-rv32zbs opcodes-rv64zba opcodes-rv64zbb opcodes-rv64zbkb -ALL_REAL_OPCODES := $(ALL_REAL_ILEN32_OPCODES) opcodes-rvc opcodes-rv32c opcodes-rv64c opcodes-custom opcodes-rvv opcodes-rvp - -ALL_OPCODES := opcodes-pseudo $(ALL_REAL_OPCODES) opcodes-rvv-pseudo - -install: encoding.out.h inst.chisel instr-table.tex priv-instr-table.tex - set -e; for FILE in $(INSTALL_HEADER_FILES); do cp -f encoding.out.h $$FILE; done +default: everything -encoding.out.h: $(ALL_OPCODES) parse_opcodes encoding.h - echo "/*" > $@ - echo " * This file is auto-generated by running 'make' in" >> $@ - echo " * https://github.com/riscv/riscv-opcodes (`git log -1 --format="format:%h"`)" >> $@ - echo " */" >> $@ - echo >> $@ - cat encoding.h >> $@ - cat $(ALL_OPCODES) | ./parse_opcodes -c >> $@ +.PHONY : everything +everything: + @./parse.py -c -chisel -sverilog -rust -latex $(EXTENSIONS) -inst.chisel: $(ALL_OPCODES) parse_opcodes - cat $(ALL_OPCODES) | ./parse_opcodes -chisel > $@ +.PHONY : c +c: + @./parse.py -c $(EXTENSIONS) -inst.go: $(ALL_REAL_ILEN32_OPCODES) parse_opcodes - cat $(ALL_REAL_ILEN32_OPCODES) | ./parse_opcodes -go > $@ +.PHONY : chisel +chisel: + @./parse.py -chisel $(EXTENSIONS) -inst.rs: $(ALL_OPCODES) parse_opcodes - cat $(ALL_OPCODES) | ./parse_opcodes -rust > $@ +.PHONY : latex +latex: + @./parse.py -latex $(EXTENSIONS) -inst.sverilog: $(ALL_OPCODES) parse_opcodes - cat $(ALL_OPCODES) | ./parse_opcodes -sverilog > $@ +.PHONY : sverilog +sverilog: + @./parse.py -sverilog $(EXTENSIONS) -instr-table.tex: $(ALL_OPCODES) parse_opcodes - cat $(ALL_OPCODES) | ./parse_opcodes -tex > $@ +.PHONY : rust +rust: + @./parse.py -rust $(EXTENSIONS) -priv-instr-table.tex: $(ALL_OPCODES) parse_opcodes - cat $(ALL_OPCODES) | ./parse_opcodes -privtex > $@ +.PHONY : clean +clean: + rm -f inst* priv-instr-table.tex encoding.out.h .PHONY : install +install: c + set -e; for FILE in $(INSTALL_HEADER_FILES); do cp -f encoding.out.h $$FILE; done + diff --git a/README.md b/README.md index bb981ea..26d39fc 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,203 @@ -riscv-opcodes -=========================================================================== +# riscv-opcodes This repo enumerates standard RISC-V instruction opcodes and control and status registers. It also contains a script to convert them into several formats (C, Scala, LaTeX). -This repo is not meant to stand alone; it is a subcomponent of -[riscv-tools](https://github.com/riscv/riscv-tools) and assumes that it -is part of that directory structure. +Artifacts (encoding.h, latex-tables, etc) from this repo are used in other +tools and projects like Spike, PK, RISC-V Manual, etc. + +## Project Structure + +```bash +├── constants.py # contains variables, constants and data-structures used in parse.py +├── encoding.h # the template encoding.h file +├── LICENSE # license file +├── Makefile # makefile to generate artifacts +├── parse.py # python file to perform checks on the instructions and generate artifacts +├── README.md # this file +├── rv* # instruction opcode files +└── unratified # contains unratified instruction opcode files +``` + +## File Naming Policy + +This project follows a very specific file structure to define the instruction encodings. All files +containing instruction encodings start with the prefix `rv`. These files can either be present in +the root directory (if the instructions have been ratified) of the `unratified` directory. The exact +file-naming policy and location is as mentioned below: + +1. `rv_x` - contains instructions common within the 32-bit and 64-bit modes of extension X. +2. `rv32_x` - contains instructions present in rv32x only (absent in rv64x e.g.. brev8) +3. `rv64_x` - contains instructions present in rv64x only (absent in rv32x, e.g. addw) +4. `rv_x_y` - contains instructions when both extension X and Y are available/enabled. It is recommended to follow canonical ordering for such file names as specified by the spec. +5. `unratified` - this directory will also contain files similar to the above policies, but will + correspond to instructions which have not yet been ratified. + +When an instruction is present in multiple extensions and the spec is vague in defining the extension which owns the instruction, the instruction encoding must be placed in the first canonically ordered extension and should be imported(via the `$import` keyword) in the remaining extensions. + +## Encoding Syntax + + +The encoding syntax uses `$` to indicate keywords. As of now 2 keywords have been identified : `$import` and `$pseudo_op` (described below). The syntax also uses `::` as a means to define the relationship between extension and instruction. `..` is used to defined bit ranges. We use `#` to define comments in the files. All comments must be in a separate line. In-line comments are not supported. + +Instruction syntaxes used in this project are broadly categorized into three: + +- **regular instructions** :- these are instructions which hold a unique opcode in the encoding space. A very generic syntax guideline + for these instructions is as follows: + ``` + + ``` + Examples: + ``` + lui rd imm20 6..2=0x0D 1..0=3 + beq bimm12hi rs1 rs2 bimm12lo 14..12=0 6..2=0x18 1..0=3 + ``` + The bit encodings are usually of 2 types: + - *single bit assignment* : here the value of a single bit is assigned using syntax `=`. For e.g. `6=1` means bit 6 should be 1. Here the value must be 1 or 0. + - *range assignment*: here a range of bits is assigned a value using syntax: `..=`. For e.g. `31..24=0xab`. The value here can be either unsigned integer, hex (0x) or binary (0b). + +- **pseudo\_instructions** (a.k.a pseudo\_ops) - These are instructions which are aliases of regular instructions. Their encodings force + certain restrictions over the regular instruction. The syntax for such instructions uses the `$pseudo_op` keyword as follows: + ``` + $pseudo_op :: + ``` + Here the `` specifies the extension which contains the base instruction. `` indicates the name of the instruction + this pseudo-instruction is an alias of. The remaining fields are the same as the regular instruction syntax, where all the args and the fields + of the pseudo instruction are specified. + + Example: + ``` + $pseudo_op rv_zicsr::csrrs frflags rd 19..15=0 31..20=0x001 14..12=2 6..2=0x1C 1..0=3 + ``` + + If a ratified instruction is a pseudo\_op of a regular unratified + instruction, it is recommended to maintain this pseudo\_op relationship i.e. + define the new instruction as a pseudo\_op of the unratified regular + instruction, as this avoids existence of overlapping opcodes for users who are + experimenting with unratified extensions as well. + +- **imported\_instructions** - these are instructions which are borrowed from an extension into a new/different extension/sub-extension. Only regular instructions can be imported. Pseudo-op instructions cannot be imported. Example: + ``` + $import rv32_zkne::aes32esmi + ``` +## Flow for parse.py + +The `parse.py` python file is used to perform checks on the current set of instruction encodings and also generates multiple artifacts : latex tables, encoding.h header file, etc. This section will provide a brief overview of the flow within the python file. + +To start with, `parse.py` creates a list of all `rv*` files currently checked into the repo (including those inside the `unratified` directory as well). +It then starts parsing each file line by line. In the first pass, we only capture regular instructions and ignore the imported or pseudo instructions. +For each regular instruction, the following checks are performed : + + - for range-assignment syntax, the *msb* position must be higher than the *lsb* position + - for range-assignment syntax, the value of the range must representable in the space identified by *msb* and *lsb* + - values for the same bit positions should not be defined multiple times. + - All bit positions must be accounted for (either as args or constant value fields) + +Once the above checks are passed for a regular instruction, we then create a dictionary for this instruction which contains the following fields: + - encoding : contains a 32-bit string defining the encoding of the instruction. Here `-` is used to represent instruction argument fields + - extension : string indicating which extension/filename this instruction was picked from + - mask : a 32-bit hex value indicating the bits of the encodings that must be checked for legality of that instruction + - match : a 32-bit hex value indicating the values the encoding must take for the bits which are set as 1 in the mask above + - variable_fields : This is list of args required by the instruction + +The above dictionary elements are added to a main `instr_dict` dictionary under the instruction node. This process continues until all regular +instructions have been processed. In the second pass, we now process the `$pseudo_op` instructions. Here, we first check if the *base-instruction* of +this pseudo instruction exists in the relevant extension/filename or not. If it is present, the the remaining part of the syntax undergoes the same +checks as above. Once the checks pass and if the *base-instruction* is not already added to the main `instr_dict` then the pseudo-instruction is added to +the list. In the third, and final, pass we process the imported instructions. + +The case where the *base-instruction* for a pseudo-instruction may not be present in the main `instr_dict` after the first pass is if the only a subset +of extensions are being processed such that the *base-instruction* is not included. + + +## Artifact Generation and Usage + +The following artifacts can be generated using parse.py: + +- instr\_dict.yaml : This is file generated always by parse.py and contains the + entire main dictionary `instr_dict` in YAML format. Note, in this yaml the + *dots* in an instruction are replaced with *underscores* +- encoding.h : this is the header file that is used by tools like spike, pk, etc +- instr-table.tex : the latex table of instructions used in the riscv-unpriv spec +- priv-instr-table.tex : the latex table of instruction used in the riscv-priv spec +- inst.chisel : chisel code to decode instructions +- inst.sverilog : system verilog code to decode instructions +- inst.rs : rust code containing mask and match variables for all instructions + +To generate all the above artifacts for all instructions currently checked in, simply run `make` from the root-directory. This should print the following log on the command-line: + +``` +Running with args : ['./parse.py', '-c', '-chisel', '-sverilog', '-rust', '-latex', 'rv*', 'unratified/rv*'] +Extensions selected : ['rv*', 'unratified/rv*'] +INFO:: encoding.out.h generated successfully +INFO:: inst.chisel generated successfully +INFO:: inst.sverilog generated successfully +INFO:: inst.rs generated successfully +INFO:: instr-table.tex generated successfully +INFO:: priv-instr-table.tex generated successfully +``` + +By default all extensions are enabled. To select only a subset of extensions you can change the `EXTENSIONS` variable of the makefile to contains only the file names of interest. +For example if you want only the I and M extensions you can do the following: + +```bash +make EXTENSIONS='rv*_i rv*_m' +``` + +Which will print the following log: + +``` +Running with args : ['./parse.py', '-c', '-chisel', '-sverilog', '-rust', '-latex', 'rv32_i', 'rv64_i', 'rv_i', 'rv64_m', 'rv_m'] +Extensions selected : ['rv32_i', 'rv64_i', 'rv_i', 'rv64_m', 'rv_m'] +INFO:: encoding.out.h generated successfully +INFO:: inst.chisel generated successfully +INFO:: inst.sverilog generated successfully +INFO:: inst.rs generated successfully +INFO:: instr-table.tex generated successfully +INFO:: priv-instr-table.tex generated successfully +``` + +If you only want a specific artifact you can use one or more of the following targets : `c`, `rust`, `chisel`, `sverilog`, `latex` + +You can use the `clean` target to remove all artifacts. + +## Adding a new extension + +To add a new extension of instructions, create an appropriate `rv*` file based on the policy defined in [File Structure](#file-naming-policy). Run `make` from the root directory to ensure that all checks pass and all artifacts are created correctly. A successful run should print the following log on the terminal: + +``` +Running with args : ['./parse.py', '-c', '-chisel', '-sverilog', '-rust', '-latex', 'rv*', 'unratified/rv*'] +Extensions selected : ['rv*', 'unratified/rv*'] +INFO:: encoding.out.h generated successfully +INFO:: inst.chisel generated successfully +INFO:: inst.sverilog generated successfully +INFO:: inst.rs generated successfully +INFO:: instr-table.tex generated successfully +INFO:: priv-instr-table.tex generated successfully +``` + +Create a PR for review. + +## Enabling Debug logs in parse.py + +To enable debug logs in parse.py change `level=logging.INFO` to `level=logging.DEBUG` and run the python command. You will now see debug statements on +the terminal like below: +``` +DEBUG:: Collecting standard instructions first +DEBUG:: Parsing File: ./rv_i +DEBUG:: Processing line: lui rd imm20 6..2=0x0D 1..0=3 +DEBUG:: Processing line: auipc rd imm20 6..2=0x05 1..0=3 +DEBUG:: Processing line: jal rd jimm20 6..2=0x1b 1..0=3 +DEBUG:: Processing line: jalr rd rs1 imm12 14..12=0 6..2=0x19 1..0=3 +DEBUG:: Processing line: beq bimm12hi rs1 rs2 bimm12lo 14..12=0 6..2=0x18 1..0=3 +DEBUG:: Processing line: bne bimm12hi rs1 rs2 bimm12lo 14..12=1 6..2=0x18 1..0=3 +``` + +## How do I find where an instruction is defined? + +You can use `grep "^\s*" rv* unratified/rv*` OR run `make` and open +`instr_dict.yaml` and search of the instruction you are looking for. Within that +instruction the `extension` field will indicate which file the instruction was +picked from. + diff --git a/constants.py b/constants.py new file mode 100644 index 0000000..b6a4351 --- /dev/null +++ b/constants.py @@ -0,0 +1,597 @@ +import re + + +isa_regex = \ +re.compile("^RV(32|64|128)[IE]+[ABCDEFGHJKLMNPQSTUVX]*(Zicsr|Zifencei|Zihintpause|Zam|Ztso|Zkne|Zknd|Zknh|Zkse|Zksh|Zkg|Zkb|Zkr|Zks|Zkn|Zba|Zbc|Zbb|Zbp|Zbr|Zbm|Zbs|Zbe|Zbf|Zbt|Zmmul|Zbpbo){,1}(_Zicsr){,1}(_Zifencei){,1}(_Zihintpause){,1}(_Zmmul){,1}(_Zam){,1}(_Zba){,1}(_Zbb){,1}(_Zbc){,1}(_Zbe){,1}(_Zbf){,1}(_Zbm){,1}(_Zbp){,1}(_Zbpbo){,1}(_Zbr){,1}(_Zbs){,1}(_Zbt){,1}(_Zkb){,1}(_Zkg){,1}(_Zkr){,1}(_Zks){,1}(_Zkn){,1}(_Zknd){,1}(_Zkne){,1}(_Zknh){,1}(_Zkse){,1}(_Zksh){,1}(_Ztso){,1}$") + +# regex to find ..= patterns in instruction +fixed_ranges = re.compile( + '\s*(?P\d+.?)\.\.(?P\d+.?)\s*=\s*(?P\d[\w]*)[\s$]*', re.M) + +# regex to find = patterns in instructions +#single_fixed = re.compile('\s+(?P\d+)=(?P[\w\d]*)[\s$]*', re.M) +single_fixed = re.compile('(?:^|[\s])(?P\d+)=(?P[\w]*)((?=\s|$))', re.M) + +# regex to find the overloading condition variable +var_regex = re.compile('(?P[a-zA-Z][\w\d]*)\s*=\s*.*?[\s$]*', re.M) + +# regex for pseudo op instructions returns the dependent filename, dependent +# instruction, the pseudo op name and the encoding string +pseudo_regex = re.compile( + '^\$pseudo_op\s+(?Prv[\d]*_[\w].*)::\s*(?P.*?)\s+(?P.*?)\s+(?P.*)$' +, re.M) + +imported_regex = re.compile('^\s*\$import\s*(?P.*)\s*::\s*(?P.*)', re.M) + +# +# Trap cause codes +causes = [ + (0x00, 'misaligned fetch'), + (0x01, 'fetch access'), + (0x02, 'illegal instruction'), + (0x03, 'breakpoint'), + (0x04, 'misaligned load'), + (0x05, 'load access'), + (0x06, 'misaligned store'), + (0x07, 'store access'), + (0x08, 'user_ecall'), + (0x09, 'supervisor_ecall'), + (0x0A, 'virtual_supervisor_ecall'), + (0x0B, 'machine_ecall'), + (0x0C, 'fetch page fault'), + (0x0D, 'load page fault'), + (0x0F, 'store page fault'), + (0x14, 'fetch guest page fault'), + (0x15, 'load guest page fault'), + (0x16, 'virtual instruction'), + (0x17, 'store guest page fault'), +] + +csrs = [ + # Standard User R/W + (0x001, 'fflags'), + (0x002, 'frm'), + (0x003, 'fcsr'), + (0x008, 'vstart'), + (0x009, 'vxsat'), + (0x00A, 'vxrm'), + (0x00F, 'vcsr'), + (0x015, 'seed'), # Zkr + + # Standard User RO + (0xC00, 'cycle'), + (0xC01, 'time'), + (0xC02, 'instret'), + (0xC03, 'hpmcounter3'), + (0xC04, 'hpmcounter4'), + (0xC05, 'hpmcounter5'), + (0xC06, 'hpmcounter6'), + (0xC07, 'hpmcounter7'), + (0xC08, 'hpmcounter8'), + (0xC09, 'hpmcounter9'), + (0xC0A, 'hpmcounter10'), + (0xC0B, 'hpmcounter11'), + (0xC0C, 'hpmcounter12'), + (0xC0D, 'hpmcounter13'), + (0xC0E, 'hpmcounter14'), + (0xC0F, 'hpmcounter15'), + (0xC10, 'hpmcounter16'), + (0xC11, 'hpmcounter17'), + (0xC12, 'hpmcounter18'), + (0xC13, 'hpmcounter19'), + (0xC14, 'hpmcounter20'), + (0xC15, 'hpmcounter21'), + (0xC16, 'hpmcounter22'), + (0xC17, 'hpmcounter23'), + (0xC18, 'hpmcounter24'), + (0xC19, 'hpmcounter25'), + (0xC1A, 'hpmcounter26'), + (0xC1B, 'hpmcounter27'), + (0xC1C, 'hpmcounter28'), + (0xC1D, 'hpmcounter29'), + (0xC1E, 'hpmcounter30'), + (0xC1F, 'hpmcounter31'), + (0xC20, 'vl'), + (0xC21, 'vtype'), + (0xC22, 'vlenb'), + + # Standard Supervisor R/W + (0x100, 'sstatus'), + (0x102, 'sedeleg'), + (0x103, 'sideleg'), + (0x104, 'sie'), + (0x105, 'stvec'), + (0x106, 'scounteren'), + (0x10A, 'senvcfg'), + (0x140, 'sscratch'), + (0x141, 'sepc'), + (0x142, 'scause'), + (0x143, 'stval'), + (0x144, 'sip'), + (0x180, 'satp'), + (0x5A8, 'scontext'), + + # Standard Hypervisor R/w + (0x200, 'vsstatus'), + (0x204, 'vsie'), + (0x205, 'vstvec'), + (0x240, 'vsscratch'), + (0x241, 'vsepc'), + (0x242, 'vscause'), + (0x243, 'vstval'), + (0x244, 'vsip'), + (0x280, 'vsatp'), + (0x600, 'hstatus'), + (0x602, 'hedeleg'), + (0x603, 'hideleg'), + (0x604, 'hie'), + (0x605, 'htimedelta'), + (0x606, 'hcounteren'), + (0x607, 'hgeie'), + (0x60A, 'henvcfg'), + (0x643, 'htval'), + (0x644, 'hip'), + (0x645, 'hvip'), + (0x64A, 'htinst'), + (0x680, 'hgatp'), + (0x6A8, 'hcontext'), + (0xE12, 'hgeip'), + + # Tentative CSR assignment for CLIC + (0x007, 'utvt'), + (0x045, 'unxti'), + (0x046, 'uintstatus'), + (0x048, 'uscratchcsw'), + (0x049, 'uscratchcswl'), + (0x107, 'stvt'), + (0x145, 'snxti'), + (0x146, 'sintstatus'), + (0x148, 'sscratchcsw'), + (0x149, 'sscratchcswl'), + (0x307, 'mtvt'), + (0x345, 'mnxti'), + (0x346, 'mintstatus'), + (0x348, 'mscratchcsw'), + (0x349, 'mscratchcswl'), + + # Standard Machine R/W + (0x300, 'mstatus'), + (0x301, 'misa'), + (0x302, 'medeleg'), + (0x303, 'mideleg'), + (0x304, 'mie'), + (0x305, 'mtvec'), + (0x306, 'mcounteren'), + (0x30a, 'menvcfg'), + (0x320, 'mcountinhibit'), + (0x340, 'mscratch'), + (0x341, 'mepc'), + (0x342, 'mcause'), + (0x343, 'mtval'), + (0x344, 'mip'), + (0x34a, 'mtinst'), + (0x34b, 'mtval2'), + (0x3a0, 'pmpcfg0'), + (0x3a1, 'pmpcfg1'), + (0x3a2, 'pmpcfg2'), + (0x3a3, 'pmpcfg3'), + (0x3a4, 'pmpcfg4'), + (0x3a5, 'pmpcfg5'), + (0x3a6, 'pmpcfg6'), + (0x3a7, 'pmpcfg7'), + (0x3a8, 'pmpcfg8'), + (0x3a9, 'pmpcfg9'), + (0x3aa, 'pmpcfg10'), + (0x3ab, 'pmpcfg11'), + (0x3ac, 'pmpcfg12'), + (0x3ad, 'pmpcfg13'), + (0x3ae, 'pmpcfg14'), + (0x3af, 'pmpcfg15'), + (0x3b0, 'pmpaddr0'), + (0x3b1, 'pmpaddr1'), + (0x3b2, 'pmpaddr2'), + (0x3b3, 'pmpaddr3'), + (0x3b4, 'pmpaddr4'), + (0x3b5, 'pmpaddr5'), + (0x3b6, 'pmpaddr6'), + (0x3b7, 'pmpaddr7'), + (0x3b8, 'pmpaddr8'), + (0x3b9, 'pmpaddr9'), + (0x3ba, 'pmpaddr10'), + (0x3bb, 'pmpaddr11'), + (0x3bc, 'pmpaddr12'), + (0x3bd, 'pmpaddr13'), + (0x3be, 'pmpaddr14'), + (0x3bf, 'pmpaddr15'), + (0x3c0, 'pmpaddr16'), + (0x3c1, 'pmpaddr17'), + (0x3c2, 'pmpaddr18'), + (0x3c3, 'pmpaddr19'), + (0x3c4, 'pmpaddr20'), + (0x3c5, 'pmpaddr21'), + (0x3c6, 'pmpaddr22'), + (0x3c7, 'pmpaddr23'), + (0x3c8, 'pmpaddr24'), + (0x3c9, 'pmpaddr25'), + (0x3ca, 'pmpaddr26'), + (0x3cb, 'pmpaddr27'), + (0x3cc, 'pmpaddr28'), + (0x3cd, 'pmpaddr29'), + (0x3ce, 'pmpaddr30'), + (0x3cf, 'pmpaddr31'), + (0x3d0, 'pmpaddr32'), + (0x3d1, 'pmpaddr33'), + (0x3d2, 'pmpaddr34'), + (0x3d3, 'pmpaddr35'), + (0x3d4, 'pmpaddr36'), + (0x3d5, 'pmpaddr37'), + (0x3d6, 'pmpaddr38'), + (0x3d7, 'pmpaddr39'), + (0x3d8, 'pmpaddr40'), + (0x3d9, 'pmpaddr41'), + (0x3da, 'pmpaddr42'), + (0x3db, 'pmpaddr43'), + (0x3dc, 'pmpaddr44'), + (0x3dd, 'pmpaddr45'), + (0x3de, 'pmpaddr46'), + (0x3df, 'pmpaddr47'), + (0x3e0, 'pmpaddr48'), + (0x3e1, 'pmpaddr49'), + (0x3e2, 'pmpaddr50'), + (0x3e3, 'pmpaddr51'), + (0x3e4, 'pmpaddr52'), + (0x3e5, 'pmpaddr53'), + (0x3e6, 'pmpaddr54'), + (0x3e7, 'pmpaddr55'), + (0x3e8, 'pmpaddr56'), + (0x3e9, 'pmpaddr57'), + (0x3ea, 'pmpaddr58'), + (0x3eb, 'pmpaddr59'), + (0x3ec, 'pmpaddr60'), + (0x3ed, 'pmpaddr61'), + (0x3ee, 'pmpaddr62'), + (0x3ef, 'pmpaddr63'), + (0x747, 'mseccfg'), + (0x7a0, 'tselect'), + (0x7a1, 'tdata1'), + (0x7a2, 'tdata2'), + (0x7a3, 'tdata3'), + (0x7a4, 'tinfo'), + (0x7a5, 'tcontrol'), + (0x7a8, 'mcontext'), + (0x7aa, 'mscontext'), + (0x7b0, 'dcsr'), + (0x7b1, 'dpc'), + (0x7b2, 'dscratch0'), + (0x7b3, 'dscratch1'), + (0xB00, 'mcycle'), + (0xB02, 'minstret'), + (0xB03, 'mhpmcounter3'), + (0xB04, 'mhpmcounter4'), + (0xB05, 'mhpmcounter5'), + (0xB06, 'mhpmcounter6'), + (0xB07, 'mhpmcounter7'), + (0xB08, 'mhpmcounter8'), + (0xB09, 'mhpmcounter9'), + (0xB0A, 'mhpmcounter10'), + (0xB0B, 'mhpmcounter11'), + (0xB0C, 'mhpmcounter12'), + (0xB0D, 'mhpmcounter13'), + (0xB0E, 'mhpmcounter14'), + (0xB0F, 'mhpmcounter15'), + (0xB10, 'mhpmcounter16'), + (0xB11, 'mhpmcounter17'), + (0xB12, 'mhpmcounter18'), + (0xB13, 'mhpmcounter19'), + (0xB14, 'mhpmcounter20'), + (0xB15, 'mhpmcounter21'), + (0xB16, 'mhpmcounter22'), + (0xB17, 'mhpmcounter23'), + (0xB18, 'mhpmcounter24'), + (0xB19, 'mhpmcounter25'), + (0xB1A, 'mhpmcounter26'), + (0xB1B, 'mhpmcounter27'), + (0xB1C, 'mhpmcounter28'), + (0xB1D, 'mhpmcounter29'), + (0xB1E, 'mhpmcounter30'), + (0xB1F, 'mhpmcounter31'), + (0x323, 'mhpmevent3'), + (0x324, 'mhpmevent4'), + (0x325, 'mhpmevent5'), + (0x326, 'mhpmevent6'), + (0x327, 'mhpmevent7'), + (0x328, 'mhpmevent8'), + (0x329, 'mhpmevent9'), + (0x32A, 'mhpmevent10'), + (0x32B, 'mhpmevent11'), + (0x32C, 'mhpmevent12'), + (0x32D, 'mhpmevent13'), + (0x32E, 'mhpmevent14'), + (0x32F, 'mhpmevent15'), + (0x330, 'mhpmevent16'), + (0x331, 'mhpmevent17'), + (0x332, 'mhpmevent18'), + (0x333, 'mhpmevent19'), + (0x334, 'mhpmevent20'), + (0x335, 'mhpmevent21'), + (0x336, 'mhpmevent22'), + (0x337, 'mhpmevent23'), + (0x338, 'mhpmevent24'), + (0x339, 'mhpmevent25'), + (0x33A, 'mhpmevent26'), + (0x33B, 'mhpmevent27'), + (0x33C, 'mhpmevent28'), + (0x33D, 'mhpmevent29'), + (0x33E, 'mhpmevent30'), + (0x33F, 'mhpmevent31'), + + # Standard Machine RO + (0xF11, 'mvendorid'), + (0xF12, 'marchid'), + (0xF13, 'mimpid'), + (0xF14, 'mhartid'), + (0xF15, 'mconfigptr'), +] + +csrs32 = [ + # Standard Hypervisor R/w + (0x615, 'htimedeltah'), + (0x61A, 'henvcfgh'), + + # Standard User RO + (0xC80, 'cycleh'), + (0xC81, 'timeh'), + (0xC82, 'instreth'), + (0xC83, 'hpmcounter3h'), + (0xC84, 'hpmcounter4h'), + (0xC85, 'hpmcounter5h'), + (0xC86, 'hpmcounter6h'), + (0xC87, 'hpmcounter7h'), + (0xC88, 'hpmcounter8h'), + (0xC89, 'hpmcounter9h'), + (0xC8A, 'hpmcounter10h'), + (0xC8B, 'hpmcounter11h'), + (0xC8C, 'hpmcounter12h'), + (0xC8D, 'hpmcounter13h'), + (0xC8E, 'hpmcounter14h'), + (0xC8F, 'hpmcounter15h'), + (0xC90, 'hpmcounter16h'), + (0xC91, 'hpmcounter17h'), + (0xC92, 'hpmcounter18h'), + (0xC93, 'hpmcounter19h'), + (0xC94, 'hpmcounter20h'), + (0xC95, 'hpmcounter21h'), + (0xC96, 'hpmcounter22h'), + (0xC97, 'hpmcounter23h'), + (0xC98, 'hpmcounter24h'), + (0xC99, 'hpmcounter25h'), + (0xC9A, 'hpmcounter26h'), + (0xC9B, 'hpmcounter27h'), + (0xC9C, 'hpmcounter28h'), + (0xC9D, 'hpmcounter29h'), + (0xC9E, 'hpmcounter30h'), + (0xC9F, 'hpmcounter31h'), + + # Standard Machine RW + (0x310, 'mstatush'), + (0x31A, 'menvcfgh'), + (0x757, 'mseccfgh'), + (0xB80, 'mcycleh'), + (0xB82, 'minstreth'), + (0xB83, 'mhpmcounter3h'), + (0xB84, 'mhpmcounter4h'), + (0xB85, 'mhpmcounter5h'), + (0xB86, 'mhpmcounter6h'), + (0xB87, 'mhpmcounter7h'), + (0xB88, 'mhpmcounter8h'), + (0xB89, 'mhpmcounter9h'), + (0xB8A, 'mhpmcounter10h'), + (0xB8B, 'mhpmcounter11h'), + (0xB8C, 'mhpmcounter12h'), + (0xB8D, 'mhpmcounter13h'), + (0xB8E, 'mhpmcounter14h'), + (0xB8F, 'mhpmcounter15h'), + (0xB90, 'mhpmcounter16h'), + (0xB91, 'mhpmcounter17h'), + (0xB92, 'mhpmcounter18h'), + (0xB93, 'mhpmcounter19h'), + (0xB94, 'mhpmcounter20h'), + (0xB95, 'mhpmcounter21h'), + (0xB96, 'mhpmcounter22h'), + (0xB97, 'mhpmcounter23h'), + (0xB98, 'mhpmcounter24h'), + (0xB99, 'mhpmcounter25h'), + (0xB9A, 'mhpmcounter26h'), + (0xB9B, 'mhpmcounter27h'), + (0xB9C, 'mhpmcounter28h'), + (0xB9D, 'mhpmcounter29h'), + (0xB9E, 'mhpmcounter30h'), + (0xB9F, 'mhpmcounter31h'), +] + +# look up table of position of various arguments that are used by the +# instructions in the encoding files. +arg_lut = {} +arg_lut['rd'] = (11, 7) +arg_lut['rt'] = (19, 15) # source+dest register address. Overlaps rs1. +arg_lut['rs1'] = (19, 15) +arg_lut['rs2'] = (24, 20) +arg_lut['rs3'] = (31, 27) +arg_lut['aqrl'] = (26, 25) +arg_lut['aq'] = (26, 26) +arg_lut['rl'] = (25, 25) +arg_lut['fm'] = (31, 28) +arg_lut['pred'] = (27, 24) +arg_lut['succ'] = (23, 20) +arg_lut['rm'] = (14, 12) +arg_lut['funct3'] = (14, 12) +arg_lut['funct2'] = (26, 25) +arg_lut['imm20'] = (31, 12) +arg_lut['jimm20'] = (31, 12) +arg_lut['imm12'] = (31, 20) +arg_lut['csr'] = (31, 20) +arg_lut['imm12hi'] = (31, 25) +arg_lut['bimm12hi'] = (31, 25) +arg_lut['imm12lo'] = (11, 7) +arg_lut['bimm12lo'] = (11, 7) +arg_lut['zimm'] = (19, 15) +arg_lut['shamt'] = (25, 20) +arg_lut['shamtw'] = (24, 20) +arg_lut['shamtw4'] = (23, 20) +arg_lut['bs'] = (31, 30) # byte select for RV32K AES +arg_lut['rnum'] = (23, 20) # round constant for RV64 AES +arg_lut['rc'] = (29, 25) +arg_lut['imm2'] = (21, 20) +arg_lut['imm3'] = (22, 20) +arg_lut['imm4'] = (23, 20) +arg_lut['imm5'] = (24, 20) +arg_lut['imm6'] = (25, 20) +arg_lut['zimm'] = (19, 15) +arg_lut['opcode'] = (6,0) +arg_lut['funct7'] = (31,25) + +# for vectors +arg_lut['vd'] = (11, 7) +arg_lut['vs3'] = (11, 7) +arg_lut['vs1'] = (19, 15) +arg_lut['vs2'] = (24, 20) +arg_lut['vm'] = (25, 25) +arg_lut['wd'] = (26, 26) +arg_lut['amoop'] = (31, 27) +arg_lut['nf'] = (31, 29) +arg_lut['simm5'] = (19, 15) +arg_lut['zimm10'] = (29, 20) +arg_lut['zimm11'] = (30, 20) + + +#compressed immediates and fields +arg_lut['c_nzuimm10'] = (12,5) +arg_lut['c_uimm7lo'] = (6,5) +arg_lut['c_uimm7hi'] = (12,10) +arg_lut['c_uimm8lo'] = (6,5) +arg_lut['c_uimm8hi'] = (12,10) +arg_lut['c_uimm9lo'] = (6,5) +arg_lut['c_uimm9hi'] = (12,10) +arg_lut['c_nzimm6lo'] = (6,2) +arg_lut['c_nzimm6hi'] = (12,12) +arg_lut['c_imm6lo'] = (6,2) +arg_lut['c_imm6hi'] = (12,12) +arg_lut['c_nzimm10hi'] = (12,12) +arg_lut['c_nzimm10lo'] = (6,2) +arg_lut['c_nzimm18hi'] = (12,12) +arg_lut['c_nzimm18lo'] = (6,2) +arg_lut['c_imm12'] = (12,2) +arg_lut['c_bimm9lo'] = (6,2) +arg_lut['c_bimm9hi'] = (12,10) +arg_lut['c_nzuimm5'] = (6,2) +arg_lut['c_nzuimm6lo'] = (6,2) +arg_lut['c_nzuimm6hi'] = (12, 12) +arg_lut['c_uimm8splo'] = (6,2) +arg_lut['c_uimm8sphi'] = (12, 12) +arg_lut['c_uimm8sp_s'] = (12,7) +arg_lut['c_uimm10splo'] = (6,2) +arg_lut['c_uimm10sphi'] = (12, 12) +arg_lut['c_uimm9splo'] = (6,2) +arg_lut['c_uimm9sphi'] = (12, 12) +arg_lut['c_uimm10sp_s'] = (12,7) +arg_lut['c_uimm9sp_s'] = (12,7) + +arg_lut['rs1_p'] = (9,7) +arg_lut['rs2_p'] = (4,2) +arg_lut['rd_p'] = (4,2) +arg_lut['rd_rs1_n0'] = (11,7) +arg_lut['rd_rs1_p'] = (9,7) +arg_lut['rd_rs1'] = (11,7) +arg_lut['rd_n2'] = (11,7) +arg_lut['rd_n0'] = (11,7) +arg_lut['rs1_n0'] = (11,7) +arg_lut['c_rs2_n0'] = (6,2) +arg_lut['c_rs1_n0'] = (11,7) +arg_lut['c_rs2'] = (6,2) + +# dictionary containing the mapping of the argument to the what the fields in +# the latex table should be +latex_mapping = {} +latex_mapping['imm12'] = 'imm[11:0]' +latex_mapping['rs1'] = 'rs1' +latex_mapping['rs2'] = 'rs2' +latex_mapping['rd'] = 'rd' +latex_mapping['imm20'] = 'imm[31:12]' +latex_mapping['bimm12hi'] = 'imm[12$\\vert$10:5]' +latex_mapping['bimm12lo'] = 'imm[4:1$\\vert$11]' +latex_mapping['imm12hi'] = 'imm[11:5]' +latex_mapping['imm12lo'] = 'imm[4:0]' +latex_mapping['jimm20'] = 'imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]' +latex_mapping['zimm'] = 'uimm' +latex_mapping['shamtw'] = 'shamt' +latex_mapping['rd_p'] = "rd\\,$'$" +latex_mapping['rs1_p'] = "rs1\\,$'$" +latex_mapping['rs2_p'] = "rs2\\,$'$" +latex_mapping['rd_rs1_n0'] = 'rd/rs$\\neq$0' +latex_mapping['rd_rs1_p'] = "rs1\\,$'$/rs2\\,$'$" +latex_mapping['c_rs2'] = 'rs2' +latex_mapping['c_rs2_n0'] = 'rs2$\\neq$0' +latex_mapping['rd_n0'] = 'rd$\\neq$0' +latex_mapping['rs1_n0'] = 'rs1$\\neq$0' +latex_mapping['c_rs1_n0'] = 'rs1$\\neq$0' +latex_mapping['rd_rs1'] = 'rd/rs1' +latex_mapping['c_nzuimm10'] = "nzuimm[5:4$\\vert$9:6$\\vert$2$\\vert$3]" +latex_mapping['c_uimm7lo'] = 'uimm[2$\\vert$6]' +latex_mapping['c_uimm7hi'] = 'uimm[5:3]' +latex_mapping['c_uimm8lo'] = 'uimm[7:6]' +latex_mapping['c_uimm8hi'] = 'uimm[5:3]' +latex_mapping['c_uimm9lo'] = 'uimm[7:6]' +latex_mapping['c_uimm9hi'] = 'uimm[5:4$\\vert$8]' +latex_mapping['c_nzimm6lo'] = 'nzimm[4:0]' +latex_mapping['c_nzimm6hi'] = 'nzimm[5]' +latex_mapping['c_imm6lo'] = 'imm[4:0]' +latex_mapping['c_imm6hi'] = 'imm[5]' +latex_mapping['c_nzimm10hi'] = 'nzimm[9]' +latex_mapping['c_nzimm10lo'] = 'nzimm[4$\\vert$6$\\vert$8:7$\\vert$5]' +latex_mapping['c_nzimm18hi'] = 'nzimm[17]' +latex_mapping['c_nzimm18lo'] = 'nzimm[16:12]' +latex_mapping['c_imm12'] = 'imm[11$\\vert$4$\\vert$9:8$\\vert$10$\\vert$6$\\vert$7$\\vert$3:1$\\vert$5]' +latex_mapping['c_bimm9lo'] = 'imm[7:6$\\vert$2:1$\\vert$5]' +latex_mapping['c_bimm9hi'] = 'imm[8$\\vert$4:3]' +latex_mapping['c_nzuimm5'] = 'nzuimm[4:0]' +latex_mapping['c_nzuimm6lo'] = 'nzuimm[4:0]' +latex_mapping['c_nzuimm6hi'] = 'nzuimm[5]' +latex_mapping['c_uimm8splo'] = 'uimm[4:2$\\vert$7:6]' +latex_mapping['c_uimm8sphi'] = 'uimm[5]' +latex_mapping['c_uimm8sp_s'] = 'uimm[5:2$\\vert$7:6]' +latex_mapping['c_uimm10splo'] = 'uimm[4$\\vert$9:6]' +latex_mapping['c_uimm10sphi'] = 'uimm[5]' +latex_mapping['c_uimm9splo'] = 'uimm[4:3$\\vert$8:6]' +latex_mapping['c_uimm9sphi'] = 'uimm[5]' +latex_mapping['c_uimm10sp_s'] = 'uimm[5:4$\\vert$9:6]' +latex_mapping['c_uimm9sp_s'] = 'uimm[5:3$\\vert$8:6]' + +# created a dummy instruction-dictionary like dictionary for all the instruction +# types so that the same logic can be used to create their tables +latex_inst_type = {} +latex_inst_type['R-type'] = {} +latex_inst_type['R-type']['variable_fields'] = ['opcode', 'rd', 'funct3', \ + 'rs1', 'rs2', 'funct7'] +latex_inst_type['R4-type'] = {} +latex_inst_type['R4-type']['variable_fields'] = ['opcode', 'rd', 'funct3', \ + 'rs1', 'rs2', 'funct2', 'rs3'] +latex_inst_type['I-type'] = {} +latex_inst_type['I-type']['variable_fields'] = ['opcode', 'rd', 'funct3', \ + 'rs1', 'imm12'] +latex_inst_type['S-type'] = {} +latex_inst_type['S-type']['variable_fields'] = ['opcode', 'imm12lo', 'funct3', \ + 'rs1', 'rs2', 'imm12hi'] +latex_inst_type['B-type'] = {} +latex_inst_type['B-type']['variable_fields'] = ['opcode', 'bimm12lo', 'funct3', \ + 'rs1', 'rs2', 'bimm12hi'] +latex_inst_type['U-type'] = {} +latex_inst_type['U-type']['variable_fields'] = ['opcode', 'rd', 'imm20'] +latex_inst_type['J-type'] = {} +latex_inst_type['J-type']['variable_fields'] = ['opcode', 'rd', 'jimm20'] +latex_fixed_fields = [] +latex_fixed_fields.append((31,25)) +latex_fixed_fields.append((24,20)) +latex_fixed_fields.append((19,15)) +latex_fixed_fields.append((14,12)) +latex_fixed_fields.append((11,7)) +latex_fixed_fields.append((6,0)) diff --git a/parse.py b/parse.py new file mode 100755 index 0000000..970c29d --- /dev/null +++ b/parse.py @@ -0,0 +1,872 @@ +#!/usr/bin/env python3 + +from constants import * +import re +import glob +import os +import pprint +import logging +import collections +import yaml +import sys + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') + +def process_enc_line(line, ext): + ''' + This function processes each line of the encoding files (rv*). As part of + the processing, the function ensures that the encoding is legal through the + following checks:: + + - there is no over specification (same bits assigned different values) + - there is no under specification (some bits not assigned values) + - bit ranges are in the format hi..lo=val where hi > lo + - value assigned is representable in the bit range + - also checks that the mapping of arguments of an instruction exists in + arg_lut. + + If the above checks pass, then the function returns a tuple of the name and + a dictionary containing basic information of the instruction which includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + ''' + single_dict = {} + + # fill all bits with don't care. we use '-' to represent don't care + # TODO: hardcoded for 32-bits. + encoding = ['-'] * 32 + + # get the name of instruction by splitting based on the first space + [name, remaining] = line.split(' ', 1) + + # replace dots with underscores as dot doesn't work with C/Sverilog, etc + name = name.replace('.', '_') + + # remove leading whitespaces + remaining = remaining.lstrip() + + # check each field for it's length and overlapping bits + # ex: 1..0=5 will result in an error --> x overlapping bits + temp_instr = ['-'] * 32 + entries = [ + x[0] for x in re.findall( + r'((\d)+\.\.(\d)+\=((0b\d+)|(0x\d+)|(\d)+))*', + remaining) if x[0] != '' + ] + for temp_entry in entries: + entry = temp_entry.split('=')[0] + f1, f2 = entry.split('..') + for ind in range(int(f1), int(f2)): + + # overlapping bits + if temp_instr[ind] == 'X': + logging.error( + f'{line.split(" ")[0]:<10} has {ind} bit overlapping in it\'s opcodes' + ) + raise SystemExit(1) + temp_instr[ind] = 'X' + + # check x < y + if int(f1) < int(f2): + logging.error( + f'{line.split(" ")[0]:<10} has position {f1} less than position {f2} in it\'s encoding' + ) + raise SystemExit(1) + + # illegal value assigned as per bit width + entry_value = temp_entry.split('=')[1] + temp_base = 16 if 'x' in entry_value else 2 if 'b' in entry_value else 10 + if len(str(int(entry_value, + temp_base))[2:]) > (int(f1) - int(f2)): + logging.error( + f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {f1 - f2}' + ) + raise SystemExit(1) + + # extract bit pattern assignments of the form hi..lo=val. fixed_ranges is a + # regex expression present in constants.py. The extracted patterns are + # captured as a list in args where each entry is a tuple (msb, lsb, value) + args = fixed_ranges.sub(' ', remaining) + + # parse through the args and assign constants 1/0 to bits which need to be + # hardcoded for this instruction + for (msb, lsb, value) in fixed_ranges.findall(remaining): + value = int(value, 0) + msb = int(msb, 0) + lsb = int(lsb, 0) + value = f"{value:032b}" + for i in range(0, msb - lsb + 1): + encoding[31 - (i + lsb)] = value[31 - i] + + # do the same as above but for = pattern. single_fixed is a regex + # expression present in constants.py + for (lsb, value, drop) in single_fixed.findall(remaining): + lsb = int(lsb, 0) + value = int(value, 0) + encoding[31 - lsb] = str(value) + + # convert the list of encodings into a single string for match and mask + match = "".join(encoding).replace('-','0') + mask = "".join(encoding).replace('0','1').replace('-','0') + + # check if all args of the instruction are present in arg_lut present in + # constants.py + args = single_fixed.sub(' ', args).split() + for a in args: + if a not in arg_lut: + logging.error(f' Found variable {a} in instruction {name} whose mapping in arg_lut does not exist') + raise SystemExit(1) + + # update the fields of the instruction as a dict and return back along with + # the name of the instruction + single_dict['encoding'] = "".join(encoding) + single_dict['variable_fields'] = args + single_dict['extension'] = [ext.split('/')[-1]] + single_dict['match']=hex(int(match,2)) + single_dict['mask']=hex(int(mask,2)) + + return (name, single_dict) + + +def create_inst_dict(file_filter, include_pseudo=False): + ''' + This function return a dictionary containing all instructions associated + with an extension defined by the file_filter input. The file_filter input + needs to be rv* file name with out the 'rv' prefix i.e. '_i', '32_i', etc. + + Each node of the dictionary will correspond to an instruction which again is + a dictionary. The dictionary contents of each instruction includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + + In order to build this dictionary, the function does 2 passes over the same + rv file. The first pass is to extract all standard + instructions. In this pass, all pseudo ops and imported instructions are + skipped. For each selected line of the file, we call process_enc_line + function to create the above mentioned dictionary contents of the + instruction. Checks are performed in this function to ensure that the same + instruction is not added twice to the overall dictionary. + + In the second pass, this function parses only pseudo_ops. For each pseudo_op + this function checks if the dependent extension and instruction, both, exit + before parsing it. The pseudo op is only added to the overall dictionary is + the dependent instruction is not present in the dictionary, else its + skipped. + + + ''' + opcodes_dir = f'./' + instr_dict = {} + + # file_names contains all files to be parsed in the riscv-opcodes directory + file_names = [] + for fil in file_filter: + file_names += glob.glob(f'{opcodes_dir}{fil}') + + # first pass if for standard/regular instructions + logging.debug('Collecting standard instructions first') + for f in file_names: + logging.debug(f'Parsing File: {f}') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + # if the an instruction needs to be imported then go to the + # respective file and pick the line that has the instruction. + # The variable 'line' will now point to the new line from the + # imported file + + # ignore all lines starting with $import and $pseudo + if '$import' in line or '$pseudo' in line: + continue + logging.debug(f' Processing line: {line}') + + # call process_enc_line to get the data about the current + # instruction + (name, single_dict) = process_enc_line(line, f) + + # if an instruction has already been added to the filtered + # instruction dictionary throw an error saying the given + # instruction is already imported and raise SystemExit + if name in instr_dict: + var = instr_dict[name]["extension"] + if instr_dict[name]['encoding'] != single_dict['encoding']: + err_msg = f'instruction : {name} from ' + err_msg += f'{f.split("/")[-1]} is already ' + err_msg += f'added from {var} but each have different encodings for the same instruction' + logging.error(err_msg) + raise SystemExit(1) + instr_dict[name]['extension'].append(single_dict['extension']) + + # update the final dict with the instruction + instr_dict[name] = single_dict + + # second pass if for pseudo instructions + logging.debug('Collecting pseudo instructions now') + for f in file_names: + logging.debug(f'Parsing File: {f}') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + + # ignore all lines not starting with $pseudo + if '$pseudo' not in line: + continue + logging.debug(f' Processing line: {line}') + + # use the regex pseudo_regex from constants.py to find the dependent + # extension, dependent instruction, the pseudo_op in question and + # its encoding + (ext, orig_inst, pseudo_inst, line) = pseudo_regex.findall(line)[0] + + # check if the file of the dependent extension exist. Throw error if + # it doesn't + if not os.path.exists(ext): + ext1 = f'unratified/{ext}' + if not os.path.exists(ext1): + logging.error(f'Pseudo op {pseudo_inst} in {f} depends on {ext} which is not available') + raise SystemExit(1) + else: + ext = ext1 + + # check if the dependent instruction exist in the dependent + # extension. Else throw error. + found = False + for oline in open(ext): + if not re.findall(f'^\s*{orig_inst}',oline): + continue + else: + found = True + break + if not found: + logging.error(f'Orig instruction {orig_inst} not found in {ext}. Required by pseudo_op {pseudo_inst} present in {f}') + raise SystemExit(1) + + + # add the pseudo_op to the dictionary only if the original + # instruction is not already in the dictionary. + if orig_inst.replace('.','_') not in instr_dict or include_pseudo: + (name, single_dict) = process_enc_line(pseudo_inst + ' ' + line, f) + + # update the final dict with the instruction + if name not in instr_dict: + instr_dict[name] = single_dict + else: + logging.debug(f'Skipping pseudo_op {pseudo_inst} since original instruction {orig_inst} already selected in list') + + # third pass if for imported instructions + logging.debug('Collecting imported instructions') + for f in file_names: + logging.debug(f'Parsing File: {f}') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + # if the an instruction needs to be imported then go to the + # respective file and pick the line that has the instruction. + # The variable 'line' will now point to the new line from the + # imported file + + # ignore all lines starting with $import and $pseudo + if '$import' not in line : + continue + logging.debug(f' Processing line: {line}') + + (import_ext, reg_instr) = imported_regex.findall(line)[0] + + # check if the file of the dependent extension exist. Throw error if + # it doesn't + if not os.path.exists(import_ext): + ext1 = f'unratified/{import_ext}' + if not os.path.exists(ext1): + logging.error(f'Instruction {reg_instr} in {f} cannot be imported from {import_ext}') + raise SystemExit(1) + else: + ext = ext1 + else: + ext = import_ext + + # check if the dependent instruction exist in the dependent + # extension. Else throw error. + found = False + for oline in open(ext): + if not re.findall(f'^\s*{reg_instr}',oline): + continue + else: + found = True + break + if not found: + logging.error(f'imported instruction {reg_instr} not found in {ext}. Required by {line} present in {f}') + logging.error(f'Note: you cannot import pseudo ops.') + raise SystemExit(1) + + # call process_enc_line to get the data about the current + # instruction + (name, single_dict) = process_enc_line(oline, f) + + # if an instruction has already been added to the filtered + # instruction dictionary throw an error saying the given + # instruction is already imported and raise SystemExit + if name in instr_dict: + var = instr_dict[name]["extension"] + if instr_dict[name]['encoding'] != single_dict['encoding']: + err_msg = f'imported instruction : {name} in ' + err_msg += f'{f.split("/")[-1]} is already ' + err_msg += f'added from {var} but each have different encodings for the same instruction' + logging.error(err_msg) + raise SystemExit(1) + instr_dict[name]['extension'].append(single_dict['extension']) + + # update the final dict with the instruction + instr_dict[name] = single_dict + return instr_dict + +def make_priv_latex_table(): + latex_file = open('priv-instr-table.tex','w') + type_list = ['R-type','I-type'] + system_instr = ['_h','_s','_system','_svinval', '64_h'] + dataset_list = [ (system_instr, 'Trap-Return Instructions',['sret','mret'], False) ] + dataset_list.append((system_instr, 'Interrupt-Management Instructions',['wfi'], False)) + dataset_list.append((system_instr, 'Supervisor Memory-Management Instructions',['sfence_vma'], False)) + dataset_list.append((system_instr, 'Hypervisor Memory-Management Instructions',['hfence_vvma', 'hfence_gvma'], False)) + dataset_list.append((system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions', + ['hlv_b','hlv_bu', 'hlv_h','hlv_hu', 'hlv_w', 'hlvx_hu', 'hlvx_wu', 'hsv_b', 'hsv_h','hsv_w'], False)) + dataset_list.append((system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions, RV64 only', ['hlv_wu','hlv_d','hsv_d'], False)) + dataset_list.append((system_instr, 'Svinval Memory-Management Instructions', ['sinval_vma', 'sfence_w_inval','sfence_inval_ir', 'hinval_vvma','hinval_gvma'], False)) + caption = '\\caption{RISC-V Privileged Instructions}' + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + latex_file.close() + +def make_latex_table(): + ''' + This function is mean to create the instr-table.tex that is meant to be used + by the riscv-isa-manual. This function basically creates a single latext + file of multiple tables with each table limited to a single page. Only the + last table is assigned a latex-caption. + + For each table we assign a type-list which capture the different instruction + types (R, I, B, etc) that will be required for the table. Then we select the + list of extensions ('_i, '32_i', etc) whose instructions are required to + populate the table. For each extension or collection of extension we can + assign Title, such that in the end they appear as subheadings within + the table (note these are inlined headings and not captions of the table). + + All of the above information is collected/created and sent to + make_ext_latex_table function to dump out the latex contents into a file. + + The last table only has to be given a caption - as per the policy of the + riscv-isa-manual. + ''' + # open the file and use it as a pointer for all further dumps + latex_file = open('instr-table.tex','w') + + # create the rv32i table first. Here we set the caption to empty. We use the + # files rv_i and rv32_i to capture instructions relevant for rv32i + # configuration. The dataset is a list of 4-element tuples : + # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions + # is empty then it indicates that all instructions of the all the extensions + # in list_of_extensions need to be dumped. If not empty, then only the + # instructions listed in list_of_instructions will be dumped into latex. + caption = '' + type_list = ['R-type','I-type','S-type','B-type','U-type','J-type'] + dataset_list = [(['_i','32_i'], 'RV32I Base Instruction Set', [], True)] + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ['R-type','I-type','S-type'] + dataset_list = [(['64_i'], 'RV64I Base Instruction Set (in addition to RV32I)', [], False)] + dataset_list.append((['_zifencei'], 'RV32/RV64 Zifencei Standard Extension', [], False)) + dataset_list.append((['_zicsr'], 'RV32/RV64 Zicsr Standard Extension', [], False)) + dataset_list.append((['_m','32_m'], 'RV32M Standard Extension', [], False)) + dataset_list.append((['64_m'],'RV64M Standard Extension (in addition to RV32M)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ['R-type'] + dataset_list = [(['_a'],'RV32A Standard Extension', [], False)] + dataset_list.append((['64_a'],'RV64A Standard Extension (in addition to RV32A)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ['R-type','R4-type','I-type','S-type'] + dataset_list = [(['_f'],'RV32F Standard Extension', [], False)] + dataset_list.append((['64_f'],'RV64F Standard Extension (in addition to RV32F)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ['R-type','R4-type','I-type','S-type'] + dataset_list = [(['_d'],'RV32D Standard Extension', [], False)] + dataset_list.append((['64_d'],'RV64D Standard Extension (in addition to RV32D)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ['R-type','R4-type','I-type','S-type'] + dataset_list = [(['_q'],'RV32Q Standard Extension', [], False)] + dataset_list.append((['64_q'],'RV64Q Standard Extension (in addition to RV32Q)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + caption = '\\caption{Instruction listing for RISC-V}' + type_list = ['R-type','R4-type','I-type','S-type'] + dataset_list = [(['_zfh', '_d_zfh','_q_zfh'],'RV32Zfh Standard Extension', [], False)] + dataset_list.append((['64_zfh'],'RV64Zfh Standard Extension (in addition to RV32Zfh)', [], False)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + ## The following is demo to show that Compressed instructions can also be + # dumped in the same manner as above + + #type_list = [''] + #dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])] + #dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', [])) + #make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption) + + latex_file.close() + +def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): + ''' + For a given collection of extensions this function dumps out a complete + latex table which includes the encodings of the instructions. + + The ilen input indicates the length of the instruction for which the table + is created. + + The caption input is used to create the latex-table caption. + + The type_list input is a list of instruction types (R, I, B, etc) that are + treated as header for each table. Each table will have its own requirements + and type_list must include all the instruction-types that the table needs. + Note, all elements of this list must be present in the latex_inst_type + dictionary defined in constants.py + + The latex_file is a file pointer to which the latex-table will dumped into + + The dataset is a list of 3-element tuples containing: + (list_of_extensions, title, list_of_instructions) + The list_of_extensions must contain all the set of extensions whose + instructions must be populated under a given title. If list_of_instructions + is not empty, then only those instructions mentioned in list_of_instructions + present in the extension will be dumped into the latex-table, other + instructions will be ignored. + + Once the above inputs are received then function first creates table entries + for the instruction types. To simplify things, we maintain a dictionary + called latex_inst_type in constants.py which is created in the same way the + instruction dictionary is created. This allows us to re-use the same logic + to create the instruction types table as well + + Once the header is created, we then parse through every entry in the + dataset. For each list dataset entry we use the create_inst_dict function to + create an exhaustive list of instructions associated with the respective + collection of the extension of that dataset. Then we apply the instruction + filter, if any, indicated by the list_of_instructions of that dataset. + Thereon, for each instruction we create a latex table entry. + + Latex table specification for ilen sized instructions: + Each table is created with ilen+1 columns - ilen columns for each bit of the + instruction and one column to hold the name of the instruction. + + For each argument of an instruction we use the arg_lut from constants.py + to identify its position in the encoding, and thus create a multicolumn + entry with the name of the argument as the data. For hardcoded bits, we + do the same where we capture a string of continuous 1s and 0s, identify + the position and assign the same string as the data of the + multicolumn entry in the table. + + ''' + column_size = "".join(['p{0.002in}']*(ilen+1)) + + type_entries = ''' + \\multicolumn{3}{l}{31} & + \\multicolumn{2}{r}{27} & + \\multicolumn{1}{c}{26} & + \\multicolumn{1}{r}{25} & + \\multicolumn{3}{l}{24} & + \\multicolumn{2}{r}{20} & + \\multicolumn{3}{l}{19} & + \\multicolumn{2}{r}{15} & + \\multicolumn{2}{l}{14} & + \\multicolumn{1}{r}{12} & + \\multicolumn{4}{l}{11} & + \\multicolumn{1}{r}{7} & + \\multicolumn{6}{l}{6} & + \\multicolumn{1}{r}{0} \\\\ + \\cline{2-33}\n& \n\n +''' if ilen == 32 else ''' + \\multicolumn{1}{c}{15} & + \\multicolumn{1}{c}{14} & + \\multicolumn{1}{c}{13} & + \\multicolumn{1}{c}{12} & + \\multicolumn{1}{c}{11} & + \\multicolumn{1}{c}{10} & + \\multicolumn{1}{c}{9} & + \\multicolumn{1}{c}{8} & + \\multicolumn{1}{c}{7} & + \\multicolumn{1}{c}{6} & + \\multicolumn{1}{c}{5} & + \\multicolumn{1}{c}{4} & + \\multicolumn{1}{c}{3} & + \\multicolumn{1}{c}{2} & + \\multicolumn{1}{c}{1} & + \\multicolumn{1}{c}{0} \\\\ + \\cline{2-17}\n& \n\n +''' + + # depending on the type_list input we create a subset dictionary of + # latex_inst_type dictionary present in constants.py + type_dict = {key: value for key, value in latex_inst_type.items() if key in type_list} + + # iterate ovr each instruction type and create a table entry + for t in type_dict: + fields = [] + + # first capture all "arguments" of the type (funct3, funct7, rd, etc) + # and capture their positions using arg_lut. + for f in type_dict[t]['variable_fields']: + (msb, lsb) = arg_lut[f] + name = f if f not in latex_mapping else latex_mapping[f] + fields.append((msb, lsb, name)) + + # iterate through the 32 bits, starting from the msb, and assign + # argument names to the relevant portions of the instructions. This + # information is stored as a 3-element tuple containing the msb, lsb + # position of the arugment and the name of the argument. + msb = ilen - 1 + y = '' + for r in range(0,ilen): + if y != '': + fields.append((msb,ilen-1-r+1,y)) + y = '' + msb = ilen-1-r-1 + if r == 31: + if y != '': + fields.append((msb, 0, y)) + y = '' + + # sort the arguments in decreasing order of msb position + fields.sort(key=lambda y: y[0], reverse=True) + + # for each argument/string of 1s or 0s, create a multicolumn latex table + # entry + entry = '' + for r in range(len(fields)): + (msb, lsb, name) = fields[r] + if r == len(fields)-1: + entry += f'\\multicolumn{{ {msb -lsb +1} }}{{|c|}}{{ {name} }} & {t} \\\\ \n' + elif r == 0: + entry += f'\\multicolumn{{ {msb- lsb + 1} }}{{|c|}}{{ {name} }} &\n' + else: + entry += f'\\multicolumn{{ {msb -lsb + 1} }}{{c|}}{{ {name} }} &\n' + entry += f'\\cline{{2-{ilen+1}}}\n&\n\n' + type_entries += entry + + # for each entry in the dataset create a table + content = '' + for (ext_list, title, filter_list, include_pseudo) in dataset: + instr_dict = {} + + # for all extensions list in ext_list, create a dictionary of + # instructions associated with those extensions. + for e in ext_list: + instr_dict.update(create_inst_dict(['rv'+e], include_pseudo)) + + # if filter_list is not empty then use that as the official set of + # instructions that need to be dumped into the latex table + inst_list = list(instr_dict.keys()) if not filter_list else filter_list + + # for each instruction create an latex table entry just like how we did + # above with the instruction-type table. + instr_entries = '' + for inst in inst_list: + if inst not in instr_dict: + logging.error(f'in make_ext_latex_table: Instruction: {inst} not found in instr_dict') + raise SystemExit(1) + fields = [] + + # only if the argument is available in arg_lut we consume it, else + # throw error. + for f in instr_dict[inst]['variable_fields']: + if f not in arg_lut: + logging.error(f'Found variable {f} in instruction {inst} whose mapping is not available') + raise SystemExit(1) + (msb,lsb) = arg_lut[f] + name = f.replace('_','.') if f not in latex_mapping else latex_mapping[f] + fields.append((msb, lsb, name)) + + msb = ilen -1 + y = '' + if ilen == 16: + encoding = instr_dict[inst]['encoding'][16:] + else: + encoding = instr_dict[inst]['encoding'] + for r in range(0,ilen): + x = encoding [r] + if ((msb, ilen-1-r+1)) in latex_fixed_fields: + fields.append((msb,ilen-1-r+1,y)) + msb = ilen-1-r + y = '' + if x == '-': + if y != '': + fields.append((msb,ilen-1-r+1,y)) + y = '' + msb = ilen-1-r-1 + else: + y += str(x) + if r == ilen-1: + if y != '': + fields.append((msb, 0, y)) + y = '' + + fields.sort(key=lambda y: y[0], reverse=True) + entry = '' + for r in range(len(fields)): + (msb, lsb, name) = fields[r] + if r == len(fields)-1: + entry += f'\\multicolumn{{ {msb -lsb +1} }}{{|c|}}{{ {name} }} & {inst.upper().replace("_",".")} \\\\ \n' + elif r == 0: + entry += f'\\multicolumn{{ {msb- lsb + 1} }}{{|c|}}{{ {name} }} &\n' + else: + entry += f'\\multicolumn{{ {msb -lsb + 1} }}{{c|}}{{ {name} }} &\n' + entry += f'\\cline{{2-{ilen+1}}}\n&\n\n' + instr_entries += entry + + # once an entry of the dataset is completed we create the whole table + # with the title of that dataset as sub-heading (sort-of) + if title != '': + content += f''' + +\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\ +\\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\ +\\cline{{2-{ilen+1}}} + + & +{instr_entries} +''' + else: + content += f''' +{instr_entries} +''' + + + header = f''' +\\newpage + +\\begin{{table}}[p] +\\begin{{small}} +\\begin{{center}} + \\begin{{tabular}} {{{column_size}l}} + {" ".join(['&']*ilen)} \\\\ + + & +{type_entries} +''' + endtable=f''' + +\\end{{tabular}} +\\end{{center}} +\\end{{small}} +{caption} +\\end{{table}} +''' + # dump the contents and return + latex_file.write(header+content+endtable) + + +def make_chisel(instr_dict): + + chisel_names='' + cause_names_str='' + csr_names_str = '' + for i in instr_dict: + chisel_names += f' def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n' + for num, name in causes: + cause_names_str += f' val {name.lower().replace(" ","_")} = {hex(num)}\n' + cause_names_str += ''' val all = { + val res = collection.mutable.ArrayBuffer[Int]() +''' + for num, name in causes: + cause_names_str += f' res += {name.lower().replace(" ","_")}\n' + cause_names_str += ''' res.toArray + }''' + + for num, name in csrs+csrs32: + csr_names_str += f' val {name} = {hex(num)}\n' + csr_names_str += ''' val all = { + val res = collection.mutable.ArrayBuffer[Int]() +''' + for num, name in csrs: + csr_names_str += f''' res += {name}\n''' + csr_names_str += ''' res.toArray + } + val all32 = { + val res = collection.mutable.ArrayBuffer(all:_*) +''' + for num, name in csrs32: + csr_names_str += f''' res += {name}\n''' + csr_names_str += ''' res.toArray + }''' + + chisel_file = open('inst.chisel','w') + chisel_file.write(f''' +/* Automatically generated by parse_opcodes */ +object Instructions {{ +{chisel_names} +}} +object Causes {{ +{cause_names_str} +}} +object CSRs {{ +{csr_names_str} +}} +''') + chisel_file.close() + +def make_rust(instr_dict): + mask_match_str= '' + for i in instr_dict: + mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n' + mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n' + for num, name in csrs+csrs32: + mask_match_str += f'const CSR_{name.upper()}: u16 = {hex(num)};\n' + for num, name in causes: + mask_match_str += f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n' + rust_file = open('inst.rs','w') + rust_file.write(f''' +/* Automatically generated by parse_opcodes */ +{mask_match_str} +''') + rust_file.close() + +def make_sverilog(instr_dict): + names_str = '' + for i in instr_dict: + names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n" + names_str += ' /* CSR Addresses */\n' + for num, name in csrs+csrs32: + names_str += f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n" + + sverilog_file = open('inst.sverilog','w') + sverilog_file.write(f''' +/* Automatically generated by parse_opcodes */ +package riscv_instr; +{names_str} +endpackage +''') + sverilog_file.close() +def make_c(instr_dict): + mask_match_str = '' + declare_insn_str = '' + for i in instr_dict: + mask_match_str += f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n' + mask_match_str += f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n' + declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n' + + csr_names_str = '' + declare_csr_str = '' + for num, name in csrs+csrs32: + csr_names_str += f'#define CSR_{name.upper()} {hex(num)}\n' + declare_csr_str += f'DECLARE_CSR({name}, CSR_{name.upper()})\n' + + causes_str= '' + declare_cause_str = '' + for num, name in causes: + causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n" + declare_cause_str += f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n" + + with open('encoding.h', 'r') as file: + enc_header = file.read() + + commit = os.popen('git log -1 --format="format:%h"').read() + enc_file = open('encoding.out.h','w') + enc_file.write(f''' +/* +* This file is auto-generated by running 'make' in +* https://github.com/riscv/riscv-opcodes ({commit}) +*/ +{enc_header} +/* Automatically generated by parse_opcodes. */ +#ifndef RISCV_ENCODING_H +#define RISCV_ENCODING_H +{mask_match_str} +{csr_names_str} +{causes_str} +#endif +#ifdef DECLARE_INSN +{declare_insn_str} +#endif +#ifdef DECLARE_CSR +{declare_csr_str} +#endif +#ifdef DECLARE_CAUSE +{declare_cause_str} +#endif +''') + enc_file.close() + +if __name__ == "__main__": + print(f'Running with args : {sys.argv}') + + extensions = sys.argv[1:] + for i in ['-c','-latex','-chisel','-sverilog','-rust']: + if i in extensions: + extensions.remove(i) + print(f'Extensions selected : {extensions}') + instr_dict = create_inst_dict(extensions) + with open('instr_dict.yaml', 'w') as outfile: + yaml.dump(instr_dict, outfile, default_flow_style=False) + instr_dict = collections.OrderedDict(sorted(instr_dict.items())) + + if '-c' in sys.argv[1:]: + make_c(instr_dict) + logging.info('encoding.out.h generated successfully') + + if '-chisel' in sys.argv[1:]: + make_chisel(instr_dict) + logging.info('inst.chisel generated successfully') + + if '-sverilog' in sys.argv[1:]: + make_sverilog(instr_dict) + logging.info('inst.sverilog generated successfully') + + if '-rust' in sys.argv[1:]: + make_rust(instr_dict) + logging.info('inst.rs generated successfully') + + if '-latex' in sys.argv[1:]: + make_latex_table() + logging.info('instr-table.tex generated successfully') + make_priv_latex_table() + logging.info('priv-instr-table.tex generated successfully') diff --git a/parse_opcodes b/parse_opcodes deleted file mode 100755 index a9445bc..0000000 --- a/parse_opcodes +++ /dev/null @@ -1,1240 +0,0 @@ -#!/usr/bin/env python3 - -import math -import sys -import tokenize -from collections import OrderedDict - -namelist = [] -match = OrderedDict() -mask = OrderedDict() -pseudos = {} -arguments = {} - -arglut = {} -arglut['rd'] = (11,7) -arglut['rt'] = (19,15) # source+dest register address. Overlaps rs1. -arglut['rs1'] = (19,15) -arglut['rs2'] = (24,20) -arglut['rs3'] = (31,27) -arglut['aqrl'] = (26,25) -arglut['fm'] = (31,28) -arglut['pred'] = (27,24) -arglut['succ'] = (23,20) -arglut['rm'] = (14,12) -arglut['funct3'] = (14,12) -arglut['imm20'] = (31,12) -arglut['jimm20'] = (31,12) -arglut['imm12'] = (31,20) -arglut['imm12hi'] = (31,25) -arglut['bimm12hi'] = (31,25) -arglut['imm12lo'] = (11,7) -arglut['bimm12lo'] = (11,7) -arglut['zimm'] = (19,15) -arglut['shamt'] = (25,20) -arglut['shamtw'] = (24,20) -arglut['bs'] = (31,30) # byte select for RV32K AES -arglut['rnum'] = (23,20) # round constant for RV64 AES -arglut['rc'] = (29,25) -arglut['imm2'] = (21,20) -arglut['imm3'] = (22,20) -arglut['imm4'] = (23,20) -arglut['imm5'] = (24,20) -arglut['imm6'] = (25,20) - -# for vectors -arglut['vd'] = (11,7) -arglut['vs3'] = (11,7) -arglut['vs1'] = (19,15) -arglut['vs2'] = (24,20) -arglut['vm'] = (25,25) -arglut['wd'] = (26,26) -arglut['amoop'] = (31,27) -arglut['nf'] = (31,29) -arglut['simm5'] = (19,15) -arglut['zimm10'] = (29,20) -arglut['zimm11'] = (30,20) - -# -# Trap cause codes -causes = [ - (0x00, 'misaligned fetch'), - (0x01, 'fetch access'), - (0x02, 'illegal instruction'), - (0x03, 'breakpoint'), - (0x04, 'misaligned load'), - (0x05, 'load access'), - (0x06, 'misaligned store'), - (0x07, 'store access'), - (0x08, 'user_ecall'), - (0x09, 'supervisor_ecall'), - (0x0A, 'virtual_supervisor_ecall'), - (0x0B, 'machine_ecall'), - (0x0C, 'fetch page fault'), - (0x0D, 'load page fault'), - (0x0F, 'store page fault'), - (0x14, 'fetch guest page fault'), - (0x15, 'load guest page fault'), - (0x16, 'virtual instruction'), - (0x17, 'store guest page fault'), -] - -csrs = [ - # Standard User R/W - (0x001, 'fflags'), - (0x002, 'frm'), - (0x003, 'fcsr'), - (0x008, 'vstart'), - (0x009, 'vxsat'), - (0x00A, 'vxrm'), - (0x00F, 'vcsr'), - (0x015, 'seed'), # Zkr - - # Standard User RO - (0xC00, 'cycle'), - (0xC01, 'time'), - (0xC02, 'instret'), - (0xC03, 'hpmcounter3'), - (0xC04, 'hpmcounter4'), - (0xC05, 'hpmcounter5'), - (0xC06, 'hpmcounter6'), - (0xC07, 'hpmcounter7'), - (0xC08, 'hpmcounter8'), - (0xC09, 'hpmcounter9'), - (0xC0A, 'hpmcounter10'), - (0xC0B, 'hpmcounter11'), - (0xC0C, 'hpmcounter12'), - (0xC0D, 'hpmcounter13'), - (0xC0E, 'hpmcounter14'), - (0xC0F, 'hpmcounter15'), - (0xC10, 'hpmcounter16'), - (0xC11, 'hpmcounter17'), - (0xC12, 'hpmcounter18'), - (0xC13, 'hpmcounter19'), - (0xC14, 'hpmcounter20'), - (0xC15, 'hpmcounter21'), - (0xC16, 'hpmcounter22'), - (0xC17, 'hpmcounter23'), - (0xC18, 'hpmcounter24'), - (0xC19, 'hpmcounter25'), - (0xC1A, 'hpmcounter26'), - (0xC1B, 'hpmcounter27'), - (0xC1C, 'hpmcounter28'), - (0xC1D, 'hpmcounter29'), - (0xC1E, 'hpmcounter30'), - (0xC1F, 'hpmcounter31'), - (0xC20, 'vl'), - (0xC21, 'vtype'), - (0xC22, 'vlenb'), - - # Standard Supervisor R/W - (0x100, 'sstatus'), - (0x102, 'sedeleg'), - (0x103, 'sideleg'), - (0x104, 'sie'), - (0x105, 'stvec'), - (0x106, 'scounteren'), - (0x10A, 'senvcfg'), - (0x140, 'sscratch'), - (0x141, 'sepc'), - (0x142, 'scause'), - (0x143, 'stval'), - (0x144, 'sip'), - (0x180, 'satp'), - (0x5A8, 'scontext'), - - # Standard Hypervisor R/w - (0x200, 'vsstatus'), - (0x204, 'vsie'), - (0x205, 'vstvec'), - (0x240, 'vsscratch'), - (0x241, 'vsepc'), - (0x242, 'vscause'), - (0x243, 'vstval'), - (0x244, 'vsip'), - (0x280, 'vsatp'), - (0x600, 'hstatus'), - (0x602, 'hedeleg'), - (0x603, 'hideleg'), - (0x604, 'hie'), - (0x605, 'htimedelta'), - (0x606, 'hcounteren'), - (0x607, 'hgeie'), - (0x60A, 'henvcfg'), - (0x643, 'htval'), - (0x644, 'hip'), - (0x645, 'hvip'), - (0x64A, 'htinst'), - (0x680, 'hgatp'), - (0x6A8, 'hcontext'), - (0xE12, 'hgeip'), - - # Tentative CSR assignment for CLIC - (0x007, 'utvt'), - (0x045, 'unxti'), - (0x046, 'uintstatus'), - (0x048, 'uscratchcsw'), - (0x049, 'uscratchcswl'), - (0x107, 'stvt'), - (0x145, 'snxti'), - (0x146, 'sintstatus'), - (0x148, 'sscratchcsw'), - (0x149, 'sscratchcswl'), - (0x307, 'mtvt'), - (0x345, 'mnxti'), - (0x346, 'mintstatus'), - (0x348, 'mscratchcsw'), - (0x349, 'mscratchcswl'), - - # Standard Machine R/W - (0x300, 'mstatus'), - (0x301, 'misa'), - (0x302, 'medeleg'), - (0x303, 'mideleg'), - (0x304, 'mie'), - (0x305, 'mtvec'), - (0x306, 'mcounteren'), - (0x30a, 'menvcfg'), - (0x320, 'mcountinhibit'), - (0x340, 'mscratch'), - (0x341, 'mepc'), - (0x342, 'mcause'), - (0x343, 'mtval'), - (0x344, 'mip'), - (0x34a, 'mtinst'), - (0x34b, 'mtval2'), - (0x3a0, 'pmpcfg0'), - (0x3a1, 'pmpcfg1'), - (0x3a2, 'pmpcfg2'), - (0x3a3, 'pmpcfg3'), - (0x3a4, 'pmpcfg4'), - (0x3a5, 'pmpcfg5'), - (0x3a6, 'pmpcfg6'), - (0x3a7, 'pmpcfg7'), - (0x3a8, 'pmpcfg8'), - (0x3a9, 'pmpcfg9'), - (0x3aa, 'pmpcfg10'), - (0x3ab, 'pmpcfg11'), - (0x3ac, 'pmpcfg12'), - (0x3ad, 'pmpcfg13'), - (0x3ae, 'pmpcfg14'), - (0x3af, 'pmpcfg15'), - (0x3b0, 'pmpaddr0'), - (0x3b1, 'pmpaddr1'), - (0x3b2, 'pmpaddr2'), - (0x3b3, 'pmpaddr3'), - (0x3b4, 'pmpaddr4'), - (0x3b5, 'pmpaddr5'), - (0x3b6, 'pmpaddr6'), - (0x3b7, 'pmpaddr7'), - (0x3b8, 'pmpaddr8'), - (0x3b9, 'pmpaddr9'), - (0x3ba, 'pmpaddr10'), - (0x3bb, 'pmpaddr11'), - (0x3bc, 'pmpaddr12'), - (0x3bd, 'pmpaddr13'), - (0x3be, 'pmpaddr14'), - (0x3bf, 'pmpaddr15'), - (0x3c0, 'pmpaddr16'), - (0x3c1, 'pmpaddr17'), - (0x3c2, 'pmpaddr18'), - (0x3c3, 'pmpaddr19'), - (0x3c4, 'pmpaddr20'), - (0x3c5, 'pmpaddr21'), - (0x3c6, 'pmpaddr22'), - (0x3c7, 'pmpaddr23'), - (0x3c8, 'pmpaddr24'), - (0x3c9, 'pmpaddr25'), - (0x3ca, 'pmpaddr26'), - (0x3cb, 'pmpaddr27'), - (0x3cc, 'pmpaddr28'), - (0x3cd, 'pmpaddr29'), - (0x3ce, 'pmpaddr30'), - (0x3cf, 'pmpaddr31'), - (0x3d0, 'pmpaddr32'), - (0x3d1, 'pmpaddr33'), - (0x3d2, 'pmpaddr34'), - (0x3d3, 'pmpaddr35'), - (0x3d4, 'pmpaddr36'), - (0x3d5, 'pmpaddr37'), - (0x3d6, 'pmpaddr38'), - (0x3d7, 'pmpaddr39'), - (0x3d8, 'pmpaddr40'), - (0x3d9, 'pmpaddr41'), - (0x3da, 'pmpaddr42'), - (0x3db, 'pmpaddr43'), - (0x3dc, 'pmpaddr44'), - (0x3dd, 'pmpaddr45'), - (0x3de, 'pmpaddr46'), - (0x3df, 'pmpaddr47'), - (0x3e0, 'pmpaddr48'), - (0x3e1, 'pmpaddr49'), - (0x3e2, 'pmpaddr50'), - (0x3e3, 'pmpaddr51'), - (0x3e4, 'pmpaddr52'), - (0x3e5, 'pmpaddr53'), - (0x3e6, 'pmpaddr54'), - (0x3e7, 'pmpaddr55'), - (0x3e8, 'pmpaddr56'), - (0x3e9, 'pmpaddr57'), - (0x3ea, 'pmpaddr58'), - (0x3eb, 'pmpaddr59'), - (0x3ec, 'pmpaddr60'), - (0x3ed, 'pmpaddr61'), - (0x3ee, 'pmpaddr62'), - (0x3ef, 'pmpaddr63'), - (0x747, 'mseccfg'), - (0x7a0, 'tselect'), - (0x7a1, 'tdata1'), - (0x7a2, 'tdata2'), - (0x7a3, 'tdata3'), - (0x7a4, 'tinfo'), - (0x7a5, 'tcontrol'), - (0x7a8, 'mcontext'), - (0x7aa, 'mscontext'), - (0x7b0, 'dcsr'), - (0x7b1, 'dpc'), - (0x7b2, 'dscratch0'), - (0x7b3, 'dscratch1'), - (0xB00, 'mcycle'), - (0xB02, 'minstret'), - (0xB03, 'mhpmcounter3'), - (0xB04, 'mhpmcounter4'), - (0xB05, 'mhpmcounter5'), - (0xB06, 'mhpmcounter6'), - (0xB07, 'mhpmcounter7'), - (0xB08, 'mhpmcounter8'), - (0xB09, 'mhpmcounter9'), - (0xB0A, 'mhpmcounter10'), - (0xB0B, 'mhpmcounter11'), - (0xB0C, 'mhpmcounter12'), - (0xB0D, 'mhpmcounter13'), - (0xB0E, 'mhpmcounter14'), - (0xB0F, 'mhpmcounter15'), - (0xB10, 'mhpmcounter16'), - (0xB11, 'mhpmcounter17'), - (0xB12, 'mhpmcounter18'), - (0xB13, 'mhpmcounter19'), - (0xB14, 'mhpmcounter20'), - (0xB15, 'mhpmcounter21'), - (0xB16, 'mhpmcounter22'), - (0xB17, 'mhpmcounter23'), - (0xB18, 'mhpmcounter24'), - (0xB19, 'mhpmcounter25'), - (0xB1A, 'mhpmcounter26'), - (0xB1B, 'mhpmcounter27'), - (0xB1C, 'mhpmcounter28'), - (0xB1D, 'mhpmcounter29'), - (0xB1E, 'mhpmcounter30'), - (0xB1F, 'mhpmcounter31'), - (0x323, 'mhpmevent3'), - (0x324, 'mhpmevent4'), - (0x325, 'mhpmevent5'), - (0x326, 'mhpmevent6'), - (0x327, 'mhpmevent7'), - (0x328, 'mhpmevent8'), - (0x329, 'mhpmevent9'), - (0x32A, 'mhpmevent10'), - (0x32B, 'mhpmevent11'), - (0x32C, 'mhpmevent12'), - (0x32D, 'mhpmevent13'), - (0x32E, 'mhpmevent14'), - (0x32F, 'mhpmevent15'), - (0x330, 'mhpmevent16'), - (0x331, 'mhpmevent17'), - (0x332, 'mhpmevent18'), - (0x333, 'mhpmevent19'), - (0x334, 'mhpmevent20'), - (0x335, 'mhpmevent21'), - (0x336, 'mhpmevent22'), - (0x337, 'mhpmevent23'), - (0x338, 'mhpmevent24'), - (0x339, 'mhpmevent25'), - (0x33A, 'mhpmevent26'), - (0x33B, 'mhpmevent27'), - (0x33C, 'mhpmevent28'), - (0x33D, 'mhpmevent29'), - (0x33E, 'mhpmevent30'), - (0x33F, 'mhpmevent31'), - - # Standard Machine RO - (0xF11, 'mvendorid'), - (0xF12, 'marchid'), - (0xF13, 'mimpid'), - (0xF14, 'mhartid'), - (0xF15, 'mconfigptr'), -] - -csrs32 = [ - # Standard Hypervisor R/w - (0x615, 'htimedeltah'), - (0x61A, 'henvcfgh'), - - # Standard User RO - (0xC80, 'cycleh'), - (0xC81, 'timeh'), - (0xC82, 'instreth'), - (0xC83, 'hpmcounter3h'), - (0xC84, 'hpmcounter4h'), - (0xC85, 'hpmcounter5h'), - (0xC86, 'hpmcounter6h'), - (0xC87, 'hpmcounter7h'), - (0xC88, 'hpmcounter8h'), - (0xC89, 'hpmcounter9h'), - (0xC8A, 'hpmcounter10h'), - (0xC8B, 'hpmcounter11h'), - (0xC8C, 'hpmcounter12h'), - (0xC8D, 'hpmcounter13h'), - (0xC8E, 'hpmcounter14h'), - (0xC8F, 'hpmcounter15h'), - (0xC90, 'hpmcounter16h'), - (0xC91, 'hpmcounter17h'), - (0xC92, 'hpmcounter18h'), - (0xC93, 'hpmcounter19h'), - (0xC94, 'hpmcounter20h'), - (0xC95, 'hpmcounter21h'), - (0xC96, 'hpmcounter22h'), - (0xC97, 'hpmcounter23h'), - (0xC98, 'hpmcounter24h'), - (0xC99, 'hpmcounter25h'), - (0xC9A, 'hpmcounter26h'), - (0xC9B, 'hpmcounter27h'), - (0xC9C, 'hpmcounter28h'), - (0xC9D, 'hpmcounter29h'), - (0xC9E, 'hpmcounter30h'), - (0xC9F, 'hpmcounter31h'), - - # Standard Machine RW - (0x310, 'mstatush'), - (0x31A, 'menvcfgh'), - (0x757, 'mseccfgh'), - (0xB80, 'mcycleh'), - (0xB82, 'minstreth'), - (0xB83, 'mhpmcounter3h'), - (0xB84, 'mhpmcounter4h'), - (0xB85, 'mhpmcounter5h'), - (0xB86, 'mhpmcounter6h'), - (0xB87, 'mhpmcounter7h'), - (0xB88, 'mhpmcounter8h'), - (0xB89, 'mhpmcounter9h'), - (0xB8A, 'mhpmcounter10h'), - (0xB8B, 'mhpmcounter11h'), - (0xB8C, 'mhpmcounter12h'), - (0xB8D, 'mhpmcounter13h'), - (0xB8E, 'mhpmcounter14h'), - (0xB8F, 'mhpmcounter15h'), - (0xB90, 'mhpmcounter16h'), - (0xB91, 'mhpmcounter17h'), - (0xB92, 'mhpmcounter18h'), - (0xB93, 'mhpmcounter19h'), - (0xB94, 'mhpmcounter20h'), - (0xB95, 'mhpmcounter21h'), - (0xB96, 'mhpmcounter22h'), - (0xB97, 'mhpmcounter23h'), - (0xB98, 'mhpmcounter24h'), - (0xB99, 'mhpmcounter25h'), - (0xB9A, 'mhpmcounter26h'), - (0xB9B, 'mhpmcounter27h'), - (0xB9C, 'mhpmcounter28h'), - (0xB9D, 'mhpmcounter29h'), - (0xB9E, 'mhpmcounter30h'), - (0xB9F, 'mhpmcounter31h'), -] - -opcode_base = 0 -opcode_size = 7 -funct_base = 12 -funct_size = 3 - -def binary(n, digits=0): - rep = bin(n)[2:] - return rep if digits == 0 else ('0' * (digits - len(rep))) + rep - -def make_c(match,mask): - print('/* Automatically generated by parse_opcodes. */') - print('#ifndef RISCV_ENCODING_H') - print('#define RISCV_ENCODING_H') - for name in namelist: - name2 = name.upper().replace('.','_') - print('#define MATCH_%s %s' % (name2, hex(match[name]))) - print('#define MASK_%s %s' % (name2, hex(mask[name]))) - for num, name in csrs+csrs32: - print('#define CSR_%s %s' % (name.upper(), hex(num))) - for num, name in causes: - print('#define CAUSE_%s %s' % (name.upper().replace(' ', '_'), hex(num))) - print('#endif') - - print('#ifdef DECLARE_INSN') - for name in namelist: - name2 = name.replace('.','_') - print('DECLARE_INSN(%s, MATCH_%s, MASK_%s)' % (name2, name2.upper(), name2.upper())) - print('#endif') # #ifdef DECLARE_INSN - - print('#ifdef DECLARE_CSR') - for num, name in csrs+csrs32: - print('DECLARE_CSR(%s, CSR_%s)' % (name, name.upper())) - print('#endif') - - print('#ifdef DECLARE_CAUSE') - for num, name in causes: - print('DECLARE_CAUSE("%s", CAUSE_%s)' % (name, name.upper().replace(' ', '_'))) - print('#endif') - -def yank(num,start,len): - return (num >> start) & ((1 << len) - 1) - -def str_arg(arg0,name,match,arguments): - if arg0 in arguments: - return name or arg0 - else: - start = arglut[arg0][1] - len = arglut[arg0][0] - arglut[arg0][1] + 1 - return binary(yank(match,start,len),len) - -def str_inst(name,arguments): - return name.replace('.rv32','').upper() - -def print_unimp_type(name,match,arguments): - print(""" -& -\\multicolumn{10}{|c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - '0'*32, \ - 'UNIMP' \ - )) - -def print_u_type(name,match,arguments): - print(""" -& -\\multicolumn{8}{|c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - str_arg('imm20','imm[31:12]',match,arguments), \ - str_arg('rd','',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_uj_type(name,match,arguments): - print(""" -& -\\multicolumn{8}{|c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - str_arg('jimm20','imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]',match,arguments), \ - str_arg('rd','',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_s_type(name,match,arguments): - print(""" -& -\\multicolumn{4}{|c|}{%s} & -\\multicolumn{2}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - str_arg('imm12hi','imm[11:5]',match,arguments), \ - str_arg('rs2','',match,arguments), \ - str_arg('rs1','',match,arguments), \ - binary(yank(match,funct_base,funct_size),funct_size), \ - str_arg('imm12lo','imm[4:0]',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_sb_type(name,match,arguments): - print(""" -& -\\multicolumn{4}{|c|}{%s} & -\\multicolumn{2}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - str_arg('bimm12hi','imm[12$\\vert$10:5]',match,arguments), \ - str_arg('rs2','',match,arguments), \ - str_arg('rs1','',match,arguments), \ - binary(yank(match,funct_base,funct_size),funct_size), \ - str_arg('bimm12lo','imm[4:1$\\vert$11]',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_i_type(name,match,arguments): - print(""" -& -\\multicolumn{6}{|c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - str_arg('imm12','imm[11:0]',match,arguments), \ - str_arg('rs1','',match,arguments), \ - binary(yank(match,funct_base,funct_size),funct_size), \ - str_arg('rd','',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_csr_type(name,match,arguments): - print(""" -& -\\multicolumn{6}{|c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - str_arg('imm12','csr',match,arguments), \ - ('uimm' if name[-1] == 'i' else 'rs1'), \ - binary(yank(match,funct_base,funct_size),funct_size), \ - str_arg('rd','',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_ish_type(name,match,arguments): - print(""" -& -\\multicolumn{3}{|c|}{%s} & -\\multicolumn{3}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - binary(yank(match,26,6),6), \ - str_arg('shamt','shamt',match,arguments), \ - str_arg('rs1','',match,arguments), \ - binary(yank(match,funct_base,funct_size),funct_size), \ - str_arg('rd','',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_ishw_type(name,match,arguments): - print(""" -& -\\multicolumn{4}{|c|}{%s} & -\\multicolumn{2}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - binary(yank(match,25,7),7), \ - str_arg('shamtw','shamt',match,arguments), \ - str_arg('rs1','',match,arguments), \ - binary(yank(match,funct_base,funct_size),funct_size), \ - str_arg('rd','',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_r_type(name,match,arguments): - print(""" -& -\\multicolumn{4}{|c|}{%s} & -\\multicolumn{2}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - binary(yank(match,25,7),7), \ - str_arg('rs2','',match,arguments), \ - 'zimm' in arguments and str_arg('zimm','imm[4:0]',match,arguments) or str_arg('rs1','',match,arguments), \ - str_arg('rm','',match,arguments), \ - str_arg('rd','',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_r4_type(name,match,arguments): - print(""" -& -\\multicolumn{2}{|c|}{%s} & -\\multicolumn{2}{c|}{%s} & -\\multicolumn{2}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - str_arg('rs3','',match,arguments), \ - binary(yank(match,25,2),2), \ - str_arg('rs2','',match,arguments), \ - str_arg('rs1','',match,arguments), \ - str_arg('rm','',match,arguments), \ - str_arg('rd','',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_amo_type(name,match,arguments): - print(""" -& -\\multicolumn{2}{|c|}{%s} & -\\multicolumn{1}{c|}{aq} & -\\multicolumn{1}{c|}{rl} & -\\multicolumn{2}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - binary(yank(match,27,5),5), \ - str_arg('rs2','',match,arguments), \ - str_arg('rs1','',match,arguments), \ - binary(yank(match,funct_base,funct_size),funct_size), \ - str_arg('rd','',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_fence_type(name,match,arguments): - print(""" -& -\\multicolumn{2}{|c|}{%s} & -\\multicolumn{3}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & -\\multicolumn{1}{c|}{%s} & %s \\\\ -\\cline{2-11} -""" % \ - ( \ - str_arg('fm','fm',match,arguments), \ - str_arg('pred','pred',match,arguments), \ - str_arg('succ','',match,arguments), \ - str_arg('rs1','',match,arguments), \ - binary(yank(match,funct_base,funct_size),funct_size), \ - str_arg('rd','',match,arguments), \ - binary(yank(match,opcode_base,opcode_size),opcode_size), \ - str_inst(name,arguments) \ - )) - -def print_header(*types): - print(""" -\\newpage - -\\begin{table}[p] -\\begin{small} -\\begin{center} -\\begin{tabular}{p{0in}p{0.4in}p{0.05in}p{0.05in}p{0.05in}p{0.05in}p{0.4in}p{0.6in}p{0.4in}p{0.6in}p{0.7in}l} -& & & & & & & & & & \\\\ - & -\\multicolumn{1}{l}{\\instbit{31}} & -\\multicolumn{1}{r}{\\instbit{27}} & -\\instbit{26} & -\\instbit{25} & -\\multicolumn{1}{l}{\\instbit{24}} & -\\multicolumn{1}{r}{\\instbit{20}} & -\\instbitrange{19}{15} & -\\instbitrange{14}{12} & -\\instbitrange{11}{7} & -\\instbitrange{6}{0} \\\\ -\\cline{2-11} -""") - if 'r' in types: - print(""" -& -\\multicolumn{4}{|c|}{funct7} & -\\multicolumn{2}{c|}{rs2} & -\\multicolumn{1}{c|}{rs1} & -\\multicolumn{1}{c|}{funct3} & -\\multicolumn{1}{c|}{rd} & -\\multicolumn{1}{c|}{opcode} & R-type \\\\ -\\cline{2-11} -""") - if 'r4' in types: - print(""" -& -\\multicolumn{2}{|c|}{rs3} & -\\multicolumn{2}{c|}{funct2} & -\\multicolumn{2}{c|}{rs2} & -\\multicolumn{1}{c|}{rs1} & -\\multicolumn{1}{c|}{funct3} & -\\multicolumn{1}{c|}{rd} & -\\multicolumn{1}{c|}{opcode} & R4-type \\\\ -\\cline{2-11} -""") - if 'i' in types: - print(""" -& -\\multicolumn{6}{|c|}{imm[11:0]} & -\\multicolumn{1}{c|}{rs1} & -\\multicolumn{1}{c|}{funct3} & -\\multicolumn{1}{c|}{rd} & -\\multicolumn{1}{c|}{opcode} & I-type \\\\ -\\cline{2-11} -""") - if 's' in types: - print(""" -& -\\multicolumn{4}{|c|}{imm[11:5]} & -\\multicolumn{2}{c|}{rs2} & -\\multicolumn{1}{c|}{rs1} & -\\multicolumn{1}{c|}{funct3} & -\\multicolumn{1}{c|}{imm[4:0]} & -\\multicolumn{1}{c|}{opcode} & S-type \\\\ -\\cline{2-11} -""") - if 'sb' in types: - print(""" -& -\\multicolumn{4}{|c|}{imm[12$\\vert$10:5]} & -\\multicolumn{2}{c|}{rs2} & -\\multicolumn{1}{c|}{rs1} & -\\multicolumn{1}{c|}{funct3} & -\\multicolumn{1}{c|}{imm[4:1$\\vert$11]} & -\\multicolumn{1}{c|}{opcode} & B-type \\\\ -\\cline{2-11} -""") - if 'u' in types: - print(""" -& -\\multicolumn{8}{|c|}{imm[31:12]} & -\\multicolumn{1}{c|}{rd} & -\\multicolumn{1}{c|}{opcode} & U-type \\\\ -\\cline{2-11} -""") - if 'uj' in types: - print(""" -& -\\multicolumn{8}{|c|}{imm[20$\\vert$10:1$\\vert$11$\\vert$19:12]} & -\\multicolumn{1}{c|}{rd} & -\\multicolumn{1}{c|}{opcode} & J-type \\\\ -\\cline{2-11} -""") - -def print_subtitle(title): - print(""" -& -\\multicolumn{10}{c}{} & \\\\ -& -\\multicolumn{10}{c}{\\bf %s} & \\\\ -\\cline{2-11} -""" % title) - -def print_footer(caption=''): - print(""" -\\end{tabular} -\\end{center} -\\end{small} -%s -\\end{table} -""" % caption) - -def print_inst(n): - if n == 'fence' or n == 'fence.tso' or n == 'pause': - print_fence_type(n, match[n], arguments[n]) - elif 'aqrl' in arguments[n]: - print_amo_type(n, match[n], arguments[n]) - elif 'shamt' in arguments[n]: - print_ish_type(n, match[n], arguments[n]) - elif 'shamtw' in arguments[n]: - print_ishw_type(n, match[n], arguments[n]) - elif 'imm20' in arguments[n]: - print_u_type(n, match[n], arguments[n]) - elif 'jimm20' in arguments[n]: - print_uj_type(n, match[n], arguments[n]) - elif n[:3] == 'csr': - print_csr_type(n, match[n], arguments[n]) - elif 'imm12' in arguments[n] or n == 'ecall' or n == 'ebreak': - print_i_type(n, match[n], arguments[n]) - elif 'imm12hi' in arguments[n]: - print_s_type(n, match[n], arguments[n]) - elif 'bimm12hi' in arguments[n]: - print_sb_type(n, match[n], arguments[n]) - elif 'rs3' in arguments[n]: - print_r4_type(n, match[n], arguments[n]) - else: - print_r_type(n, match[n], arguments[n]) - -def print_insts(*names): - for n in names: - print_inst(n) - -def make_supervisor_latex_table(): - print_header('r', 'i') - print_subtitle('Trap-Return Instructions') - print_insts('sret', 'mret') - print_subtitle('Interrupt-Management Instructions') - print_insts('wfi') - print_subtitle('Supervisor Memory-Management Instructions') - print_insts('sfence.vma') - print_subtitle('Hypervisor Memory-Management Instructions') - print_insts('hfence.vvma', 'hfence.gvma') - print_subtitle('Hypervisor Virtual-Machine Load and Store Instructions') - print_insts('hlv.b', 'hlv.bu') - print_insts('hlv.h', 'hlv.hu') - print_insts('hlv.w') - print_insts('hlvx.hu', 'hlvx.wu') - print_insts('hsv.b', 'hsv.h', 'hsv.w') - print_subtitle('Hypervisor Virtual-Machine Load and Store Instructions, RV64 only') - print_insts('hlv.wu') - print_insts('hlv.d') - print_insts('hsv.d') - print_subtitle('\emph{Svinval} Memory-Management Extension') - print_insts('sinval.vma') - print_insts('sfence.w.inval', 'sfence.inval.ir') - print_insts('hinval.vvma', 'hinval.gvma') - print_footer('\\caption{RISC-V Privileged Instructions}') - -def make_latex_table(): - print_header('r','i','s','sb','u','uj') - print_subtitle('RV32I Base Instruction Set') - print_insts('lui', 'auipc') - print_insts('jal', 'jalr', 'beq', 'bne', 'blt', 'bge', 'bltu', 'bgeu') - print_insts('lb', 'lh', 'lw', 'lbu', 'lhu', 'sb', 'sh', 'sw') - print_insts('addi', 'slti', 'sltiu', 'xori', 'ori', 'andi', 'slli.rv32', 'srli.rv32', 'srai.rv32') - print_insts('add', 'sub', 'sll', 'slt', 'sltu', 'xor', 'srl', 'sra', 'or', 'and') - print_insts('fence', 'fence.tso', 'pause') - print_insts('ecall', 'ebreak') - print_footer() - - print_header('r','a','i','s') - print_subtitle('RV64I Base Instruction Set (in addition to RV32I)') - print_insts('lwu', 'ld', 'sd') - print_insts('slli', 'srli', 'srai') - print_insts('addiw', 'slliw', 'srliw', 'sraiw') - print_insts('addw', 'subw', 'sllw', 'srlw', 'sraw') - print_subtitle('RV32/RV64 \emph{Zifencei} Standard Extension') - print_insts('fence.i') - print_subtitle('RV32/RV64 \emph{Zicsr} Standard Extension') - print_insts('csrrw', 'csrrs', 'csrrc') - print_insts('csrrwi', 'csrrsi', 'csrrci') - print_subtitle('RV32M Standard Extension') - print_insts('mul', 'mulh', 'mulhsu', 'mulhu') - print_insts('div', 'divu', 'rem', 'remu') - print_subtitle('RV64M Standard Extension (in addition to RV32M)') - print_insts('mulw', 'divw', 'divuw', 'remw', 'remuw') - print_footer() - - print_header('r') - print_subtitle('RV32A Standard Extension') - print_insts('lr.w', 'sc.w') - print_insts('amoswap.w') - print_insts('amoadd.w', 'amoxor.w', 'amoand.w', 'amoor.w') - print_insts('amomin.w', 'amomax.w', 'amominu.w', 'amomaxu.w') - print_subtitle('RV64A Standard Extension (in addition to RV32A)') - print_insts('lr.d', 'sc.d') - print_insts('amoswap.d') - print_insts('amoadd.d', 'amoxor.d', 'amoand.d', 'amoor.d') - print_insts('amomin.d', 'amomax.d', 'amominu.d', 'amomaxu.d') - print_subtitle('RV32/RV64 \emph{Zicbom} Standard Extension') - print_insts('cbo.clean') - print_insts('cbo.flush') - print_insts('cbo.inval') - print_subtitle('RV32/RV64 \emph{Zicboz} Standard Extension') - print_insts('cbo.zero') - print_subtitle('RV32/RV64 \emph{Zicbop} Standard Extension') - print_insts('prefetch.i', 'prefetch.r', 'prefetch.w') - print_footer() - - print_header('r','r4','i','s') - print_subtitle('RV32F Standard Extension') - print_insts('flw', 'fsw') - print_insts('fmadd.s', 'fmsub.s', 'fnmsub.s', 'fnmadd.s') - print_insts('fadd.s', 'fsub.s', 'fmul.s', 'fdiv.s', 'fsqrt.s') - print_insts('fsgnj.s', 'fsgnjn.s', 'fsgnjx.s', 'fmin.s', 'fmax.s') - print_insts('fcvt.w.s', 'fcvt.wu.s', 'fmv.x.w') - print_insts('feq.s', 'flt.s', 'fle.s', 'fclass.s') - print_insts('fcvt.s.w', 'fcvt.s.wu', 'fmv.w.x') - print_subtitle('RV64F Standard Extension (in addition to RV32F)') - print_insts('fcvt.l.s', 'fcvt.lu.s') - print_insts('fcvt.s.l', 'fcvt.s.lu') - print_footer() - - print_header('r','r4','i','s') - print_subtitle('RV32D Standard Extension') - print_insts('fld', 'fsd') - print_insts('fmadd.d', 'fmsub.d', 'fnmsub.d', 'fnmadd.d') - print_insts('fadd.d', 'fsub.d', 'fmul.d', 'fdiv.d', 'fsqrt.d') - print_insts('fsgnj.d', 'fsgnjn.d', 'fsgnjx.d', 'fmin.d', 'fmax.d') - print_insts('fcvt.s.d', 'fcvt.d.s') - print_insts('feq.d', 'flt.d', 'fle.d', 'fclass.d') - print_insts('fcvt.w.d', 'fcvt.wu.d') - print_insts('fcvt.d.w', 'fcvt.d.wu') - print_subtitle('RV64D Standard Extension (in addition to RV32D)') - print_insts('fcvt.l.d', 'fcvt.lu.d', 'fmv.x.d') - print_insts('fcvt.d.l', 'fcvt.d.lu', 'fmv.d.x') - print_footer() - - print_header('r','r4','i','s') - print_subtitle('RV32Q Standard Extension') - print_insts('flq', 'fsq') - print_insts('fmadd.q', 'fmsub.q', 'fnmsub.q', 'fnmadd.q') - print_insts('fadd.q', 'fsub.q', 'fmul.q', 'fdiv.q', 'fsqrt.q') - print_insts('fsgnj.q', 'fsgnjn.q', 'fsgnjx.q', 'fmin.q', 'fmax.q') - print_insts('fcvt.s.q', 'fcvt.q.s') - print_insts('fcvt.d.q', 'fcvt.q.d') - print_insts('feq.q', 'flt.q', 'fle.q', 'fclass.q') - print_insts('fcvt.w.q', 'fcvt.wu.q') - print_insts('fcvt.q.w', 'fcvt.q.wu') - print_subtitle('RV64Q Standard Extension (in addition to RV32Q)') - print_insts('fcvt.l.q', 'fcvt.lu.q') - print_insts('fcvt.q.l', 'fcvt.q.lu') - print_footer() - - print_header('r','r4','i','s') - print_subtitle('RV32Zfh Standard Extension') - print_insts('flh', 'fsh') - print_insts('fmadd.h', 'fmsub.h', 'fnmsub.h', 'fnmadd.h') - print_insts('fadd.h', 'fsub.h', 'fmul.h', 'fdiv.h', 'fsqrt.h') - print_insts('fsgnj.h', 'fsgnjn.h', 'fsgnjx.h', 'fmin.h', 'fmax.h') - print_insts('fcvt.s.h', 'fcvt.h.s') - print_insts('fcvt.d.h', 'fcvt.h.d') - print_insts('fcvt.q.h', 'fcvt.h.q') - print_insts('feq.h', 'flt.h', 'fle.h', 'fclass.h') - print_insts('fcvt.w.h', 'fcvt.wu.h', 'fmv.x.h') - print_insts('fcvt.h.w', 'fcvt.h.wu', 'fmv.h.x') - print_subtitle('RV64Zfh Standard Extension (in addition to RV32Zfh)') - print_insts('fcvt.l.h', 'fcvt.lu.h') - print_insts('fcvt.h.l', 'fcvt.h.lu') - print_footer('\\caption{Instruction listing for RISC-V}') - -def print_chisel_insn(name): - s = " def %-18s = BitPat(\"b" % name.replace('.', '_').upper() - for i in range(31, -1, -1): - if yank(mask[name], i, 1): - s = '%s%d' % (s, yank(match[name], i, 1)) - else: - s = s + '?' - print(s + "\")") - -def make_chisel(): - print('/* Automatically generated by parse_opcodes */') - print('object Instructions {') - for name in namelist: - print_chisel_insn(name) - print('}') - print('object Causes {') - for num, name in causes: - print(' val %s = %s' % (name.lower().replace(' ', '_'), hex(num))) - print(' val all = {') - print(' val res = collection.mutable.ArrayBuffer[Int]()') - for num, name in causes: - print(' res += %s' % (name.lower().replace(' ', '_'))) - print(' res.toArray') - print(' }') - print('}') - print('object CSRs {') - for num, name in csrs+csrs32: - print(' val %s = %s' % (name, hex(num))) - print(' val all = {') - print(' val res = collection.mutable.ArrayBuffer[Int]()') - for num, name in csrs: - print(' res += %s' % (name)) - print(' res.toArray') - print(' }') - print(' val all32 = {') - print(' val res = collection.mutable.ArrayBuffer(all:_*)') - for num, name in csrs32: - print(' res += %s' % (name)) - print(' res.toArray') - print(' }') - print('}') - -def print_sverilog_insn(name): - s = " localparam [31:0] %-18s = 32'b" % name.replace('.', '_').upper() - for i in range(31, -1, -1): - if yank(mask[name], i, 1): - s = '%s%d' % (s, yank(match[name], i, 1)) - else: - s = s + '?' - print(s + ";") - -def make_sverilog(): - print('/* Automatically generated by parse_opcodes */') - print('package riscv_instr;') - for name in namelist: - print_sverilog_insn(name) - print(' /* CSR Addresses */') - for num, name in csrs+csrs32: - print(' localparam logic [11:0] CSR_%s = 12\'h%s;' % (name.upper(), hex(num)[2:])) - print('endpackage') - -def signed(value, width): - if 0 <= value < (1<<(width-1)): - return value - else: - return value - (1<= 2 - - name = tokens[0] - pseudo = name[0] == '@' - if pseudo: - name = name[1:] - mymatch = 0 - mymask = 0 - cover = 0 - - if not name in list(arguments.keys()): - arguments[name] = [] - - for token in tokens[1:]: - if len(token.split('=')) == 2: - tokens = token.split('=') - if len(tokens[0].split('..')) == 2: - tmp = tokens[0].split('..') - hi = int(tmp[0]) - lo = int(tmp[1]) - if hi <= lo: - sys.exit("%s: bad range %d..%d" % (name,hi,lo)) - else: - hi = lo = int(tokens[0]) - - if tokens[1] != 'ignore': - val = int(tokens[1], 0) - if val >= (1 << (hi-lo+1)): - sys.exit("%s: bad value %d for range %d..%d" % (name,val,hi,lo)) - mymatch = mymatch | (val << lo) - mymask = mymask | ((1<<(hi+1))-(1<