aboutsummaryrefslogtreecommitdiff
path: root/parse.py
blob: 83c1d72c5082c253945161e0e9f2808cda31d469 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
#!/usr/bin/env python3

from constants import *
import re
import glob
import os
import pprint
import logging
import collections
import yaml
import sys

pp = pprint.PrettyPrinter(indent=2)
logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s')

def process_enc_line(line, ext):
    '''
    This function processes each line of the encoding files (rv*). As part of
    the processing, the function ensures that the encoding is legal through the
    following checks::

        - there is no over specification (same bits assigned different values)
        - there is no under specification (some bits not assigned values)
        - bit ranges are in the format hi..lo=val where hi > lo
        - value assigned is representable in the bit range
        - also checks that the mapping of arguments of an instruction exists in
          arg_lut.

    If the above checks pass, then the function returns a tuple of the name and
    a dictionary containing basic information of the instruction which includes:
        - variables: list of arguments used by the instruction whose mapping
          exists in the arg_lut dictionary
        - encoding: this contains the 32-bit encoding of the instruction where
          '-' is used to represent position of arguments and 1/0 is used to
          reprsent the static encoding of the bits
        - extension: this field contains the rv* filename from which this
          instruction was included
        - match: hex value representing the bits that need to match to detect
          this instruction
        - mask: hex value representin the bits that need to be masked to extract
          the value required for matching.
    '''
    single_dict = {}

    # fill all bits with don't care. we use '-' to represent don't care
    # TODO: hardcoded for 32-bits.
    encoding = ['-'] * 32

    # get the name of instruction by splitting based on the first space
    [name, remaining] = line.split(' ', 1)

    # replace dots with underscores as dot doesn't work with C/Sverilog, etc
    name = name.replace('.', '_')

    # remove leading whitespaces
    remaining = remaining.lstrip()

    # check each field for it's length and overlapping bits
    # ex: 1..0=5 will result in an error --> x<y
    # ex: 5..0=0 2..1=2 --> overlapping bits
    temp_instr = ['-'] * 32
    entries = [
        x[0] for x in re.findall(
            r'((\d)+\.\.(\d)+\=((0b\d+)|(0x\d+)|(\d)+))*',
            remaining) if x[0] != ''
    ]
    for temp_entry in entries:
        entry = temp_entry.split('=')[0]
        f1, f2 = entry.split('..')
        for ind in range(int(f1), int(f2)):

            # overlapping bits
            if temp_instr[ind] == 'X':
                logging.error(
                    f'{line.split(" ")[0]:<10} has {ind} bit overlapping in it\'s opcodes'
                )
                raise SystemExit(1)
            temp_instr[ind] = 'X'

            # check x < y
            if int(f1) < int(f2):
                logging.error(
                    f'{line.split(" ")[0]:<10} has position {f1} less than position {f2} in it\'s encoding'
                )
                raise SystemExit(1)

        # illegal value assigned as per bit width
        entry_value = temp_entry.split('=')[1]
        temp_base = 16 if 'x' in entry_value else 2 if 'b' in entry_value else 10
        if len(str(int(entry_value,
                       temp_base))[2:]) > (int(f1) - int(f2)):
            logging.error(
                f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {f1 - f2}'
            )
            raise SystemExit(1)

    # extract bit pattern assignments of the form hi..lo=val. fixed_ranges is a
    # regex expression present in constants.py. The extracted patterns are
    # captured as a list in args where each entry is a tuple (msb, lsb, value)
    args = fixed_ranges.sub(' ', remaining)

    # parse through the args and assign constants 1/0 to bits which need to be
    # hardcoded for this instruction
    for (msb, lsb, value) in fixed_ranges.findall(remaining):
        value = int(value, 0)
        msb = int(msb, 0)
        lsb = int(lsb, 0)
        value = f"{value:032b}"
        for i in range(0, msb - lsb + 1):
            encoding[31 - (i + lsb)] = value[31 - i]

    # do the same as above but for <lsb>=<val> pattern. single_fixed is a regex
    # expression present in constants.py
    for (lsb, value, drop) in single_fixed.findall(remaining):
        lsb = int(lsb, 0)
        value = int(value, 0)
        encoding[31 - lsb] = str(value)

    # convert the list of encodings into a single string for match and mask
    match = "".join(encoding).replace('-','0')
    mask = "".join(encoding).replace('0','1').replace('-','0')

    # check if all args of the instruction are present in arg_lut present in
    # constants.py
    args = single_fixed.sub(' ', args).split()
    for a in args:
        if a not in arg_lut:
            logging.error(f' Found variable {a} in instruction {name} whose mapping in arg_lut does not exist')
            raise SystemExit(1)

    # update the fields of the instruction as a dict and return back along with
    # the name of the instruction
    single_dict['encoding'] = "".join(encoding)
    single_dict['variable_fields'] = args
    single_dict['extension'] = [ext.split('/')[-1]]
    single_dict['match']=hex(int(match,2))
    single_dict['mask']=hex(int(mask,2))

    return (name, single_dict)


def create_inst_dict(file_filter, include_pseudo=False):
    '''
    This function return a dictionary containing all instructions associated
    with an extension defined by the file_filter input. The file_filter input
    needs to be rv* file name with out the 'rv' prefix i.e. '_i', '32_i', etc.

    Each node of the dictionary will correspond to an instruction which again is
    a dictionary. The dictionary contents of each instruction includes:
        - variables: list of arguments used by the instruction whose mapping
          exists in the arg_lut dictionary
        - encoding: this contains the 32-bit encoding of the instruction where
          '-' is used to represent position of arguments and 1/0 is used to
          reprsent the static encoding of the bits
        - extension: this field contains the rv* filename from which this
          instruction was included
        - match: hex value representing the bits that need to match to detect
          this instruction
        - mask: hex value representin the bits that need to be masked to extract
          the value required for matching.

    In order to build this dictionary, the function does 2 passes over the same
    rv<file_filter> file. The first pass is to extract all standard
    instructions. In this pass, all pseudo ops and imported instructions are
    skipped. For each selected line of the file, we call process_enc_line
    function to create the above mentioned dictionary contents of the
    instruction. Checks are performed in this function to ensure that the same
    instruction is not added twice to the overall dictionary.

    In the second pass, this function parses only pseudo_ops. For each pseudo_op
    this function checks if the dependent extension and instruction, both, exit
    before parsing it. The pseudo op is only added to the overall dictionary is
    the dependent instruction is not present in the dictionary, else its
    skipped.


    '''
    opcodes_dir = f'./'
    instr_dict = {}

    # file_names contains all files to be parsed in the riscv-opcodes directory
    file_names = []
    for fil in file_filter:
        file_names += glob.glob(f'{opcodes_dir}{fil}')

    # first pass if for standard/regular instructions
    logging.debug('Collecting standard instructions first')
    for f in file_names:
        logging.debug(f'Parsing File: {f}')
        with open(f) as fp:
            lines = (line.rstrip()
                     for line in fp)  # All lines including the blank ones
            lines = list(line for line in lines if line)  # Non-blank lines
            lines = list(
                line for line in lines
                if not line.startswith("#"))  # remove comment lines

        # go through each line of the file
        for line in lines:
            # if the an instruction needs to be imported then go to the
            # respective file and pick the line that has the instruction.
            # The variable 'line' will now point to the new line from the
            # imported file

            # ignore all lines starting with $import and $pseudo
            if '$import' in line or '$pseudo' in line:
                continue
            logging.debug(f'     Processing line: {line}')

            # call process_enc_line to get the data about the current
            # instruction
            (name, single_dict) = process_enc_line(line, f)

            # if an instruction has already been added to the filtered
            # instruction dictionary throw an error saying the given
            # instruction is already imported and raise SystemExit
            if name in instr_dict:
                var = instr_dict[name]["extension"]
                if instr_dict[name]['encoding'] != single_dict['encoding']:
                    err_msg = f'instruction : {name} from '
                    err_msg += f'{f.split("/")[-1]} is already '
                    err_msg += f'added from {var} but each have different encodings for the same instruction'
                    logging.error(err_msg)
                    raise SystemExit(1)
                instr_dict[name]['extension'].append(single_dict['extension'])

            # update the final dict with the instruction
            instr_dict[name] = single_dict

    # second pass if for pseudo instructions
    logging.debug('Collecting pseudo instructions now')
    for f in file_names:
        logging.debug(f'Parsing File: {f}')
        with open(f) as fp:
            lines = (line.rstrip()
                     for line in fp)  # All lines including the blank ones
            lines = list(line for line in lines if line)  # Non-blank lines
            lines = list(
                line for line in lines
                if not line.startswith("#"))  # remove comment lines

        # go through each line of the file
        for line in lines:

            # ignore all lines not starting with $pseudo
            if '$pseudo' not in line:
                continue
            logging.debug(f'     Processing line: {line}')

            # use the regex pseudo_regex from constants.py to find the dependent
            # extension, dependent instruction, the pseudo_op in question and
            # its encoding
            (ext, orig_inst, pseudo_inst, line) = pseudo_regex.findall(line)[0]

            # check if the file of the dependent extension exist. Throw error if
            # it doesn't
            if not os.path.exists(ext):
                ext1 = f'unratified/{ext}'
                if not os.path.exists(ext1):
                    logging.error(f'Pseudo op {pseudo_inst} in {f} depends on {ext} which is not available')
                    raise SystemExit(1)
                else:
                    ext = ext1

            # check if the dependent instruction exist in the dependent
            # extension. Else throw error.
            found = False
            for oline in open(ext):
                if not re.findall(f'^\s*{orig_inst}',oline):
                    continue
                else:
                    found = True
                    break
            if not found:
                logging.error(f'Orig instruction {orig_inst} not found in {ext}. Required by pseudo_op {pseudo_inst} present in {f}')
                raise SystemExit(1)


            # add the pseudo_op to the dictionary only if the original
            # instruction is not already in the dictionary.
            if orig_inst.replace('.','_') not in instr_dict or include_pseudo:
                (name, single_dict) = process_enc_line(pseudo_inst + ' ' + line, f)

                # update the final dict with the instruction
                if name not in instr_dict:
                    instr_dict[name] = single_dict
            else:
                logging.debug(f'Skipping pseudo_op {pseudo_inst} since original instruction {orig_inst} already selected in list')

    # third pass if for imported instructions
    logging.debug('Collecting imported instructions')
    for f in file_names:
        logging.debug(f'Parsing File: {f}')
        with open(f) as fp:
            lines = (line.rstrip()
                     for line in fp)  # All lines including the blank ones
            lines = list(line for line in lines if line)  # Non-blank lines
            lines = list(
                line for line in lines
                if not line.startswith("#"))  # remove comment lines

        # go through each line of the file
        for line in lines:
            # if the an instruction needs to be imported then go to the
            # respective file and pick the line that has the instruction.
            # The variable 'line' will now point to the new line from the
            # imported file

            # ignore all lines starting with $import and $pseudo
            if '$import' not in line :
                continue
            logging.debug(f'     Processing line: {line}')

            (import_ext, reg_instr) = imported_regex.findall(line)[0]

            # check if the file of the dependent extension exist. Throw error if
            # it doesn't
            if not os.path.exists(import_ext):
                ext1 = f'unratified/{import_ext}'
                if not os.path.exists(ext1):
                    logging.error(f'Instruction {reg_instr} in {f} cannot be imported from {import_ext}')
                    raise SystemExit(1)
                else:
                    ext = ext1
            else:
                ext = import_ext

            # check if the dependent instruction exist in the dependent
            # extension. Else throw error.
            found = False
            for oline in open(ext):
                if not re.findall(f'^\s*{reg_instr}',oline):
                    continue
                else:
                    found = True
                    break
            if not found:
                logging.error(f'imported instruction {reg_instr} not found in {ext}. Required by {line} present in {f}')
                logging.error(f'Note: you cannot import pseudo ops.')
                raise SystemExit(1)

            # call process_enc_line to get the data about the current
            # instruction
            (name, single_dict) = process_enc_line(oline, f)

            # if an instruction has already been added to the filtered
            # instruction dictionary throw an error saying the given
            # instruction is already imported and raise SystemExit
            if name in instr_dict:
                var = instr_dict[name]["extension"]
                if instr_dict[name]['encoding'] != single_dict['encoding']:
                    err_msg = f'imported instruction : {name} in '
                    err_msg += f'{f.split("/")[-1]} is already '
                    err_msg += f'added from {var} but each have different encodings for the same instruction'
                    logging.error(err_msg)
                    raise SystemExit(1)
                instr_dict[name]['extension'].append(single_dict['extension'])

            # update the final dict with the instruction
            instr_dict[name] = single_dict
    return instr_dict

def make_priv_latex_table():
    latex_file = open('priv-instr-table.tex','w')
    type_list = ['R-type','I-type']
    system_instr = ['_h','_s','_system','_svinval', '64_h']
    dataset_list = [ (system_instr, 'Trap-Return Instructions',['sret','mret'], False) ]
    dataset_list.append((system_instr, 'Interrupt-Management Instructions',['wfi'], False))
    dataset_list.append((system_instr, 'Supervisor Memory-Management Instructions',['sfence_vma'], False))
    dataset_list.append((system_instr, 'Hypervisor Memory-Management Instructions',['hfence_vvma', 'hfence_gvma'], False))
    dataset_list.append((system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions',
        ['hlv_b','hlv_bu', 'hlv_h','hlv_hu', 'hlv_w', 'hlvx_hu', 'hlvx_wu', 'hsv_b', 'hsv_h','hsv_w'], False))
    dataset_list.append((system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions, RV64 only', ['hlv_wu','hlv_d','hsv_d'], False))
    dataset_list.append((system_instr, 'Svinval Memory-Management Instructions', ['sinval_vma', 'sfence_w_inval','sfence_inval_ir', 'hinval_vvma','hinval_gvma'], False))
    caption = '\\caption{RISC-V Privileged Instructions}'
    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)

    latex_file.close()

def make_latex_table():
    '''
    This function is mean to create the instr-table.tex that is meant to be used
    by the riscv-isa-manual. This function basically creates a single latext
    file of multiple tables with each table limited to a single page. Only the
    last table is assigned a latex-caption.

    For each table we assign a type-list which capture the different instruction
    types (R, I, B, etc) that will be required for the table. Then we select the
    list of extensions ('_i, '32_i', etc) whose instructions are required to
    populate the table. For each extension or collection of extension we can
    assign Title, such that in the end they appear as subheadings within
    the table (note these are inlined headings and not captions of the table).

    All of the above information is collected/created and sent to
    make_ext_latex_table function to dump out the latex contents into a file.

    The last table only has to be given a caption - as per the policy of the
    riscv-isa-manual.
    '''
    # open the file and use it as a pointer for all further dumps
    latex_file = open('instr-table.tex','w')

    # create the rv32i table first. Here we set the caption to empty. We use the
    # files rv_i and rv32_i to capture instructions relevant for rv32i
    # configuration. The dataset is a list of 4-element tuples :
    # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions
    # is empty then it indicates that all instructions of the all the extensions
    # in list_of_extensions need to be dumped. If not empty, then only the
    # instructions listed in list_of_instructions will be dumped into latex.
    caption = ''
    type_list = ['R-type','I-type','S-type','B-type','U-type','J-type']
    dataset_list = [(['_i','32_i'], 'RV32I Base Instruction Set', [], False)]
    dataset_list.append((['_i'], '', ['fence_tso','pause'], True))
    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)

    type_list = ['R-type','I-type','S-type']
    dataset_list = [(['64_i'], 'RV64I Base Instruction Set (in addition to RV32I)', [], False)]
    dataset_list.append((['_zifencei'], 'RV32/RV64 Zifencei Standard Extension', [], False))
    dataset_list.append((['_zicsr'], 'RV32/RV64 Zicsr Standard Extension', [], False))
    dataset_list.append((['_m','32_m'], 'RV32M Standard Extension', [], False))
    dataset_list.append((['64_m'],'RV64M Standard Extension (in addition to RV32M)', [], False))
    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)

    type_list = ['R-type']
    dataset_list = [(['_a'],'RV32A Standard Extension', [], False)]
    dataset_list.append((['64_a'],'RV64A Standard Extension (in addition to RV32A)', [], False))
    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)

    type_list = ['R-type','R4-type','I-type','S-type']
    dataset_list = [(['_f'],'RV32F Standard Extension', [], False)]
    dataset_list.append((['64_f'],'RV64F Standard Extension (in addition to RV32F)', [], False))
    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)

    type_list = ['R-type','R4-type','I-type','S-type']
    dataset_list = [(['_d'],'RV32D Standard Extension', [], False)]
    dataset_list.append((['64_d'],'RV64D Standard Extension (in addition to RV32D)', [], False))
    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)

    type_list = ['R-type','R4-type','I-type','S-type']
    dataset_list = [(['_q'],'RV32Q Standard Extension', [], False)]
    dataset_list.append((['64_q'],'RV64Q Standard Extension (in addition to RV32Q)', [], False))
    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)

    caption = '\\caption{Instruction listing for RISC-V}'
    type_list = ['R-type','R4-type','I-type','S-type']
    dataset_list = [(['_zfh', '_d_zfh','_q_zfh'],'RV32Zfh Standard Extension', [], False)]
    dataset_list.append((['64_zfh'],'RV64Zfh Standard Extension (in addition to RV32Zfh)', [], False))
    make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption)

    ## The following is demo to show that Compressed instructions can also be
    # dumped in the same manner as above

    #type_list = ['']
    #dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])]
    #dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', []))
    #make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption)

    latex_file.close()

def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption):
    '''
    For a given collection of extensions this function dumps out a complete
    latex table which includes the encodings of the instructions.

    The ilen input indicates the length of the instruction for which the table
    is created.

    The caption input is used to create the latex-table caption.

    The type_list input is a list of instruction types (R, I, B, etc) that are
    treated as header for each table. Each table will have its own requirements
    and type_list must include all the instruction-types that the table needs.
    Note, all elements of this list must be present in the latex_inst_type
    dictionary defined in constants.py

    The latex_file is a file pointer to which the latex-table will dumped into

    The dataset is a list of 3-element tuples containing:
        (list_of_extensions, title, list_of_instructions)
    The list_of_extensions must contain all the set of extensions whose
    instructions must be populated under a given title. If list_of_instructions
    is not empty, then only those instructions mentioned in list_of_instructions
    present in the extension will be dumped into the latex-table, other
    instructions will be ignored.

    Once the above inputs are received then function first creates table entries
    for the instruction types. To simplify things, we maintain a dictionary
    called latex_inst_type in constants.py which is created in the same way the
    instruction dictionary is created. This allows us to re-use the same logic
    to create the instruction types table as well

    Once the header is created, we then parse through every entry in the
    dataset. For each list dataset entry we use the create_inst_dict function to
    create an exhaustive list of instructions associated with the respective
    collection of the extension of that dataset. Then we apply the instruction
    filter, if any, indicated by the list_of_instructions of that dataset.
    Thereon, for each instruction we create a latex table entry.

    Latex table specification for ilen sized instructions:
        Each table is created with ilen+1 columns - ilen columns for each bit of the
        instruction and one column to hold the name of the instruction.

        For each argument of an instruction we use the arg_lut from constants.py
        to identify its position in the encoding, and thus create a multicolumn
        entry with the name of the argument as the data. For hardcoded bits, we
        do the same where we capture a string of continuous 1s and 0s, identify
        the position and assign the same string as the data of the
        multicolumn entry in the table.

    '''
    column_size = "".join(['p{0.002in}']*(ilen+1))

    type_entries = '''
    \\multicolumn{3}{l}{31} &
    \\multicolumn{2}{r}{27} &
    \\multicolumn{1}{c}{26} &
    \\multicolumn{1}{r}{25} &
    \\multicolumn{3}{l}{24} &
    \\multicolumn{2}{r}{20} &
    \\multicolumn{3}{l}{19} &
    \\multicolumn{2}{r}{15} &
    \\multicolumn{2}{l}{14} &
    \\multicolumn{1}{r}{12} &
    \\multicolumn{4}{l}{11} &
    \\multicolumn{1}{r}{7} &
    \\multicolumn{6}{l}{6} &
    \\multicolumn{1}{r}{0} \\\\
    \\cline{2-33}\n&\n\n
''' if ilen == 32 else '''
    \\multicolumn{1}{c}{15} &
    \\multicolumn{1}{c}{14} &
    \\multicolumn{1}{c}{13} &
    \\multicolumn{1}{c}{12} &
    \\multicolumn{1}{c}{11} &
    \\multicolumn{1}{c}{10} &
    \\multicolumn{1}{c}{9} &
    \\multicolumn{1}{c}{8} &
    \\multicolumn{1}{c}{7} &
    \\multicolumn{1}{c}{6} &
    \\multicolumn{1}{c}{5} &
    \\multicolumn{1}{c}{4} &
    \\multicolumn{1}{c}{3} &
    \\multicolumn{1}{c}{2} &
    \\multicolumn{1}{c}{1} &
    \\multicolumn{1}{c}{0} \\\\
    \\cline{2-17}\n&\n\n
'''

    # depending on the type_list input we create a subset dictionary of
    # latex_inst_type dictionary present in constants.py
    type_dict = {key: value for key, value in latex_inst_type.items() if key in type_list}

    # iterate ovr each instruction type and create a table entry
    for t in type_dict:
        fields = []

        # first capture all "arguments" of the type (funct3, funct7, rd, etc)
        # and capture their positions using arg_lut.
        for f in type_dict[t]['variable_fields']:
            (msb, lsb) = arg_lut[f]
            name = f if f not in latex_mapping else latex_mapping[f]
            fields.append((msb, lsb, name))

        # iterate through the 32 bits, starting from the msb, and assign
        # argument names to the relevant portions of the instructions. This
        # information is stored as a 3-element tuple containing the msb, lsb
        # position of the arugment and the name of the argument.
        msb = ilen - 1
        y = ''
        for r in range(0,ilen):
            if y != '':
                fields.append((msb,ilen-1-r+1,y))
                y = ''
            msb = ilen-1-r-1
            if r == 31:
                if y != '':
                    fields.append((msb, 0, y))
                y = ''

        # sort the arguments in decreasing order of msb position
        fields.sort(key=lambda y: y[0], reverse=True)

        # for each argument/string of 1s or 0s, create a multicolumn latex table
        # entry
        entry = ''
        for r in range(len(fields)):
            (msb, lsb, name) = fields[r]
            if r == len(fields)-1:
                entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n'
            elif r == 0:
                entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n'
            else:
                entry += f'\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n'
        entry += f'\\cline{{2-{ilen+1}}}\n&\n\n'
        type_entries += entry

    # for each entry in the dataset create a table
    content = ''
    for (ext_list, title, filter_list, include_pseudo) in dataset:
        instr_dict = {}

        # for all extensions list in ext_list, create a dictionary of
        # instructions associated with those extensions.
        for e in ext_list:
            instr_dict.update(create_inst_dict(['rv'+e], include_pseudo))

        # if filter_list is not empty then use that as the official set of
        # instructions that need to be dumped into the latex table
        inst_list = list(instr_dict.keys()) if not filter_list else filter_list

        # for each instruction create an latex table entry just like how we did
        # above with the instruction-type table.
        instr_entries = ''
        for inst in inst_list:
            if inst not in instr_dict:
                logging.error(f'in make_ext_latex_table: Instruction: {inst} not found in instr_dict')
                raise SystemExit(1)
            fields = []

            # only if the argument is available in arg_lut we consume it, else
            # throw error.
            for f in instr_dict[inst]['variable_fields']:
                if f not in arg_lut:
                    logging.error(f'Found variable {f} in instruction {inst} whose mapping is not available')
                    raise SystemExit(1)
                (msb,lsb) = arg_lut[f]
                name = f.replace('_','.') if f not in latex_mapping else latex_mapping[f]
                fields.append((msb, lsb, name))

            msb = ilen -1
            y = ''
            if ilen == 16:
                encoding = instr_dict[inst]['encoding'][16:]
            else:
                encoding = instr_dict[inst]['encoding']
            for r in range(0,ilen):
                x = encoding [r]
                if ((msb, ilen-1-r+1)) in latex_fixed_fields:
                    fields.append((msb,ilen-1-r+1,y))
                    msb = ilen-1-r
                    y = ''
                if x == '-':
                    if y != '':
                        fields.append((msb,ilen-1-r+1,y))
                        y = ''
                    msb = ilen-1-r-1
                else:
                    y += str(x)
                if r == ilen-1:
                    if y != '':
                        fields.append((msb, 0, y))
                    y = ''

            fields.sort(key=lambda y: y[0], reverse=True)
            entry = ''
            for r in range(len(fields)):
                (msb, lsb, name) = fields[r]
                if r == len(fields)-1:
                    entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n'
                elif r == 0:
                    entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n'
                else:
                    entry += f'\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n'
            entry += f'\\cline{{2-{ilen+1}}}\n&\n\n'
            instr_entries += entry

        # once an entry of the dataset is completed we create the whole table
        # with the title of that dataset as sub-heading (sort-of)
        if title != '':
            content += f'''

\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\
\\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\
\\cline{{2-{ilen+1}}}

            &
{instr_entries}
'''
        else:
            content += f'''
{instr_entries}
'''


    header = f'''
\\newpage

\\begin{{table}}[p]
\\begin{{small}}
\\begin{{center}}
    \\begin{{tabular}} {{{column_size}l}}
    {" ".join(['&']*ilen)} \\\\

            &
{type_entries}
'''
    endtable=f'''

\\end{{tabular}}
\\end{{center}}
\\end{{small}}
{caption}
\\end{{table}}
'''
    # dump the contents and return
    latex_file.write(header+content+endtable)


def make_chisel(instr_dict, spinal_hdl=False):

    chisel_names=''
    cause_names_str=''
    csr_names_str = ''
    for i in instr_dict:
        if spinal_hdl:
            chisel_names += f'  def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n'
        else:
            chisel_names += f'  def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n'
    for num, name in causes:
        cause_names_str += f'  val {name.lower().replace(" ","_")} = {hex(num)}\n'
    cause_names_str += '''  val all = {
    val res = collection.mutable.ArrayBuffer[Int]()
'''
    for num, name in causes:
        cause_names_str += f'    res += {name.lower().replace(" ","_")}\n'
    cause_names_str += '''    res.toArray
  }'''

    for num, name in csrs+csrs32:
        csr_names_str += f'  val {name} = {hex(num)}\n'
    csr_names_str += '''  val all = {
    val res = collection.mutable.ArrayBuffer[Int]()
'''
    for num, name in csrs:
        csr_names_str += f'''    res += {name}\n'''
    csr_names_str += '''    res.toArray
  }
  val all32 = {
    val res = collection.mutable.ArrayBuffer(all:_*)
'''
    for num, name in csrs32:
        csr_names_str += f'''    res += {name}\n'''
    csr_names_str += '''    res.toArray
  }'''

    if spinal_hdl:
        chisel_file = open('inst.spinalhdl','w')
    else:
        chisel_file = open('inst.chisel','w')
    chisel_file.write(f'''
/* Automatically generated by parse_opcodes */
object Instructions {{
{chisel_names}
}}
object Causes {{
{cause_names_str}
}}
object CSRs {{
{csr_names_str}
}}
''')
    chisel_file.close()

def make_rust(instr_dict):
    mask_match_str= ''
    for i in instr_dict:
        mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n'
        mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n'
    for num, name in csrs+csrs32:
        mask_match_str += f'const CSR_{name.upper()}: u16 = {hex(num)};\n'
    for num, name in causes:
        mask_match_str += f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n'
    rust_file = open('inst.rs','w')
    rust_file.write(f'''
/* Automatically generated by parse_opcodes */
{mask_match_str}
''')
    rust_file.close()

def make_sverilog(instr_dict):
    names_str = ''
    for i in instr_dict:
        names_str += f"  localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n"
    names_str += '  /* CSR Addresses */\n'
    for num, name in csrs+csrs32:
        names_str += f"  localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n"

    sverilog_file = open('inst.sverilog','w')
    sverilog_file.write(f'''
/* Automatically generated by parse_opcodes */
package riscv_instr;
{names_str}
endpackage
''')
    sverilog_file.close()
def make_c(instr_dict):
    mask_match_str = ''
    declare_insn_str = ''
    for i in instr_dict:
        mask_match_str += f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n'
        mask_match_str += f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n'
        declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n'

    csr_names_str = ''
    declare_csr_str = ''
    for num, name in csrs+csrs32:
        csr_names_str += f'#define CSR_{name.upper()} {hex(num)}\n'
        declare_csr_str += f'DECLARE_CSR({name}, CSR_{name.upper()})\n'

    causes_str= ''
    declare_cause_str = ''
    for num, name in causes:
        causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n"
        declare_cause_str += f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n"

    arg_str = ''
    for name, rng in arg_lut.items():
        begin = rng[1]
        end   = rng[0]
        mask = ((1 << (end - begin + 1)) - 1) << begin
        arg_str += f"#define INSN_FIELD_{name.upper().replace(' ', '_')} {hex(mask)}\n"

    with open('encoding.h', 'r') as file:
        enc_header = file.read()

    commit = os.popen('git log -1 --format="format:%h"').read()
    enc_file = open('encoding.out.h','w')
    enc_file.write(f'''
/*
* This file is auto-generated by running 'make' in
* https://github.com/riscv/riscv-opcodes ({commit})
*/
{enc_header}
/* Automatically generated by parse_opcodes. */
#ifndef RISCV_ENCODING_H
#define RISCV_ENCODING_H
{mask_match_str}
{csr_names_str}
{causes_str}
{arg_str}#endif
#ifdef DECLARE_INSN
{declare_insn_str}#endif
#ifdef DECLARE_CSR
{declare_csr_str}#endif
#ifdef DECLARE_CAUSE
{declare_cause_str}#endif
''')
    enc_file.close()

def make_go(instr_dict):
    prelude = '''// Code generated by parse_opcodes -go; DO NOT EDIT.

package riscv

import "cmd/internal/obj"

type inst struct {
	opcode uint32
	funct3 uint32
	rs2    uint32
	csr    int64
	funct7 uint32
}

func encode(a obj.As) *inst {
	switch a {
'''
    endoffile = '''  }
	return nil
}
'''
    instr_str = ''
    for i in instr_dict:
        enc_match = int(instr_dict[i]['match'],0)
        opcode = (enc_match >> 0) & ((1<<7)-1)
        funct3 = (enc_match >> 12) & ((1<<3)-1)
        rs2 = (enc_match >> 20) & ((1<<5)-1)
        csr = (enc_match >> 20) & ((1<<12)-1)
        funct7 = (enc_match >> 25) & ((1<<7)-1)
        instr_str += f'''  case A{i.upper().replace("_","")}:
    return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }}
'''
        
    with open('inst.go','w') as file:
        file.write(prelude)
        file.write(instr_str)
        file.write(endoffile)

def signed(value, width):
  if 0 <= value < (1<<(width-1)):
    return value
  else:
    return value - (1<<width)


if __name__ == "__main__":
    print(f'Running with args : {sys.argv}')

    extensions = sys.argv[1:]
    for i in ['-c','-latex','-chisel','-sverilog','-rust', '-go', '-spinalhdl']:
        if i in extensions:
            extensions.remove(i)
    print(f'Extensions selected : {extensions}')
    instr_dict = create_inst_dict(extensions)
    with open('instr_dict.yaml', 'w') as outfile:
        yaml.dump(instr_dict, outfile, default_flow_style=False)
    instr_dict = collections.OrderedDict(sorted(instr_dict.items()))

    if '-c' in sys.argv[1:]:
        make_c(instr_dict)
        logging.info('encoding.out.h generated successfully')

    if '-chisel' in sys.argv[1:]:
        make_chisel(instr_dict)
        logging.info('inst.chisel generated successfully')

    if '-spinalhdl' in sys.argv[1:]:
        make_chisel(instr_dict, True)
        logging.info('inst.spinalhdl generated successfully')

    if '-sverilog' in sys.argv[1:]:
        make_sverilog(instr_dict)
        logging.info('inst.sverilog generated successfully')

    if '-rust' in sys.argv[1:]:
        make_rust(instr_dict)
        logging.info('inst.rs generated successfully')

    if '-go' in sys.argv[1:]:
        make_go(instr_dict)
        logging.info('inst.go generated successfully')

    if '-latex' in sys.argv[1:]:
        make_latex_table()
        logging.info('instr-table.tex generated successfully')
        make_priv_latex_table()
        logging.info('priv-instr-table.tex generated successfully')