target/hexagon/gen_decodetree.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198

#!/usr/bin/env python3

##
##  Copyright (c) 2024 Taylor Simpson <ltaylorsimpson@gmail.com>
##
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
##
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##
##  You should have received a copy of the GNU General Public License
##  along with this program; if not, see <http://www.gnu.org/licenses/>.
##

import io
import re

import sys
import textwrap
import iset
import hex_common

encs = {
    tag: "".join(reversed(iset.iset[tag]["enc"].replace(" ", "")))
    for tag in iset.tags
    if iset.iset[tag]["enc"] != "MISSING ENCODING"
}


regre = re.compile(r"((?<!DUP)[MNORCPQXSGVZA])([stuvwxyzdefg]+)([.]?[LlHh]?)(\d+S?)")
immre = re.compile(r"[#]([rRsSuUm])(\d+)(?:[:](\d+))?")


def ordered_unique(l):
    return sorted(set(l), key=l.index)

num_registers = {"R": 32, "V": 32}

operand_letters = {
    "P",
    "i",
    "I",
    "r",
    "s",
    "t",
    "u",
    "v",
    "w",
    "x",
    "y",
    "z",
    "d",
    "e",
    "f",
    "g",
}

#
# These instructions have unused operand letters in their encoding
# They don't correspond to actual operands in the instruction semantics
# We will mark them as ignored in QEMU decodetree
#
tags_with_unused_d_encoding = {
    "R6_release_at_vi",
    "R6_release_st_vi",
    "S4_stored_rl_at_vi",
    "S4_stored_rl_st_vi",
    "S2_storew_rl_at_vi",
    "S2_stored_rl_at_vi",
    "S2_storew_rl_st_vi",
}

tags_with_unused_t_encoding = {
    "R6_release_at_vi",
    "R6_release_st_vi",
}

def skip_tag(tag, class_to_decode):
    enc_class = iset.iset[tag]["enc_class"]
    return enc_class != class_to_decode


##
## Generate the QEMU decodetree file for each instruction in class_to_decode
##     For A2_add: Rd32=add(Rs32,Rt32)
##     We produce:
##     %A2_add_Rd   0:5
##     %A2_add_Rs   16:5
##     %A2_add_Rt   8:5
##     @A2_add  11110011000.......-.....---..... Rd=%A2_add_Rd Rs=%A2_add_Rs Rt=%A2_add_Rt %PP
##     A2_add   ..................-.....---..... @A2_add
##
def gen_decodetree_file(f, class_to_decode):
    is_subinsn = class_to_decode.startswith("SUBINSN_")
    f.write(f"## DO NOT MODIFY - This file is generated by {sys.argv[0]}\n\n")
    if not is_subinsn:
        f.write("%PP\t14:2\n\n")
    for tag in sorted(encs.keys(), key=iset.tags.index):
        if skip_tag(tag, class_to_decode):
            continue

        enc = encs[tag]
        enc_str = "".join(reversed(encs[tag]))
        f.write(("#" * 80) + "\n"
                f"## {tag}:\t{enc_str}\n"
                "##\n")

        # The subinstructions come with a 13-bit encoding, but
        # decodetree.py needs 16 bits
        if is_subinsn:
            enc_str = "---" + enc_str

        regs = ordered_unique(regre.findall(iset.iset[tag]["syntax"]))
        imms = ordered_unique(immre.findall(iset.iset[tag]["syntax"]))

        # Write the field definitions for the registers
        for regno, reg in enumerate(regs):
            reg_type, reg_id, _, reg_enc_size = reg
            reg_letter = reg_id[0]
            reg_num_choices = int(reg_enc_size.rstrip("S"))
            reg_mapping = reg_type + "".join("_" for letter in reg_id) + \
                          reg_enc_size
            reg_enc_fields = re.findall(reg_letter + "+", enc)

            # Check for some errors
            if len(reg_enc_fields) == 0:
                raise Exception(f"{tag} missing register field!")
            if len(reg_enc_fields) > 1:
                raise Exception(f"{tag} has split register field!")
            reg_enc_field = reg_enc_fields[0]
            if 2 ** len(reg_enc_field) != reg_num_choices:
                raise Exception(f"{tag} has incorrect register field width!")

            f.write(f"%{tag}_{reg_type}{reg_id}\t"
                    f"{enc.index(reg_enc_field)}:{len(reg_enc_field)}")

            if (reg_type in num_registers and
                reg_num_choices != num_registers[reg_type]):
                f.write(f"\t!function=decode_mapped_reg_{reg_mapping}")
            f.write("\n")

        # Write the field definitions for the immediates
        for imm in imms:
            immno = 1 if imm[0].isupper() else 0
            imm_type = imm[0]
            imm_width = int(imm[1])
            imm_letter = "i" if imm_type.islower() else "I"
            fields = []
            sign_mark = "s" if imm_type.lower() in "sr" else ""
            for m in reversed(list(re.finditer(imm_letter + "+", enc))):
                fields.append(f"{m.start()}:{sign_mark}{m.end() - m.start()}")
                sign_mark = ""
            field_str = " ".join(fields)
            f.write(f"%{tag}_{imm_type}{imm_letter}\t{field_str}\n")

        ## Handle instructions with unused encoding letters
        ## Change the unused letters to ignored
        if tag in tags_with_unused_d_encoding:
            enc_str = enc_str.replace("d", "-")
        if tag in tags_with_unused_t_encoding:
            enc_str = enc_str.replace("t", "-")

        # Replace the operand letters with .
        for x in operand_letters:
            enc_str = enc_str.replace(x, ".")

        # Write the instruction format
        f.write(f"@{tag}\t{enc_str}")
        for reg in regs:
            reg_type = reg[0]
            reg_id = reg[1]
            f.write(f" {reg_type}{reg_id}=%{tag}_{reg_type}{reg_id}")
        for imm in imms:
            imm_type = imm[0]
            imm_letter = "i" if imm_type.islower() else "I"
            f.write(f" {imm_type}{imm_letter}=%{tag}_{imm_type}{imm_letter}")

        if not is_subinsn:
            f.write(" %PP")
        f.write("\n")

         # Replace the 0s and 1s with .
        enc_str = enc_str.replace("0", ".").replace("1", ".")

        # Write the instruction pattern
        f.write(f"{tag}\t{enc_str} @{tag}\n")


if __name__ == "__main__":
    hex_common.read_semantics_file(sys.argv[1])
    class_to_decode = sys.argv[2]
    with open(sys.argv[3], "w") as f:
        gen_decodetree_file(f, class_to_decode)