1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
|
/* Copyright 2013-2015 IBM Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __NPU_H
#define __NPU_H
#include <io.h>
/* Number of PEs supported */
#define NPU_NUM_OF_PES 4
/* Each brick has 2 MMIO BARs at the maximum. BAR0 is always used to
* map the 128KB TL/DL registers. BAR1 is used to map either the PL or
* the AT registers which are not exposed to the OS.
*/
#define NPU_BRICK_NUM_OF_BARS 2
#define NPU_BRICK_TL_BAR_SIZE 0x20000
#define NPU_BRICK_PL_BAR_SIZE 0x200000
/* The config space of NPU device is emulated. We have different
* bits to represent config register properties: readonly, write-
* one-to-clear.
*/
#define NPU_DEV_CFG_NORMAL 0
#define NPU_DEV_CFG_RDONLY 1
#define NPU_DEV_CFG_W1CLR 2
#define NPU_DEV_CFG_MAX 3
/* Bytes of the emulated NPU PCI device config space. We are
* emulating PCI express device, not legacy one
*/
#define NPU_DEV_CFG_SIZE 0x100
/* Interrupt mapping
*
* NPU PHB doesn't support MSI interrupts. It only supports
* 8 LSI interrupts: [0, 3] for bricks' DL blocks. [4, 5]
* for reporting errors from DL blocks. [6, 7] for reporting
* errors from TL blocks, NPCQs and AT.
*/
#define NPU_LSI_IRQ_COUNT 8
#define NPU_LSI_INT_DL0 0
#define NPU_LSI_INT_DL1 1
#define NPU_LSI_INT_DL2 2
#define NPU_LSI_INT_DL3 3
#define NPU_LSI_IRQ_MIN 0x7F0
#define NPU_LSI_IRQ_MAX (NPU_LSI_IRQ_MIN + NPU_LSI_IRQ_COUNT - 1)
#define NPU_LSI_IRQ_BASE(chip, phb) (P8_CHIP_IRQ_PHB_BASE(chip, phb) | NPU_LSI_IRQ_MIN)
#define NPU_IRQ_NUM(irq) (irq & 0x7FF)
/* NPU device capability descriptor. All PCI capabilities is
* organized as linked list. Each PCI capability has specific
* hook to populate when initializing NPU device.
*/
struct npu_dev;
struct npu_dev_cap {
uint16_t id;
uint16_t start;
uint16_t end;
struct npu_dev *dev;
void (*populate)(struct npu_dev_cap *cap);
struct list_node link;
};
/* Config space access trap. */
struct npu_dev_trap {
struct npu_dev *dev;
uint32_t start;
uint32_t end;
void *data;
int64_t (*read)(struct npu_dev_trap *trap,
uint32_t offset,
uint32_t size,
uint32_t *data);
int64_t (*write)(struct npu_dev_trap *trap,
uint32_t offset,
uint32_t size,
uint32_t data);
struct list_node link;
};
struct npu_dev_bar {
uint32_t flags;
uint32_t xscom;
uint64_t base;
uint64_t size;
uint32_t bar_sz;
bool trapped;
};
/* Each device contains 2 links. The device will be exposed as
* standard PCIE device and the config space is emulated by skiboot.
*/
struct npu_dev {
uint32_t flags;
uint32_t index;
uint64_t xscom;
void *pl_base;
uint64_t pl_xscom_base;
struct npu_dev_bar bar;
struct phb *phb;
/* The link@x node */
struct dt_node *dt_node;
/* PCI virtual device and the associated GPU device */
struct pci_virt_device *pvd;
struct pci_device *pd;
struct npu *npu;
struct list_head capabilities;
/* Which PHY lanes this device is associated with */
uint16_t lane_mask;
/* Used to store the currently running procedure number for
* this device. */
uint16_t procedure_number;
/* Used to store the step within a procedure that we are up
* to. */
uint16_t procedure_step;
/* Arbitrary data used by each procedure to track status. */
uint64_t procedure_data;
/* Used to timeout long running procedures. */
unsigned long procedure_tb;
uint32_t procedure_status;
uint64_t pe_number;
/* Used to associate the NPU device with GPU PCI devices */
const char *slot_label;
};
/* NPU PHB descriptor */
struct npu {
uint32_t flags;
uint32_t index;
uint32_t chip_id;
uint64_t xscom_base;
uint64_t at_xscom;
void *at_regs;
uint32_t base_lsi;
uint64_t mm_base;
uint64_t mm_size;
uint32_t total_devices;
struct npu_dev *devices;
/* IODA cache */
uint64_t lxive_cache[8];
uint64_t pce_cache[6];
uint64_t tve_cache[NPU_NUM_OF_PES];
bool tx_zcal_complete[2];
bool fenced;
struct phb phb;
};
static inline struct npu *phb_to_npu(struct phb *phb)
{
return container_of(phb, struct npu, phb);
}
static inline void npu_ioda_sel(struct npu *p, uint32_t table,
uint32_t addr, bool autoinc)
{
out_be64(p->at_regs + NPU_IODA_ADDR,
(autoinc ? NPU_IODA_AD_AUTOINC : 0) |
SETFIELD(NPU_IODA_AD_TSEL, 0ul, table) |
SETFIELD(NPU_IODA_AD_TADR, 0ul, addr));
}
void npu_scom_init(struct npu_dev *dev);
int64_t npu_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
uint32_t offset, uint32_t len, uint32_t *data,
bool write);
void npu_set_fence_state(struct npu *p, bool fence);
#define NPUDBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \
(p)->phb.opal_id, ##a)
#define NPUINF(p, fmt, a...) prlog(PR_INFO, "NPU%d: " fmt, \
(p)->phb.opal_id, ##a)
#define NPUDEVDBG(p, fmt, a...) NPUDBG((p)->npu, fmt, ##a)
#define NPUDEVINF(p, fmt, a...) NPUINF((p)->npu, fmt, ##a)
#endif /* __NPU_H */
|