diff options
-rw-r--r-- | core/Makefile.inc | 4 | ||||
-rw-r--r-- | core/pci-iov.c | 257 | ||||
-rw-r--r-- | core/pci.c | 2 | ||||
-rw-r--r-- | include/pci-cfg.h | 32 | ||||
-rw-r--r-- | include/pci-iov.h | 37 |
5 files changed, 330 insertions, 2 deletions
diff --git a/core/Makefile.inc b/core/Makefile.inc index 2167044..ae3c297 100644 --- a/core/Makefile.inc +++ b/core/Makefile.inc @@ -2,8 +2,8 @@ SUBDIRS += core CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o -CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o -CORE_OBJS += timebase.o opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o +CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o +CORE_OBJS += opal-msg.o pci.o pci-iov.o pci-virt.o pci-slot.o pcie-slot.o CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o diff --git a/core/pci-iov.c b/core/pci-iov.c new file mode 100644 index 0000000..14c810b --- /dev/null +++ b/core/pci-iov.c @@ -0,0 +1,257 @@ +/* Copyright 2013-2016 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <pci.h> +#include <pci-cfg.h> +#include <pci-slot.h> +#include <pci-iov.h> + +/* + * Tackle the VF's MPS in PCIe capability. The field is read only. + * This function caches what is written and returns the cached + * MPS on read. + */ +static int64_t pci_iov_vf_devctl(void *dev, struct pci_cfg_reg_filter *pcrf, + uint32_t offset, uint32_t len, + uint32_t *data, bool write) +{ + struct pci_device *vf = (struct pci_device *)dev; + uint32_t pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false); + uint8_t *pcache; + + if (offset != pcrf->start || + offset != (pos + PCICAP_EXP_DEVCTL)) + return OPAL_SUCCESS; + + pcache = &pcrf->data[0]; + if (write) { + *pcache = ((uint8_t)(*data >> (8 * (4 - len)))) & + PCICAP_EXP_DEVCTL_MPS; + } else { + *data &= ~(PCICAP_EXP_DEVCTL_MPS << (8 * (4 - len))); + *data |= (((uint32_t)(*pcache & PCICAP_EXP_DEVCTL_MPS)) + << (8 * (4 - len))); + } + + return OPAL_SUCCESS; +} + +static void pci_iov_vf_quirk(struct phb *phb, struct pci_device *vf) +{ + struct pci_cfg_reg_filter *pcrf; + uint32_t pos; + + if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false)) + return; + + /* + * On Mellanox MT27500 Family [ConnectX-3], its VF's MPS field in + * the corresponding config register is readonly. The MPS for PF/VF + * are usually different. We are introducing a quirk to make them + * look same to avoid confusion. + */ + if (vf->vdid != 0x100315b3) + return; + + pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false); + pcrf = pci_add_cfg_reg_filter(vf, pos + PCICAP_EXP_DEVCTL, 4, + PCI_REG_FLAG_MASK, pci_iov_vf_devctl); + if (!pcrf) + prlog(PR_WARNING, "%s: Missed DEVCTL filter on %04x:%02x:%02x.%01x\n", + __func__, phb->opal_id, (vf->bdfn >> 8), + ((vf->bdfn >> 3) & 0x1f), (vf->bdfn & 0x7)); +} + +/* + * Update the SRIOV parameters that change when the number of + * VFs is configured. + */ +static bool pci_iov_update_parameters(struct pci_iov *iov) +{ + struct phb *phb = iov->phb; + uint16_t bdfn = iov->pd->bdfn; + uint32_t pos = iov->pos; + uint16_t val; + bool enabled; + + pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_CTRL, &val); + enabled = !!(val & PCIECAP_SRIOV_CTRL_VFE); + if (iov->enabled == enabled) + return false; + + if (enabled) { + pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_INITIAL_VF, + &iov->init_VFs); + pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_NUM_VF, + &iov->num_VFs); + pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_OFFSET, + &iov->offset); + pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_STRIDE, + &iov->stride); + } else { + iov->init_VFs = 0; + iov->num_VFs = 0; + iov->offset = 0; + iov->stride = 0; + } + + iov->enabled = enabled; + return true; +} + +static int64_t pci_iov_change(void *dev __unused, + struct pci_cfg_reg_filter *pcrf, + uint32_t offset __unused, + uint32_t len __unused, + uint32_t *data __unused, + bool write __unused) +{ + struct pci_iov *iov = (struct pci_iov *)pcrf->data; + struct phb *phb = iov->phb; + struct pci_device *pd = iov->pd; + struct pci_device *vf, *tmp; + uint32_t i; + bool changed; + + /* Update SRIOV variable parameters */ + changed = pci_iov_update_parameters(iov); + if (!changed) + return OPAL_SUCCESS; + + /* Remove all VFs that have been attached to the parent */ + if (!iov->enabled) { + list_for_each_safe(&pd->children, vf, tmp, link) + list_del(&vf->link); + return OPAL_SUCCESS; + } + + /* Initialize the VFs and attach them to parent */ + for (changed = false, i = 0; i < iov->num_VFs; i++) { + vf = &iov->VFs[i]; + vf->bdfn = pd->bdfn + iov->offset + iov->stride * i; + list_add_tail(&pd->children, &vf->link); + + /* + * We don't populate the capabilities again if they have + * been existing, to save time. Also, we need delay for + * 100ms before the VF's config space becomes ready. + */ + if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false)) { + if (!changed) { + changed = !changed; + time_wait_ms(100); + } + + pci_init_capabilities(phb, vf); + pci_iov_vf_quirk(phb, vf); + } + + /* Call PHB hook */ + if (phb->ops->device_init) + phb->ops->device_init(phb, pd, NULL); + } + + return OPAL_SUCCESS; +} + +/* + * This function is called with disabled SRIOV capability. So the VF's + * config address isn't finalized and its config space isn't accessible. + */ +static void pci_iov_init_VF(struct pci_device *pd, struct pci_device *vf) +{ + vf->is_bridge = false; + vf->is_multifunction = false; + vf->dev_type = PCIE_TYPE_ENDPOINT; + vf->scan_map = -1; + vf->vdid = pd->vdid; + vf->sub_vdid = pd->sub_vdid; + vf->class = pd->class; + vf->dn = NULL; + vf->slot = NULL; + vf->parent = pd; + list_head_init(&vf->pcrf); + list_head_init(&vf->children); +} + +void pci_init_iov_cap(struct phb *phb, struct pci_device *pd) +{ + int64_t pos; + struct pci_iov *iov; + struct pci_cfg_reg_filter *pcrf; + uint32_t i; + + /* Search for SRIOV capability */ + if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) + return; + + pos = pci_find_ecap(phb, pd->bdfn, PCIECAP_ID_SRIOV, NULL); + if (pos <= 0) + return; + + /* Allocate IOV */ + iov = zalloc(sizeof(*iov)); + if (!iov) { + prlog(PR_ERR, "%s: Cannot alloc IOV for %04x:%02x:%02x.%01x\n", + __func__, phb->opal_id, (pd->bdfn >> 8), + ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7)); + return; + } + + /* Allocate VFs */ + pci_cfg_read16(phb, pd->bdfn, pos + PCIECAP_SRIOV_TOTAL_VF, + &iov->total_VFs); + iov->VFs = zalloc(sizeof(*iov->VFs) * iov->total_VFs); + if (!iov->VFs) { + prlog(PR_ERR, "%s: Cannot alloc %d VFs for %04x:%02x:%02x.%01x\n", + __func__, iov->total_VFs, phb->opal_id, + (pd->bdfn >> 8), ((pd->bdfn >> 3) & 0x1f), + (pd->bdfn & 0x7)); + free(iov); + return; + } + + /* Initialize VFs */ + for (i = 0; i < iov->total_VFs; i++) + pci_iov_init_VF(pd, &iov->VFs[i]); + + /* Register filter for enabling or disabling SRIOV capability */ + pcrf = pci_add_cfg_reg_filter(pd, pos + PCIECAP_SRIOV_CTRL, 2, + PCI_REG_FLAG_WRITE, pci_iov_change); + if (!pcrf) { + prlog(PR_ERR, "%s: Cannot set filter on %04x:%02x:%02x.%01x\n", + __func__, phb->opal_id, (pd->bdfn >> 8), + ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7)); + free(iov->VFs); + free(iov); + return; + } + + /* Associate filter and IOV capability */ + pcrf->data = (void *)iov; + + /* + * Retrieve the number of VFs and other information if applicable. + * Register the SRIOV capability in the mean while. + */ + iov->phb = phb; + iov->pd = pd; + iov->pos = pos; + iov->enabled = false; + pci_iov_update_parameters(iov); + pci_set_cap(pd, PCIECAP_ID_SRIOV, pos, iov, true); +} @@ -18,6 +18,7 @@ #include <cpu.h> #include <pci.h> #include <pci-cfg.h> +#include <pci-iov.h> #include <pci-slot.h> #include <timebase.h> #include <device.h> @@ -204,6 +205,7 @@ void pci_init_capabilities(struct phb *phb, struct pci_device *pd) { pci_init_pcie_cap(phb, pd); pci_init_aer_cap(phb, pd); + pci_init_iov_cap(phb, pd); } static struct pci_device *pci_scan_one(struct phb *phb, struct pci_device *parent, diff --git a/include/pci-cfg.h b/include/pci-cfg.h index 27c0f74..530f0a8 100644 --- a/include/pci-cfg.h +++ b/include/pci-cfg.h @@ -486,6 +486,38 @@ #define PCIECAP_AER_TLP_PFX_LOG2 0x40 #define PCIECAP_AER_TLP_PFX_LOG3 0x44 +/* SRIOV capability */ +#define PCIECAP_ID_SRIOV 0x10 +#define PCIECAP_SRIOV_CAP 0x04 +#define PCIECAP_SRIOV_CAP_VFM 0x01 +#define PCIECAP_SRIOV_CAP_INTR(x) ((x) >> 21) +#define PCIECAP_SRIOV_CTRL 0x08 +#define PCIECAP_SRIOV_CTRL_VFE 0x01 +#define PCIECAP_SRIOV_CTRL_VFM 0x02 +#define PCIECAP_SRIOV_CTRL_INTR 0x04 +#define PCIECAP_SRIOV_CTRL_MSE 0x08 +#define PCIECAP_SRIOV_CTRL_ARI 0x10 +#define PCIECAP_SRIOV_STATUS 0x0a +#define PCIECAP_SRIOV_STATUS_VFM 0x01 +#define PCIECAP_SRIOV_INITIAL_VF 0x0c +#define PCIECAP_SRIOV_TOTAL_VF 0x0e +#define PCIECAP_SRIOV_NUM_VF 0x10 +#define PCIECAP_SRIOV_FUNC_LINK 0x12 +#define PCIECAP_SRIOV_VF_OFFSET 0x14 +#define PCIECAP_SRIOV_VF_STRIDE 0x16 +#define PCIECAP_SRIOV_VF_DID 0x1a +#define PCIECAP_SRIOV_SUP_PGSIZE 0x1c +#define PCIECAP_SRIOV_SYS_PGSIZE 0x20 +#define PCIECAP_SRIOV_BAR 0x24 +#define PCIECAP_SRIOV_NUM_BARS 6 +#define PCIECAP_SRIOV_VFM 0x3c +#define PCIECAP_SRIOV_VFM_BIR(x) ((x) & 7) +#define PCIECAP_SRIOV_VFM_OFFSET(x) ((x) & ~7) +#define PCIECAP_SRIOV_VFM_UA 0x0 +#define PCIECAP_SRIOV_VFM_MI 0x1 +#define PCIECAP_SRIOV_VFM_MO 0x2 +#define PCIECAP_SRIOV_VFM_AV 0x3 + /* Vendor specific extend capability */ #define PCIECAP_ID_VNDR 0x0b #define PCIECAP_VNDR_HDR 0x04 diff --git a/include/pci-iov.h b/include/pci-iov.h new file mode 100644 index 0000000..787b2cd --- /dev/null +++ b/include/pci-iov.h @@ -0,0 +1,37 @@ +/* Copyright 2013-2016 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __PCI_IOV_H +#define __PCI_IOV_H + +struct pci_iov { + struct phb *phb; + struct pci_device *pd; + struct pci_device *VFs; + uint32_t pos; + bool enabled; + struct pci_cfg_reg_filter pcrf; + + uint16_t init_VFs; + uint16_t total_VFs; + uint16_t num_VFs; + uint16_t offset; + uint16_t stride; +}; + +extern void pci_init_iov_cap(struct phb *phb, struct pci_device *pd); + +#endif |