aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/Makefile.inc4
-rw-r--r--core/pci-iov.c257
-rw-r--r--core/pci.c2
-rw-r--r--include/pci-cfg.h32
-rw-r--r--include/pci-iov.h37
5 files changed, 330 insertions, 2 deletions
diff --git a/core/Makefile.inc b/core/Makefile.inc
index 2167044..ae3c297 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -2,8 +2,8 @@
SUBDIRS += core
CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
-CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o
-CORE_OBJS += timebase.o opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o
+CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o
+CORE_OBJS += opal-msg.o pci.o pci-iov.o pci-virt.o pci-slot.o pcie-slot.o
CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
CORE_OBJS += vpd.o hostservices.o platform.o nvram.o nvram-format.o hmi.o
CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
diff --git a/core/pci-iov.c b/core/pci-iov.c
new file mode 100644
index 0000000..14c810b
--- /dev/null
+++ b/core/pci-iov.c
@@ -0,0 +1,257 @@
+/* Copyright 2013-2016 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <pci.h>
+#include <pci-cfg.h>
+#include <pci-slot.h>
+#include <pci-iov.h>
+
+/*
+ * Tackle the VF's MPS in PCIe capability. The field is read only.
+ * This function caches what is written and returns the cached
+ * MPS on read.
+ */
+static int64_t pci_iov_vf_devctl(void *dev, struct pci_cfg_reg_filter *pcrf,
+ uint32_t offset, uint32_t len,
+ uint32_t *data, bool write)
+{
+ struct pci_device *vf = (struct pci_device *)dev;
+ uint32_t pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false);
+ uint8_t *pcache;
+
+ if (offset != pcrf->start ||
+ offset != (pos + PCICAP_EXP_DEVCTL))
+ return OPAL_SUCCESS;
+
+ pcache = &pcrf->data[0];
+ if (write) {
+ *pcache = ((uint8_t)(*data >> (8 * (4 - len)))) &
+ PCICAP_EXP_DEVCTL_MPS;
+ } else {
+ *data &= ~(PCICAP_EXP_DEVCTL_MPS << (8 * (4 - len)));
+ *data |= (((uint32_t)(*pcache & PCICAP_EXP_DEVCTL_MPS))
+ << (8 * (4 - len)));
+ }
+
+ return OPAL_SUCCESS;
+}
+
+static void pci_iov_vf_quirk(struct phb *phb, struct pci_device *vf)
+{
+ struct pci_cfg_reg_filter *pcrf;
+ uint32_t pos;
+
+ if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false))
+ return;
+
+ /*
+ * On Mellanox MT27500 Family [ConnectX-3], its VF's MPS field in
+ * the corresponding config register is readonly. The MPS for PF/VF
+ * are usually different. We are introducing a quirk to make them
+ * look same to avoid confusion.
+ */
+ if (vf->vdid != 0x100315b3)
+ return;
+
+ pos = pci_cap(vf, PCI_CFG_CAP_ID_EXP, false);
+ pcrf = pci_add_cfg_reg_filter(vf, pos + PCICAP_EXP_DEVCTL, 4,
+ PCI_REG_FLAG_MASK, pci_iov_vf_devctl);
+ if (!pcrf)
+ prlog(PR_WARNING, "%s: Missed DEVCTL filter on %04x:%02x:%02x.%01x\n",
+ __func__, phb->opal_id, (vf->bdfn >> 8),
+ ((vf->bdfn >> 3) & 0x1f), (vf->bdfn & 0x7));
+}
+
+/*
+ * Update the SRIOV parameters that change when the number of
+ * VFs is configured.
+ */
+static bool pci_iov_update_parameters(struct pci_iov *iov)
+{
+ struct phb *phb = iov->phb;
+ uint16_t bdfn = iov->pd->bdfn;
+ uint32_t pos = iov->pos;
+ uint16_t val;
+ bool enabled;
+
+ pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_CTRL, &val);
+ enabled = !!(val & PCIECAP_SRIOV_CTRL_VFE);
+ if (iov->enabled == enabled)
+ return false;
+
+ if (enabled) {
+ pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_INITIAL_VF,
+ &iov->init_VFs);
+ pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_NUM_VF,
+ &iov->num_VFs);
+ pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_OFFSET,
+ &iov->offset);
+ pci_cfg_read16(phb, bdfn, pos + PCIECAP_SRIOV_VF_STRIDE,
+ &iov->stride);
+ } else {
+ iov->init_VFs = 0;
+ iov->num_VFs = 0;
+ iov->offset = 0;
+ iov->stride = 0;
+ }
+
+ iov->enabled = enabled;
+ return true;
+}
+
+static int64_t pci_iov_change(void *dev __unused,
+ struct pci_cfg_reg_filter *pcrf,
+ uint32_t offset __unused,
+ uint32_t len __unused,
+ uint32_t *data __unused,
+ bool write __unused)
+{
+ struct pci_iov *iov = (struct pci_iov *)pcrf->data;
+ struct phb *phb = iov->phb;
+ struct pci_device *pd = iov->pd;
+ struct pci_device *vf, *tmp;
+ uint32_t i;
+ bool changed;
+
+ /* Update SRIOV variable parameters */
+ changed = pci_iov_update_parameters(iov);
+ if (!changed)
+ return OPAL_SUCCESS;
+
+ /* Remove all VFs that have been attached to the parent */
+ if (!iov->enabled) {
+ list_for_each_safe(&pd->children, vf, tmp, link)
+ list_del(&vf->link);
+ return OPAL_SUCCESS;
+ }
+
+ /* Initialize the VFs and attach them to parent */
+ for (changed = false, i = 0; i < iov->num_VFs; i++) {
+ vf = &iov->VFs[i];
+ vf->bdfn = pd->bdfn + iov->offset + iov->stride * i;
+ list_add_tail(&pd->children, &vf->link);
+
+ /*
+ * We don't populate the capabilities again if they have
+ * been existing, to save time. Also, we need delay for
+ * 100ms before the VF's config space becomes ready.
+ */
+ if (!pci_has_cap(vf, PCI_CFG_CAP_ID_EXP, false)) {
+ if (!changed) {
+ changed = !changed;
+ time_wait_ms(100);
+ }
+
+ pci_init_capabilities(phb, vf);
+ pci_iov_vf_quirk(phb, vf);
+ }
+
+ /* Call PHB hook */
+ if (phb->ops->device_init)
+ phb->ops->device_init(phb, pd, NULL);
+ }
+
+ return OPAL_SUCCESS;
+}
+
+/*
+ * This function is called with disabled SRIOV capability. So the VF's
+ * config address isn't finalized and its config space isn't accessible.
+ */
+static void pci_iov_init_VF(struct pci_device *pd, struct pci_device *vf)
+{
+ vf->is_bridge = false;
+ vf->is_multifunction = false;
+ vf->dev_type = PCIE_TYPE_ENDPOINT;
+ vf->scan_map = -1;
+ vf->vdid = pd->vdid;
+ vf->sub_vdid = pd->sub_vdid;
+ vf->class = pd->class;
+ vf->dn = NULL;
+ vf->slot = NULL;
+ vf->parent = pd;
+ list_head_init(&vf->pcrf);
+ list_head_init(&vf->children);
+}
+
+void pci_init_iov_cap(struct phb *phb, struct pci_device *pd)
+{
+ int64_t pos;
+ struct pci_iov *iov;
+ struct pci_cfg_reg_filter *pcrf;
+ uint32_t i;
+
+ /* Search for SRIOV capability */
+ if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false))
+ return;
+
+ pos = pci_find_ecap(phb, pd->bdfn, PCIECAP_ID_SRIOV, NULL);
+ if (pos <= 0)
+ return;
+
+ /* Allocate IOV */
+ iov = zalloc(sizeof(*iov));
+ if (!iov) {
+ prlog(PR_ERR, "%s: Cannot alloc IOV for %04x:%02x:%02x.%01x\n",
+ __func__, phb->opal_id, (pd->bdfn >> 8),
+ ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7));
+ return;
+ }
+
+ /* Allocate VFs */
+ pci_cfg_read16(phb, pd->bdfn, pos + PCIECAP_SRIOV_TOTAL_VF,
+ &iov->total_VFs);
+ iov->VFs = zalloc(sizeof(*iov->VFs) * iov->total_VFs);
+ if (!iov->VFs) {
+ prlog(PR_ERR, "%s: Cannot alloc %d VFs for %04x:%02x:%02x.%01x\n",
+ __func__, iov->total_VFs, phb->opal_id,
+ (pd->bdfn >> 8), ((pd->bdfn >> 3) & 0x1f),
+ (pd->bdfn & 0x7));
+ free(iov);
+ return;
+ }
+
+ /* Initialize VFs */
+ for (i = 0; i < iov->total_VFs; i++)
+ pci_iov_init_VF(pd, &iov->VFs[i]);
+
+ /* Register filter for enabling or disabling SRIOV capability */
+ pcrf = pci_add_cfg_reg_filter(pd, pos + PCIECAP_SRIOV_CTRL, 2,
+ PCI_REG_FLAG_WRITE, pci_iov_change);
+ if (!pcrf) {
+ prlog(PR_ERR, "%s: Cannot set filter on %04x:%02x:%02x.%01x\n",
+ __func__, phb->opal_id, (pd->bdfn >> 8),
+ ((pd->bdfn >> 3) & 0x1f), (pd->bdfn & 0x7));
+ free(iov->VFs);
+ free(iov);
+ return;
+ }
+
+ /* Associate filter and IOV capability */
+ pcrf->data = (void *)iov;
+
+ /*
+ * Retrieve the number of VFs and other information if applicable.
+ * Register the SRIOV capability in the mean while.
+ */
+ iov->phb = phb;
+ iov->pd = pd;
+ iov->pos = pos;
+ iov->enabled = false;
+ pci_iov_update_parameters(iov);
+ pci_set_cap(pd, PCIECAP_ID_SRIOV, pos, iov, true);
+}
diff --git a/core/pci.c b/core/pci.c
index 142a1a1..9889dbf 100644
--- a/core/pci.c
+++ b/core/pci.c
@@ -18,6 +18,7 @@
#include <cpu.h>
#include <pci.h>
#include <pci-cfg.h>
+#include <pci-iov.h>
#include <pci-slot.h>
#include <timebase.h>
#include <device.h>
@@ -204,6 +205,7 @@ void pci_init_capabilities(struct phb *phb, struct pci_device *pd)
{
pci_init_pcie_cap(phb, pd);
pci_init_aer_cap(phb, pd);
+ pci_init_iov_cap(phb, pd);
}
static struct pci_device *pci_scan_one(struct phb *phb, struct pci_device *parent,
diff --git a/include/pci-cfg.h b/include/pci-cfg.h
index 27c0f74..530f0a8 100644
--- a/include/pci-cfg.h
+++ b/include/pci-cfg.h
@@ -486,6 +486,38 @@
#define PCIECAP_AER_TLP_PFX_LOG2 0x40
#define PCIECAP_AER_TLP_PFX_LOG3 0x44
+/* SRIOV capability */
+#define PCIECAP_ID_SRIOV 0x10
+#define PCIECAP_SRIOV_CAP 0x04
+#define PCIECAP_SRIOV_CAP_VFM 0x01
+#define PCIECAP_SRIOV_CAP_INTR(x) ((x) >> 21)
+#define PCIECAP_SRIOV_CTRL 0x08
+#define PCIECAP_SRIOV_CTRL_VFE 0x01
+#define PCIECAP_SRIOV_CTRL_VFM 0x02
+#define PCIECAP_SRIOV_CTRL_INTR 0x04
+#define PCIECAP_SRIOV_CTRL_MSE 0x08
+#define PCIECAP_SRIOV_CTRL_ARI 0x10
+#define PCIECAP_SRIOV_STATUS 0x0a
+#define PCIECAP_SRIOV_STATUS_VFM 0x01
+#define PCIECAP_SRIOV_INITIAL_VF 0x0c
+#define PCIECAP_SRIOV_TOTAL_VF 0x0e
+#define PCIECAP_SRIOV_NUM_VF 0x10
+#define PCIECAP_SRIOV_FUNC_LINK 0x12
+#define PCIECAP_SRIOV_VF_OFFSET 0x14
+#define PCIECAP_SRIOV_VF_STRIDE 0x16
+#define PCIECAP_SRIOV_VF_DID 0x1a
+#define PCIECAP_SRIOV_SUP_PGSIZE 0x1c
+#define PCIECAP_SRIOV_SYS_PGSIZE 0x20
+#define PCIECAP_SRIOV_BAR 0x24
+#define PCIECAP_SRIOV_NUM_BARS 6
+#define PCIECAP_SRIOV_VFM 0x3c
+#define PCIECAP_SRIOV_VFM_BIR(x) ((x) & 7)
+#define PCIECAP_SRIOV_VFM_OFFSET(x) ((x) & ~7)
+#define PCIECAP_SRIOV_VFM_UA 0x0
+#define PCIECAP_SRIOV_VFM_MI 0x1
+#define PCIECAP_SRIOV_VFM_MO 0x2
+#define PCIECAP_SRIOV_VFM_AV 0x3
+
/* Vendor specific extend capability */
#define PCIECAP_ID_VNDR 0x0b
#define PCIECAP_VNDR_HDR 0x04
diff --git a/include/pci-iov.h b/include/pci-iov.h
new file mode 100644
index 0000000..787b2cd
--- /dev/null
+++ b/include/pci-iov.h
@@ -0,0 +1,37 @@
+/* Copyright 2013-2016 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PCI_IOV_H
+#define __PCI_IOV_H
+
+struct pci_iov {
+ struct phb *phb;
+ struct pci_device *pd;
+ struct pci_device *VFs;
+ uint32_t pos;
+ bool enabled;
+ struct pci_cfg_reg_filter pcrf;
+
+ uint16_t init_VFs;
+ uint16_t total_VFs;
+ uint16_t num_VFs;
+ uint16_t offset;
+ uint16_t stride;
+};
+
+extern void pci_init_iov_cap(struct phb *phb, struct pci_device *pd);
+
+#endif