aboutsummaryrefslogtreecommitdiff
path: root/platforms
diff options
context:
space:
mode:
authorFrederic Barrat <fbarrat@linux.ibm.com>2020-03-25 11:41:17 +0100
committerOliver O'Halloran <oohall@gmail.com>2020-03-30 18:18:10 +1100
commite876514b3773dcecc0b39317ca341d27db96d81c (patch)
treeffc0ce4a55d76e0bb07c2897d797f534dc6e5319 /platforms
parent3794167200e7169afc240516254ed0e601e76e72 (diff)
downloadskiboot-e876514b3773dcecc0b39317ca341d27db96d81c.zip
skiboot-e876514b3773dcecc0b39317ca341d27db96d81c.tar.gz
skiboot-e876514b3773dcecc0b39317ca341d27db96d81c.tar.bz2
hw/phb4: Tune GPU direct performance on witherspoon in PCI mode
Good GPU direct performance on witherspoon, with a Mellanox adapter on the shared slot, requires to reallocate some dma engines within PEC2, "stealing" some from PHB4&5 and giving extras to PHB3. It's currently done when using CAPI mode. But the same is true if the adapter stays in PCI mode. In preparation for upcoming versions of MOFED, which may not use CAPI mode, this patch reallocates dma engines even in PCI mode for a series of Mellanox adapters that can be used with GPU direct, on witherspoon and on the shared slot only. The loss of dma engines for PHB4&5 on witherspoon has not shown problems in testing, as well as in current deployments where CAPI mode is used. Here is a comparison of the bandwidth numbers seen with the PHB in PCI mode (no CAPI) with and without this patch. Variations on smaller packet sizes can be attributed to jitter and are not that meaningful. # OSU MPI-CUDA Bi-Directional Bandwidth Test v5.6.1 # Send Buffer on DEVICE (D) and Receive Buffer on DEVICE (D) # Size Bandwidth (MB/s) Bandwidth (MB/s) # with patch without patch 1 1.29 1.48 2 2.66 3.04 4 5.34 5.93 8 10.68 11.86 16 21.39 23.71 32 42.78 49.15 64 85.43 97.67 128 170.82 196.64 256 385.47 383.02 512 774.68 755.54 1024 1535.14 1495.30 2048 2599.31 2561.60 4096 5192.31 5092.47 8192 9930.30 9566.90 16384 18189.81 16803.42 32768 24671.48 21383.57 65536 28977.71 24104.50 131072 31110.55 25858.95 262144 32180.64 26470.61 524288 32842.23 26961.93 1048576 33184.87 27217.38 2097152 33342.67 27338.08 Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com> Cc: skiboot-stable@lists.ozlabs.org # skiboot-op940.x Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Diffstat (limited to 'platforms')
-rw-r--r--platforms/astbmc/witherspoon.c47
1 files changed, 47 insertions, 0 deletions
diff --git a/platforms/astbmc/witherspoon.c b/platforms/astbmc/witherspoon.c
index 6387af4..39c3f16 100644
--- a/platforms/astbmc/witherspoon.c
+++ b/platforms/astbmc/witherspoon.c
@@ -192,6 +192,52 @@ static void witherspoon_shared_slot_fixup(void)
}
}
+static int check_mlx_cards(struct phb *phb __unused, struct pci_device *dev,
+ void *userdata __unused)
+{
+ uint16_t mlx_cards[] = {
+ 0x1017, /* ConnectX-5 */
+ 0x1019, /* ConnectX-5 Ex */
+ 0x101b, /* ConnectX-6 */
+ 0x101d, /* ConnectX-6 Dx */
+ 0x101f, /* ConnectX-6 Lx */
+ 0x1021, /* ConnectX-7 */
+ };
+
+ if (PCI_VENDOR_ID(dev->vdid) == 0x15b3) { /* Mellanox */
+ for (int i = 0; i < ARRAY_SIZE(mlx_cards); i++) {
+ if (mlx_cards[i] == PCI_DEVICE_ID(dev->vdid))
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static void witherspoon_pci_probe_complete(void)
+{
+ struct pci_device *dev;
+ struct phb *phb;
+ struct phb4 *p;
+
+ /*
+ * Reallocate dma engines between stacks in PEC2 if a Mellanox
+ * card is found on the shared slot, as it is required to get
+ * good GPU direct performance.
+ */
+ for_each_phb(phb) {
+ /* skip the virtual PHBs */
+ if (phb->phb_type != phb_type_pcie_v4)
+ continue;
+ p = phb_to_phb4(phb);
+ /* Keep only the first PHB on PEC2 */
+ if (p->index != 3)
+ continue;
+ dev = pci_walk_dev(phb, NULL, check_mlx_cards, NULL);
+ if (dev)
+ phb4_pec2_dma_engine_realloc(p);
+ }
+}
+
static void set_link_details(struct npu2 *npu, uint32_t link_index,
uint32_t brick_index, enum npu2_dev_type type)
{
@@ -533,6 +579,7 @@ DECLARE_PLATFORM(witherspoon) = {
.probe = witherspoon_probe,
.init = astbmc_init,
.pre_pci_fixup = witherspoon_shared_slot_fixup,
+ .pci_probe_complete = witherspoon_pci_probe_complete,
.start_preload_resource = flash_start_preload_resource,
.resource_loaded = flash_resource_loaded,
.bmc = &bmc_plat_ast2500_openbmc,