[gve] Allocate all possible event countersHEAD master

The admin queue API requires us to tell the device how many event counters we have provided via the "configure device resources" admin queue command. There is, of course, absolutely no documentation indicating how many event counters actually need to be provided. We require only two event counters: one for the transmit queue, one for the receive queue. (The receive queue doesn't seem to actually make any use of its event counter, but the "create receive queue" admin queue command will fail if it doesn't have an available event counter to choose.) In the absence of any documentation, we currently make the assumption that allocating and configuring 16 counters (i.e. one whole cacheline) will be sufficient to allow for the use of two counters. This assumption turns out to be incorrect. On larger instance types (observed with a c3d-standard-16 instance in europe-west4-a), we find that creating the transmit or receive queues will each fail with a probability of around 50% with the "failed precondition" error code. Experimentation suggests that even though the device has accepted our "configure device resources" command indicating that we are providing only 16 event counters, it will attempt to choose any of its potential 32 event counters (and will then fail since the event counter that it unilaterally chose is outside of the agreed range). Work around this firmware bug by always allocating the maximum number of event counters supported by the device. (This requires deferring the allocation of the event counters until after issuing the "describe device" command.) Signed-off-by: Michael Brown <mcb30@ipxe.org>
author: Michael Brown <mcb30@ipxe.org> 2024-09-17 13:11:43 +0100
committer: Michael Brown <mcb30@ipxe.org> 2024-09-17 13:37:20 +0100
commit: 59d123658bfe25402c4e89bbaf6eea83140d3c1f (patch)
tree: b7ea72b0150564168a8e732dba379f2f9964cd42 /src/drivers
parent: 9bb20686369abf5166161804c16554d491f53a62 (diff)
download: ipxe-master.zip
ipxe-master.tar.gz
ipxe-master.tar.bz2
2 files changed, 76 insertions, 64 deletions
diff --git a/src/drivers/net/gve.c b/src/drivers/net/gve.c
index 0193e76..df10a94 100644
--- a/src/drivers/net/gve.c
+++ b/src/drivers/net/gve.c
@@ -191,12 +191,8 @@ static int gve_reset ( struct gve_nic *gve ) {
 static int gve_admin_alloc ( struct gve_nic *gve ) {
 	struct dma_device *dma = gve->dma;
 	struct gve_admin *admin = &gve->admin;
-	struct gve_irqs *irqs = &gve->irqs;
-	struct gve_events *events = &gve->events;
 	struct gve_scratch *scratch = &gve->scratch;
 	size_t admin_len = ( GVE_ADMIN_COUNT * sizeof ( admin->cmd[0] ) );
-	size_t irqs_len = ( GVE_IRQ_COUNT * sizeof ( irqs->irq[0] ) );
-	size_t events_len = ( GVE_EVENT_MAX * sizeof ( events->event[0] ) );
 	size_t scratch_len = sizeof ( *scratch->buf );
 	int rc;
 
@@ -207,20 +203,6 @@ static int gve_admin_alloc ( struct gve_nic *gve ) {
 		goto err_admin;
 	}
 
-	/* Allocate interrupt channels */
-	irqs->irq = dma_alloc ( dma, &irqs->map, irqs_len, GVE_ALIGN );
-	if ( ! irqs->irq ) {
-		rc = -ENOMEM;
-		goto err_irqs;
-	}
-
-	/* Allocate event counters */
-	events->event = dma_alloc ( dma, &events->map, events_len, GVE_ALIGN );
-	if ( ! events->event ) {
-		rc = -ENOMEM;
-		goto err_events;
-	}
-
 	/* Allocate scratch buffer */
 	scratch->buf = dma_alloc ( dma, &scratch->map, scratch_len, GVE_ALIGN );
 	if ( ! scratch->buf ) {
@@ -228,17 +210,15 @@ static int gve_admin_alloc ( struct gve_nic *gve ) {
 		goto err_scratch;
 	}
 
-	DBGC ( gve, "GVE %p AQ at [%08lx,%08lx)\n",
+	DBGC ( gve, "GVE %p AQ at [%08lx,%08lx) scratch [%08lx,%08lx)\n",
 	       gve, virt_to_phys ( admin->cmd ),
-	       ( virt_to_phys ( admin->cmd ) + admin_len ) );
+	       ( virt_to_phys ( admin->cmd ) + admin_len ),
+	       virt_to_phys ( scratch->buf ),
+	       ( virt_to_phys ( scratch->buf ) + scratch_len ) );
 	return 0;
 
 	dma_free ( &scratch->map, scratch->buf, scratch_len );
  err_scratch:
-	dma_free ( &events->map, events->event, events_len );
- err_events:
-	dma_free ( &irqs->map, irqs->irq, irqs_len );
- err_irqs:
 	dma_free ( &admin->map, admin->cmd, admin_len );
  err_admin:
 	return rc;
@@ -251,23 +231,13 @@ static int gve_admin_alloc ( struct gve_nic *gve ) {
  */
 static void gve_admin_free ( struct gve_nic *gve ) {
 	struct gve_admin *admin = &gve->admin;
-	struct gve_irqs *irqs = &gve->irqs;
-	struct gve_events *events = &gve->events;
 	struct gve_scratch *scratch = &gve->scratch;
 	size_t admin_len = ( GVE_ADMIN_COUNT * sizeof ( admin->cmd[0] ) );
-	size_t irqs_len = ( GVE_IRQ_COUNT * sizeof ( irqs->irq[0] ) );
-	size_t events_len = ( GVE_EVENT_MAX * sizeof ( events->event[0] ) );
 	size_t scratch_len = sizeof ( *scratch->buf );
 
 	/* Free scratch buffer */
 	dma_free ( &scratch->map, scratch->buf, scratch_len );
 
-	/* Free event counter */
-	dma_free ( &events->map, events->event, events_len );
-
-	/* Free interrupt channels */
-	dma_free ( &irqs->map, irqs->irq, irqs_len );
-
 	/* Free admin queue */
 	dma_free ( &admin->map, admin->cmd, admin_len );
 }
@@ -467,13 +437,10 @@ static int gve_describe ( struct gve_nic *gve ) {
 
 	/* Extract queue parameters */
 	gve->events.count = be16_to_cpu ( desc->counters );
-	if ( gve->events.count > GVE_EVENT_MAX )
-		gve->events.count = GVE_EVENT_MAX;
 	gve->tx.count = be16_to_cpu ( desc->tx_count );
 	gve->rx.count = be16_to_cpu ( desc->rx_count );
-	DBGC ( gve, "GVE %p using %d TX, %d RX, %d/%d events\n",
-	       gve, gve->tx.count, gve->rx.count, gve->events.count,
-	       be16_to_cpu ( desc->counters ) );
+	DBGC ( gve, "GVE %p using %d TX, %d RX, %d events\n",
+	       gve, gve->tx.count, gve->rx.count, gve->events.count );
 
 	/* Extract network parameters */
 	build_assert ( sizeof ( desc->mac ) == ETH_ALEN );
@@ -705,6 +672,67 @@ static int gve_destroy_queue ( struct gve_nic *gve, struct gve_queue *queue ) {
  */
 
 /**
+ * Allocate shared queue resources
+ *
+ * @v gve		GVE device
+ * @ret rc		Return status code
+ */
+static int gve_alloc_shared ( struct gve_nic *gve ) {
+	struct dma_device *dma = gve->dma;
+	struct gve_irqs *irqs = &gve->irqs;
+	struct gve_events *events = &gve->events;
+	size_t irqs_len = ( GVE_IRQ_COUNT * sizeof ( irqs->irq[0] ) );
+	size_t events_len = ( gve->events.count * sizeof ( events->event[0] ) );
+	int rc;
+
+	/* Allocate interrupt channels */
+	irqs->irq = dma_alloc ( dma, &irqs->map, irqs_len, GVE_ALIGN );
+	if ( ! irqs->irq ) {
+		rc = -ENOMEM;
+		goto err_irqs;
+	}
+	DBGC ( gve, "GVE %p IRQs at [%08lx,%08lx)\n",
+	       gve, virt_to_phys ( irqs->irq ),
+	       ( virt_to_phys ( irqs->irq ) + irqs_len ) );
+
+	/* Allocate event counters */
+	events->event = dma_alloc ( dma, &events->map, events_len, GVE_ALIGN );
+	if ( ! events->event ) {
+		rc = -ENOMEM;
+		goto err_events;
+	}
+	DBGC ( gve, "GVE %p events at [%08lx,%08lx)\n",
+	       gve, virt_to_phys ( events->event ),
+	       ( virt_to_phys ( events->event ) + events_len ) );
+
+	return 0;
+
+	dma_free ( &events->map, events->event, events_len );
+ err_events:
+	dma_free ( &irqs->map, irqs->irq, irqs_len );
+ err_irqs:
+	return rc;
+}
+
+/**
+ * Free shared queue resources
+ *
+ * @v gve		GVE device
+ */
+static void gve_free_shared ( struct gve_nic *gve ) {
+	struct gve_irqs *irqs = &gve->irqs;
+	struct gve_events *events = &gve->events;
+	size_t irqs_len = ( GVE_IRQ_COUNT * sizeof ( irqs->irq[0] ) );
+	size_t events_len = ( gve->events.count * sizeof ( events->event[0] ) );
+
+	/* Free event counters */
+	dma_free ( &events->map, events->event, events_len );
+
+	/* Free interrupt channels */
+	dma_free ( &irqs->map, irqs->irq, irqs_len );
+}
+
+/**
  * Allocate queue page list
  *
  * @v gve		GVE device
@@ -1117,6 +1145,10 @@ static int gve_open ( struct net_device *netdev ) {
 	struct gve_queue *rx = &gve->rx;
 	int rc;
 
+	/* Allocate shared queue resources */
+	if ( ( rc = gve_alloc_shared ( gve ) ) != 0 )
+		goto err_alloc_shared;
+
 	/* Allocate and prepopulate transmit queue */
 	if ( ( rc = gve_alloc_queue ( gve, tx ) ) != 0 )
 		goto err_alloc_tx;
@@ -1137,6 +1169,8 @@ static int gve_open ( struct net_device *netdev ) {
  err_alloc_rx:
 	gve_free_queue ( gve, tx );
  err_alloc_tx:
+	gve_free_shared ( gve );
+ err_alloc_shared:
 	return rc;
 }
 
@@ -1163,6 +1197,9 @@ static void gve_close ( struct net_device *netdev ) {
 	/* Free queues */
 	gve_free_queue ( gve, rx );
 	gve_free_queue ( gve, tx );
+
+	/* Free shared queue resources */
+	gve_free_shared ( gve );
 }
 
 /**
diff --git a/src/drivers/net/gve.h b/src/drivers/net/gve.h
index 247d6e6..43517cc 100644
--- a/src/drivers/net/gve.h
+++ b/src/drivers/net/gve.h
@@ -50,15 +50,6 @@ struct google_mac {
  */
 #define GVE_ALIGN GVE_PAGE_SIZE
 
-/**
- * Length alignment
- *
- * All DMA data structure lengths seem to need to be aligned to a
- * multiple of 64 bytes.  (This is not documented anywhere, but is
- * inferred from existing source code and experimentation.)
- */
-#define GVE_LEN_ALIGN 64
-
 /** Configuration BAR */
 #define GVE_CFG_BAR PCI_BASE_ADDRESS_0
 
@@ -350,22 +341,6 @@ struct gve_event {
 	volatile uint32_t count;
 } __attribute__ (( packed ));
 
-/**
- * Maximum number of event counters
- *
- * We tell the device how many event counters we have provided via the
- * "configure device resources" admin queue command.  The device will
- * accept being given only a single counter, but will subsequently
- * fail to create a receive queue.
- *
- * There is, of course, no documentation indicating how may event
- * counters actually need to be provided.  In the absence of evidence
- * to the contrary, assume that 16 counters (i.e. the smallest number
- * we can allocate, given the length alignment constraint on
- * allocations) will be sufficient.
- */
-#define GVE_EVENT_MAX ( GVE_LEN_ALIGN / sizeof ( struct gve_event ) )
-
 /** Event counter array */
 struct gve_events {
 	/** Event counters */
author	Michael Brown <mcb30@ipxe.org>	2024-09-17 13:11:43 +0100
committer	Michael Brown <mcb30@ipxe.org>	2024-09-17 13:37:20 +0100
commit	59d123658bfe25402c4e89bbaf6eea83140d3c1f (patch)
tree	b7ea72b0150564168a8e732dba379f2f9964cd42 /src/drivers
parent	9bb20686369abf5166161804c16554d491f53a62 (diff)
download	ipxe-master.zip ipxe-master.tar.gz ipxe-master.tar.bz2