From 2ab0ec31215e68f7af6b21b62e49141eb7c083e2 Mon Sep 17 00:00:00 2001 From: Andrew Melnychenko Date: Tue, 1 Aug 2023 01:31:45 +0300 Subject: tap: Add USO support to tap device. Passing additional parameters (USOv4 and USOv6 offloads) when setting TAP offloads Signed-off-by: Yuri Benditovich Signed-off-by: Andrew Melnychenko Signed-off-by: Jason Wang --- hw/net/e1000e_core.c | 2 +- hw/net/igb_core.c | 2 +- hw/net/virtio-net.c | 4 +++- hw/net/vmxnet3.c | 2 ++ 4 files changed, 7 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index f8aeafa..d405595 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -2852,7 +2852,7 @@ e1000e_update_rx_offloads(E1000ECore *core) if (core->has_vnet) { qemu_set_offload(qemu_get_queue(core->owner_nic)->peer, - cso_state, 0, 0, 0, 0); + cso_state, 0, 0, 0, 0, 0, 0); } } diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index 8b6b75c..389eef1 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -2753,7 +2753,7 @@ igb_update_rx_offloads(IGBCore *core) if (core->has_vnet) { qemu_set_offload(qemu_get_queue(core->owner_nic)->peer, - cso_state, 0, 0, 0, 0); + cso_state, 0, 0, 0, 0, 0, 0); } } diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 7102ec4..d2311e7 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -859,7 +859,9 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n) !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), - !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO))); + !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)), + !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)), + !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6))); } static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 3fb1087..226c077 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -1341,6 +1341,8 @@ static void vmxnet3_update_features(VMXNET3State *s) s->lro_supported, s->lro_supported, 0, + 0, + 0, 0); } } -- cgit v1.1 From 9da1684954d10b4d22277b93b4c03e89d38976a1 Mon Sep 17 00:00:00 2001 From: Andrew Melnychenko Date: Tue, 1 Aug 2023 01:31:47 +0300 Subject: virtio-net: Add USO flags to vhost support. New features are subject to check with vhost-user and vdpa. Signed-off-by: Yuri Benditovich Signed-off-by: Andrew Melnychenko Signed-off-by: Jason Wang --- hw/net/vhost_net.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'hw') diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 6b958d6..57427a3 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -78,6 +78,9 @@ static const int user_feature_bits[] = { VIRTIO_F_RING_RESET, VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, + VIRTIO_NET_F_GUEST_USO4, + VIRTIO_NET_F_GUEST_USO6, + VIRTIO_NET_F_HOST_USO, /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, -- cgit v1.1 From 53da8b5a992deab25d4f53233891b1aeed7104a6 Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Tue, 1 Aug 2023 01:31:48 +0300 Subject: virtio-net: Add support for USO features USO features of virtio-net device depend on kernel ability to support them, for backward compatibility by default the features are disabled on 8.0 and earlier. Signed-off-by: Yuri Benditovich Signed-off-by: Andrew Melnychecnko Signed-off-by: Jason Wang --- hw/core/machine.c | 4 ++++ hw/net/virtio-net.c | 31 +++++++++++++++++++++++++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/core/machine.c b/hw/core/machine.c index da699cf..230aab8 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -38,6 +38,7 @@ #include "exec/confidential-guest-support.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-net.h" GlobalProperty hw_compat_8_1[] = {}; const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1); @@ -45,6 +46,9 @@ const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1); GlobalProperty hw_compat_8_0[] = { { "migration", "multifd-flush-after-each-section", "on"}, { TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" }, + { TYPE_VIRTIO_NET, "host_uso", "off"}, + { TYPE_VIRTIO_NET, "guest_uso4", "off"}, + { TYPE_VIRTIO_NET, "guest_uso6", "off"}, }; const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0); diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index d2311e7..bd0ead9 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -659,6 +659,15 @@ static int peer_has_ufo(VirtIONet *n) return n->has_ufo; } +static int peer_has_uso(VirtIONet *n) +{ + if (!peer_has_vnet_hdr(n)) { + return 0; + } + + return qemu_has_uso(qemu_get_queue(n->nic)->peer); +} + static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, int version_1, int hash_report) { @@ -796,6 +805,10 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); } @@ -804,6 +817,12 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); } + if (!peer_has_uso(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); + } + if (!get_vhost_net(nc->peer)) { return features; } @@ -864,14 +883,16 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n) !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6))); } -static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) +static uint64_t virtio_net_guest_offloads_by_features(uint64_t features) { static const uint64_t guest_offloads_mask = (1ULL << VIRTIO_NET_F_GUEST_CSUM) | (1ULL << VIRTIO_NET_F_GUEST_TSO4) | (1ULL << VIRTIO_NET_F_GUEST_TSO6) | (1ULL << VIRTIO_NET_F_GUEST_ECN) | - (1ULL << VIRTIO_NET_F_GUEST_UFO); + (1ULL << VIRTIO_NET_F_GUEST_UFO) | + (1ULL << VIRTIO_NET_F_GUEST_USO4) | + (1ULL << VIRTIO_NET_F_GUEST_USO6); return guest_offloads_mask & features; } @@ -3924,6 +3945,12 @@ static Property virtio_net_properties[] = { DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN), DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str), DEFINE_PROP_BOOL("failover", VirtIONet, failover, false), + DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features, + VIRTIO_NET_F_GUEST_USO4, true), + DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features, + VIRTIO_NET_F_GUEST_USO6, true), + DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features, + VIRTIO_NET_F_HOST_USO, true), DEFINE_PROP_END_OF_LIST(), }; -- cgit v1.1 From 2959c51dded9f5d6f35713655340f219adab5e07 Mon Sep 17 00:00:00 2001 From: Tomasz Dzieciol Date: Mon, 29 May 2023 16:01:47 +0200 Subject: igb: remove TCP ACK detection TCP ACK detection is no longer present in igb. Signed-off-by: Tomasz Dzieciol Reviewed-by: Akihiko Odaki Tested-by: Akihiko Odaki Signed-off-by: Jason Wang --- hw/net/igb_core.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'hw') diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index 389eef1..a83e4aa 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -1327,11 +1327,6 @@ igb_build_rx_metadata(IGBCore *core, trace_e1000e_rx_metadata_ip_id(*ip_id); } - if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) { - *status_flags |= E1000_RXD_STAT_ACK; - trace_e1000e_rx_metadata_ack(); - } - if (pkt_info) { *pkt_info = rss_info->enabled ? rss_info->type : 0; -- cgit v1.1 From a86aee7e953da69c444759a02f2686cab216cb93 Mon Sep 17 00:00:00 2001 From: Tomasz Dzieciol Date: Mon, 29 May 2023 16:01:48 +0200 Subject: igb: rename E1000E_RingInfo_st Rename E1000E_RingInfo_st and E1000E_RingInfo according to qemu typdefs guide. Signed-off-by: Tomasz Dzieciol Reviewed-by: Akihiko Odaki Tested-by: Akihiko Odaki Signed-off-by: Jason Wang --- hw/net/e1000e_core.c | 34 +++++++++++++++++----------------- hw/net/igb_core.c | 42 +++++++++++++++++++++--------------------- 2 files changed, 38 insertions(+), 38 deletions(-) (limited to 'hw') diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index d405595..91aae37 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -810,24 +810,24 @@ e1000e_txdesc_writeback(E1000ECore *core, dma_addr_t base, return e1000e_tx_wb_interrupt_cause(core, queue_idx); } -typedef struct E1000E_RingInfo_st { +typedef struct E1000ERingInfo { int dbah; int dbal; int dlen; int dh; int dt; int idx; -} E1000E_RingInfo; +} E1000ERingInfo; static inline bool -e1000e_ring_empty(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_empty(E1000ECore *core, const E1000ERingInfo *r) { return core->mac[r->dh] == core->mac[r->dt] || core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN; } static inline uint64_t -e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_base(E1000ECore *core, const E1000ERingInfo *r) { uint64_t bah = core->mac[r->dbah]; uint64_t bal = core->mac[r->dbal]; @@ -836,13 +836,13 @@ e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r) } static inline uint64_t -e1000e_ring_head_descr(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_head_descr(E1000ECore *core, const E1000ERingInfo *r) { return e1000e_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh]; } static inline void -e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count) +e1000e_ring_advance(E1000ECore *core, const E1000ERingInfo *r, uint32_t count) { core->mac[r->dh] += count; @@ -852,7 +852,7 @@ e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count) } static inline uint32_t -e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_free_descr_num(E1000ECore *core, const E1000ERingInfo *r) { trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen], core->mac[r->dh], core->mac[r->dt]); @@ -871,19 +871,19 @@ e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r) } static inline bool -e1000e_ring_enabled(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_enabled(E1000ECore *core, const E1000ERingInfo *r) { return core->mac[r->dlen] > 0; } static inline uint32_t -e1000e_ring_len(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_len(E1000ECore *core, const E1000ERingInfo *r) { return core->mac[r->dlen]; } typedef struct E1000E_TxRing_st { - const E1000E_RingInfo *i; + const E1000ERingInfo *i; struct e1000e_tx *tx; } E1000E_TxRing; @@ -896,7 +896,7 @@ e1000e_mq_queue_idx(int base_reg_idx, int reg_idx) static inline void e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx) { - static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = { + static const E1000ERingInfo i[E1000E_NUM_QUEUES] = { { TDBAH, TDBAL, TDLEN, TDH, TDT, 0 }, { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 } }; @@ -908,13 +908,13 @@ e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx) } typedef struct E1000E_RxRing_st { - const E1000E_RingInfo *i; + const E1000ERingInfo *i; } E1000E_RxRing; static inline void e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx) { - static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = { + static const E1000ERingInfo i[E1000E_NUM_QUEUES] = { { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 }, { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 } }; @@ -930,7 +930,7 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) dma_addr_t base; struct e1000_tx_desc desc; bool ide = false; - const E1000E_RingInfo *txi = txr->i; + const E1000ERingInfo *txi = txr->i; uint32_t cause = E1000_ICS_TXQE; if (!(core->mac[TCTL] & E1000_TCTL_EN)) { @@ -960,7 +960,7 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) } static bool -e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r, +e1000e_has_rxbufs(E1000ECore *core, const E1000ERingInfo *r, size_t total_size) { uint32_t bufs = e1000e_ring_free_descr_num(core, r); @@ -1460,7 +1460,7 @@ e1000e_update_rx_stats(E1000ECore *core, size_t pkt_size, size_t pkt_fcs_size) } static inline bool -e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000E_RingInfo *rxi) +e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000ERingInfo *rxi) { return e1000e_ring_free_descr_num(core, rxi) == e1000e_ring_len(core, rxi) >> core->rxbuf_min_shift; @@ -1521,7 +1521,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, struct iovec *iov = net_rx_pkt_get_iovec(pkt); size_t size = net_rx_pkt_get_total_len(pkt); size_t total_size = size + e1000x_fcs_len(core->mac); - const E1000E_RingInfo *rxi; + const E1000ERingInfo *rxi; size_t ps_hdr_len = 0; bool do_ps = e1000e_do_ps(core, pkt, &ps_hdr_len); bool is_first = true; diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index a83e4aa..d50e6b1 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -694,24 +694,24 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx) return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0; } -typedef struct E1000E_RingInfo_st { +typedef struct E1000ERingInfo { int dbah; int dbal; int dlen; int dh; int dt; int idx; -} E1000E_RingInfo; +} E1000ERingInfo; static inline bool -igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r) +igb_ring_empty(IGBCore *core, const E1000ERingInfo *r) { return core->mac[r->dh] == core->mac[r->dt] || core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN; } static inline uint64_t -igb_ring_base(IGBCore *core, const E1000E_RingInfo *r) +igb_ring_base(IGBCore *core, const E1000ERingInfo *r) { uint64_t bah = core->mac[r->dbah]; uint64_t bal = core->mac[r->dbal]; @@ -720,13 +720,13 @@ igb_ring_base(IGBCore *core, const E1000E_RingInfo *r) } static inline uint64_t -igb_ring_head_descr(IGBCore *core, const E1000E_RingInfo *r) +igb_ring_head_descr(IGBCore *core, const E1000ERingInfo *r) { return igb_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh]; } static inline void -igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count) +igb_ring_advance(IGBCore *core, const E1000ERingInfo *r, uint32_t count) { core->mac[r->dh] += count; @@ -736,7 +736,7 @@ igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count) } static inline uint32_t -igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r) +igb_ring_free_descr_num(IGBCore *core, const E1000ERingInfo *r) { trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen], core->mac[r->dh], core->mac[r->dt]); @@ -755,13 +755,13 @@ igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r) } static inline bool -igb_ring_enabled(IGBCore *core, const E1000E_RingInfo *r) +igb_ring_enabled(IGBCore *core, const E1000ERingInfo *r) { return core->mac[r->dlen] > 0; } typedef struct IGB_TxRing_st { - const E1000E_RingInfo *i; + const E1000ERingInfo *i; struct igb_tx *tx; } IGB_TxRing; @@ -774,7 +774,7 @@ igb_mq_queue_idx(int base_reg_idx, int reg_idx) static inline void igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx) { - static const E1000E_RingInfo i[IGB_NUM_QUEUES] = { + static const E1000ERingInfo i[IGB_NUM_QUEUES] = { { TDBAH0, TDBAL0, TDLEN0, TDH0, TDT0, 0 }, { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 }, { TDBAH2, TDBAL2, TDLEN2, TDH2, TDT2, 2 }, @@ -800,13 +800,13 @@ igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx) } typedef struct E1000E_RxRing_st { - const E1000E_RingInfo *i; + const E1000ERingInfo *i; } E1000E_RxRing; static inline void igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx) { - static const E1000E_RingInfo i[IGB_NUM_QUEUES] = { + static const E1000ERingInfo i[IGB_NUM_QUEUES] = { { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 }, { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 }, { RDBAH2, RDBAL2, RDLEN2, RDH2, RDT2, 2 }, @@ -833,7 +833,7 @@ igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx) static uint32_t igb_txdesc_writeback(IGBCore *core, dma_addr_t base, union e1000_adv_tx_desc *tx_desc, - const E1000E_RingInfo *txi) + const E1000ERingInfo *txi) { PCIDevice *d; uint32_t cmd_type_len = le32_to_cpu(tx_desc->read.cmd_type_len); @@ -866,7 +866,7 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base, } static inline bool -igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi) +igb_tx_enabled(IGBCore *core, const E1000ERingInfo *txi) { bool vmdq = core->mac[MRQC] & 1; uint16_t qn = txi->idx; @@ -883,7 +883,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) PCIDevice *d; dma_addr_t base; union e1000_adv_tx_desc desc; - const E1000E_RingInfo *txi = txr->i; + const E1000ERingInfo *txi = txr->i; uint32_t eic = 0; if (!igb_tx_enabled(core, txi)) { @@ -918,7 +918,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) } static uint32_t -igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r) +igb_rxbufsize(IGBCore *core, const E1000ERingInfo *r) { uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; uint32_t bsizepkt = srrctl & E1000_SRRCTL_BSIZEPKT_MASK; @@ -930,7 +930,7 @@ igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r) } static bool -igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size) +igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size) { uint32_t bufs = igb_ring_free_descr_num(core, r); uint32_t bufsize = igb_rxbufsize(core, r); @@ -1522,7 +1522,7 @@ igb_write_to_rx_buffers(IGBCore *core, } static void -igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, +igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi, size_t pkt_size, size_t pkt_fcs_size) { eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt); @@ -1540,7 +1540,7 @@ igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, } static inline bool -igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) +igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi) { return igb_ring_free_descr_num(core, rxi) == ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; @@ -1562,7 +1562,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, struct iovec *iov = net_rx_pkt_get_iovec(pkt); size_t size = net_rx_pkt_get_total_len(pkt); size_t total_size = size + e1000x_fcs_len(core->mac); - const E1000E_RingInfo *rxi = rxr->i; + const E1000ERingInfo *rxi = rxr->i; size_t bufsize = igb_rxbufsize(core, rxi); d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); @@ -1643,7 +1643,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, } static bool -igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi) +igb_rx_strip_vlan(IGBCore *core, const E1000ERingInfo *rxi) { if (core->mac[MRQC] & 1) { uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS; -- cgit v1.1 From ec82ad7c4d61061d78907436ceb181859ab4a8d5 Mon Sep 17 00:00:00 2001 From: Tomasz Dzieciol Date: Mon, 29 May 2023 16:01:49 +0200 Subject: igb: RX descriptors guest writting refactoring Refactoring is done in preparation for support of multiple advanced descriptors RX modes, especially packet-split modes. Signed-off-by: Tomasz Dzieciol Reviewed-by: Akihiko Odaki Tested-by: Akihiko Odaki Signed-off-by: Jason Wang --- hw/net/igb_core.c | 170 +++++++++++++++++++++++++++------------------------- hw/net/igb_regs.h | 10 ++-- hw/net/trace-events | 4 +- 3 files changed, 96 insertions(+), 88 deletions(-) (limited to 'hw') diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index d50e6b1..e140358 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -1281,15 +1281,11 @@ igb_verify_csum_in_sw(IGBCore *core, } static void -igb_build_rx_metadata(IGBCore *core, - struct NetRxPkt *pkt, - bool is_eop, - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, - uint16_t *pkt_info, uint16_t *hdr_info, - uint32_t *rss, - uint32_t *status_flags, - uint16_t *ip_id, - uint16_t *vlan_tag) +igb_build_rx_metadata_common(IGBCore *core, + struct NetRxPkt *pkt, + bool is_eop, + uint32_t *status_flags, + uint16_t *vlan_tag) { struct virtio_net_hdr *vhdr; bool hasip4, hasip6, csum_valid; @@ -1298,7 +1294,6 @@ igb_build_rx_metadata(IGBCore *core, *status_flags = E1000_RXD_STAT_DD; /* No additional metadata needed for non-EOP descriptors */ - /* TODO: EOP apply only to status so don't skip whole function. */ if (!is_eop) { goto func_exit; } @@ -1315,59 +1310,6 @@ igb_build_rx_metadata(IGBCore *core, trace_e1000e_rx_metadata_vlan(*vlan_tag); } - /* Packet parsing results */ - if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { - if (rss_info->enabled) { - *rss = cpu_to_le32(rss_info->hash); - trace_igb_rx_metadata_rss(*rss); - } - } else if (hasip4) { - *status_flags |= E1000_RXD_STAT_IPIDV; - *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); - trace_e1000e_rx_metadata_ip_id(*ip_id); - } - - if (pkt_info) { - *pkt_info = rss_info->enabled ? rss_info->type : 0; - - if (etqf < 8) { - *pkt_info |= (BIT(11) | etqf) << 4; - } else { - if (hasip4) { - *pkt_info |= E1000_ADVRXD_PKT_IP4; - } - - if (hasip6) { - *pkt_info |= E1000_ADVRXD_PKT_IP6; - } - - switch (l4hdr_proto) { - case ETH_L4_HDR_PROTO_TCP: - *pkt_info |= E1000_ADVRXD_PKT_TCP; - break; - - case ETH_L4_HDR_PROTO_UDP: - *pkt_info |= E1000_ADVRXD_PKT_UDP; - break; - - case ETH_L4_HDR_PROTO_SCTP: - *pkt_info |= E1000_ADVRXD_PKT_SCTP; - break; - - default: - break; - } - } - } - - if (hdr_info) { - *hdr_info = 0; - } - - if (ts) { - *status_flags |= BIT(16); - } - /* RX CSO information */ if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { trace_e1000e_rx_metadata_ipv6_sum_disabled(); @@ -1423,43 +1365,108 @@ func_exit: static inline void igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, struct NetRxPkt *pkt, - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, + const E1000E_RSSInfo *rss_info, uint16_t length) { - uint32_t status_flags, rss; - uint16_t ip_id; + uint32_t status_flags; assert(!rss_info->enabled); + + memset(desc, 0, sizeof(*desc)); desc->length = cpu_to_le16(length); - desc->csum = 0; + igb_build_rx_metadata_common(core, pkt, pkt != NULL, + &status_flags, + &desc->special); - igb_build_rx_metadata(core, pkt, pkt != NULL, - rss_info, etqf, ts, - NULL, NULL, &rss, - &status_flags, &ip_id, - &desc->special); desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); desc->status = (uint8_t) le32_to_cpu(status_flags); } +static uint16_t +igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf) +{ + uint16_t pkt_type; + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + + if (etqf < 8) { + pkt_type = BIT(11) | etqf; + return pkt_type; + } + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { + pkt_type = E1000_ADVRXD_PKT_IP6; + } else if (hasip4) { + pkt_type = E1000_ADVRXD_PKT_IP4; + } else { + pkt_type = 0; + } + + switch (l4hdr_proto) { + case ETH_L4_HDR_PROTO_TCP: + pkt_type |= E1000_ADVRXD_PKT_TCP; + break; + case ETH_L4_HDR_PROTO_UDP: + pkt_type |= E1000_ADVRXD_PKT_UDP; + break; + case ETH_L4_HDR_PROTO_SCTP: + pkt_type |= E1000_ADVRXD_PKT_SCTP; + break; + default: + break; + } + + return pkt_type; +} + static inline void igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, uint16_t length) { + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + uint16_t rss_type = 0, pkt_type; + bool eop = (pkt != NULL); + uint32_t adv_desc_status_error = 0; memset(&desc->wb, 0, sizeof(desc->wb)); desc->wb.upper.length = cpu_to_le16(length); + igb_build_rx_metadata_common(core, pkt, eop, + &desc->wb.upper.status_error, + &desc->wb.upper.vlan); + + if (!eop) { + return; + } + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { + if (rss_info->enabled) { + desc->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash); + rss_type = rss_info->type; + trace_igb_rx_metadata_rss(desc->wb.lower.hi_dword.rss, rss_type); + } + } else if (hasip4) { + adv_desc_status_error |= E1000_RXD_STAT_IPIDV; + desc->wb.lower.hi_dword.csum_ip.ip_id = + cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); + trace_e1000e_rx_metadata_ip_id( + desc->wb.lower.hi_dword.csum_ip.ip_id); + } + + if (ts) { + adv_desc_status_error |= BIT(16); + } - igb_build_rx_metadata(core, pkt, pkt != NULL, - rss_info, etqf, ts, - &desc->wb.lower.lo_dword.pkt_info, - &desc->wb.lower.lo_dword.hdr_info, - &desc->wb.lower.hi_dword.rss, - &desc->wb.upper.status_error, - &desc->wb.lower.hi_dword.csum_ip.ip_id, - &desc->wb.upper.vlan); + pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf); + trace_e1000e_rx_metadata_pkt_type(pkt_type); + desc->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4)); + desc->wb.upper.status_error |= cpu_to_le32(adv_desc_status_error); } static inline void @@ -1468,8 +1475,7 @@ igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, uint16_t etqf, bool ts, uint16_t length) { if (igb_rx_use_legacy_descriptor(core)) { - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, - etqf, ts, length); + igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, length); } else { igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, etqf, ts, length); diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index 82ff195..71a8833 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -692,11 +692,11 @@ union e1000_adv_rx_desc { #define E1000_STATUS_NUM_VFS_SHIFT 14 -#define E1000_ADVRXD_PKT_IP4 BIT(4) -#define E1000_ADVRXD_PKT_IP6 BIT(6) -#define E1000_ADVRXD_PKT_TCP BIT(8) -#define E1000_ADVRXD_PKT_UDP BIT(9) -#define E1000_ADVRXD_PKT_SCTP BIT(10) +#define E1000_ADVRXD_PKT_IP4 BIT(0) +#define E1000_ADVRXD_PKT_IP6 BIT(2) +#define E1000_ADVRXD_PKT_TCP BIT(4) +#define E1000_ADVRXD_PKT_UDP BIT(5) +#define E1000_ADVRXD_PKT_SCTP BIT(6) static inline uint8_t igb_ivar_entry_rx(uint8_t i) { diff --git a/hw/net/trace-events b/hw/net/trace-events index 6b5ba66..b8305c0 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -280,7 +280,7 @@ igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) " igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" -igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X" +igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X" igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled" igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x" @@ -295,6 +295,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x" igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x" igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x" +igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor type: %d" + # igbvf.c igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64 -- cgit v1.1 From 17ccd01647960e760f9b3daa41bcdc6aabd2ddc9 Mon Sep 17 00:00:00 2001 From: Tomasz Dzieciol Date: Mon, 29 May 2023 16:01:50 +0200 Subject: igb: RX payload guest writting refactoring Refactoring is done in preparation for support of multiple advanced descriptors RX modes, especially packet-split modes. Signed-off-by: Tomasz Dzieciol Reviewed-by: Akihiko Odaki Tested-by: Akihiko Odaki Signed-off-by: Jason Wang --- hw/net/e1000e_core.c | 18 +++-- hw/net/igb_core.c | 213 ++++++++++++++++++++++++++++++++------------------- 2 files changed, 145 insertions(+), 86 deletions(-) (limited to 'hw') diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index 91aae37..cc243b7 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core, } static void -e1000e_write_to_rx_buffers(E1000ECore *core, - hwaddr ba[MAX_PS_BUFFERS], - e1000e_ba_state *bastate, - const char *data, - dma_addr_t data_len) +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, + hwaddr ba[MAX_PS_BUFFERS], + e1000e_ba_state *bastate, + const char *data, + dma_addr_t data_len) { while (data_len > 0) { uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, while (copy_size) { iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); - e1000e_write_to_rx_buffers(core, ba, &bastate, - iov->iov_base + iov_ofs, iov_copy); + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, + iov->iov_base + + iov_ofs, + iov_copy); copy_size -= iov_copy; iov_ofs += iov_copy; @@ -1607,7 +1609,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, if (desc_offset + desc_size >= total_size) { /* Simulate FCS checksum presence in the last descriptor */ - e1000e_write_to_rx_buffers(core, ba, &bastate, + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); } } diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index e140358..6d2712e 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -941,6 +941,14 @@ igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size) bufsize; } +static uint32_t +igb_rxhdrbufsize(IGBCore *core, const E1000ERingInfo *r) +{ + uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; + return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >> + E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; +} + void igb_start_recv(IGBCore *core) { @@ -1231,6 +1239,21 @@ igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, *buff_addr = le64_to_cpu(desc->read.pkt_addr); } +typedef struct IGBPacketRxDMAState { + size_t size; + size_t total_size; + size_t ps_hdr_len; + size_t desc_size; + size_t desc_offset; + uint32_t rx_desc_packet_buf_size; + uint32_t rx_desc_header_buf_size; + struct iovec *iov; + size_t iov_ofs; + bool is_first; + uint16_t written; + hwaddr ba; +} IGBPacketRxDMAState; + static inline void igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, hwaddr *buff_addr) @@ -1515,19 +1538,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr, } static void -igb_write_to_rx_buffers(IGBCore *core, - PCIDevice *d, - hwaddr ba, - uint16_t *written, - const char *data, - dma_addr_t data_len) -{ - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); - pci_dma_write(d, ba + *written, data, data_len); - *written += data_len; -} - -static void igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi, size_t pkt_size, size_t pkt_fcs_size) { @@ -1553,6 +1563,93 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi) } static void +igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size) +{ + if (*size > pdma_st->rx_desc_packet_buf_size) { + *size = pdma_st->rx_desc_packet_buf_size; + } +} + +static void +igb_write_payload_frag_to_rx_buffers(IGBCore *core, + PCIDevice *d, + hwaddr ba, + uint16_t *written, + uint32_t cur_buf_len, + const char *data, + dma_addr_t data_len) +{ + trace_igb_rx_desc_buff_write(ba, *written, data, data_len); + pci_dma_write(d, ba + *written, data, data_len); + *written += data_len; +} + +static void +igb_write_payload_to_rx_buffers(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + IGBPacketRxDMAState *pdma_st, + size_t *copy_size) +{ + static const uint32_t fcs_pad; + size_t iov_copy; + + /* Copy packet payload */ + while (*copy_size) { + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); + igb_write_payload_frag_to_rx_buffers(core, d, + pdma_st->ba, + &pdma_st->written, + pdma_st->rx_desc_packet_buf_size, + pdma_st->iov->iov_base + + pdma_st->iov_ofs, + iov_copy); + + *copy_size -= iov_copy; + pdma_st->iov_ofs += iov_copy; + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { + pdma_st->iov++; + pdma_st->iov_ofs = 0; + } + } + + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { + /* Simulate FCS checksum presence in the last descriptor */ + igb_write_payload_frag_to_rx_buffers(core, d, + pdma_st->ba, + &pdma_st->written, + pdma_st->rx_desc_packet_buf_size, + (const char *) &fcs_pad, + e1000x_fcs_len(core->mac)); + } +} + +static void +igb_write_to_rx_buffers(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + IGBPacketRxDMAState *pdma_st) +{ + size_t copy_size; + + if (!pdma_st->ba) { + /* as per intel docs; skip descriptors with null buf addr */ + trace_e1000e_rx_null_descriptor(); + return; + } + + if (pdma_st->desc_offset >= pdma_st->size) { + return; + } + + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset; + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); + copy_size = pdma_st->size - pdma_st->desc_offset; + igb_truncate_to_descriptor_size(pdma_st, ©_size); + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size); +} + +static void igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, const E1000E_RxRing *rxr, const E1000E_RSSInfo *rss_info, @@ -1561,91 +1658,51 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, PCIDevice *d; dma_addr_t base; union e1000_rx_desc_union desc; - size_t desc_size; - size_t desc_offset = 0; - size_t iov_ofs = 0; - - struct iovec *iov = net_rx_pkt_get_iovec(pkt); - size_t size = net_rx_pkt_get_total_len(pkt); - size_t total_size = size + e1000x_fcs_len(core->mac); - const E1000ERingInfo *rxi = rxr->i; - size_t bufsize = igb_rxbufsize(core, rxi); - + const E1000ERingInfo *rxi; + size_t rx_desc_len; + + IGBPacketRxDMAState pdma_st = {0}; + pdma_st.is_first = true; + pdma_st.size = net_rx_pkt_get_total_len(pkt); + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac); + + rxi = rxr->i; + rx_desc_len = core->rx_desc_len; + pdma_st.rx_desc_packet_buf_size = igb_rxbufsize(core, rxi); + pdma_st.rx_desc_header_buf_size = igb_rxhdrbufsize(core, rxi); + pdma_st.iov = net_rx_pkt_get_iovec(pkt); d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); if (!d) { d = core->owner; } do { - hwaddr ba; - uint16_t written = 0; + pdma_st.written = 0; bool is_last = false; - desc_size = total_size - desc_offset; - - if (desc_size > bufsize) { - desc_size = bufsize; - } - if (igb_ring_empty(core, rxi)) { return; } base = igb_ring_head_descr(core, rxi); + pci_dma_read(d, base, &desc, rx_desc_len); + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); - pci_dma_read(d, base, &desc, core->rx_desc_len); - - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); - - igb_read_rx_descr(core, &desc, &ba); - - if (ba) { - if (desc_offset < size) { - static const uint32_t fcs_pad; - size_t iov_copy; - size_t copy_size = size - desc_offset; - if (copy_size > bufsize) { - copy_size = bufsize; - } - - /* Copy packet payload */ - while (copy_size) { - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); - - igb_write_to_rx_buffers(core, d, ba, &written, - iov->iov_base + iov_ofs, iov_copy); + igb_read_rx_descr(core, &desc, &pdma_st.ba); - copy_size -= iov_copy; - iov_ofs += iov_copy; - if (iov_ofs == iov->iov_len) { - iov++; - iov_ofs = 0; - } - } - - if (desc_offset + desc_size >= total_size) { - /* Simulate FCS checksum presence in the last descriptor */ - igb_write_to_rx_buffers(core, d, ba, &written, - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); - } - } - } else { /* as per intel docs; skip descriptors with null buf addr */ - trace_e1000e_rx_null_descriptor(); - } - desc_offset += desc_size; - if (desc_offset >= total_size) { + igb_write_to_rx_buffers(core, pkt, d, &pdma_st); + pdma_st.desc_offset += pdma_st.desc_size; + if (pdma_st.desc_offset >= pdma_st.total_size) { is_last = true; } igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, - rss_info, etqf, ts, written); - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); - - igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN); - - } while (desc_offset < total_size); + rss_info, etqf, ts, pdma_st.written); + igb_pci_dma_write_rx_desc(core, d, base, &desc, rx_desc_len); + igb_ring_advance(core, rxi, rx_desc_len / E1000_MIN_RX_DESC_LEN); + } while (pdma_st.desc_offset < pdma_st.total_size); - igb_update_rx_stats(core, rxi, size, total_size); + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size); } static bool -- cgit v1.1 From 1c4e67a5be1fd89bd2c919799b2eb1478142848a Mon Sep 17 00:00:00 2001 From: Tomasz Dzieciol Date: Mon, 29 May 2023 16:01:51 +0200 Subject: igb: add IPv6 extended headers traffic detection Signed-off-by: Tomasz Dzieciol Reviewed-by: Akihiko Odaki Tested-by: Akihiko Odaki Signed-off-by: Jason Wang --- hw/net/igb_core.c | 4 +++- hw/net/igb_regs.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index 6d2712e..9f43fe5 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -1420,7 +1420,9 @@ igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf) net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { - pkt_type = E1000_ADVRXD_PKT_IP6; + eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt); + pkt_type = ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E : + E1000_ADVRXD_PKT_IP6; } else if (hasip4) { pkt_type = E1000_ADVRXD_PKT_IP4; } else { diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index 71a8833..36763f2 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -694,6 +694,7 @@ union e1000_adv_rx_desc { #define E1000_ADVRXD_PKT_IP4 BIT(0) #define E1000_ADVRXD_PKT_IP6 BIT(2) +#define E1000_ADVRXD_PKT_IP6E BIT(3) #define E1000_ADVRXD_PKT_TCP BIT(4) #define E1000_ADVRXD_PKT_UDP BIT(5) #define E1000_ADVRXD_PKT_SCTP BIT(6) -- cgit v1.1 From 560cf339b2a50bfb6bbeb53801065119e03118f3 Mon Sep 17 00:00:00 2001 From: Tomasz Dzieciol Date: Mon, 29 May 2023 16:01:52 +0200 Subject: igb: packet-split descriptors support Packet-split descriptors are used by Linux VF driver for MTU values from 2048 Signed-off-by: Tomasz Dzieciol Reviewed-by: Akihiko Odaki Tested-by: Akihiko Odaki Signed-off-by: Jason Wang --- hw/net/igb_core.c | 348 +++++++++++++++++++++++++++++++++++++++++++++------- hw/net/igb_regs.h | 9 ++ hw/net/trace-events | 2 +- 3 files changed, 316 insertions(+), 43 deletions(-) (limited to 'hw') diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index 9f43fe5..f6a5e23 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -267,6 +267,29 @@ igb_rx_use_legacy_descriptor(IGBCore *core) return false; } +typedef struct E1000ERingInfo { + int dbah; + int dbal; + int dlen; + int dh; + int dt; + int idx; +} E1000ERingInfo; + +static uint32_t +igb_rx_queue_desctyp_get(IGBCore *core, const E1000ERingInfo *r) +{ + return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK; +} + +static bool +igb_rx_use_ps_descriptor(IGBCore *core, const E1000ERingInfo *r) +{ + uint32_t desctyp = igb_rx_queue_desctyp_get(core, r); + return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT || + desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; +} + static inline bool igb_rss_enabled(IGBCore *core) { @@ -694,15 +717,6 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx) return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0; } -typedef struct E1000ERingInfo { - int dbah; - int dbal; - int dlen; - int dh; - int dt; - int idx; -} E1000ERingInfo; - static inline bool igb_ring_empty(IGBCore *core, const E1000ERingInfo *r) { @@ -1233,12 +1247,31 @@ igb_read_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, } static inline void -igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, - hwaddr *buff_addr) +igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc, + hwaddr *buff_addr) { *buff_addr = le64_to_cpu(desc->read.pkt_addr); } +static inline void +igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc, + hwaddr *buff_addr) +{ + buff_addr[0] = le64_to_cpu(desc->read.hdr_addr); + buff_addr[1] = le64_to_cpu(desc->read.pkt_addr); +} + +typedef struct IGBBAState { + uint16_t written[IGB_MAX_PS_BUFFERS]; + uint8_t cur_idx; +} IGBBAState; + +typedef struct IGBSplitDescriptorData { + bool sph; + bool hbo; + size_t hdr_len; +} IGBSplitDescriptorData; + typedef struct IGBPacketRxDMAState { size_t size; size_t total_size; @@ -1249,20 +1282,42 @@ typedef struct IGBPacketRxDMAState { uint32_t rx_desc_header_buf_size; struct iovec *iov; size_t iov_ofs; + bool do_ps; bool is_first; - uint16_t written; - hwaddr ba; + IGBBAState bastate; + hwaddr ba[IGB_MAX_PS_BUFFERS]; + IGBSplitDescriptorData ps_desc_data; } IGBPacketRxDMAState; static inline void -igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, - hwaddr *buff_addr) +igb_read_rx_descr(IGBCore *core, + union e1000_rx_desc_union *desc, + IGBPacketRxDMAState *pdma_st, + const E1000ERingInfo *r) { + uint32_t desc_type; + if (igb_rx_use_legacy_descriptor(core)) { - igb_read_lgcy_rx_descr(core, &desc->legacy, buff_addr); - } else { - igb_read_adv_rx_descr(core, &desc->adv, buff_addr); + igb_read_lgcy_rx_descr(core, &desc->legacy, &pdma_st->ba[1]); + pdma_st->ba[0] = 0; + return; + } + + /* advanced header split descriptor */ + if (igb_rx_use_ps_descriptor(core, r)) { + igb_read_adv_rx_split_buf_descr(core, &desc->adv, &pdma_st->ba[0]); + return; + } + + /* descriptor replication modes not supported */ + desc_type = igb_rx_queue_desctyp_get(core, r); + if (desc_type != E1000_SRRCTL_DESCTYPE_ADV_ONEBUF) { + trace_igb_wrn_rx_desc_modes_not_supp(desc_type); } + + /* advanced single buffer descriptor */ + igb_read_adv_rx_single_buf_descr(core, &desc->adv, &pdma_st->ba[1]); + pdma_st->ba[0] = 0; } static void @@ -1405,6 +1460,13 @@ igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, desc->status = (uint8_t) le32_to_cpu(status_flags); } +static bool +igb_rx_ps_descriptor_split_always(IGBCore *core, const E1000ERingInfo *r) +{ + uint32_t desctyp = igb_rx_queue_desctyp_get(core, r); + return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; +} + static uint16_t igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf) { @@ -1495,15 +1557,54 @@ igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, } static inline void -igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, - struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, - uint16_t etqf, bool ts, uint16_t length) +igb_write_adv_ps_rx_descr(IGBCore *core, + union e1000_adv_rx_desc *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + const E1000ERingInfo *r, + uint16_t etqf, + bool ts, + IGBPacketRxDMAState *pdma_st) +{ + size_t pkt_len; + uint16_t hdr_info = 0; + + if (pdma_st->do_ps) { + pkt_len = pdma_st->bastate.written[1]; + } else { + pkt_len = pdma_st->bastate.written[0] + pdma_st->bastate.written[1]; + } + + igb_write_adv_rx_descr(core, desc, pkt, rss_info, etqf, ts, pkt_len); + + hdr_info = (pdma_st->ps_desc_data.hdr_len << E1000_ADVRXD_HDR_LEN_OFFSET) & + E1000_ADVRXD_ADV_HDR_LEN_MASK; + hdr_info |= pdma_st->ps_desc_data.sph ? E1000_ADVRXD_HDR_SPH : 0; + desc->wb.lower.lo_dword.hdr_info = cpu_to_le16(hdr_info); + + desc->wb.upper.status_error |= cpu_to_le32( + pdma_st->ps_desc_data.hbo ? E1000_ADVRXD_ST_ERR_HBO_OFFSET : 0); +} + +static inline void +igb_write_rx_descr(IGBCore *core, + union e1000_rx_desc_union *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + uint16_t etqf, + bool ts, + IGBPacketRxDMAState *pdma_st, + const E1000ERingInfo *r) { if (igb_rx_use_legacy_descriptor(core)) { - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, length); + igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, + pdma_st->bastate.written[1]); + } else if (igb_rx_use_ps_descriptor(core, r)) { + igb_write_adv_ps_rx_descr(core, &desc->adv, pkt, rss_info, r, etqf, ts, + pdma_st); } else { igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, - etqf, ts, length); + etqf, ts, pdma_st->bastate.written[1]); } } @@ -1564,26 +1665,179 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi) ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; } +static bool +igb_do_ps(IGBCore *core, + const E1000ERingInfo *r, + struct NetRxPkt *pkt, + IGBPacketRxDMAState *pdma_st) +{ + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + bool fragment; + bool split_always; + size_t bheader_size; + size_t total_pkt_len; + + if (!igb_rx_use_ps_descriptor(core, r)) { + return false; + } + + total_pkt_len = net_rx_pkt_get_total_len(pkt); + bheader_size = igb_rxhdrbufsize(core, r); + split_always = igb_rx_ps_descriptor_split_always(core, r); + if (split_always && total_pkt_len <= bheader_size) { + pdma_st->ps_hdr_len = total_pkt_len; + pdma_st->ps_desc_data.hdr_len = total_pkt_len; + return true; + } + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if (hasip4) { + fragment = net_rx_pkt_get_ip4_info(pkt)->fragment; + } else if (hasip6) { + fragment = net_rx_pkt_get_ip6_info(pkt)->fragment; + } else { + pdma_st->ps_desc_data.hdr_len = bheader_size; + goto header_not_handled; + } + + if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) { + pdma_st->ps_desc_data.hdr_len = bheader_size; + goto header_not_handled; + } + + /* no header splitting for SCTP */ + if (!fragment && (l4hdr_proto == ETH_L4_HDR_PROTO_UDP || + l4hdr_proto == ETH_L4_HDR_PROTO_TCP)) { + pdma_st->ps_hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt); + } else { + pdma_st->ps_hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt); + } + + pdma_st->ps_desc_data.sph = true; + pdma_st->ps_desc_data.hdr_len = pdma_st->ps_hdr_len; + + if (pdma_st->ps_hdr_len > bheader_size) { + pdma_st->ps_desc_data.hbo = true; + goto header_not_handled; + } + + return true; + +header_not_handled: + if (split_always) { + pdma_st->ps_hdr_len = bheader_size; + return true; + } + + return false; +} + static void igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size) { - if (*size > pdma_st->rx_desc_packet_buf_size) { - *size = pdma_st->rx_desc_packet_buf_size; + if (pdma_st->do_ps && pdma_st->is_first) { + if (*size > pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len) { + *size = pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len; + } + } else { + if (*size > pdma_st->rx_desc_packet_buf_size) { + *size = pdma_st->rx_desc_packet_buf_size; + } + } +} + +static inline void +igb_write_hdr_frag_to_rx_buffers(IGBCore *core, + PCIDevice *d, + IGBPacketRxDMAState *pdma_st, + const char *data, + dma_addr_t data_len) +{ + assert(data_len <= pdma_st->rx_desc_header_buf_size - + pdma_st->bastate.written[0]); + pci_dma_write(d, + pdma_st->ba[0] + pdma_st->bastate.written[0], + data, data_len); + pdma_st->bastate.written[0] += data_len; + pdma_st->bastate.cur_idx = 1; +} + +static void +igb_write_header_to_rx_buffers(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + IGBPacketRxDMAState *pdma_st, + size_t *copy_size) +{ + size_t iov_copy; + size_t ps_hdr_copied = 0; + + if (!pdma_st->is_first) { + /* Leave buffer 0 of each descriptor except first */ + /* empty */ + pdma_st->bastate.cur_idx = 1; + return; } + + do { + iov_copy = MIN(pdma_st->ps_hdr_len - ps_hdr_copied, + pdma_st->iov->iov_len - pdma_st->iov_ofs); + + igb_write_hdr_frag_to_rx_buffers(core, d, pdma_st, + pdma_st->iov->iov_base, + iov_copy); + + *copy_size -= iov_copy; + ps_hdr_copied += iov_copy; + + pdma_st->iov_ofs += iov_copy; + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { + pdma_st->iov++; + pdma_st->iov_ofs = 0; + } + } while (ps_hdr_copied < pdma_st->ps_hdr_len); + + pdma_st->is_first = false; } static void igb_write_payload_frag_to_rx_buffers(IGBCore *core, PCIDevice *d, - hwaddr ba, - uint16_t *written, - uint32_t cur_buf_len, + IGBPacketRxDMAState *pdma_st, const char *data, dma_addr_t data_len) { - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); - pci_dma_write(d, ba + *written, data, data_len); - *written += data_len; + while (data_len > 0) { + assert(pdma_st->bastate.cur_idx < IGB_MAX_PS_BUFFERS); + + uint32_t cur_buf_bytes_left = + pdma_st->rx_desc_packet_buf_size - + pdma_st->bastate.written[pdma_st->bastate.cur_idx]; + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); + + trace_igb_rx_desc_buff_write( + pdma_st->bastate.cur_idx, + pdma_st->ba[pdma_st->bastate.cur_idx], + pdma_st->bastate.written[pdma_st->bastate.cur_idx], + data, + bytes_to_write); + + pci_dma_write(d, + pdma_st->ba[pdma_st->bastate.cur_idx] + + pdma_st->bastate.written[pdma_st->bastate.cur_idx], + data, bytes_to_write); + + pdma_st->bastate.written[pdma_st->bastate.cur_idx] += bytes_to_write; + data += bytes_to_write; + data_len -= bytes_to_write; + + if (pdma_st->bastate.written[pdma_st->bastate.cur_idx] == + pdma_st->rx_desc_packet_buf_size) { + pdma_st->bastate.cur_idx++; + } + } } static void @@ -1600,9 +1854,7 @@ igb_write_payload_to_rx_buffers(IGBCore *core, while (*copy_size) { iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); igb_write_payload_frag_to_rx_buffers(core, d, - pdma_st->ba, - &pdma_st->written, - pdma_st->rx_desc_packet_buf_size, + pdma_st, pdma_st->iov->iov_base + pdma_st->iov_ofs, iov_copy); @@ -1618,9 +1870,7 @@ igb_write_payload_to_rx_buffers(IGBCore *core, if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { /* Simulate FCS checksum presence in the last descriptor */ igb_write_payload_frag_to_rx_buffers(core, d, - pdma_st->ba, - &pdma_st->written, - pdma_st->rx_desc_packet_buf_size, + pdma_st, (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); } @@ -1634,7 +1884,7 @@ igb_write_to_rx_buffers(IGBCore *core, { size_t copy_size; - if (!pdma_st->ba) { + if (!(pdma_st->ba)[1] || (pdma_st->do_ps && !(pdma_st->ba[0]))) { /* as per intel docs; skip descriptors with null buf addr */ trace_e1000e_rx_null_descriptor(); return; @@ -1648,6 +1898,14 @@ igb_write_to_rx_buffers(IGBCore *core, igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); copy_size = pdma_st->size - pdma_st->desc_offset; igb_truncate_to_descriptor_size(pdma_st, ©_size); + + /* For PS mode copy the packet header first */ + if (pdma_st->do_ps) { + igb_write_header_to_rx_buffers(core, pkt, d, pdma_st, ©_size); + } else { + pdma_st->bastate.cur_idx = 1; + } + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size); } @@ -1678,8 +1936,10 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, d = core->owner; } + pdma_st.do_ps = igb_do_ps(core, rxi, pkt, &pdma_st); + do { - pdma_st.written = 0; + memset(&pdma_st.bastate, 0, sizeof(IGBBAState)); bool is_last = false; if (igb_ring_empty(core, rxi)) { @@ -1690,7 +1950,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, pci_dma_read(d, base, &desc, rx_desc_len); trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); - igb_read_rx_descr(core, &desc, &pdma_st.ba); + igb_read_rx_descr(core, &desc, &pdma_st, rxi); igb_write_to_rx_buffers(core, pkt, d, &pdma_st); pdma_st.desc_offset += pdma_st.desc_size; @@ -1698,8 +1958,12 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, is_last = true; } - igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, - rss_info, etqf, ts, pdma_st.written); + igb_write_rx_descr(core, &desc, + is_last ? pkt : NULL, + rss_info, + etqf, ts, + &pdma_st, + rxi); igb_pci_dma_write_rx_desc(core, d, base, &desc, rx_desc_len); igb_ring_advance(core, rxi, rx_desc_len / E1000_MIN_RX_DESC_LEN); } while (pdma_st.desc_offset < pdma_st.total_size); diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index 36763f2..ed7427b 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -452,6 +452,7 @@ union e1000_adv_rx_desc { #define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 #define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 #define E1000_SRRCTL_DROP_EN 0x80000000 @@ -699,6 +700,14 @@ union e1000_adv_rx_desc { #define E1000_ADVRXD_PKT_UDP BIT(5) #define E1000_ADVRXD_PKT_SCTP BIT(6) +#define IGB_MAX_PS_BUFFERS 2 + +#define E1000_ADVRXD_HDR_LEN_OFFSET (21 - 16) +#define E1000_ADVRXD_ADV_HDR_LEN_MASK ((BIT(10) - 1) << \ + E1000_ADVRXD_HDR_LEN_OFFSET) +#define E1000_ADVRXD_HDR_SPH BIT(15) +#define E1000_ADVRXD_ST_ERR_HBO_OFFSET BIT(3 + 20) + static inline uint8_t igb_ivar_entry_rx(uint8_t i) { return i < 8 ? i * 4 : (i - 8) * 4 + 2; diff --git a/hw/net/trace-events b/hw/net/trace-events index b8305c0..3abfd65 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -278,7 +278,7 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED" igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d" igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" -igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" +igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer %u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X" -- cgit v1.1 From e710f9c47008287086b73cb747c1b1d2a4e17bfb Mon Sep 17 00:00:00 2001 From: Tomasz Dzieciol Date: Mon, 29 May 2023 16:01:53 +0200 Subject: e1000e: rename e1000e_ba_state and e1000e_write_hdr_to_rx_buffers Rename e1000e_ba_state according and e1000e_write_hdr_to_rx_buffers for consistency with IGB. Signed-off-by: Tomasz Dzieciol Reviewed-by: Akihiko Odaki Tested-by: Akihiko Odaki Signed-off-by: Jason Wang --- hw/net/e1000e_core.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'hw') diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index cc243b7..e324c02 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -1397,17 +1397,17 @@ e1000e_pci_dma_write_rx_desc(E1000ECore *core, dma_addr_t addr, } } -typedef struct e1000e_ba_state_st { +typedef struct E1000EBAState { uint16_t written[MAX_PS_BUFFERS]; uint8_t cur_idx; -} e1000e_ba_state; +} E1000EBAState; static inline void -e1000e_write_hdr_to_rx_buffers(E1000ECore *core, - hwaddr ba[MAX_PS_BUFFERS], - e1000e_ba_state *bastate, - const char *data, - dma_addr_t data_len) +e1000e_write_hdr_frag_to_rx_buffers(E1000ECore *core, + hwaddr ba[MAX_PS_BUFFERS], + E1000EBAState *bastate, + const char *data, + dma_addr_t data_len) { assert(data_len <= core->rxbuf_sizes[0] - bastate->written[0]); @@ -1420,7 +1420,7 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core, static void e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, hwaddr ba[MAX_PS_BUFFERS], - e1000e_ba_state *bastate, + E1000EBAState *bastate, const char *data, dma_addr_t data_len) { @@ -1530,7 +1530,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, do { hwaddr ba[MAX_PS_BUFFERS]; - e1000e_ba_state bastate = { { 0 } }; + E1000EBAState bastate = { { 0 } }; bool is_last = false; desc_size = total_size - desc_offset; @@ -1568,8 +1568,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, iov_copy = MIN(ps_hdr_len - ps_hdr_copied, iov->iov_len - iov_ofs); - e1000e_write_hdr_to_rx_buffers(core, ba, &bastate, - iov->iov_base, iov_copy); + e1000e_write_hdr_frag_to_rx_buffers(core, ba, + &bastate, + iov->iov_base, + iov_copy); copy_size -= iov_copy; ps_hdr_copied += iov_copy; @@ -1585,8 +1587,8 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, } else { /* Leave buffer 0 of each descriptor except first */ /* empty as per spec 7.1.5.1 */ - e1000e_write_hdr_to_rx_buffers(core, ba, &bastate, - NULL, 0); + e1000e_write_hdr_frag_to_rx_buffers(core, ba, &bastate, + NULL, 0); } } -- cgit v1.1 From 2a6cb383e2ec7224d8631c3c0a324bff469c9c64 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 24 Aug 2023 16:32:21 +0100 Subject: hw/net/fsl_etsec/rings.c: Avoid variable length array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In fill_rx_bd() we create a variable length array of size etsec->rx_padding. In fact we know that this will never be larger than 64 bytes, because rx_padding is set in rx_init_frame() in a way that ensures it is only that large. Use a fixed sized array and assert that it is big enough. Since padd[] is now potentially rather larger than the actual padding required, adjust the memset() we do on it to match the size that we write with cpu_physical_memory_write(), rather than clearing the entire array. The codebase has very few VLAs, and if we can get rid of them all we can make the compiler error on new additions. This is a defensive measure against security bugs where an on-stack dynamic allocation isn't correctly size-checked (e.g. CVE-2021-3527). Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Jason Wang --- hw/net/fsl_etsec/rings.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c index 788463f..2f2f359 100644 --- a/hw/net/fsl_etsec/rings.c +++ b/hw/net/fsl_etsec/rings.c @@ -372,6 +372,12 @@ void etsec_walk_tx_ring(eTSEC *etsec, int ring_nbr) etsec->regs[TSTAT].value |= 1 << (31 - ring_nbr); } +/* + * rx_init_frame() ensures we never do more padding than this + * (checksum plus minimum data packet size) + */ +#define MAX_RX_PADDING 64 + static void fill_rx_bd(eTSEC *etsec, eTSEC_rxtx_bd *bd, const uint8_t **buf, @@ -380,9 +386,11 @@ static void fill_rx_bd(eTSEC *etsec, uint16_t to_write; hwaddr bufptr = bd->bufptr + ((hwaddr)(etsec->regs[TBDBPH].value & 0xF) << 32); - uint8_t padd[etsec->rx_padding]; + uint8_t padd[MAX_RX_PADDING]; uint8_t rem; + assert(etsec->rx_padding <= MAX_RX_PADDING); + RING_DEBUG("eTSEC fill Rx buffer @ 0x%016" HWADDR_PRIx " size:%zu(padding + crc:%u) + fcb:%u\n", bufptr, *size, etsec->rx_padding, etsec->rx_fcb_size); @@ -426,7 +434,7 @@ static void fill_rx_bd(eTSEC *etsec, rem = MIN(etsec->regs[MRBLR].value - bd->length, etsec->rx_padding); if (rem > 0) { - memset(padd, 0x0, sizeof(padd)); + memset(padd, 0x0, rem); etsec->rx_padding -= rem; *size -= rem; bd->length += rem; -- cgit v1.1 From 1257065783df26a83934195a5cd70f951ef9c51d Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 24 Aug 2023 16:32:22 +0100 Subject: hw/net/rocker: Avoid variable length array Replace an on-stack variable length array in of_dpa_ig() with a g_autofree heap allocation. The codebase has very few VLAs, and if we can get rid of them all we can make the compiler error on new additions. This is a defensive measure against security bugs where an on-stack dynamic allocation isn't correctly size-checked (e.g. CVE-2021-3527). Signed-off-by: Peter Maydell Reviewed-by: Francisco Iglesias Signed-off-by: Jason Wang --- hw/net/rocker/rocker_of_dpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c index dfe4754..5e16056 100644 --- a/hw/net/rocker/rocker_of_dpa.c +++ b/hw/net/rocker/rocker_of_dpa.c @@ -1043,7 +1043,7 @@ static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id) static ssize_t of_dpa_ig(World *world, uint32_t pport, const struct iovec *iov, int iovcnt) { - struct iovec iov_copy[iovcnt + 2]; + g_autofree struct iovec *iov_copy = g_new(struct iovec, iovcnt + 2); OfDpaFlowContext fc = { .of_dpa = world_private(world), .in_pport = pport, -- cgit v1.1