diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2023-03-28 13:26:49 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2023-03-28 13:26:49 +0100 |
commit | 52dd5f6f70276518dd21cd3a72cb95495df33bc9 (patch) | |
tree | 754c8efa8dcda39407b818407f9a8e423aff823f /hw | |
parent | ecaefc8f50ea5058e478c088de7cf70f56338d5b (diff) | |
parent | fba7c3b788dfcb99a3f9253f7d99cc0d217d6d3c (diff) | |
download | qemu-52dd5f6f70276518dd21cd3a72cb95495df33bc9.zip qemu-52dd5f6f70276518dd21cd3a72cb95495df33bc9.tar.gz qemu-52dd5f6f70276518dd21cd3a72cb95495df33bc9.tar.bz2 |
Merge tag 'net-pull-request' of https://github.com/jasowang/qemu into staging
# -----BEGIN PGP SIGNATURE-----
# Version: GnuPG v1
#
# iQEcBAABAgAGBQJkIncaAAoJEO8Ells5jWIR6bgH/icQAN1a0PBHh2lujmob8KvE
# IZ0KYuJMDcA8HFdhnzxPPKj7/77AM1DgmrmVwqLKTr6j64CZYr2Uc0yeyPa0f/0Y
# TtePW70bjoUkRm/dDdFe32xViO4O33pGQia6olR6QwmfdDbBBJjAucmlep8NClrh
# EooQ2WfXPBqrC6ckKZ7vEvgKV2sDl2XxYIr3kU3MiB4j4b1lrGHE+XSv7cXOC+at
# G2vYqbimipZstHZCJYeB5tRz+FXbAI3ZNCGtYpxeVyabrlHS+n+X+gttaswcvVIF
# ln6yidVGg/Ot3bi0qiV1WihpqNCWO0ghhf7wIEubAIIJlnE5hXULF4uFVfz+rRE=
# =HjJv
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 28 Mar 2023 06:11:54 BST
# gpg: using RSA key EF04965B398D6211
# gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg: It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211
* tag 'net-pull-request' of https://github.com/jasowang/qemu:
igb: respect VMVIR and VMOLR for VLAN
igb: implement VF Tx and Rx stats
igb: respect E1000_VMOLR_RSSE
igb: check oversized packets for VMDq
igb: implement VFRE and VFTE registers
igb: add ICR_RXDW
igb: handle PF/VF reset properly
MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer
hw/net/net_tx_pkt: Align l3_hdr
hw/net/net_tx_pkt: Ignore ECN bit
igb: Fix DMA requester specification for Tx packet
igb: Save more Tx states
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw')
-rw-r--r-- | hw/net/e1000e_core.c | 6 | ||||
-rw-r--r-- | hw/net/e1000x_regs.h | 4 | ||||
-rw-r--r-- | hw/net/igb.c | 26 | ||||
-rw-r--r-- | hw/net/igb_core.c | 256 | ||||
-rw-r--r-- | hw/net/igb_core.h | 9 | ||||
-rw-r--r-- | hw/net/igb_regs.h | 6 | ||||
-rw-r--r-- | hw/net/net_tx_pkt.c | 30 | ||||
-rw-r--r-- | hw/net/net_tx_pkt.h | 3 | ||||
-rw-r--r-- | hw/net/trace-events | 2 | ||||
-rw-r--r-- | hw/net/vmxnet3.c | 4 |
10 files changed, 253 insertions, 93 deletions
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index 4d9679c..c0c09b6 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -765,7 +765,7 @@ e1000e_process_tx_desc(E1000ECore *core, } tx->skip_cp = false; - net_tx_pkt_reset(tx->tx_pkt); + net_tx_pkt_reset(tx->tx_pkt, core->owner); tx->sum_needed = 0; tx->cptse = 0; @@ -3447,7 +3447,7 @@ e1000e_core_pci_uninit(E1000ECore *core) qemu_del_vm_change_state_handler(core->vmstate); for (i = 0; i < E1000E_NUM_QUEUES; i++) { - net_tx_pkt_reset(core->tx[i].tx_pkt); + net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner); net_tx_pkt_uninit(core->tx[i].tx_pkt); } @@ -3572,7 +3572,7 @@ static void e1000e_reset(E1000ECore *core, bool sw) e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac); for (i = 0; i < ARRAY_SIZE(core->tx); i++) { - net_tx_pkt_reset(core->tx[i].tx_pkt); + net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner); memset(&core->tx[i].props, 0, sizeof(core->tx[i].props)); core->tx[i].skip_cp = false; } diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index c0832fa..6d3c4c6 100644 --- a/hw/net/e1000x_regs.h +++ b/hw/net/e1000x_regs.h @@ -335,6 +335,7 @@ #define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ #define E1000_ICR_RXO 0x00000040 /* rx overrun */ #define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ +#define E1000_ICR_RXDW 0x00000080 /* rx desc written back */ #define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */ #define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */ #define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ @@ -378,6 +379,7 @@ #define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */ #define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_ICS_RXDW E1000_ICR_RXDW /* rx desc written back */ #define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */ #define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ #define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ @@ -407,6 +409,7 @@ #define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */ #define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_IMS_RXDW E1000_ICR_RXDW /* rx desc written back */ #define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */ #define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ #define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ @@ -441,6 +444,7 @@ #define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */ #define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */ +#define E1000_IMC_RXDW E1000_ICR_RXDW /* rx desc written back */ #define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */ #define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */ #define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */ diff --git a/hw/net/igb.c b/hw/net/igb.c index c6d753d..51a7e91 100644 --- a/hw/net/igb.c +++ b/hw/net/igb.c @@ -502,16 +502,28 @@ static int igb_post_load(void *opaque, int version_id) return igb_core_post_load(&s->core); } -static const VMStateDescription igb_vmstate_tx = { - .name = "igb-tx", +static const VMStateDescription igb_vmstate_tx_ctx = { + .name = "igb-tx-ctx", .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { - VMSTATE_UINT16(vlan, struct igb_tx), - VMSTATE_UINT16(mss, struct igb_tx), - VMSTATE_BOOL(tse, struct igb_tx), - VMSTATE_BOOL(ixsm, struct igb_tx), - VMSTATE_BOOL(txsm, struct igb_tx), + VMSTATE_UINT32(vlan_macip_lens, struct e1000_adv_tx_context_desc), + VMSTATE_UINT32(seqnum_seed, struct e1000_adv_tx_context_desc), + VMSTATE_UINT32(type_tucmd_mlhl, struct e1000_adv_tx_context_desc), + VMSTATE_UINT32(mss_l4len_idx, struct e1000_adv_tx_context_desc), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription igb_vmstate_tx = { + .name = "igb-tx", + .version_id = 2, + .minimum_version_id = 2, + .fields = (VMStateField[]) { + VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0, igb_vmstate_tx_ctx, + struct e1000_adv_tx_context_desc), + VMSTATE_UINT32(first_cmd_type_len, struct igb_tx), + VMSTATE_UINT32(first_olinfo_status, struct igb_tx), VMSTATE_BOOL(first, struct igb_tx), VMSTATE_BOOL(skip_cp, struct igb_tx), VMSTATE_END_OF_LIST() diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index a7c7bfd..d733fed 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -386,11 +386,35 @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx, info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash); } +static void +igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx, + uint16_t vlan, bool insert_vlan) +{ + if (core->mac[MRQC] & 1) { + uint16_t pool = qn % IGB_NUM_VM_POOLS; + + if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) { + /* always insert default VLAN */ + insert_vlan = true; + vlan = core->mac[VMVIR0 + pool] & 0xffff; + } else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) { + insert_vlan = false; + } + } + + if (insert_vlan && e1000x_vlan_enabled(core->mac)) { + net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, + core->mac[VET] & 0xffff); + } +} + static bool igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx) { - if (tx->tse) { - if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) { + if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) { + uint32_t idx = (tx->first_olinfo_status >> 4) & 1; + uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16; + if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) { return false; } @@ -399,13 +423,13 @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx) return true; } - if (tx->txsm) { + if (tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) { if (!net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0)) { return false; } } - if (tx->ixsm) { + if (tx->first_olinfo_status & E1000_ADVTXD_POTS_IXSM) { net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt); } @@ -490,7 +514,7 @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int queue_index) } static void -igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt) +igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn) { static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511, PTC1023, PTC1522 }; @@ -517,17 +541,25 @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt) core->mac[GPTC] = core->mac[TPT]; core->mac[GOTCL] = core->mac[TOTL]; core->mac[GOTCH] = core->mac[TOTH]; + + if (core->mac[MRQC] & 1) { + uint16_t pool = qn % IGB_NUM_VM_POOLS; + + core->mac[PVFGOTC0 + (pool * 64)] += tot_len; + core->mac[PVFGPTC0 + (pool * 64)]++; + } } static void igb_process_tx_desc(IGBCore *core, + PCIDevice *dev, struct igb_tx *tx, union e1000_adv_tx_desc *tx_desc, int queue_index) { struct e1000_adv_tx_context_desc *tx_ctx_desc; uint32_t cmd_type_len; - uint32_t olinfo_status; + uint32_t idx; uint64_t buffer_addr; uint16_t length; @@ -538,20 +570,19 @@ igb_process_tx_desc(IGBCore *core, E1000_ADVTXD_DTYP_DATA) { /* advanced transmit data descriptor */ if (tx->first) { - olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status); - - tx->tse = !!(cmd_type_len & E1000_ADVTXD_DCMD_TSE); - tx->ixsm = !!(olinfo_status & E1000_ADVTXD_POTS_IXSM); - tx->txsm = !!(olinfo_status & E1000_ADVTXD_POTS_TXSM); - + tx->first_cmd_type_len = cmd_type_len; + tx->first_olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status); tx->first = false; } } else if ((cmd_type_len & E1000_ADVTXD_DTYP_CTXT) == E1000_ADVTXD_DTYP_CTXT) { /* advanced transmit context descriptor */ tx_ctx_desc = (struct e1000_adv_tx_context_desc *)tx_desc; - tx->vlan = le32_to_cpu(tx_ctx_desc->vlan_macip_lens) >> 16; - tx->mss = le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 16; + idx = (le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 4) & 1; + tx->ctx[idx].vlan_macip_lens = le32_to_cpu(tx_ctx_desc->vlan_macip_lens); + tx->ctx[idx].seqnum_seed = le32_to_cpu(tx_ctx_desc->seqnum_seed); + tx->ctx[idx].type_tucmd_mlhl = le32_to_cpu(tx_ctx_desc->type_tucmd_mlhl); + tx->ctx[idx].mss_l4len_idx = le32_to_cpu(tx_ctx_desc->mss_l4len_idx); return; } else { /* unknown descriptor type */ @@ -574,18 +605,19 @@ igb_process_tx_desc(IGBCore *core, if (cmd_type_len & E1000_TXD_CMD_EOP) { if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) { - if (cmd_type_len & E1000_TXD_CMD_VLE) { - net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan, - core->mac[VET] & 0xffff); - } + idx = (tx->first_olinfo_status >> 4) & 1; + igb_tx_insert_vlan(core, queue_index, tx, + tx->ctx[idx].vlan_macip_lens >> 16, + !!(cmd_type_len & E1000_TXD_CMD_VLE)); + if (igb_tx_pkt_send(core, tx, queue_index)) { - igb_on_tx_done_update_stats(core, tx->tx_pkt); + igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index); } } tx->first = true; tx->skip_cp = false; - net_tx_pkt_reset(tx->tx_pkt); + net_tx_pkt_reset(tx->tx_pkt, dev); } } @@ -780,6 +812,18 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base, return igb_tx_wb_eic(core, txi->idx); } +static inline bool +igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi) +{ + bool vmdq = core->mac[MRQC] & 1; + uint16_t qn = txi->idx; + uint16_t pool = qn % IGB_NUM_VM_POOLS; + + return (core->mac[TCTL] & E1000_TCTL_EN) && + (!vmdq || core->mac[VFTE] & BIT(pool)) && + (core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE); +} + static void igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) { @@ -789,8 +833,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) const E1000E_RingInfo *txi = txr->i; uint32_t eic = 0; - /* TODO: check if the queue itself is enabled too. */ - if (!(core->mac[TCTL] & E1000_TCTL_EN)) { + if (!igb_tx_enabled(core, txi)) { trace_e1000e_tx_disabled(); return; } @@ -800,6 +843,8 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) d = core->owner; } + net_tx_pkt_reset(txr->tx->tx_pkt, d); + while (!igb_ring_empty(core, txi)) { base = igb_ring_head_descr(core, txi); @@ -808,7 +853,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) trace_e1000e_tx_descr((void *)(intptr_t)desc.read.buffer_addr, desc.read.cmd_type_len, desc.wb.status); - igb_process_tx_desc(core, txr->tx, &desc, txi->idx); + igb_process_tx_desc(core, d, txr->tx, &desc, txi->idx); igb_ring_advance(core, txi, 1); eic |= igb_txdesc_writeback(core, base, &desc, txi); } @@ -866,6 +911,9 @@ igb_can_receive(IGBCore *core) for (i = 0; i < IGB_NUM_QUEUES; i++) { E1000E_RxRing rxr; + if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) { + continue; + } igb_rx_ring_init(core, &rxr, i); if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) { @@ -901,12 +949,26 @@ igb_rx_l4_cso_enabled(IGBCore *core) return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD); } +static bool +igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size) +{ + uint16_t pool = qn % IGB_NUM_VM_POOLS; + bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE); + int max_ethernet_lpe_size = + core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK; + int max_ethernet_vlan_size = 1522; + + return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size); +} + static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, - E1000E_RSSInfo *rss_info, bool *external_tx) + size_t size, E1000E_RSSInfo *rss_info, + bool *external_tx) { static const int ta_shift[] = { 4, 3, 2, 0 }; uint32_t f, ra[2], *macp, rctl = core->mac[RCTL]; uint16_t queues = 0; + uint16_t oversized = 0; uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK; bool accepted = false; int i; @@ -932,7 +994,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, if (core->mac[MRQC] & 1) { if (is_broadcast_ether_addr(ehdr->h_dest)) { - for (i = 0; i < 8; i++) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) { queues |= BIT(i); } @@ -966,7 +1028,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff; if (macp[f >> 5] & (1 << (f & 0x1f))) { - for (i = 0; i < 8; i++) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) { queues |= BIT(i); } @@ -989,7 +1051,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, } } } else { - for (i = 0; i < 8; i++) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) { mask |= BIT(i); } @@ -1005,9 +1067,34 @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT); } - igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info); - if (rss_info->queue & 1) { - queues <<= 8; + queues &= core->mac[VFRE]; + if (queues) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { + if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) { + oversized |= BIT(i); + } + } + /* 8.19.37 increment ROC if packet is oversized for all queues */ + if (oversized == queues) { + trace_e1000x_rx_oversized(size); + e1000x_inc_reg_if_not_full(core->mac, ROC); + } + queues &= ~oversized; + } + + if (queues) { + igb_rss_parse_packet(core, core->rx_pkt, + external_tx != NULL, rss_info); + /* Sec 8.26.1: PQn = VFn + VQn*8 */ + if (rss_info->queue & 1) { + for (i = 0; i < IGB_NUM_VM_POOLS; i++) { + if ((queues & BIT(i)) && + (core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) { + queues |= BIT(i + IGB_NUM_VM_POOLS); + queues &= ~BIT(i); + } + } + } } } else { switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { @@ -1350,7 +1437,8 @@ igb_write_to_rx_buffers(IGBCore *core, } static void -igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size) +igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, + size_t data_size, size_t data_fcs_size) { e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size); @@ -1366,6 +1454,16 @@ igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size) default: break; } + + if (core->mac[MRQC] & 1) { + uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS; + + core->mac[PVFGORC0 + (pool * 64)] += data_size + 4; + core->mac[PVFGPRC0 + (pool * 64)]++; + if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) { + core->mac[PVFMPRC0 + (pool * 64)]++; + } + } } static inline bool @@ -1467,7 +1565,21 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, } while (desc_offset < total_size); - igb_update_rx_stats(core, size, total_size); + igb_update_rx_stats(core, rxi, size, total_size); +} + +static bool +igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi) +{ + if (core->mac[MRQC] & 1) { + uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS; + /* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */ + return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ? + core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN : + core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN; + } + + return e1000x_vlan_enabled(core->mac); } static inline void @@ -1550,34 +1662,36 @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, ehdr = PKT_GET_ETH_HDR(filter_buf); net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr)); + net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size); - net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, - e1000x_vlan_enabled(core->mac), - core->mac[VET] & 0xffff); - - queues = igb_receive_assign(core, ehdr, &rss_info, external_tx); + queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx); if (!queues) { trace_e1000e_rx_flt_dropped(); return orig_size; } - total_size = net_rx_pkt_get_total_len(core->rx_pkt) + - e1000x_fcs_len(core->mac); - for (i = 0; i < IGB_NUM_QUEUES; i++) { - if (!(queues & BIT(i))) { + if (!(queues & BIT(i)) || + !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) { continue; } igb_rx_ring_init(core, &rxr, i); + net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, + igb_rx_strip_vlan(core, rxr.i), + core->mac[VET] & 0xffff); + + total_size = net_rx_pkt_get_total_len(core->rx_pkt) + + e1000x_fcs_len(core->mac); + if (!igb_has_rxbufs(core, rxr.i, total_size)) { n |= E1000_ICS_RXO; trace_e1000e_rx_not_written_to_guest(rxr.i->idx); continue; } - n |= E1000_ICR_RXT0; + n |= E1000_ICR_RXDW; igb_rx_fix_l4_csum(core, core->rx_pkt); igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info); @@ -1892,14 +2006,6 @@ static void igb_set_eims(IGBCore *core, int index, uint32_t val) igb_update_interrupt_state(core); } -static void igb_vf_reset(IGBCore *core, uint16_t vfn) -{ - /* TODO: Reset of the queue enable and the interrupt registers of the VF. */ - - core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI; - core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD; -} - static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn) { uint32_t ent = core->mac[VTIVAR_MISC + vfn]; @@ -1977,6 +2083,24 @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val) } } +static void igb_vf_reset(IGBCore *core, uint16_t vfn) +{ + uint16_t qn0 = vfn; + uint16_t qn1 = vfn + IGB_NUM_VM_POOLS; + + /* disable Rx and Tx for the VF*/ + core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE; + core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE; + core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE; + core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE; + core->mac[VFRE] &= ~BIT(vfn); + core->mac[VFTE] &= ~BIT(vfn); + /* indicate VF reset to PF */ + core->mac[VFLRE] |= BIT(vfn); + /* VFLRE and mailbox use the same interrupt cause */ + mailbox_interrupt_to_pf(core); +} + static void igb_w1c(IGBCore *core, int index, uint32_t val) { core->mac[index] &= ~val; @@ -2231,14 +2355,20 @@ igb_set_status(IGBCore *core, int index, uint32_t val) static void igb_set_ctrlext(IGBCore *core, int index, uint32_t val) { - trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK), - !!(val & E1000_CTRL_EXT_SPD_BYPS)); - - /* TODO: PFRSTD */ + trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK), + !!(val & E1000_CTRL_EXT_SPD_BYPS), + !!(val & E1000_CTRL_EXT_PFRSTD)); /* Zero self-clearing bits */ val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST); core->mac[CTRL_EXT] = val; + + if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) { + for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) { + core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI; + core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD; + } + } } static void @@ -3825,7 +3955,7 @@ igb_core_pci_realize(IGBCore *core, core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core); for (i = 0; i < IGB_NUM_QUEUES; i++) { - net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS); + net_tx_pkt_init(&core->tx[i].tx_pkt, NULL, E1000E_MAX_TX_FRAGS); } net_rx_pkt_init(&core->rx_pkt); @@ -3850,7 +3980,7 @@ igb_core_pci_uninit(IGBCore *core) qemu_del_vm_change_state_handler(core->vmstate); for (i = 0; i < IGB_NUM_QUEUES; i++) { - net_tx_pkt_reset(core->tx[i].tx_pkt); + net_tx_pkt_reset(core->tx[i].tx_pkt, NULL); net_tx_pkt_uninit(core->tx[i].tx_pkt); } @@ -3899,6 +4029,7 @@ igb_phy_reg_init[] = { static const uint32_t igb_mac_reg_init[] = { [LEDCTL] = 2 | (3 << 8) | BIT(15) | (6 << 16) | (7 << 24), [EEMNGCTL] = BIT(31), + [TXDCTL0] = E1000_TXDCTL_QUEUE_ENABLE, [RXDCTL0] = E1000_RXDCTL_QUEUE_ENABLE | (1 << 16), [RXDCTL1] = 1 << 16, [RXDCTL2] = 1 << 16, @@ -4021,14 +4152,15 @@ static void igb_reset(IGBCore *core, bool sw) e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac); + for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) { + /* Set RSTI, so VF can identify a PF reset is in progress */ + core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTI; + } + for (i = 0; i < ARRAY_SIZE(core->tx); i++) { tx = &core->tx[i]; - net_tx_pkt_reset(tx->tx_pkt); - tx->vlan = 0; - tx->mss = 0; - tx->tse = false; - tx->ixsm = false; - tx->txsm = false; + net_tx_pkt_reset(tx->tx_pkt, NULL); + memset(tx->ctx, 0, sizeof(tx->ctx)); tx->first = true; tx->skip_cp = false; } diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h index 814c1e2..9cbbfd5 100644 --- a/hw/net/igb_core.h +++ b/hw/net/igb_core.h @@ -47,6 +47,7 @@ #define IGB_MSIX_VEC_NUM (10) #define IGBVF_MSIX_VEC_NUM (3) #define IGB_NUM_QUEUES (16) +#define IGB_NUM_VM_POOLS (8) typedef struct IGBCore IGBCore; @@ -72,11 +73,9 @@ struct IGBCore { QEMUTimer *autoneg_timer; struct igb_tx { - uint16_t vlan; /* VLAN Tag */ - uint16_t mss; /* Maximum Segment Size */ - bool tse; /* TCP/UDP Segmentation Enable */ - bool ixsm; /* Insert IP Checksum */ - bool txsm; /* Insert TCP/UDP Checksum */ + struct e1000_adv_tx_context_desc ctx[2]; + uint32_t first_cmd_type_len; + uint32_t first_olinfo_status; bool first; bool skip_cp; diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index 00934d4..c5c5b3c 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -160,6 +160,9 @@ union e1000_adv_rx_desc { #define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 +/* Additional Transmit Descriptor Control definitions */ +#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Tx Queue */ + /* Additional Receive Descriptor Control definitions */ #define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Rx Queue */ @@ -240,6 +243,9 @@ union e1000_adv_rx_desc { /* from igb/e1000_defines.h */ +/* Physical Func Reset Done Indication */ +#define E1000_CTRL_EXT_PFRSTD 0x00004000 + #define E1000_IVAR_VALID 0x80 #define E1000_GPIE_NSICR 0x00000001 #define E1000_GPIE_MSIX_MODE 0x00000010 diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c index 986a3ad..8dc8568 100644 --- a/hw/net/net_tx_pkt.c +++ b/hw/net/net_tx_pkt.c @@ -43,7 +43,11 @@ struct NetTxPkt { struct iovec *vec; uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; - uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN]; + union { + struct ip_header ip; + struct ip6_header ip6; + uint8_t octets[ETH_MAX_IP_DGRAM_LEN]; + } l3_hdr; uint32_t payload_len; @@ -89,16 +93,14 @@ void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt) { uint16_t csum; assert(pkt); - struct ip_header *ip_hdr; - ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; - ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + + pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); - ip_hdr->ip_sum = 0; - csum = net_raw_checksum((uint8_t *)ip_hdr, + pkt->l3_hdr.ip.ip_sum = 0; + csum = net_raw_checksum(pkt->l3_hdr.octets, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); - ip_hdr->ip_sum = cpu_to_be16(csum); + pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum); } void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) @@ -443,7 +445,7 @@ void net_tx_pkt_dump(struct NetTxPkt *pkt) #endif } -void net_tx_pkt_reset(struct NetTxPkt *pkt) +void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev) { int i; @@ -467,6 +469,7 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt) pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0); } } + pkt->pci_dev = pci_dev; pkt->raw_frags = 0; pkt->hdr_len = 0; @@ -795,11 +798,13 @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload, { assert(pkt); + uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; + /* * Since underlying infrastructure does not support IP datagrams longer * than 64K we should drop such packets and don't even try to send */ - if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) { + if (VIRTIO_NET_HDR_GSO_NONE != gso_type) { if (pkt->payload_len > ETH_MAX_IP_DGRAM_LEN - pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) { @@ -807,7 +812,7 @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload, } } - if (offload || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) { + if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) { if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG], pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1, @@ -829,15 +834,14 @@ void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt) { struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) { - struct ip6_header *ip6 = (struct ip6_header *) pkt->l3_hdr; /* * TODO: if qemu would support >64K packets - add jumbo option check * something like that: * 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {' */ - if (ip6->ip6_plen == 0) { + if (pkt->l3_hdr.ip6.ip6_plen == 0) { if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) { - ip6->ip6_plen = htons(pkt->payload_len); + pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len); } /* * TODO: if qemu would support >64K packets diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h index f57b4e0..e5ce6f2 100644 --- a/hw/net/net_tx_pkt.h +++ b/hw/net/net_tx_pkt.h @@ -148,9 +148,10 @@ void net_tx_pkt_dump(struct NetTxPkt *pkt); * reset tx packet private context (needed to be called between packets) * * @pkt: packet + * @dev: PCI device processing the next packet * */ -void net_tx_pkt_reset(struct NetTxPkt *pkt); +void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev); /** * Send packet to qemu. handles sw offloads if vhdr is not supported. diff --git a/hw/net/trace-events b/hw/net/trace-events index 6575341..d35554f 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -280,6 +280,8 @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: PHY[%u] UNHANDLED" igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x" igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED" +igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d" + igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 1068b80..f7b874c 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -678,7 +678,7 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) vmxnet3_complete_packet(s, qidx, txd_idx); s->tx_sop = true; s->skip_current_tx_pkt = false; - net_tx_pkt_reset(s->tx_pkt); + net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s)); } } } @@ -1159,7 +1159,7 @@ static void vmxnet3_deactivate_device(VMXNET3State *s) { if (s->device_active) { VMW_CBPRN("Deactivating vmxnet3..."); - net_tx_pkt_reset(s->tx_pkt); + net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s)); net_tx_pkt_uninit(s->tx_pkt); net_rx_pkt_uninit(s->rx_pkt); s->device_active = false; |