aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2023-03-28 13:26:49 +0100
committerPeter Maydell <peter.maydell@linaro.org>2023-03-28 13:26:49 +0100
commit52dd5f6f70276518dd21cd3a72cb95495df33bc9 (patch)
tree754c8efa8dcda39407b818407f9a8e423aff823f
parentecaefc8f50ea5058e478c088de7cf70f56338d5b (diff)
parentfba7c3b788dfcb99a3f9253f7d99cc0d217d6d3c (diff)
downloadqemu-52dd5f6f70276518dd21cd3a72cb95495df33bc9.zip
qemu-52dd5f6f70276518dd21cd3a72cb95495df33bc9.tar.gz
qemu-52dd5f6f70276518dd21cd3a72cb95495df33bc9.tar.bz2
Merge tag 'net-pull-request' of https://github.com/jasowang/qemu into staging
# -----BEGIN PGP SIGNATURE----- # Version: GnuPG v1 # # iQEcBAABAgAGBQJkIncaAAoJEO8Ells5jWIR6bgH/icQAN1a0PBHh2lujmob8KvE # IZ0KYuJMDcA8HFdhnzxPPKj7/77AM1DgmrmVwqLKTr6j64CZYr2Uc0yeyPa0f/0Y # TtePW70bjoUkRm/dDdFe32xViO4O33pGQia6olR6QwmfdDbBBJjAucmlep8NClrh # EooQ2WfXPBqrC6ckKZ7vEvgKV2sDl2XxYIr3kU3MiB4j4b1lrGHE+XSv7cXOC+at # G2vYqbimipZstHZCJYeB5tRz+FXbAI3ZNCGtYpxeVyabrlHS+n+X+gttaswcvVIF # ln6yidVGg/Ot3bi0qiV1WihpqNCWO0ghhf7wIEubAIIJlnE5hXULF4uFVfz+rRE= # =HjJv # -----END PGP SIGNATURE----- # gpg: Signature made Tue 28 Mar 2023 06:11:54 BST # gpg: using RSA key EF04965B398D6211 # gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [marginal] # gpg: WARNING: This key is not certified with sufficiently trusted signatures! # gpg: It is not certain that the signature belongs to the owner. # Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211 * tag 'net-pull-request' of https://github.com/jasowang/qemu: igb: respect VMVIR and VMOLR for VLAN igb: implement VF Tx and Rx stats igb: respect E1000_VMOLR_RSSE igb: check oversized packets for VMDq igb: implement VFRE and VFTE registers igb: add ICR_RXDW igb: handle PF/VF reset properly MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer hw/net/net_tx_pkt: Align l3_hdr hw/net/net_tx_pkt: Ignore ECN bit igb: Fix DMA requester specification for Tx packet igb: Save more Tx states Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--MAINTAINERS1
-rw-r--r--hw/net/e1000e_core.c6
-rw-r--r--hw/net/e1000x_regs.h4
-rw-r--r--hw/net/igb.c26
-rw-r--r--hw/net/igb_core.c256
-rw-r--r--hw/net/igb_core.h9
-rw-r--r--hw/net/igb_regs.h6
-rw-r--r--hw/net/net_tx_pkt.c30
-rw-r--r--hw/net/net_tx_pkt.h3
-rw-r--r--hw/net/trace-events2
-rw-r--r--hw/net/vmxnet3.c4
11 files changed, 254 insertions, 93 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 34b50b2..ef45b5e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2252,6 +2252,7 @@ F: tests/qtest/libqos/e1000e.*
igb
M: Akihiko Odaki <akihiko.odaki@daynix.com>
+R: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
S: Maintained
F: docs/system/devices/igb.rst
F: hw/net/igb*
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index 4d9679c..c0c09b6 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -765,7 +765,7 @@ e1000e_process_tx_desc(E1000ECore *core,
}
tx->skip_cp = false;
- net_tx_pkt_reset(tx->tx_pkt);
+ net_tx_pkt_reset(tx->tx_pkt, core->owner);
tx->sum_needed = 0;
tx->cptse = 0;
@@ -3447,7 +3447,7 @@ e1000e_core_pci_uninit(E1000ECore *core)
qemu_del_vm_change_state_handler(core->vmstate);
for (i = 0; i < E1000E_NUM_QUEUES; i++) {
- net_tx_pkt_reset(core->tx[i].tx_pkt);
+ net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
net_tx_pkt_uninit(core->tx[i].tx_pkt);
}
@@ -3572,7 +3572,7 @@ static void e1000e_reset(E1000ECore *core, bool sw)
e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
- net_tx_pkt_reset(core->tx[i].tx_pkt);
+ net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
core->tx[i].skip_cp = false;
}
diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
index c0832fa..6d3c4c6 100644
--- a/hw/net/e1000x_regs.h
+++ b/hw/net/e1000x_regs.h
@@ -335,6 +335,7 @@
#define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */
#define E1000_ICR_RXO 0x00000040 /* rx overrun */
#define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */
+#define E1000_ICR_RXDW 0x00000080 /* rx desc written back */
#define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */
#define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */
#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */
@@ -378,6 +379,7 @@
#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
#define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */
#define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */
+#define E1000_ICS_RXDW E1000_ICR_RXDW /* rx desc written back */
#define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */
#define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
#define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
@@ -407,6 +409,7 @@
#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
#define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */
#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */
+#define E1000_IMS_RXDW E1000_ICR_RXDW /* rx desc written back */
#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */
#define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
#define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
@@ -441,6 +444,7 @@
#define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
#define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */
#define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */
+#define E1000_IMC_RXDW E1000_ICR_RXDW /* rx desc written back */
#define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */
#define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
#define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
diff --git a/hw/net/igb.c b/hw/net/igb.c
index c6d753d..51a7e91 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -502,16 +502,28 @@ static int igb_post_load(void *opaque, int version_id)
return igb_core_post_load(&s->core);
}
-static const VMStateDescription igb_vmstate_tx = {
- .name = "igb-tx",
+static const VMStateDescription igb_vmstate_tx_ctx = {
+ .name = "igb-tx-ctx",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
- VMSTATE_UINT16(vlan, struct igb_tx),
- VMSTATE_UINT16(mss, struct igb_tx),
- VMSTATE_BOOL(tse, struct igb_tx),
- VMSTATE_BOOL(ixsm, struct igb_tx),
- VMSTATE_BOOL(txsm, struct igb_tx),
+ VMSTATE_UINT32(vlan_macip_lens, struct e1000_adv_tx_context_desc),
+ VMSTATE_UINT32(seqnum_seed, struct e1000_adv_tx_context_desc),
+ VMSTATE_UINT32(type_tucmd_mlhl, struct e1000_adv_tx_context_desc),
+ VMSTATE_UINT32(mss_l4len_idx, struct e1000_adv_tx_context_desc),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription igb_vmstate_tx = {
+ .name = "igb-tx",
+ .version_id = 2,
+ .minimum_version_id = 2,
+ .fields = (VMStateField[]) {
+ VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0, igb_vmstate_tx_ctx,
+ struct e1000_adv_tx_context_desc),
+ VMSTATE_UINT32(first_cmd_type_len, struct igb_tx),
+ VMSTATE_UINT32(first_olinfo_status, struct igb_tx),
VMSTATE_BOOL(first, struct igb_tx),
VMSTATE_BOOL(skip_cp, struct igb_tx),
VMSTATE_END_OF_LIST()
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index a7c7bfd..d733fed 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -386,11 +386,35 @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx,
info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash);
}
+static void
+igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx,
+ uint16_t vlan, bool insert_vlan)
+{
+ if (core->mac[MRQC] & 1) {
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+ if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) {
+ /* always insert default VLAN */
+ insert_vlan = true;
+ vlan = core->mac[VMVIR0 + pool] & 0xffff;
+ } else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) {
+ insert_vlan = false;
+ }
+ }
+
+ if (insert_vlan && e1000x_vlan_enabled(core->mac)) {
+ net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan,
+ core->mac[VET] & 0xffff);
+ }
+}
+
static bool
igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
{
- if (tx->tse) {
- if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) {
+ if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
+ uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
+ uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16;
+ if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) {
return false;
}
@@ -399,13 +423,13 @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
return true;
}
- if (tx->txsm) {
+ if (tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) {
if (!net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0)) {
return false;
}
}
- if (tx->ixsm) {
+ if (tx->first_olinfo_status & E1000_ADVTXD_POTS_IXSM) {
net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt);
}
@@ -490,7 +514,7 @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int queue_index)
}
static void
-igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
+igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
{
static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
PTC1023, PTC1522 };
@@ -517,17 +541,25 @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
core->mac[GPTC] = core->mac[TPT];
core->mac[GOTCL] = core->mac[TOTL];
core->mac[GOTCH] = core->mac[TOTH];
+
+ if (core->mac[MRQC] & 1) {
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+ core->mac[PVFGOTC0 + (pool * 64)] += tot_len;
+ core->mac[PVFGPTC0 + (pool * 64)]++;
+ }
}
static void
igb_process_tx_desc(IGBCore *core,
+ PCIDevice *dev,
struct igb_tx *tx,
union e1000_adv_tx_desc *tx_desc,
int queue_index)
{
struct e1000_adv_tx_context_desc *tx_ctx_desc;
uint32_t cmd_type_len;
- uint32_t olinfo_status;
+ uint32_t idx;
uint64_t buffer_addr;
uint16_t length;
@@ -538,20 +570,19 @@ igb_process_tx_desc(IGBCore *core,
E1000_ADVTXD_DTYP_DATA) {
/* advanced transmit data descriptor */
if (tx->first) {
- olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
-
- tx->tse = !!(cmd_type_len & E1000_ADVTXD_DCMD_TSE);
- tx->ixsm = !!(olinfo_status & E1000_ADVTXD_POTS_IXSM);
- tx->txsm = !!(olinfo_status & E1000_ADVTXD_POTS_TXSM);
-
+ tx->first_cmd_type_len = cmd_type_len;
+ tx->first_olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
tx->first = false;
}
} else if ((cmd_type_len & E1000_ADVTXD_DTYP_CTXT) ==
E1000_ADVTXD_DTYP_CTXT) {
/* advanced transmit context descriptor */
tx_ctx_desc = (struct e1000_adv_tx_context_desc *)tx_desc;
- tx->vlan = le32_to_cpu(tx_ctx_desc->vlan_macip_lens) >> 16;
- tx->mss = le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 16;
+ idx = (le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 4) & 1;
+ tx->ctx[idx].vlan_macip_lens = le32_to_cpu(tx_ctx_desc->vlan_macip_lens);
+ tx->ctx[idx].seqnum_seed = le32_to_cpu(tx_ctx_desc->seqnum_seed);
+ tx->ctx[idx].type_tucmd_mlhl = le32_to_cpu(tx_ctx_desc->type_tucmd_mlhl);
+ tx->ctx[idx].mss_l4len_idx = le32_to_cpu(tx_ctx_desc->mss_l4len_idx);
return;
} else {
/* unknown descriptor type */
@@ -574,18 +605,19 @@ igb_process_tx_desc(IGBCore *core,
if (cmd_type_len & E1000_TXD_CMD_EOP) {
if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
- if (cmd_type_len & E1000_TXD_CMD_VLE) {
- net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan,
- core->mac[VET] & 0xffff);
- }
+ idx = (tx->first_olinfo_status >> 4) & 1;
+ igb_tx_insert_vlan(core, queue_index, tx,
+ tx->ctx[idx].vlan_macip_lens >> 16,
+ !!(cmd_type_len & E1000_TXD_CMD_VLE));
+
if (igb_tx_pkt_send(core, tx, queue_index)) {
- igb_on_tx_done_update_stats(core, tx->tx_pkt);
+ igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
}
}
tx->first = true;
tx->skip_cp = false;
- net_tx_pkt_reset(tx->tx_pkt);
+ net_tx_pkt_reset(tx->tx_pkt, dev);
}
}
@@ -780,6 +812,18 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
return igb_tx_wb_eic(core, txi->idx);
}
+static inline bool
+igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
+{
+ bool vmdq = core->mac[MRQC] & 1;
+ uint16_t qn = txi->idx;
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+ return (core->mac[TCTL] & E1000_TCTL_EN) &&
+ (!vmdq || core->mac[VFTE] & BIT(pool)) &&
+ (core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE);
+}
+
static void
igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
{
@@ -789,8 +833,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
const E1000E_RingInfo *txi = txr->i;
uint32_t eic = 0;
- /* TODO: check if the queue itself is enabled too. */
- if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
+ if (!igb_tx_enabled(core, txi)) {
trace_e1000e_tx_disabled();
return;
}
@@ -800,6 +843,8 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
d = core->owner;
}
+ net_tx_pkt_reset(txr->tx->tx_pkt, d);
+
while (!igb_ring_empty(core, txi)) {
base = igb_ring_head_descr(core, txi);
@@ -808,7 +853,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
trace_e1000e_tx_descr((void *)(intptr_t)desc.read.buffer_addr,
desc.read.cmd_type_len, desc.wb.status);
- igb_process_tx_desc(core, txr->tx, &desc, txi->idx);
+ igb_process_tx_desc(core, d, txr->tx, &desc, txi->idx);
igb_ring_advance(core, txi, 1);
eic |= igb_txdesc_writeback(core, base, &desc, txi);
}
@@ -866,6 +911,9 @@ igb_can_receive(IGBCore *core)
for (i = 0; i < IGB_NUM_QUEUES; i++) {
E1000E_RxRing rxr;
+ if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
+ continue;
+ }
igb_rx_ring_init(core, &rxr, i);
if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) {
@@ -901,12 +949,26 @@ igb_rx_l4_cso_enabled(IGBCore *core)
return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
}
+static bool
+igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
+{
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
+ bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
+ int max_ethernet_lpe_size =
+ core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
+ int max_ethernet_vlan_size = 1522;
+
+ return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
+}
+
static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
- E1000E_RSSInfo *rss_info, bool *external_tx)
+ size_t size, E1000E_RSSInfo *rss_info,
+ bool *external_tx)
{
static const int ta_shift[] = { 4, 3, 2, 0 };
uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
uint16_t queues = 0;
+ uint16_t oversized = 0;
uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
bool accepted = false;
int i;
@@ -932,7 +994,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
if (core->mac[MRQC] & 1) {
if (is_broadcast_ether_addr(ehdr->h_dest)) {
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) {
queues |= BIT(i);
}
@@ -966,7 +1028,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff;
if (macp[f >> 5] & (1 << (f & 0x1f))) {
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) {
queues |= BIT(i);
}
@@ -989,7 +1051,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
}
}
} else {
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) {
mask |= BIT(i);
}
@@ -1005,9 +1067,34 @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT);
}
- igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info);
- if (rss_info->queue & 1) {
- queues <<= 8;
+ queues &= core->mac[VFRE];
+ if (queues) {
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+ if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) {
+ oversized |= BIT(i);
+ }
+ }
+ /* 8.19.37 increment ROC if packet is oversized for all queues */
+ if (oversized == queues) {
+ trace_e1000x_rx_oversized(size);
+ e1000x_inc_reg_if_not_full(core->mac, ROC);
+ }
+ queues &= ~oversized;
+ }
+
+ if (queues) {
+ igb_rss_parse_packet(core, core->rx_pkt,
+ external_tx != NULL, rss_info);
+ /* Sec 8.26.1: PQn = VFn + VQn*8 */
+ if (rss_info->queue & 1) {
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+ if ((queues & BIT(i)) &&
+ (core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) {
+ queues |= BIT(i + IGB_NUM_VM_POOLS);
+ queues &= ~BIT(i);
+ }
+ }
+ }
}
} else {
switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
@@ -1350,7 +1437,8 @@ igb_write_to_rx_buffers(IGBCore *core,
}
static void
-igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
+igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
+ size_t data_size, size_t data_fcs_size)
{
e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
@@ -1366,6 +1454,16 @@ igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
default:
break;
}
+
+ if (core->mac[MRQC] & 1) {
+ uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+
+ core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
+ core->mac[PVFGPRC0 + (pool * 64)]++;
+ if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
+ core->mac[PVFMPRC0 + (pool * 64)]++;
+ }
+ }
}
static inline bool
@@ -1467,7 +1565,21 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
} while (desc_offset < total_size);
- igb_update_rx_stats(core, size, total_size);
+ igb_update_rx_stats(core, rxi, size, total_size);
+}
+
+static bool
+igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
+{
+ if (core->mac[MRQC] & 1) {
+ uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+ /* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */
+ return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ?
+ core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN :
+ core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN;
+ }
+
+ return e1000x_vlan_enabled(core->mac);
}
static inline void
@@ -1550,34 +1662,36 @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
ehdr = PKT_GET_ETH_HDR(filter_buf);
net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
+ net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
- net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
- e1000x_vlan_enabled(core->mac),
- core->mac[VET] & 0xffff);
-
- queues = igb_receive_assign(core, ehdr, &rss_info, external_tx);
+ queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
if (!queues) {
trace_e1000e_rx_flt_dropped();
return orig_size;
}
- total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
- e1000x_fcs_len(core->mac);
-
for (i = 0; i < IGB_NUM_QUEUES; i++) {
- if (!(queues & BIT(i))) {
+ if (!(queues & BIT(i)) ||
+ !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
continue;
}
igb_rx_ring_init(core, &rxr, i);
+ net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
+ igb_rx_strip_vlan(core, rxr.i),
+ core->mac[VET] & 0xffff);
+
+ total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
+ e1000x_fcs_len(core->mac);
+
if (!igb_has_rxbufs(core, rxr.i, total_size)) {
n |= E1000_ICS_RXO;
trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
continue;
}
- n |= E1000_ICR_RXT0;
+ n |= E1000_ICR_RXDW;
igb_rx_fix_l4_csum(core, core->rx_pkt);
igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info);
@@ -1892,14 +2006,6 @@ static void igb_set_eims(IGBCore *core, int index, uint32_t val)
igb_update_interrupt_state(core);
}
-static void igb_vf_reset(IGBCore *core, uint16_t vfn)
-{
- /* TODO: Reset of the queue enable and the interrupt registers of the VF. */
-
- core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
- core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD;
-}
-
static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn)
{
uint32_t ent = core->mac[VTIVAR_MISC + vfn];
@@ -1977,6 +2083,24 @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val)
}
}
+static void igb_vf_reset(IGBCore *core, uint16_t vfn)
+{
+ uint16_t qn0 = vfn;
+ uint16_t qn1 = vfn + IGB_NUM_VM_POOLS;
+
+ /* disable Rx and Tx for the VF*/
+ core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+ core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+ core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
+ core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
+ core->mac[VFRE] &= ~BIT(vfn);
+ core->mac[VFTE] &= ~BIT(vfn);
+ /* indicate VF reset to PF */
+ core->mac[VFLRE] |= BIT(vfn);
+ /* VFLRE and mailbox use the same interrupt cause */
+ mailbox_interrupt_to_pf(core);
+}
+
static void igb_w1c(IGBCore *core, int index, uint32_t val)
{
core->mac[index] &= ~val;
@@ -2231,14 +2355,20 @@ igb_set_status(IGBCore *core, int index, uint32_t val)
static void
igb_set_ctrlext(IGBCore *core, int index, uint32_t val)
{
- trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
- !!(val & E1000_CTRL_EXT_SPD_BYPS));
-
- /* TODO: PFRSTD */
+ trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
+ !!(val & E1000_CTRL_EXT_SPD_BYPS),
+ !!(val & E1000_CTRL_EXT_PFRSTD));
/* Zero self-clearing bits */
val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST);
core->mac[CTRL_EXT] = val;
+
+ if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) {
+ for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
+ core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
+ core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD;
+ }
+ }
}
static void
@@ -3825,7 +3955,7 @@ igb_core_pci_realize(IGBCore *core,
core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core);
for (i = 0; i < IGB_NUM_QUEUES; i++) {
- net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS);
+ net_tx_pkt_init(&core->tx[i].tx_pkt, NULL, E1000E_MAX_TX_FRAGS);
}
net_rx_pkt_init(&core->rx_pkt);
@@ -3850,7 +3980,7 @@ igb_core_pci_uninit(IGBCore *core)
qemu_del_vm_change_state_handler(core->vmstate);
for (i = 0; i < IGB_NUM_QUEUES; i++) {
- net_tx_pkt_reset(core->tx[i].tx_pkt);
+ net_tx_pkt_reset(core->tx[i].tx_pkt, NULL);
net_tx_pkt_uninit(core->tx[i].tx_pkt);
}
@@ -3899,6 +4029,7 @@ igb_phy_reg_init[] = {
static const uint32_t igb_mac_reg_init[] = {
[LEDCTL] = 2 | (3 << 8) | BIT(15) | (6 << 16) | (7 << 24),
[EEMNGCTL] = BIT(31),
+ [TXDCTL0] = E1000_TXDCTL_QUEUE_ENABLE,
[RXDCTL0] = E1000_RXDCTL_QUEUE_ENABLE | (1 << 16),
[RXDCTL1] = 1 << 16,
[RXDCTL2] = 1 << 16,
@@ -4021,14 +4152,15 @@ static void igb_reset(IGBCore *core, bool sw)
e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
+ for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
+ /* Set RSTI, so VF can identify a PF reset is in progress */
+ core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTI;
+ }
+
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
tx = &core->tx[i];
- net_tx_pkt_reset(tx->tx_pkt);
- tx->vlan = 0;
- tx->mss = 0;
- tx->tse = false;
- tx->ixsm = false;
- tx->txsm = false;
+ net_tx_pkt_reset(tx->tx_pkt, NULL);
+ memset(tx->ctx, 0, sizeof(tx->ctx));
tx->first = true;
tx->skip_cp = false;
}
diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h
index 814c1e2..9cbbfd5 100644
--- a/hw/net/igb_core.h
+++ b/hw/net/igb_core.h
@@ -47,6 +47,7 @@
#define IGB_MSIX_VEC_NUM (10)
#define IGBVF_MSIX_VEC_NUM (3)
#define IGB_NUM_QUEUES (16)
+#define IGB_NUM_VM_POOLS (8)
typedef struct IGBCore IGBCore;
@@ -72,11 +73,9 @@ struct IGBCore {
QEMUTimer *autoneg_timer;
struct igb_tx {
- uint16_t vlan; /* VLAN Tag */
- uint16_t mss; /* Maximum Segment Size */
- bool tse; /* TCP/UDP Segmentation Enable */
- bool ixsm; /* Insert IP Checksum */
- bool txsm; /* Insert TCP/UDP Checksum */
+ struct e1000_adv_tx_context_desc ctx[2];
+ uint32_t first_cmd_type_len;
+ uint32_t first_olinfo_status;
bool first;
bool skip_cp;
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
index 00934d4..c5c5b3c 100644
--- a/hw/net/igb_regs.h
+++ b/hw/net/igb_regs.h
@@ -160,6 +160,9 @@ union e1000_adv_rx_desc {
#define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000
#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000
+/* Additional Transmit Descriptor Control definitions */
+#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Tx Queue */
+
/* Additional Receive Descriptor Control definitions */
#define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Rx Queue */
@@ -240,6 +243,9 @@ union e1000_adv_rx_desc {
/* from igb/e1000_defines.h */
+/* Physical Func Reset Done Indication */
+#define E1000_CTRL_EXT_PFRSTD 0x00004000
+
#define E1000_IVAR_VALID 0x80
#define E1000_GPIE_NSICR 0x00000001
#define E1000_GPIE_MSIX_MODE 0x00000010
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index 986a3ad..8dc8568 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -43,7 +43,11 @@ struct NetTxPkt {
struct iovec *vec;
uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN];
- uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN];
+ union {
+ struct ip_header ip;
+ struct ip6_header ip6;
+ uint8_t octets[ETH_MAX_IP_DGRAM_LEN];
+ } l3_hdr;
uint32_t payload_len;
@@ -89,16 +93,14 @@ void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt)
{
uint16_t csum;
assert(pkt);
- struct ip_header *ip_hdr;
- ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
- ip_hdr->ip_len = cpu_to_be16(pkt->payload_len +
+ pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len +
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
- ip_hdr->ip_sum = 0;
- csum = net_raw_checksum((uint8_t *)ip_hdr,
+ pkt->l3_hdr.ip.ip_sum = 0;
+ csum = net_raw_checksum(pkt->l3_hdr.octets,
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
- ip_hdr->ip_sum = cpu_to_be16(csum);
+ pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum);
}
void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
@@ -443,7 +445,7 @@ void net_tx_pkt_dump(struct NetTxPkt *pkt)
#endif
}
-void net_tx_pkt_reset(struct NetTxPkt *pkt)
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
{
int i;
@@ -467,6 +469,7 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt)
pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0);
}
}
+ pkt->pci_dev = pci_dev;
pkt->raw_frags = 0;
pkt->hdr_len = 0;
@@ -795,11 +798,13 @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
{
assert(pkt);
+ uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
+
/*
* Since underlying infrastructure does not support IP datagrams longer
* than 64K we should drop such packets and don't even try to send
*/
- if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) {
+ if (VIRTIO_NET_HDR_GSO_NONE != gso_type) {
if (pkt->payload_len >
ETH_MAX_IP_DGRAM_LEN -
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) {
@@ -807,7 +812,7 @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
}
}
- if (offload || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
+ if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) {
if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
@@ -829,15 +834,14 @@ void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt)
{
struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) {
- struct ip6_header *ip6 = (struct ip6_header *) pkt->l3_hdr;
/*
* TODO: if qemu would support >64K packets - add jumbo option check
* something like that:
* 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {'
*/
- if (ip6->ip6_plen == 0) {
+ if (pkt->l3_hdr.ip6.ip6_plen == 0) {
if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) {
- ip6->ip6_plen = htons(pkt->payload_len);
+ pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len);
}
/*
* TODO: if qemu would support >64K packets
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
index f57b4e0..e5ce6f2 100644
--- a/hw/net/net_tx_pkt.h
+++ b/hw/net/net_tx_pkt.h
@@ -148,9 +148,10 @@ void net_tx_pkt_dump(struct NetTxPkt *pkt);
* reset tx packet private context (needed to be called between packets)
*
* @pkt: packet
+ * @dev: PCI device processing the next packet
*
*/
-void net_tx_pkt_reset(struct NetTxPkt *pkt);
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev);
/**
* Send packet to qemu. handles sw offloads if vhdr is not supported.
diff --git a/hw/net/trace-events b/hw/net/trace-events
index 6575341..d35554f 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -280,6 +280,8 @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: PHY[%u] UNHANDLED"
igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x"
igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
+igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"
+
igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 1068b80..f7b874c 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -678,7 +678,7 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
vmxnet3_complete_packet(s, qidx, txd_idx);
s->tx_sop = true;
s->skip_current_tx_pkt = false;
- net_tx_pkt_reset(s->tx_pkt);
+ net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
}
}
}
@@ -1159,7 +1159,7 @@ static void vmxnet3_deactivate_device(VMXNET3State *s)
{
if (s->device_active) {
VMW_CBPRN("Deactivating vmxnet3...");
- net_tx_pkt_reset(s->tx_pkt);
+ net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
net_tx_pkt_uninit(s->tx_pkt);
net_rx_pkt_uninit(s->rx_pkt);
s->device_active = false;