aboutsummaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
authorStefan Hajnoczi <stefanha@redhat.com>2023-09-19 13:21:49 -0400
committerStefan Hajnoczi <stefanha@redhat.com>2023-09-19 13:21:49 -0400
commitdd0c84983dd5c3fefaa29f15ed1b4c5c7be9775d (patch)
tree2de4d420f503fd0c2cbb061fd966d399365b0279 /hw
parentd7754940d78a7d5bfb13531afa9a67f8c57e987e (diff)
parent6d7a53e9f16d2b18d94f9fce1e4eea34570286ef (diff)
downloadqemu-dd0c84983dd5c3fefaa29f15ed1b4c5c7be9775d.zip
qemu-dd0c84983dd5c3fefaa29f15ed1b4c5c7be9775d.tar.gz
qemu-dd0c84983dd5c3fefaa29f15ed1b4c5c7be9775d.tar.bz2
Merge tag 'net-pull-request' of https://github.com/jasowang/qemu into staging
# -----BEGIN PGP SIGNATURE----- # Version: GnuPG v1 # # iQEcBAABAgAGBQJlB/SLAAoJEO8Ells5jWIR7EQH/1kAbxHcSGJXDOgQAXJ/rOZi # UKn3ugJzD0Hxd4Xz8cvdVLM+9/JoEEOK1uB+NIG7Ask/gA5D7eUYzaLtp1OJ8VNO # mamfKmn3EIBWJoLSHH19TKzfW2tGMJHQ0Nj+sbDQRkK5f2c7hwLTRXa1EmlJd4dB # VoVzX4OiJtrQyv4OVmpP/PSETXJDvYYX/DNcRl9/3ccKtQW/wVDI3YzrMzXrsgyc # w9ItJi8k+19mVH6RgQwciqRvTbVMdzkOxqvU//LY0TxnjsHfbyHr+KlNAa2WTY2N # QgpAlMZhHqUG6/XXAs0o2VEtA66zmw932Xfy/CZUEcdGWfkG/9CEVfbuT4CKGY4= # =tF7K # -----END PGP SIGNATURE----- # gpg: Signature made Mon 18 Sep 2023 02:56:11 EDT # gpg: using RSA key EF04965B398D6211 # gpg: Good signature from "Jason Wang (Jason Wang on RedHat) <jasowang@redhat.com>" [full] # Primary key fingerprint: 215D 46F4 8246 689E C77F 3562 EF04 965B 398D 6211 * tag 'net-pull-request' of https://github.com/jasowang/qemu: net/tap: Avoid variable-length array net/dump: Avoid variable length array hw/net/rocker: Avoid variable length array hw/net/fsl_etsec/rings.c: Avoid variable length array net: add initial support for AF_XDP network backend tests: bump libvirt-ci for libasan and libxdp e1000e: rename e1000e_ba_state and e1000e_write_hdr_to_rx_buffers igb: packet-split descriptors support igb: add IPv6 extended headers traffic detection igb: RX payload guest writting refactoring igb: RX descriptors guest writting refactoring igb: rename E1000E_RingInfo_st igb: remove TCP ACK detection virtio-net: Add support for USO features virtio-net: Add USO flags to vhost support. tap: Add check for USO features tap: Add USO support to tap device. Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'hw')
-rw-r--r--hw/core/machine.c4
-rw-r--r--hw/net/e1000e_core.c80
-rw-r--r--hw/net/fsl_etsec/rings.c12
-rw-r--r--hw/net/igb_core.c732
-rw-r--r--hw/net/igb_regs.h20
-rw-r--r--hw/net/rocker/rocker_of_dpa.c2
-rw-r--r--hw/net/trace-events6
-rw-r--r--hw/net/vhost_net.c3
-rw-r--r--hw/net/virtio-net.c35
-rw-r--r--hw/net/vmxnet3.c2
10 files changed, 641 insertions, 255 deletions
diff --git a/hw/core/machine.c b/hw/core/machine.c
index da699cf..230aab8 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -38,6 +38,7 @@
#include "exec/confidential-guest-support.h"
#include "hw/virtio/virtio.h"
#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-net.h"
GlobalProperty hw_compat_8_1[] = {};
const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);
@@ -45,6 +46,9 @@ const size_t hw_compat_8_1_len = G_N_ELEMENTS(hw_compat_8_1);
GlobalProperty hw_compat_8_0[] = {
{ "migration", "multifd-flush-after-each-section", "on"},
{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
+ { TYPE_VIRTIO_NET, "host_uso", "off"},
+ { TYPE_VIRTIO_NET, "guest_uso4", "off"},
+ { TYPE_VIRTIO_NET, "guest_uso6", "off"},
};
const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index f8aeafa..e324c02 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -810,24 +810,24 @@ e1000e_txdesc_writeback(E1000ECore *core, dma_addr_t base,
return e1000e_tx_wb_interrupt_cause(core, queue_idx);
}
-typedef struct E1000E_RingInfo_st {
+typedef struct E1000ERingInfo {
int dbah;
int dbal;
int dlen;
int dh;
int dt;
int idx;
-} E1000E_RingInfo;
+} E1000ERingInfo;
static inline bool
-e1000e_ring_empty(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_empty(E1000ECore *core, const E1000ERingInfo *r)
{
return core->mac[r->dh] == core->mac[r->dt] ||
core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN;
}
static inline uint64_t
-e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_base(E1000ECore *core, const E1000ERingInfo *r)
{
uint64_t bah = core->mac[r->dbah];
uint64_t bal = core->mac[r->dbal];
@@ -836,13 +836,13 @@ e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r)
}
static inline uint64_t
-e1000e_ring_head_descr(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_head_descr(E1000ECore *core, const E1000ERingInfo *r)
{
return e1000e_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
}
static inline void
-e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count)
+e1000e_ring_advance(E1000ECore *core, const E1000ERingInfo *r, uint32_t count)
{
core->mac[r->dh] += count;
@@ -852,7 +852,7 @@ e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count)
}
static inline uint32_t
-e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_free_descr_num(E1000ECore *core, const E1000ERingInfo *r)
{
trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
core->mac[r->dh], core->mac[r->dt]);
@@ -871,19 +871,19 @@ e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r)
}
static inline bool
-e1000e_ring_enabled(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_enabled(E1000ECore *core, const E1000ERingInfo *r)
{
return core->mac[r->dlen] > 0;
}
static inline uint32_t
-e1000e_ring_len(E1000ECore *core, const E1000E_RingInfo *r)
+e1000e_ring_len(E1000ECore *core, const E1000ERingInfo *r)
{
return core->mac[r->dlen];
}
typedef struct E1000E_TxRing_st {
- const E1000E_RingInfo *i;
+ const E1000ERingInfo *i;
struct e1000e_tx *tx;
} E1000E_TxRing;
@@ -896,7 +896,7 @@ e1000e_mq_queue_idx(int base_reg_idx, int reg_idx)
static inline void
e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx)
{
- static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
+ static const E1000ERingInfo i[E1000E_NUM_QUEUES] = {
{ TDBAH, TDBAL, TDLEN, TDH, TDT, 0 },
{ TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 }
};
@@ -908,13 +908,13 @@ e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx)
}
typedef struct E1000E_RxRing_st {
- const E1000E_RingInfo *i;
+ const E1000ERingInfo *i;
} E1000E_RxRing;
static inline void
e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx)
{
- static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = {
+ static const E1000ERingInfo i[E1000E_NUM_QUEUES] = {
{ RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
{ RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 }
};
@@ -930,7 +930,7 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
dma_addr_t base;
struct e1000_tx_desc desc;
bool ide = false;
- const E1000E_RingInfo *txi = txr->i;
+ const E1000ERingInfo *txi = txr->i;
uint32_t cause = E1000_ICS_TXQE;
if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
@@ -960,7 +960,7 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
}
static bool
-e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r,
+e1000e_has_rxbufs(E1000ECore *core, const E1000ERingInfo *r,
size_t total_size)
{
uint32_t bufs = e1000e_ring_free_descr_num(core, r);
@@ -1397,17 +1397,17 @@ e1000e_pci_dma_write_rx_desc(E1000ECore *core, dma_addr_t addr,
}
}
-typedef struct e1000e_ba_state_st {
+typedef struct E1000EBAState {
uint16_t written[MAX_PS_BUFFERS];
uint8_t cur_idx;
-} e1000e_ba_state;
+} E1000EBAState;
static inline void
-e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
- hwaddr ba[MAX_PS_BUFFERS],
- e1000e_ba_state *bastate,
- const char *data,
- dma_addr_t data_len)
+e1000e_write_hdr_frag_to_rx_buffers(E1000ECore *core,
+ hwaddr ba[MAX_PS_BUFFERS],
+ E1000EBAState *bastate,
+ const char *data,
+ dma_addr_t data_len)
{
assert(data_len <= core->rxbuf_sizes[0] - bastate->written[0]);
@@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
}
static void
-e1000e_write_to_rx_buffers(E1000ECore *core,
- hwaddr ba[MAX_PS_BUFFERS],
- e1000e_ba_state *bastate,
- const char *data,
- dma_addr_t data_len)
+e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core,
+ hwaddr ba[MAX_PS_BUFFERS],
+ E1000EBAState *bastate,
+ const char *data,
+ dma_addr_t data_len)
{
while (data_len > 0) {
uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx];
@@ -1460,7 +1460,7 @@ e1000e_update_rx_stats(E1000ECore *core, size_t pkt_size, size_t pkt_fcs_size)
}
static inline bool
-e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000E_RingInfo *rxi)
+e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000ERingInfo *rxi)
{
return e1000e_ring_free_descr_num(core, rxi) ==
e1000e_ring_len(core, rxi) >> core->rxbuf_min_shift;
@@ -1521,7 +1521,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
struct iovec *iov = net_rx_pkt_get_iovec(pkt);
size_t size = net_rx_pkt_get_total_len(pkt);
size_t total_size = size + e1000x_fcs_len(core->mac);
- const E1000E_RingInfo *rxi;
+ const E1000ERingInfo *rxi;
size_t ps_hdr_len = 0;
bool do_ps = e1000e_do_ps(core, pkt, &ps_hdr_len);
bool is_first = true;
@@ -1530,7 +1530,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
do {
hwaddr ba[MAX_PS_BUFFERS];
- e1000e_ba_state bastate = { { 0 } };
+ E1000EBAState bastate = { { 0 } };
bool is_last = false;
desc_size = total_size - desc_offset;
@@ -1568,8 +1568,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
iov_copy = MIN(ps_hdr_len - ps_hdr_copied,
iov->iov_len - iov_ofs);
- e1000e_write_hdr_to_rx_buffers(core, ba, &bastate,
- iov->iov_base, iov_copy);
+ e1000e_write_hdr_frag_to_rx_buffers(core, ba,
+ &bastate,
+ iov->iov_base,
+ iov_copy);
copy_size -= iov_copy;
ps_hdr_copied += iov_copy;
@@ -1585,8 +1587,8 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
} else {
/* Leave buffer 0 of each descriptor except first */
/* empty as per spec 7.1.5.1 */
- e1000e_write_hdr_to_rx_buffers(core, ba, &bastate,
- NULL, 0);
+ e1000e_write_hdr_frag_to_rx_buffers(core, ba, &bastate,
+ NULL, 0);
}
}
@@ -1594,8 +1596,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
while (copy_size) {
iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
- e1000e_write_to_rx_buffers(core, ba, &bastate,
- iov->iov_base + iov_ofs, iov_copy);
+ e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
+ iov->iov_base +
+ iov_ofs,
+ iov_copy);
copy_size -= iov_copy;
iov_ofs += iov_copy;
@@ -1607,7 +1611,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
if (desc_offset + desc_size >= total_size) {
/* Simulate FCS checksum presence in the last descriptor */
- e1000e_write_to_rx_buffers(core, ba, &bastate,
+ e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
(const char *) &fcs_pad, e1000x_fcs_len(core->mac));
}
}
@@ -2852,7 +2856,7 @@ e1000e_update_rx_offloads(E1000ECore *core)
if (core->has_vnet) {
qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
- cso_state, 0, 0, 0, 0);
+ cso_state, 0, 0, 0, 0, 0, 0);
}
}
diff --git a/hw/net/fsl_etsec/rings.c b/hw/net/fsl_etsec/rings.c
index 788463f..2f2f359 100644
--- a/hw/net/fsl_etsec/rings.c
+++ b/hw/net/fsl_etsec/rings.c
@@ -372,6 +372,12 @@ void etsec_walk_tx_ring(eTSEC *etsec, int ring_nbr)
etsec->regs[TSTAT].value |= 1 << (31 - ring_nbr);
}
+/*
+ * rx_init_frame() ensures we never do more padding than this
+ * (checksum plus minimum data packet size)
+ */
+#define MAX_RX_PADDING 64
+
static void fill_rx_bd(eTSEC *etsec,
eTSEC_rxtx_bd *bd,
const uint8_t **buf,
@@ -380,9 +386,11 @@ static void fill_rx_bd(eTSEC *etsec,
uint16_t to_write;
hwaddr bufptr = bd->bufptr +
((hwaddr)(etsec->regs[TBDBPH].value & 0xF) << 32);
- uint8_t padd[etsec->rx_padding];
+ uint8_t padd[MAX_RX_PADDING];
uint8_t rem;
+ assert(etsec->rx_padding <= MAX_RX_PADDING);
+
RING_DEBUG("eTSEC fill Rx buffer @ 0x%016" HWADDR_PRIx
" size:%zu(padding + crc:%u) + fcb:%u\n",
bufptr, *size, etsec->rx_padding, etsec->rx_fcb_size);
@@ -426,7 +434,7 @@ static void fill_rx_bd(eTSEC *etsec,
rem = MIN(etsec->regs[MRBLR].value - bd->length, etsec->rx_padding);
if (rem > 0) {
- memset(padd, 0x0, sizeof(padd));
+ memset(padd, 0x0, rem);
etsec->rx_padding -= rem;
*size -= rem;
bd->length += rem;
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 8b6b75c..f6a5e23 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -267,6 +267,29 @@ igb_rx_use_legacy_descriptor(IGBCore *core)
return false;
}
+typedef struct E1000ERingInfo {
+ int dbah;
+ int dbal;
+ int dlen;
+ int dh;
+ int dt;
+ int idx;
+} E1000ERingInfo;
+
+static uint32_t
+igb_rx_queue_desctyp_get(IGBCore *core, const E1000ERingInfo *r)
+{
+ return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK;
+}
+
+static bool
+igb_rx_use_ps_descriptor(IGBCore *core, const E1000ERingInfo *r)
+{
+ uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+ return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT ||
+ desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
static inline bool
igb_rss_enabled(IGBCore *core)
{
@@ -694,24 +717,15 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx)
return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0;
}
-typedef struct E1000E_RingInfo_st {
- int dbah;
- int dbal;
- int dlen;
- int dh;
- int dt;
- int idx;
-} E1000E_RingInfo;
-
static inline bool
-igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_empty(IGBCore *core, const E1000ERingInfo *r)
{
return core->mac[r->dh] == core->mac[r->dt] ||
core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN;
}
static inline uint64_t
-igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_base(IGBCore *core, const E1000ERingInfo *r)
{
uint64_t bah = core->mac[r->dbah];
uint64_t bal = core->mac[r->dbal];
@@ -720,13 +734,13 @@ igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
}
static inline uint64_t
-igb_ring_head_descr(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_head_descr(IGBCore *core, const E1000ERingInfo *r)
{
return igb_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
}
static inline void
-igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
+igb_ring_advance(IGBCore *core, const E1000ERingInfo *r, uint32_t count)
{
core->mac[r->dh] += count;
@@ -736,7 +750,7 @@ igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
}
static inline uint32_t
-igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_free_descr_num(IGBCore *core, const E1000ERingInfo *r)
{
trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
core->mac[r->dh], core->mac[r->dt]);
@@ -755,13 +769,13 @@ igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
}
static inline bool
-igb_ring_enabled(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_enabled(IGBCore *core, const E1000ERingInfo *r)
{
return core->mac[r->dlen] > 0;
}
typedef struct IGB_TxRing_st {
- const E1000E_RingInfo *i;
+ const E1000ERingInfo *i;
struct igb_tx *tx;
} IGB_TxRing;
@@ -774,7 +788,7 @@ igb_mq_queue_idx(int base_reg_idx, int reg_idx)
static inline void
igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
{
- static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
+ static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
{ TDBAH0, TDBAL0, TDLEN0, TDH0, TDT0, 0 },
{ TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 },
{ TDBAH2, TDBAL2, TDLEN2, TDH2, TDT2, 2 },
@@ -800,13 +814,13 @@ igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
}
typedef struct E1000E_RxRing_st {
- const E1000E_RingInfo *i;
+ const E1000ERingInfo *i;
} E1000E_RxRing;
static inline void
igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
{
- static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
+ static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
{ RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
{ RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 },
{ RDBAH2, RDBAL2, RDLEN2, RDH2, RDT2, 2 },
@@ -833,7 +847,7 @@ igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
static uint32_t
igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
union e1000_adv_tx_desc *tx_desc,
- const E1000E_RingInfo *txi)
+ const E1000ERingInfo *txi)
{
PCIDevice *d;
uint32_t cmd_type_len = le32_to_cpu(tx_desc->read.cmd_type_len);
@@ -866,7 +880,7 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
}
static inline bool
-igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
+igb_tx_enabled(IGBCore *core, const E1000ERingInfo *txi)
{
bool vmdq = core->mac[MRQC] & 1;
uint16_t qn = txi->idx;
@@ -883,7 +897,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
PCIDevice *d;
dma_addr_t base;
union e1000_adv_tx_desc desc;
- const E1000E_RingInfo *txi = txr->i;
+ const E1000ERingInfo *txi = txr->i;
uint32_t eic = 0;
if (!igb_tx_enabled(core, txi)) {
@@ -918,7 +932,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
}
static uint32_t
-igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
+igb_rxbufsize(IGBCore *core, const E1000ERingInfo *r)
{
uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
uint32_t bsizepkt = srrctl & E1000_SRRCTL_BSIZEPKT_MASK;
@@ -930,7 +944,7 @@ igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
}
static bool
-igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size)
+igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size)
{
uint32_t bufs = igb_ring_free_descr_num(core, r);
uint32_t bufsize = igb_rxbufsize(core, r);
@@ -941,6 +955,14 @@ igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size)
bufsize;
}
+static uint32_t
+igb_rxhdrbufsize(IGBCore *core, const E1000ERingInfo *r)
+{
+ uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
+ return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >>
+ E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+}
+
void
igb_start_recv(IGBCore *core)
{
@@ -1225,21 +1247,77 @@ igb_read_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
}
static inline void
-igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
- hwaddr *buff_addr)
+igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+ hwaddr *buff_addr)
{
*buff_addr = le64_to_cpu(desc->read.pkt_addr);
}
static inline void
-igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
- hwaddr *buff_addr)
+igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+ hwaddr *buff_addr)
{
+ buff_addr[0] = le64_to_cpu(desc->read.hdr_addr);
+ buff_addr[1] = le64_to_cpu(desc->read.pkt_addr);
+}
+
+typedef struct IGBBAState {
+ uint16_t written[IGB_MAX_PS_BUFFERS];
+ uint8_t cur_idx;
+} IGBBAState;
+
+typedef struct IGBSplitDescriptorData {
+ bool sph;
+ bool hbo;
+ size_t hdr_len;
+} IGBSplitDescriptorData;
+
+typedef struct IGBPacketRxDMAState {
+ size_t size;
+ size_t total_size;
+ size_t ps_hdr_len;
+ size_t desc_size;
+ size_t desc_offset;
+ uint32_t rx_desc_packet_buf_size;
+ uint32_t rx_desc_header_buf_size;
+ struct iovec *iov;
+ size_t iov_ofs;
+ bool do_ps;
+ bool is_first;
+ IGBBAState bastate;
+ hwaddr ba[IGB_MAX_PS_BUFFERS];
+ IGBSplitDescriptorData ps_desc_data;
+} IGBPacketRxDMAState;
+
+static inline void
+igb_read_rx_descr(IGBCore *core,
+ union e1000_rx_desc_union *desc,
+ IGBPacketRxDMAState *pdma_st,
+ const E1000ERingInfo *r)
+{
+ uint32_t desc_type;
+
if (igb_rx_use_legacy_descriptor(core)) {
- igb_read_lgcy_rx_descr(core, &desc->legacy, buff_addr);
- } else {
- igb_read_adv_rx_descr(core, &desc->adv, buff_addr);
+ igb_read_lgcy_rx_descr(core, &desc->legacy, &pdma_st->ba[1]);
+ pdma_st->ba[0] = 0;
+ return;
}
+
+ /* advanced header split descriptor */
+ if (igb_rx_use_ps_descriptor(core, r)) {
+ igb_read_adv_rx_split_buf_descr(core, &desc->adv, &pdma_st->ba[0]);
+ return;
+ }
+
+ /* descriptor replication modes not supported */
+ desc_type = igb_rx_queue_desctyp_get(core, r);
+ if (desc_type != E1000_SRRCTL_DESCTYPE_ADV_ONEBUF) {
+ trace_igb_wrn_rx_desc_modes_not_supp(desc_type);
+ }
+
+ /* advanced single buffer descriptor */
+ igb_read_adv_rx_single_buf_descr(core, &desc->adv, &pdma_st->ba[1]);
+ pdma_st->ba[0] = 0;
}
static void
@@ -1281,15 +1359,11 @@ igb_verify_csum_in_sw(IGBCore *core,
}
static void
-igb_build_rx_metadata(IGBCore *core,
- struct NetRxPkt *pkt,
- bool is_eop,
- const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
- uint16_t *pkt_info, uint16_t *hdr_info,
- uint32_t *rss,
- uint32_t *status_flags,
- uint16_t *ip_id,
- uint16_t *vlan_tag)
+igb_build_rx_metadata_common(IGBCore *core,
+ struct NetRxPkt *pkt,
+ bool is_eop,
+ uint32_t *status_flags,
+ uint16_t *vlan_tag)
{
struct virtio_net_hdr *vhdr;
bool hasip4, hasip6, csum_valid;
@@ -1298,7 +1372,6 @@ igb_build_rx_metadata(IGBCore *core,
*status_flags = E1000_RXD_STAT_DD;
/* No additional metadata needed for non-EOP descriptors */
- /* TODO: EOP apply only to status so don't skip whole function. */
if (!is_eop) {
goto func_exit;
}
@@ -1315,64 +1388,6 @@ igb_build_rx_metadata(IGBCore *core,
trace_e1000e_rx_metadata_vlan(*vlan_tag);
}
- /* Packet parsing results */
- if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
- if (rss_info->enabled) {
- *rss = cpu_to_le32(rss_info->hash);
- trace_igb_rx_metadata_rss(*rss);
- }
- } else if (hasip4) {
- *status_flags |= E1000_RXD_STAT_IPIDV;
- *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
- trace_e1000e_rx_metadata_ip_id(*ip_id);
- }
-
- if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) {
- *status_flags |= E1000_RXD_STAT_ACK;
- trace_e1000e_rx_metadata_ack();
- }
-
- if (pkt_info) {
- *pkt_info = rss_info->enabled ? rss_info->type : 0;
-
- if (etqf < 8) {
- *pkt_info |= (BIT(11) | etqf) << 4;
- } else {
- if (hasip4) {
- *pkt_info |= E1000_ADVRXD_PKT_IP4;
- }
-
- if (hasip6) {
- *pkt_info |= E1000_ADVRXD_PKT_IP6;
- }
-
- switch (l4hdr_proto) {
- case ETH_L4_HDR_PROTO_TCP:
- *pkt_info |= E1000_ADVRXD_PKT_TCP;
- break;
-
- case ETH_L4_HDR_PROTO_UDP:
- *pkt_info |= E1000_ADVRXD_PKT_UDP;
- break;
-
- case ETH_L4_HDR_PROTO_SCTP:
- *pkt_info |= E1000_ADVRXD_PKT_SCTP;
- break;
-
- default:
- break;
- }
- }
- }
-
- if (hdr_info) {
- *hdr_info = 0;
- }
-
- if (ts) {
- *status_flags |= BIT(16);
- }
-
/* RX CSO information */
if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) {
trace_e1000e_rx_metadata_ipv6_sum_disabled();
@@ -1428,56 +1443,168 @@ func_exit:
static inline void
igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
struct NetRxPkt *pkt,
- const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
+ const E1000E_RSSInfo *rss_info,
uint16_t length)
{
- uint32_t status_flags, rss;
- uint16_t ip_id;
+ uint32_t status_flags;
assert(!rss_info->enabled);
+
+ memset(desc, 0, sizeof(*desc));
desc->length = cpu_to_le16(length);
- desc->csum = 0;
+ igb_build_rx_metadata_common(core, pkt, pkt != NULL,
+ &status_flags,
+ &desc->special);
- igb_build_rx_metadata(core, pkt, pkt != NULL,
- rss_info, etqf, ts,
- NULL, NULL, &rss,
- &status_flags, &ip_id,
- &desc->special);
desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24);
desc->status = (uint8_t) le32_to_cpu(status_flags);
}
+static bool
+igb_rx_ps_descriptor_split_always(IGBCore *core, const E1000ERingInfo *r)
+{
+ uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+ return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
+static uint16_t
+igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf)
+{
+ uint16_t pkt_type;
+ bool hasip4, hasip6;
+ EthL4HdrProto l4hdr_proto;
+
+ if (etqf < 8) {
+ pkt_type = BIT(11) | etqf;
+ return pkt_type;
+ }
+
+ net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+ if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) {
+ eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt);
+ pkt_type = ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E :
+ E1000_ADVRXD_PKT_IP6;
+ } else if (hasip4) {
+ pkt_type = E1000_ADVRXD_PKT_IP4;
+ } else {
+ pkt_type = 0;
+ }
+
+ switch (l4hdr_proto) {
+ case ETH_L4_HDR_PROTO_TCP:
+ pkt_type |= E1000_ADVRXD_PKT_TCP;
+ break;
+ case ETH_L4_HDR_PROTO_UDP:
+ pkt_type |= E1000_ADVRXD_PKT_UDP;
+ break;
+ case ETH_L4_HDR_PROTO_SCTP:
+ pkt_type |= E1000_ADVRXD_PKT_SCTP;
+ break;
+ default:
+ break;
+ }
+
+ return pkt_type;
+}
+
static inline void
igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
struct NetRxPkt *pkt,
const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
uint16_t length)
{
+ bool hasip4, hasip6;
+ EthL4HdrProto l4hdr_proto;
+ uint16_t rss_type = 0, pkt_type;
+ bool eop = (pkt != NULL);
+ uint32_t adv_desc_status_error = 0;
memset(&desc->wb, 0, sizeof(desc->wb));
desc->wb.upper.length = cpu_to_le16(length);
+ igb_build_rx_metadata_common(core, pkt, eop,
+ &desc->wb.upper.status_error,
+ &desc->wb.upper.vlan);
+
+ if (!eop) {
+ return;
+ }
+
+ net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+ if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
+ if (rss_info->enabled) {
+ desc->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash);
+ rss_type = rss_info->type;
+ trace_igb_rx_metadata_rss(desc->wb.lower.hi_dword.rss, rss_type);
+ }
+ } else if (hasip4) {
+ adv_desc_status_error |= E1000_RXD_STAT_IPIDV;
+ desc->wb.lower.hi_dword.csum_ip.ip_id =
+ cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
+ trace_e1000e_rx_metadata_ip_id(
+ desc->wb.lower.hi_dword.csum_ip.ip_id);
+ }
+
+ if (ts) {
+ adv_desc_status_error |= BIT(16);
+ }
+
+ pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf);
+ trace_e1000e_rx_metadata_pkt_type(pkt_type);
+ desc->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4));
+ desc->wb.upper.status_error |= cpu_to_le32(adv_desc_status_error);
+}
+
+static inline void
+igb_write_adv_ps_rx_descr(IGBCore *core,
+ union e1000_adv_rx_desc *desc,
+ struct NetRxPkt *pkt,
+ const E1000E_RSSInfo *rss_info,
+ const E1000ERingInfo *r,
+ uint16_t etqf,
+ bool ts,
+ IGBPacketRxDMAState *pdma_st)
+{
+ size_t pkt_len;
+ uint16_t hdr_info = 0;
- igb_build_rx_metadata(core, pkt, pkt != NULL,
- rss_info, etqf, ts,
- &desc->wb.lower.lo_dword.pkt_info,
- &desc->wb.lower.lo_dword.hdr_info,
- &desc->wb.lower.hi_dword.rss,
- &desc->wb.upper.status_error,
- &desc->wb.lower.hi_dword.csum_ip.ip_id,
- &desc->wb.upper.vlan);
+ if (pdma_st->do_ps) {
+ pkt_len = pdma_st->bastate.written[1];
+ } else {
+ pkt_len = pdma_st->bastate.written[0] + pdma_st->bastate.written[1];
+ }
+
+ igb_write_adv_rx_descr(core, desc, pkt, rss_info, etqf, ts, pkt_len);
+
+ hdr_info = (pdma_st->ps_desc_data.hdr_len << E1000_ADVRXD_HDR_LEN_OFFSET) &
+ E1000_ADVRXD_ADV_HDR_LEN_MASK;
+ hdr_info |= pdma_st->ps_desc_data.sph ? E1000_ADVRXD_HDR_SPH : 0;
+ desc->wb.lower.lo_dword.hdr_info = cpu_to_le16(hdr_info);
+
+ desc->wb.upper.status_error |= cpu_to_le32(
+ pdma_st->ps_desc_data.hbo ? E1000_ADVRXD_ST_ERR_HBO_OFFSET : 0);
}
static inline void
-igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
- struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info,
- uint16_t etqf, bool ts, uint16_t length)
+igb_write_rx_descr(IGBCore *core,
+ union e1000_rx_desc_union *desc,
+ struct NetRxPkt *pkt,
+ const E1000E_RSSInfo *rss_info,
+ uint16_t etqf,
+ bool ts,
+ IGBPacketRxDMAState *pdma_st,
+ const E1000ERingInfo *r)
{
if (igb_rx_use_legacy_descriptor(core)) {
igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info,
- etqf, ts, length);
+ pdma_st->bastate.written[1]);
+ } else if (igb_rx_use_ps_descriptor(core, r)) {
+ igb_write_adv_ps_rx_descr(core, &desc->adv, pkt, rss_info, r, etqf, ts,
+ pdma_st);
} else {
igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info,
- etqf, ts, length);
+ etqf, ts, pdma_st->bastate.written[1]);
}
}
@@ -1514,20 +1641,7 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr,
}
static void
-igb_write_to_rx_buffers(IGBCore *core,
- PCIDevice *d,
- hwaddr ba,
- uint16_t *written,
- const char *data,
- dma_addr_t data_len)
-{
- trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
- pci_dma_write(d, ba + *written, data, data_len);
- *written += data_len;
-}
-
-static void
-igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
+igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi,
size_t pkt_size, size_t pkt_fcs_size)
{
eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt);
@@ -1545,12 +1659,256 @@ igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
}
static inline bool
-igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi)
+igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi)
{
return igb_ring_free_descr_num(core, rxi) ==
((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16;
}
+static bool
+igb_do_ps(IGBCore *core,
+ const E1000ERingInfo *r,
+ struct NetRxPkt *pkt,
+ IGBPacketRxDMAState *pdma_st)
+{
+ bool hasip4, hasip6;
+ EthL4HdrProto l4hdr_proto;
+ bool fragment;
+ bool split_always;
+ size_t bheader_size;
+ size_t total_pkt_len;
+
+ if (!igb_rx_use_ps_descriptor(core, r)) {
+ return false;
+ }
+
+ total_pkt_len = net_rx_pkt_get_total_len(pkt);
+ bheader_size = igb_rxhdrbufsize(core, r);
+ split_always = igb_rx_ps_descriptor_split_always(core, r);
+ if (split_always && total_pkt_len <= bheader_size) {
+ pdma_st->ps_hdr_len = total_pkt_len;
+ pdma_st->ps_desc_data.hdr_len = total_pkt_len;
+ return true;
+ }
+
+ net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+ if (hasip4) {
+ fragment = net_rx_pkt_get_ip4_info(pkt)->fragment;
+ } else if (hasip6) {
+ fragment = net_rx_pkt_get_ip6_info(pkt)->fragment;
+ } else {
+ pdma_st->ps_desc_data.hdr_len = bheader_size;
+ goto header_not_handled;
+ }
+
+ if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) {
+ pdma_st->ps_desc_data.hdr_len = bheader_size;
+ goto header_not_handled;
+ }
+
+ /* no header splitting for SCTP */
+ if (!fragment && (l4hdr_proto == ETH_L4_HDR_PROTO_UDP ||
+ l4hdr_proto == ETH_L4_HDR_PROTO_TCP)) {
+ pdma_st->ps_hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt);
+ } else {
+ pdma_st->ps_hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt);
+ }
+
+ pdma_st->ps_desc_data.sph = true;
+ pdma_st->ps_desc_data.hdr_len = pdma_st->ps_hdr_len;
+
+ if (pdma_st->ps_hdr_len > bheader_size) {
+ pdma_st->ps_desc_data.hbo = true;
+ goto header_not_handled;
+ }
+
+ return true;
+
+header_not_handled:
+ if (split_always) {
+ pdma_st->ps_hdr_len = bheader_size;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size)
+{
+ if (pdma_st->do_ps && pdma_st->is_first) {
+ if (*size > pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len) {
+ *size = pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len;
+ }
+ } else {
+ if (*size > pdma_st->rx_desc_packet_buf_size) {
+ *size = pdma_st->rx_desc_packet_buf_size;
+ }
+ }
+}
+
+static inline void
+igb_write_hdr_frag_to_rx_buffers(IGBCore *core,
+ PCIDevice *d,
+ IGBPacketRxDMAState *pdma_st,
+ const char *data,
+ dma_addr_t data_len)
+{
+ assert(data_len <= pdma_st->rx_desc_header_buf_size -
+ pdma_st->bastate.written[0]);
+ pci_dma_write(d,
+ pdma_st->ba[0] + pdma_st->bastate.written[0],
+ data, data_len);
+ pdma_st->bastate.written[0] += data_len;
+ pdma_st->bastate.cur_idx = 1;
+}
+
+static void
+igb_write_header_to_rx_buffers(IGBCore *core,
+ struct NetRxPkt *pkt,
+ PCIDevice *d,
+ IGBPacketRxDMAState *pdma_st,
+ size_t *copy_size)
+{
+ size_t iov_copy;
+ size_t ps_hdr_copied = 0;
+
+ if (!pdma_st->is_first) {
+ /* Leave buffer 0 of each descriptor except first */
+ /* empty */
+ pdma_st->bastate.cur_idx = 1;
+ return;
+ }
+
+ do {
+ iov_copy = MIN(pdma_st->ps_hdr_len - ps_hdr_copied,
+ pdma_st->iov->iov_len - pdma_st->iov_ofs);
+
+ igb_write_hdr_frag_to_rx_buffers(core, d, pdma_st,
+ pdma_st->iov->iov_base,
+ iov_copy);
+
+ *copy_size -= iov_copy;
+ ps_hdr_copied += iov_copy;
+
+ pdma_st->iov_ofs += iov_copy;
+ if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
+ pdma_st->iov++;
+ pdma_st->iov_ofs = 0;
+ }
+ } while (ps_hdr_copied < pdma_st->ps_hdr_len);
+
+ pdma_st->is_first = false;
+}
+
+static void
+igb_write_payload_frag_to_rx_buffers(IGBCore *core,
+ PCIDevice *d,
+ IGBPacketRxDMAState *pdma_st,
+ const char *data,
+ dma_addr_t data_len)
+{
+ while (data_len > 0) {
+ assert(pdma_st->bastate.cur_idx < IGB_MAX_PS_BUFFERS);
+
+ uint32_t cur_buf_bytes_left =
+ pdma_st->rx_desc_packet_buf_size -
+ pdma_st->bastate.written[pdma_st->bastate.cur_idx];
+ uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left);
+
+ trace_igb_rx_desc_buff_write(
+ pdma_st->bastate.cur_idx,
+ pdma_st->ba[pdma_st->bastate.cur_idx],
+ pdma_st->bastate.written[pdma_st->bastate.cur_idx],
+ data,
+ bytes_to_write);
+
+ pci_dma_write(d,
+ pdma_st->ba[pdma_st->bastate.cur_idx] +
+ pdma_st->bastate.written[pdma_st->bastate.cur_idx],
+ data, bytes_to_write);
+
+ pdma_st->bastate.written[pdma_st->bastate.cur_idx] += bytes_to_write;
+ data += bytes_to_write;
+ data_len -= bytes_to_write;
+
+ if (pdma_st->bastate.written[pdma_st->bastate.cur_idx] ==
+ pdma_st->rx_desc_packet_buf_size) {
+ pdma_st->bastate.cur_idx++;
+ }
+ }
+}
+
+static void
+igb_write_payload_to_rx_buffers(IGBCore *core,
+ struct NetRxPkt *pkt,
+ PCIDevice *d,
+ IGBPacketRxDMAState *pdma_st,
+ size_t *copy_size)
+{
+ static const uint32_t fcs_pad;
+ size_t iov_copy;
+
+ /* Copy packet payload */
+ while (*copy_size) {
+ iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs);
+ igb_write_payload_frag_to_rx_buffers(core, d,
+ pdma_st,
+ pdma_st->iov->iov_base +
+ pdma_st->iov_ofs,
+ iov_copy);
+
+ *copy_size -= iov_copy;
+ pdma_st->iov_ofs += iov_copy;
+ if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
+ pdma_st->iov++;
+ pdma_st->iov_ofs = 0;
+ }
+ }
+
+ if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) {
+ /* Simulate FCS checksum presence in the last descriptor */
+ igb_write_payload_frag_to_rx_buffers(core, d,
+ pdma_st,
+ (const char *) &fcs_pad,
+ e1000x_fcs_len(core->mac));
+ }
+}
+
+static void
+igb_write_to_rx_buffers(IGBCore *core,
+ struct NetRxPkt *pkt,
+ PCIDevice *d,
+ IGBPacketRxDMAState *pdma_st)
+{
+ size_t copy_size;
+
+ if (!(pdma_st->ba)[1] || (pdma_st->do_ps && !(pdma_st->ba[0]))) {
+ /* as per intel docs; skip descriptors with null buf addr */
+ trace_e1000e_rx_null_descriptor();
+ return;
+ }
+
+ if (pdma_st->desc_offset >= pdma_st->size) {
+ return;
+ }
+
+ pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset;
+ igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size);
+ copy_size = pdma_st->size - pdma_st->desc_offset;
+ igb_truncate_to_descriptor_size(pdma_st, &copy_size);
+
+ /* For PS mode copy the packet header first */
+ if (pdma_st->do_ps) {
+ igb_write_header_to_rx_buffers(core, pkt, d, pdma_st, &copy_size);
+ } else {
+ pdma_st->bastate.cur_idx = 1;
+ }
+
+ igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, &copy_size);
+}
+
static void
igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
const E1000E_RxRing *rxr,
@@ -1560,95 +1918,61 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
PCIDevice *d;
dma_addr_t base;
union e1000_rx_desc_union desc;
- size_t desc_size;
- size_t desc_offset = 0;
- size_t iov_ofs = 0;
-
- struct iovec *iov = net_rx_pkt_get_iovec(pkt);
- size_t size = net_rx_pkt_get_total_len(pkt);
- size_t total_size = size + e1000x_fcs_len(core->mac);
- const E1000E_RingInfo *rxi = rxr->i;
- size_t bufsize = igb_rxbufsize(core, rxi);
-
+ const E1000ERingInfo *rxi;
+ size_t rx_desc_len;
+
+ IGBPacketRxDMAState pdma_st = {0};
+ pdma_st.is_first = true;
+ pdma_st.size = net_rx_pkt_get_total_len(pkt);
+ pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac);
+
+ rxi = rxr->i;
+ rx_desc_len = core->rx_desc_len;
+ pdma_st.rx_desc_packet_buf_size = igb_rxbufsize(core, rxi);
+ pdma_st.rx_desc_header_buf_size = igb_rxhdrbufsize(core, rxi);
+ pdma_st.iov = net_rx_pkt_get_iovec(pkt);
d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8);
if (!d) {
d = core->owner;
}
+ pdma_st.do_ps = igb_do_ps(core, rxi, pkt, &pdma_st);
+
do {
- hwaddr ba;
- uint16_t written = 0;
+ memset(&pdma_st.bastate, 0, sizeof(IGBBAState));
bool is_last = false;
- desc_size = total_size - desc_offset;
-
- if (desc_size > bufsize) {
- desc_size = bufsize;
- }
-
if (igb_ring_empty(core, rxi)) {
return;
}
base = igb_ring_head_descr(core, rxi);
+ pci_dma_read(d, base, &desc, rx_desc_len);
+ trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len);
- pci_dma_read(d, base, &desc, core->rx_desc_len);
-
- trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len);
-
- igb_read_rx_descr(core, &desc, &ba);
-
- if (ba) {
- if (desc_offset < size) {
- static const uint32_t fcs_pad;
- size_t iov_copy;
- size_t copy_size = size - desc_offset;
- if (copy_size > bufsize) {
- copy_size = bufsize;
- }
-
- /* Copy packet payload */
- while (copy_size) {
- iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
+ igb_read_rx_descr(core, &desc, &pdma_st, rxi);
- igb_write_to_rx_buffers(core, d, ba, &written,
- iov->iov_base + iov_ofs, iov_copy);
-
- copy_size -= iov_copy;
- iov_ofs += iov_copy;
- if (iov_ofs == iov->iov_len) {
- iov++;
- iov_ofs = 0;
- }
- }
-
- if (desc_offset + desc_size >= total_size) {
- /* Simulate FCS checksum presence in the last descriptor */
- igb_write_to_rx_buffers(core, d, ba, &written,
- (const char *) &fcs_pad, e1000x_fcs_len(core->mac));
- }
- }
- } else { /* as per intel docs; skip descriptors with null buf addr */
- trace_e1000e_rx_null_descriptor();
- }
- desc_offset += desc_size;
- if (desc_offset >= total_size) {
+ igb_write_to_rx_buffers(core, pkt, d, &pdma_st);
+ pdma_st.desc_offset += pdma_st.desc_size;
+ if (pdma_st.desc_offset >= pdma_st.total_size) {
is_last = true;
}
- igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL,
- rss_info, etqf, ts, written);
- igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len);
-
- igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN);
-
- } while (desc_offset < total_size);
+ igb_write_rx_descr(core, &desc,
+ is_last ? pkt : NULL,
+ rss_info,
+ etqf, ts,
+ &pdma_st,
+ rxi);
+ igb_pci_dma_write_rx_desc(core, d, base, &desc, rx_desc_len);
+ igb_ring_advance(core, rxi, rx_desc_len / E1000_MIN_RX_DESC_LEN);
+ } while (pdma_st.desc_offset < pdma_st.total_size);
- igb_update_rx_stats(core, rxi, size, total_size);
+ igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size);
}
static bool
-igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
+igb_rx_strip_vlan(IGBCore *core, const E1000ERingInfo *rxi)
{
if (core->mac[MRQC] & 1) {
uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
@@ -2753,7 +3077,7 @@ igb_update_rx_offloads(IGBCore *core)
if (core->has_vnet) {
qemu_set_offload(qemu_get_queue(core->owner_nic)->peer,
- cso_state, 0, 0, 0, 0);
+ cso_state, 0, 0, 0, 0, 0, 0);
}
}
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
index 82ff195..ed7427b 100644
--- a/hw/net/igb_regs.h
+++ b/hw/net/igb_regs.h
@@ -452,6 +452,7 @@ union e1000_adv_rx_desc {
#define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00
#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */
#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000
+#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000
#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000
#define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000
#define E1000_SRRCTL_DROP_EN 0x80000000
@@ -692,11 +693,20 @@ union e1000_adv_rx_desc {
#define E1000_STATUS_NUM_VFS_SHIFT 14
-#define E1000_ADVRXD_PKT_IP4 BIT(4)
-#define E1000_ADVRXD_PKT_IP6 BIT(6)
-#define E1000_ADVRXD_PKT_TCP BIT(8)
-#define E1000_ADVRXD_PKT_UDP BIT(9)
-#define E1000_ADVRXD_PKT_SCTP BIT(10)
+#define E1000_ADVRXD_PKT_IP4 BIT(0)
+#define E1000_ADVRXD_PKT_IP6 BIT(2)
+#define E1000_ADVRXD_PKT_IP6E BIT(3)
+#define E1000_ADVRXD_PKT_TCP BIT(4)
+#define E1000_ADVRXD_PKT_UDP BIT(5)
+#define E1000_ADVRXD_PKT_SCTP BIT(6)
+
+#define IGB_MAX_PS_BUFFERS 2
+
+#define E1000_ADVRXD_HDR_LEN_OFFSET (21 - 16)
+#define E1000_ADVRXD_ADV_HDR_LEN_MASK ((BIT(10) - 1) << \
+ E1000_ADVRXD_HDR_LEN_OFFSET)
+#define E1000_ADVRXD_HDR_SPH BIT(15)
+#define E1000_ADVRXD_ST_ERR_HBO_OFFSET BIT(3 + 20)
static inline uint8_t igb_ivar_entry_rx(uint8_t i)
{
diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
index dfe4754..5e16056 100644
--- a/hw/net/rocker/rocker_of_dpa.c
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -1043,7 +1043,7 @@ static void of_dpa_flow_ig_tbl(OfDpaFlowContext *fc, uint32_t tbl_id)
static ssize_t of_dpa_ig(World *world, uint32_t pport,
const struct iovec *iov, int iovcnt)
{
- struct iovec iov_copy[iovcnt + 2];
+ g_autofree struct iovec *iov_copy = g_new(struct iovec, iovcnt + 2);
OfDpaFlowContext fc = {
.of_dpa = world_private(world),
.in_pport = pport,
diff --git a/hw/net/trace-events b/hw/net/trace-events
index 6b5ba66..3abfd65 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -278,9 +278,9 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"
igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
-igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
+igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer %u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
-igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X"
+igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X"
igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled"
igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x"
@@ -295,6 +295,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x"
igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x"
igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x"
+igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor type: %d"
+
# igbvf.c
igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 6b958d6..57427a3 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -78,6 +78,9 @@ static const int user_feature_bits[] = {
VIRTIO_F_RING_RESET,
VIRTIO_NET_F_RSS,
VIRTIO_NET_F_HASH_REPORT,
+ VIRTIO_NET_F_GUEST_USO4,
+ VIRTIO_NET_F_GUEST_USO6,
+ VIRTIO_NET_F_HOST_USO,
/* This bit implies RARP isn't sent by QEMU out of band */
VIRTIO_NET_F_GUEST_ANNOUNCE,
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7102ec4..bd0ead9 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -659,6 +659,15 @@ static int peer_has_ufo(VirtIONet *n)
return n->has_ufo;
}
+static int peer_has_uso(VirtIONet *n)
+{
+ if (!peer_has_vnet_hdr(n)) {
+ return 0;
+ }
+
+ return qemu_has_uso(qemu_get_queue(n->nic)->peer);
+}
+
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
int version_1, int hash_report)
{
@@ -796,6 +805,10 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
+ virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
+ virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
+ virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
+
virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT);
}
@@ -804,6 +817,12 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
}
+ if (!peer_has_uso(n)) {
+ virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO);
+ virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4);
+ virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6);
+ }
+
if (!get_vhost_net(nc->peer)) {
return features;
}
@@ -859,17 +878,21 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n)
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
!!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
+ !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)),
+ !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)),
+ !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)));
}
-static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
+static uint64_t virtio_net_guest_offloads_by_features(uint64_t features)
{
static const uint64_t guest_offloads_mask =
(1ULL << VIRTIO_NET_F_GUEST_CSUM) |
(1ULL << VIRTIO_NET_F_GUEST_TSO4) |
(1ULL << VIRTIO_NET_F_GUEST_TSO6) |
(1ULL << VIRTIO_NET_F_GUEST_ECN) |
- (1ULL << VIRTIO_NET_F_GUEST_UFO);
+ (1ULL << VIRTIO_NET_F_GUEST_UFO) |
+ (1ULL << VIRTIO_NET_F_GUEST_USO4) |
+ (1ULL << VIRTIO_NET_F_GUEST_USO6);
return guest_offloads_mask & features;
}
@@ -3922,6 +3945,12 @@ static Property virtio_net_properties[] = {
DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
+ DEFINE_PROP_BIT64("guest_uso4", VirtIONet, host_features,
+ VIRTIO_NET_F_GUEST_USO4, true),
+ DEFINE_PROP_BIT64("guest_uso6", VirtIONet, host_features,
+ VIRTIO_NET_F_GUEST_USO6, true),
+ DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features,
+ VIRTIO_NET_F_HOST_USO, true),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 3fb1087..226c077 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1341,6 +1341,8 @@ static void vmxnet3_update_features(VMXNET3State *s)
s->lro_supported,
s->lro_supported,
0,
+ 0,
+ 0,
0);
}
}