From 3af9187fc6caaf415ab9c0c6d92c9678f65cb17f Mon Sep 17 00:00:00 2001 From: Prasad J Pandit Date: Thu, 7 Apr 2016 15:56:02 +0530 Subject: net: mipsnet: check packet length against buffer When receiving packets over MIPSnet network device, it uses receive buffer of size 1514 bytes. In case the controller accepts large(MTU) packets, it could lead to memory corruption. Add check to avoid it. Reported by: Oleksandr Bazhaniuk Signed-off-by: Prasad J Pandit Signed-off-by: Jason Wang --- hw/net/mipsnet.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'hw') diff --git a/hw/net/mipsnet.c b/hw/net/mipsnet.c index 740cd98..cf8b823 100644 --- a/hw/net/mipsnet.c +++ b/hw/net/mipsnet.c @@ -83,6 +83,9 @@ static ssize_t mipsnet_receive(NetClientState *nc, const uint8_t *buf, size_t si if (!mipsnet_can_receive(nc)) return 0; + if (size >= sizeof(s->rx_buffer)) { + return 0; + } s->busy = 1; /* Just accept everything. */ -- cgit v1.1 From 3bdfaabbcf0e91d16e90691cf8bd4a836c67279a Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:31 +0300 Subject: msix: make msix_clr_pending() visible for clients This function will be used by e1000e device code. Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/pci/msix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/pci/msix.c b/hw/pci/msix.c index b75f0e9..0ec1cb1 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -72,7 +72,7 @@ void msix_set_pending(PCIDevice *dev, unsigned int vector) *msix_pending_byte(dev, vector) |= msix_pending_mask(vector); } -static void msix_clr_pending(PCIDevice *dev, int vector) +void msix_clr_pending(PCIDevice *dev, int vector) { *msix_pending_byte(dev, vector) &= ~msix_pending_mask(vector); } -- cgit v1.1 From 6383292ac884f01be609f69d888f54c099af622e Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:33 +0300 Subject: pcie: Add support for PCIe CAP v1 Added support for PCIe CAP v1, while reusing some of the existing v2 infrastructure. Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/pci/pcie.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 18 deletions(-) (limited to 'hw') diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 728386a..24cfc3b 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -43,26 +43,15 @@ /*************************************************************************** * pci express capability helper functions */ -int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) -{ - int pos; - uint8_t *exp_cap; - - assert(pci_is_express(dev)); - - pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset, - PCI_EXP_VER2_SIZEOF); - if (pos < 0) { - return pos; - } - dev->exp.exp_cap = pos; - exp_cap = dev->config + pos; +static void +pcie_cap_v1_fill(uint8_t *exp_cap, uint8_t port, uint8_t type, uint8_t version) +{ /* capability register - interrupt message number defaults to 0 */ + interrupt message number defaults to 0 */ pci_set_word(exp_cap + PCI_EXP_FLAGS, ((type << PCI_EXP_FLAGS_TYPE_SHIFT) & PCI_EXP_FLAGS_TYPE) | - PCI_EXP_FLAGS_VER2); + version); /* device capability register * table 7-12: @@ -81,7 +70,27 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) pci_set_word(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25 |PCI_EXP_LNKSTA_DLLLA); +} + +int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) +{ + /* PCIe cap v2 init */ + int pos; + uint8_t *exp_cap; + + assert(pci_is_express(dev)); + + pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset, PCI_EXP_VER2_SIZEOF); + if (pos < 0) { + return pos; + } + dev->exp.exp_cap = pos; + exp_cap = dev->config + pos; + + /* Filling values common with v1 */ + pcie_cap_v1_fill(exp_cap, port, type, PCI_EXP_FLAGS_VER2); + /* Filling v2 specific values */ pci_set_long(exp_cap + PCI_EXP_DEVCAP2, PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP); @@ -89,7 +98,29 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset, uint8_t type, uint8_t port) return pos; } -int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset) +int pcie_cap_v1_init(PCIDevice *dev, uint8_t offset, uint8_t type, + uint8_t port) +{ + /* PCIe cap v1 init */ + int pos; + uint8_t *exp_cap; + + assert(pci_is_express(dev)); + + pos = pci_add_capability(dev, PCI_CAP_ID_EXP, offset, PCI_EXP_VER1_SIZEOF); + if (pos < 0) { + return pos; + } + dev->exp.exp_cap = pos; + exp_cap = dev->config + pos; + + pcie_cap_v1_fill(exp_cap, port, type, PCI_EXP_FLAGS_VER1); + + return pos; +} + +static int +pcie_endpoint_cap_common_init(PCIDevice *dev, uint8_t offset, uint8_t cap_size) { uint8_t type = PCI_EXP_TYPE_ENDPOINT; @@ -102,7 +133,19 @@ int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset) type = PCI_EXP_TYPE_RC_END; } - return pcie_cap_init(dev, offset, type, 0); + return (cap_size == PCI_EXP_VER1_SIZEOF) + ? pcie_cap_v1_init(dev, offset, type, 0) + : pcie_cap_init(dev, offset, type, 0); +} + +int pcie_endpoint_cap_init(PCIDevice *dev, uint8_t offset) +{ + return pcie_endpoint_cap_common_init(dev, offset, PCI_EXP_VER2_SIZEOF); +} + +int pcie_endpoint_cap_v1_init(PCIDevice *dev, uint8_t offset) +{ + return pcie_endpoint_cap_common_init(dev, offset, PCI_EXP_VER1_SIZEOF); } void pcie_cap_exit(PCIDevice *dev) @@ -110,6 +153,11 @@ void pcie_cap_exit(PCIDevice *dev) pci_del_capability(dev, PCI_CAP_ID_EXP, PCI_EXP_VER2_SIZEOF); } +void pcie_cap_v1_exit(PCIDevice *dev) +{ + pci_del_capability(dev, PCI_CAP_ID_EXP, PCI_EXP_VER1_SIZEOF); +} + uint8_t pcie_cap_get_type(const PCIDevice *dev) { uint32_t pos = dev->exp.exp_cap; -- cgit v1.1 From b56b9285e4b58a0b8fe8b011d48dbf7e2afba785 Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:34 +0300 Subject: pcie: Introduce function for DSN capability creation Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/pci/pcie.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'hw') diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 24cfc3b..9599fde 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -695,3 +695,13 @@ void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn) offset, PCI_ARI_SIZEOF); pci_set_long(dev->config + offset + PCI_ARI_CAP, (nextfn & 0xff) << 8); } + +void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num) +{ + static const int pci_dsn_ver = 1; + static const int pci_dsn_cap = 4; + + pcie_add_capability(dev, PCI_EXT_CAP_ID_DSN, pci_dsn_ver, offset, + PCI_EXT_CAP_DSN_SIZEOF); + pci_set_quad(dev->config + offset + pci_dsn_cap, ser_num); +} -- cgit v1.1 From a4b387e623f4e17bd4784cf76e3f8c15800365dc Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:35 +0300 Subject: vmxnet3: Use generic function for DSN capability definition Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/vmxnet3.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'hw') diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 20f26b7..586e915 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -2255,9 +2255,9 @@ static const MemoryRegionOps b1_ops = { }, }; -static uint8_t *vmxnet3_device_serial_num(VMXNET3State *s) +static uint64_t vmxnet3_device_serial_num(VMXNET3State *s) { - static uint64_t dsn_payload; + uint64_t dsn_payload; uint8_t *dsnp = (uint8_t *)&dsn_payload; dsnp[0] = 0xfe; @@ -2268,7 +2268,7 @@ static uint8_t *vmxnet3_device_serial_num(VMXNET3State *s) dsnp[5] = s->conf.macaddr.a[1]; dsnp[6] = s->conf.macaddr.a[2]; dsnp[7] = 0xff; - return dsnp; + return dsn_payload; } static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) @@ -2313,10 +2313,8 @@ static void vmxnet3_pci_realize(PCIDevice *pci_dev, Error **errp) pcie_endpoint_cap_init(pci_dev, VMXNET3_EXP_EP_OFFSET); } - pcie_add_capability(pci_dev, PCI_EXT_CAP_ID_DSN, 0x1, - VMXNET3_DSN_OFFSET, PCI_EXT_CAP_DSN_SIZEOF); - memcpy(pci_dev->config + VMXNET3_DSN_OFFSET + 4, - vmxnet3_device_serial_num(s), sizeof(uint64_t)); + pcie_dev_ser_num_init(pci_dev, VMXNET3_DSN_OFFSET, + vmxnet3_device_serial_num(s)); } register_savevm(dev, "vmxnet3-msix", -1, 1, -- cgit v1.1 From ab647872011205a18241ec1f8ce7dd57b7da9989 Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:38 +0300 Subject: vmxnet3: Use common MAC address tracing macros Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/vmxnet3.c | 8 ++++---- hw/net/vmxnet_debug.h | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'hw') diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 586e915..200d2ea 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -474,7 +474,7 @@ static void vmxnet3_set_variable_mac(VMXNET3State *s, uint32_t h, uint32_t l) s->conf.macaddr.a[4] = VMXNET3_GET_BYTE(h, 0); s->conf.macaddr.a[5] = VMXNET3_GET_BYTE(h, 1); - VMW_CFPRN("Variable MAC: " VMXNET_MF, VMXNET_MA(s->conf.macaddr.a)); + VMW_CFPRN("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a)); qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); } @@ -1219,7 +1219,7 @@ static void vmxnet3_reset_interrupt_states(VMXNET3State *s) static void vmxnet3_reset_mac(VMXNET3State *s) { memcpy(&s->conf.macaddr.a, &s->perm_mac.a, sizeof(s->perm_mac.a)); - VMW_CFPRN("MAC address set to: " VMXNET_MF, VMXNET_MA(s->conf.macaddr.a)); + VMW_CFPRN("MAC address set to: " MAC_FMT, MAC_ARG(s->conf.macaddr.a)); } static void vmxnet3_deactivate_device(VMXNET3State *s) @@ -1301,7 +1301,7 @@ static void vmxnet3_update_mcast_filters(VMXNET3State *s) cpu_physical_memory_read(mcast_list_pa, s->mcast_list, list_bytes); VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len); for (i = 0; i < s->mcast_list_len; i++) { - VMW_CFPRN("\t" VMXNET_MF, VMXNET_MA(s->mcast_list[i].a)); + VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a)); } } } @@ -2102,7 +2102,7 @@ static void vmxnet3_net_init(VMXNET3State *s) s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP; - VMW_CFPRN("Permanent MAC: " VMXNET_MF, VMXNET_MA(s->perm_mac.a)); + VMW_CFPRN("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a)); s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf, object_get_typename(OBJECT(s)), diff --git a/hw/net/vmxnet_debug.h b/hw/net/vmxnet_debug.h index 96495db..5aab00b 100644 --- a/hw/net/vmxnet_debug.h +++ b/hw/net/vmxnet_debug.h @@ -142,7 +142,4 @@ } \ } while (0) -#define VMXNET_MF "%02X:%02X:%02X:%02X:%02X:%02X" -#define VMXNET_MA(a) (a)[0], (a)[1], (a)[2], (a)[3], (a)[4], (a)[5] - #endif /* _QEMU_VMXNET3_DEBUG_H */ -- cgit v1.1 From 605d52e62fc94ec28408890fc682d4bdd7d8a36c Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:39 +0300 Subject: net_pkt: Name vmxnet3 packet abstractions more generic This patch drops "vmx" prefix from packet abstractions names to emphasize the fact they are generic and not tied to any specific network device. These abstractions will be reused by e1000e emulation implementation introduced by following patches so their names need generalization. This patch (except renamed files, adjusted comments and changes in MAINTAINTERS) was produced by: git grep -lz 'vmxnet_tx_pkt' | xargs -0 perl -i'' -pE "s/vmxnet_tx_pkt/net_tx_pkt/g" git grep -lz 'vmxnet_rx_pkt' | xargs -0 perl -i'' -pE "s/vmxnet_rx_pkt/net_rx_pkt/g" git grep -lz 'VmxnetTxPkt' | xargs -0 perl -i'' -pE "s/VmxnetTxPkt/NetTxPkt/g" git grep -lz 'VMXNET_TX_PKT' | xargs -0 perl -i'' -pE "s/VMXNET_TX_PKT/NET_TX_PKT/g" git grep -lz 'VmxnetRxPkt' | xargs -0 perl -i'' -pE "s/VmxnetRxPkt/NetRxPkt/g" git grep -lz 'VMXNET_RX_PKT' | xargs -0 perl -i'' -pE "s/VMXNET_RX_PKT/NET_RX_PKT/g" sed -ie 's/VMXNET_/NET_/g' hw/net/vmxnet_rx_pkt.c sed -ie 's/VMXNET_/NET_/g' hw/net/vmxnet_tx_pkt.c Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/Makefile.objs | 2 +- hw/net/net_rx_pkt.c | 187 ++++++++++++++++ hw/net/net_rx_pkt.h | 174 +++++++++++++++ hw/net/net_tx_pkt.c | 581 +++++++++++++++++++++++++++++++++++++++++++++++++ hw/net/net_tx_pkt.h | 146 +++++++++++++ hw/net/vmxnet3.c | 88 ++++---- hw/net/vmxnet_rx_pkt.c | 187 ---------------- hw/net/vmxnet_rx_pkt.h | 174 --------------- hw/net/vmxnet_tx_pkt.c | 581 ------------------------------------------------- hw/net/vmxnet_tx_pkt.h | 146 ------------- 10 files changed, 1133 insertions(+), 1133 deletions(-) create mode 100644 hw/net/net_rx_pkt.c create mode 100644 hw/net/net_rx_pkt.h create mode 100644 hw/net/net_tx_pkt.c create mode 100644 hw/net/net_tx_pkt.h delete mode 100644 hw/net/vmxnet_rx_pkt.c delete mode 100644 hw/net/vmxnet_rx_pkt.h delete mode 100644 hw/net/vmxnet_tx_pkt.c delete mode 100644 hw/net/vmxnet_tx_pkt.h (limited to 'hw') diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs index 64d0449..527d264 100644 --- a/hw/net/Makefile.objs +++ b/hw/net/Makefile.objs @@ -8,7 +8,7 @@ common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o common-obj-$(CONFIG_E1000_PCI) += e1000.o common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o -common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet_tx_pkt.o vmxnet_rx_pkt.o +common-obj-$(CONFIG_VMXNET3_PCI) += net_tx_pkt.o net_rx_pkt.o common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o common-obj-$(CONFIG_SMC91C111) += smc91c111.o diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c new file mode 100644 index 0000000..8a4f29f --- /dev/null +++ b/hw/net/net_rx_pkt.c @@ -0,0 +1,187 @@ +/* + * QEMU RX packets abstractions + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman + * Tamir Shomer + * Yan Vugenfirer + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "net_rx_pkt.h" +#include "net/eth.h" +#include "qemu-common.h" +#include "qemu/iov.h" +#include "net/checksum.h" +#include "net/tap.h" + +/* + * RX packet may contain up to 2 fragments - rebuilt eth header + * in case of VLAN tag stripping + * and payload received from QEMU - in any case + */ +#define NET_MAX_RX_PACKET_FRAGMENTS (2) + +struct NetRxPkt { + struct virtio_net_hdr virt_hdr; + uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN]; + struct iovec vec[NET_MAX_RX_PACKET_FRAGMENTS]; + uint16_t vec_len; + uint32_t tot_len; + uint16_t tci; + bool vlan_stripped; + bool has_virt_hdr; + eth_pkt_types_e packet_type; + + /* Analysis results */ + bool isip4; + bool isip6; + bool isudp; + bool istcp; +}; + +void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr) +{ + struct NetRxPkt *p = g_malloc0(sizeof *p); + p->has_virt_hdr = has_virt_hdr; + *pkt = p; +} + +void net_rx_pkt_uninit(struct NetRxPkt *pkt) +{ + g_free(pkt); +} + +struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt) +{ + assert(pkt); + return &pkt->virt_hdr; +} + +void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, + size_t len, bool strip_vlan) +{ + uint16_t tci = 0; + uint16_t ploff; + assert(pkt); + pkt->vlan_stripped = false; + + if (strip_vlan) { + pkt->vlan_stripped = eth_strip_vlan(data, pkt->ehdr_buf, &ploff, &tci); + } + + if (pkt->vlan_stripped) { + pkt->vec[0].iov_base = pkt->ehdr_buf; + pkt->vec[0].iov_len = ploff - sizeof(struct vlan_header); + pkt->vec[1].iov_base = (uint8_t *) data + ploff; + pkt->vec[1].iov_len = len - ploff; + pkt->vec_len = 2; + pkt->tot_len = len - ploff + sizeof(struct eth_header); + } else { + pkt->vec[0].iov_base = (void *)data; + pkt->vec[0].iov_len = len; + pkt->vec_len = 1; + pkt->tot_len = len; + } + + pkt->tci = tci; +} + +void net_rx_pkt_dump(struct NetRxPkt *pkt) +{ +#ifdef NET_RX_PKT_DEBUG + NetRxPkt *pkt = (NetRxPkt *)pkt; + assert(pkt); + + printf("RX PKT: tot_len: %d, vlan_stripped: %d, vlan_tag: %d\n", + pkt->tot_len, pkt->vlan_stripped, pkt->tci); +#endif +} + +void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, + eth_pkt_types_e packet_type) +{ + assert(pkt); + + pkt->packet_type = packet_type; + +} + +eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->packet_type; +} + +size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->tot_len; +} + +void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, + size_t len) +{ + assert(pkt); + + eth_get_protocols(data, len, &pkt->isip4, &pkt->isip6, + &pkt->isudp, &pkt->istcp); +} + +void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, + bool *isip4, bool *isip6, + bool *isudp, bool *istcp) +{ + assert(pkt); + + *isip4 = pkt->isip4; + *isip6 = pkt->isip6; + *isudp = pkt->isudp; + *istcp = pkt->istcp; +} + +struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->vec; +} + +void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, + struct virtio_net_hdr *vhdr) +{ + assert(pkt); + + memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); +} + +bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->vlan_stripped; +} + +bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->has_virt_hdr; +} + +uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->tci; +} diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h new file mode 100644 index 0000000..897330a --- /dev/null +++ b/hw/net/net_rx_pkt.h @@ -0,0 +1,174 @@ +/* + * QEMU RX packets abstraction + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman + * Tamir Shomer + * Yan Vugenfirer + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef NET_RX_PKT_H +#define NET_RX_PKT_H + +#include "net/eth.h" + +/* defines to enable packet dump functions */ +/*#define NET_RX_PKT_DEBUG*/ + +struct NetRxPkt; + +/** + * Clean all rx packet resources + * + * @pkt: packet + * + */ +void net_rx_pkt_uninit(struct NetRxPkt *pkt); + +/** + * Init function for rx packet functionality + * + * @pkt: packet pointer + * @has_virt_hdr: device uses virtio header + * + */ +void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr); + +/** + * returns total length of data attached to rx context + * + * @pkt: packet + * + * Return: nothing + * + */ +size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt); + +/** + * parse and set packet analysis results + * + * @pkt: packet + * @data: pointer to the data buffer to be parsed + * @len: data length + * + */ +void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, + size_t len); + +/** + * fetches packet analysis results + * + * @pkt: packet + * @isip4: whether the packet given is IPv4 + * @isip6: whether the packet given is IPv6 + * @isudp: whether the packet given is UDP + * @istcp: whether the packet given is TCP + * + */ +void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, + bool *isip4, bool *isip6, + bool *isudp, bool *istcp); + +/** + * returns virtio header stored in rx context + * + * @pkt: packet + * @ret: virtio header + * + */ +struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt); + +/** + * returns packet type + * + * @pkt: packet + * @ret: packet type + * + */ +eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt); + +/** + * returns vlan tag + * + * @pkt: packet + * @ret: VLAN tag + * + */ +uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt); + +/** + * tells whether vlan was stripped from the packet + * + * @pkt: packet + * @ret: VLAN stripped sign + * + */ +bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt); + +/** + * notifies caller if the packet has virtio header + * + * @pkt: packet + * @ret: true if packet has virtio header, false otherwize + * + */ +bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt); + +/** + * attach data to rx packet + * + * @pkt: packet + * @data: pointer to the data buffer + * @len: data length + * @strip_vlan: should the module strip vlan from data + * + */ +void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, + size_t len, bool strip_vlan); + +/** + * returns io vector that holds the attached data + * + * @pkt: packet + * @ret: pointer to IOVec + * + */ +struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt); + +/** + * prints rx packet data if debug is enabled + * + * @pkt: packet + * + */ +void net_rx_pkt_dump(struct NetRxPkt *pkt); + +/** + * copy passed vhdr data to packet context + * + * @pkt: packet + * @vhdr: VHDR buffer + * + */ +void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, + struct virtio_net_hdr *vhdr); + +/** + * save packet type in packet context + * + * @pkt: packet + * @packet_type: the packet type + * + */ +void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, + eth_pkt_types_e packet_type); + +#endif diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c new file mode 100644 index 0000000..94c7e3d --- /dev/null +++ b/hw/net/net_tx_pkt.c @@ -0,0 +1,581 @@ +/* + * QEMU TX packets abstractions + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman + * Tamir Shomer + * Yan Vugenfirer + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "net_tx_pkt.h" +#include "net/eth.h" +#include "qemu-common.h" +#include "qemu/iov.h" +#include "net/checksum.h" +#include "net/tap.h" +#include "net/net.h" + +enum { + NET_TX_PKT_VHDR_FRAG = 0, + NET_TX_PKT_L2HDR_FRAG, + NET_TX_PKT_L3HDR_FRAG, + NET_TX_PKT_PL_START_FRAG +}; + +/* TX packet private context */ +struct NetTxPkt { + struct virtio_net_hdr virt_hdr; + bool has_virt_hdr; + + struct iovec *raw; + uint32_t raw_frags; + uint32_t max_raw_frags; + + struct iovec *vec; + + uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; + + uint32_t payload_len; + + uint32_t payload_frags; + uint32_t max_payload_frags; + + uint16_t hdr_len; + eth_pkt_types_e packet_type; + uint8_t l4proto; +}; + +void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, + bool has_virt_hdr) +{ + struct NetTxPkt *p = g_malloc0(sizeof *p); + + p->vec = g_malloc((sizeof *p->vec) * + (max_frags + NET_TX_PKT_PL_START_FRAG)); + + p->raw = g_malloc((sizeof *p->raw) * max_frags); + + p->max_payload_frags = max_frags; + p->max_raw_frags = max_frags; + p->has_virt_hdr = has_virt_hdr; + p->vec[NET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr; + p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = + p->has_virt_hdr ? sizeof p->virt_hdr : 0; + p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; + p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL; + p->vec[NET_TX_PKT_L3HDR_FRAG].iov_len = 0; + + *pkt = p; +} + +void net_tx_pkt_uninit(struct NetTxPkt *pkt) +{ + if (pkt) { + g_free(pkt->vec); + g_free(pkt->raw); + g_free(pkt); + } +} + +void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) +{ + uint16_t csum; + uint32_t ph_raw_csum; + assert(pkt); + uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; + struct ip_header *ip_hdr; + + if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type && + VIRTIO_NET_HDR_GSO_UDP != gso_type) { + return; + } + + ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; + + if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > + ETH_MAX_IP_DGRAM_LEN) { + return; + } + + ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); + + /* Calculate IP header checksum */ + ip_hdr->ip_sum = 0; + csum = net_raw_checksum((uint8_t *)ip_hdr, + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); + ip_hdr->ip_sum = cpu_to_be16(csum); + + /* Calculate IP pseudo header checksum */ + ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len); + csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum)); + iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, + pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); +} + +static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt) +{ + pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len + + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len; +} + +static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) +{ + struct iovec *l2_hdr, *l3_hdr; + size_t bytes_read; + size_t full_ip6hdr_len; + uint16_t l3_proto; + + assert(pkt); + + l2_hdr = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; + l3_hdr = &pkt->vec[NET_TX_PKT_L3HDR_FRAG]; + + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base, + ETH_MAX_L2_HDR_LEN); + if (bytes_read < sizeof(struct eth_header)) { + l2_hdr->iov_len = 0; + return false; + } + + l2_hdr->iov_len = sizeof(struct eth_header); + switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr->iov_base)->h_proto)) { + case ETH_P_VLAN: + l2_hdr->iov_len += sizeof(struct vlan_header); + break; + case ETH_P_DVLAN: + l2_hdr->iov_len += 2 * sizeof(struct vlan_header); + break; + } + + if (bytes_read < l2_hdr->iov_len) { + l2_hdr->iov_len = 0; + return false; + } + + l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len); + + switch (l3_proto) { + case ETH_P_IP: + l3_hdr->iov_base = g_malloc(ETH_MAX_IP4_HDR_LEN); + + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, + l3_hdr->iov_base, sizeof(struct ip_header)); + + if (bytes_read < sizeof(struct ip_header)) { + l3_hdr->iov_len = 0; + return false; + } + + l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); + pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; + + /* copy optional IPv4 header data */ + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, + l2_hdr->iov_len + sizeof(struct ip_header), + l3_hdr->iov_base + sizeof(struct ip_header), + l3_hdr->iov_len - sizeof(struct ip_header)); + if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { + l3_hdr->iov_len = 0; + return false; + } + break; + + case ETH_P_IPV6: + if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, + &pkt->l4proto, &full_ip6hdr_len)) { + l3_hdr->iov_len = 0; + return false; + } + + l3_hdr->iov_base = g_malloc(full_ip6hdr_len); + + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, + l3_hdr->iov_base, full_ip6hdr_len); + + if (bytes_read < full_ip6hdr_len) { + l3_hdr->iov_len = 0; + return false; + } else { + l3_hdr->iov_len = full_ip6hdr_len; + } + break; + + default: + l3_hdr->iov_len = 0; + break; + } + + net_tx_pkt_calculate_hdr_len(pkt); + pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); + return true; +} + +static bool net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) +{ + size_t payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; + + pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG], + pkt->max_payload_frags, + pkt->raw, pkt->raw_frags, + pkt->hdr_len, payload_len); + + if (pkt->payload_frags != (uint32_t) -1) { + pkt->payload_len = payload_len; + return true; + } else { + return false; + } +} + +bool net_tx_pkt_parse(struct NetTxPkt *pkt) +{ + return net_tx_pkt_parse_headers(pkt) && + net_tx_pkt_rebuild_payload(pkt); +} + +struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt) +{ + assert(pkt); + return &pkt->virt_hdr; +} + +static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt, + bool tso_enable) +{ + uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; + uint16_t l3_proto; + + l3_proto = eth_get_l3_proto(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, + pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len); + + if (!tso_enable) { + goto func_exit; + } + + rc = eth_get_gso_type(l3_proto, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base, + pkt->l4proto); + +func_exit: + return rc; +} + +void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, + bool csum_enable, uint32_t gso_size) +{ + struct tcp_hdr l4hdr; + assert(pkt); + + /* csum has to be enabled if tso is. */ + assert(csum_enable || !tso_enable); + + pkt->virt_hdr.gso_type = net_tx_pkt_get_gso_type(pkt, tso_enable); + + switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_NONE: + pkt->virt_hdr.hdr_len = 0; + pkt->virt_hdr.gso_size = 0; + break; + + case VIRTIO_NET_HDR_GSO_UDP: + pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); + break; + + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: + iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, + 0, &l4hdr, sizeof(l4hdr)); + pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); + pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + break; + + default: + g_assert_not_reached(); + } + + if (csum_enable) { + switch (pkt->l4proto) { + case IP_PROTO_TCP: + pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + pkt->virt_hdr.csum_start = pkt->hdr_len; + pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum); + break; + case IP_PROTO_UDP: + pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + pkt->virt_hdr.csum_start = pkt->hdr_len; + pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum); + break; + default: + break; + } + } +} + +void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan) +{ + bool is_new; + assert(pkt); + + eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, + vlan, &is_new); + + /* update l2hdrlen */ + if (is_new) { + pkt->hdr_len += sizeof(struct vlan_header); + pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len += + sizeof(struct vlan_header); + } +} + +bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, + size_t len) +{ + hwaddr mapped_len = 0; + struct iovec *ventry; + assert(pkt); + assert(pkt->max_raw_frags > pkt->raw_frags); + + if (!len) { + return true; + } + + ventry = &pkt->raw[pkt->raw_frags]; + mapped_len = len; + + ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false); + ventry->iov_len = mapped_len; + pkt->raw_frags += !!ventry->iov_base; + + if ((ventry->iov_base == NULL) || (len != mapped_len)) { + return false; + } + + return true; +} + +eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt) +{ + assert(pkt); + + return pkt->packet_type; +} + +size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt) +{ + assert(pkt); + + return pkt->hdr_len + pkt->payload_len; +} + +void net_tx_pkt_dump(struct NetTxPkt *pkt) +{ +#ifdef NET_TX_PKT_DEBUG + assert(pkt); + + printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, " + "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type, + pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len, + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len); +#endif +} + +void net_tx_pkt_reset(struct NetTxPkt *pkt) +{ + int i; + + /* no assert, as reset can be called before tx_pkt_init */ + if (!pkt) { + return; + } + + memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); + + g_free(pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base); + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL; + + assert(pkt->vec); + for (i = NET_TX_PKT_L2HDR_FRAG; + i < pkt->payload_frags + NET_TX_PKT_PL_START_FRAG; i++) { + pkt->vec[i].iov_len = 0; + } + pkt->payload_len = 0; + pkt->payload_frags = 0; + + assert(pkt->raw); + for (i = 0; i < pkt->raw_frags; i++) { + assert(pkt->raw[i].iov_base); + cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len, + false, pkt->raw[i].iov_len); + pkt->raw[i].iov_len = 0; + } + pkt->raw_frags = 0; + + pkt->hdr_len = 0; + pkt->packet_type = 0; + pkt->l4proto = 0; +} + +static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) +{ + struct iovec *iov = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; + uint32_t csum_cntr; + uint16_t csum = 0; + /* num of iovec without vhdr */ + uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1; + uint16_t csl; + struct ip_header *iphdr; + size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset; + + /* Put zero to checksum field */ + iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); + + /* Calculate L4 TCP/UDP checksum */ + csl = pkt->payload_len; + + /* data checksum */ + csum_cntr = + net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl); + /* add pseudo header to csum */ + iphdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; + csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl); + + /* Put the checksum obtained into the packet */ + csum = cpu_to_be16(net_checksum_finish(csum_cntr)); + iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); +} + +enum { + NET_TX_PKT_FRAGMENT_L2_HDR_POS = 0, + NET_TX_PKT_FRAGMENT_L3_HDR_POS, + NET_TX_PKT_FRAGMENT_HEADER_NUM +}; + +#define NET_MAX_FRAG_SG_LIST (64) + +static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, + int *src_idx, size_t *src_offset, struct iovec *dst, int *dst_idx) +{ + size_t fetched = 0; + struct iovec *src = pkt->vec; + + *dst_idx = NET_TX_PKT_FRAGMENT_HEADER_NUM; + + while (fetched < pkt->virt_hdr.gso_size) { + + /* no more place in fragment iov */ + if (*dst_idx == NET_MAX_FRAG_SG_LIST) { + break; + } + + /* no more data in iovec */ + if (*src_idx == (pkt->payload_frags + NET_TX_PKT_PL_START_FRAG)) { + break; + } + + + dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; + dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, + pkt->virt_hdr.gso_size - fetched); + + *src_offset += dst[*dst_idx].iov_len; + fetched += dst[*dst_idx].iov_len; + + if (*src_offset == src[*src_idx].iov_len) { + *src_offset = 0; + (*src_idx)++; + } + + (*dst_idx)++; + } + + return fetched; +} + +static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, + NetClientState *nc) +{ + struct iovec fragment[NET_MAX_FRAG_SG_LIST]; + size_t fragment_len = 0; + bool more_frags = false; + + /* some pointers for shorter code */ + void *l2_iov_base, *l3_iov_base; + size_t l2_iov_len, l3_iov_len; + int src_idx = NET_TX_PKT_PL_START_FRAG, dst_idx; + size_t src_offset = 0; + size_t fragment_offset = 0; + + l2_iov_base = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base; + l2_iov_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len; + l3_iov_base = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; + l3_iov_len = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len; + + /* Copy headers */ + fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base; + fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len; + fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base; + fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len; + + + /* Put as much data as possible and send */ + do { + fragment_len = net_tx_pkt_fetch_fragment(pkt, &src_idx, &src_offset, + fragment, &dst_idx); + + more_frags = (fragment_offset + fragment_len < pkt->payload_len); + + eth_setup_ip4_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base, + l3_iov_len, fragment_len, fragment_offset, more_frags); + + eth_fix_ip4_checksum(l3_iov_base, l3_iov_len); + + qemu_sendv_packet(nc, fragment, dst_idx); + + fragment_offset += fragment_len; + + } while (more_frags); + + return true; +} + +bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc) +{ + assert(pkt); + + if (!pkt->has_virt_hdr && + pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + net_tx_pkt_do_sw_csum(pkt); + } + + /* + * Since underlying infrastructure does not support IP datagrams longer + * than 64K we should drop such packets and don't even try to send + */ + if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) { + if (pkt->payload_len > + ETH_MAX_IP_DGRAM_LEN - + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) { + return false; + } + } + + if (pkt->has_virt_hdr || + pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) { + qemu_sendv_packet(nc, pkt->vec, + pkt->payload_frags + NET_TX_PKT_PL_START_FRAG); + return true; + } + + return net_tx_pkt_do_sw_fragmentation(pkt, nc); +} diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h new file mode 100644 index 0000000..be2e117 --- /dev/null +++ b/hw/net/net_tx_pkt.h @@ -0,0 +1,146 @@ +/* + * QEMU TX packets abstraction + * + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) + * + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Dmitry Fleytman + * Tamir Shomer + * Yan Vugenfirer + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef NET_TX_PKT_H +#define NET_TX_PKT_H + +#include "net/eth.h" +#include "exec/hwaddr.h" + +/* define to enable packet dump functions */ +/*#define NET_TX_PKT_DEBUG*/ + +struct NetTxPkt; + +/** + * Init function for tx packet functionality + * + * @pkt: packet pointer + * @max_frags: max tx ip fragments + * @has_virt_hdr: device uses virtio header. + */ +void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, + bool has_virt_hdr); + +/** + * Clean all tx packet resources. + * + * @pkt: packet. + */ +void net_tx_pkt_uninit(struct NetTxPkt *pkt); + +/** + * get virtio header + * + * @pkt: packet + * @ret: virtio header + */ +struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt); + +/** + * build virtio header (will be stored in module context) + * + * @pkt: packet + * @tso_enable: TSO enabled + * @csum_enable: CSO enabled + * @gso_size: MSS size for TSO + * + */ +void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, + bool csum_enable, uint32_t gso_size); + +/** + * updates vlan tag, and adds vlan header in case it is missing + * + * @pkt: packet + * @vlan: VLAN tag + * + */ +void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan); + +/** + * populate data fragment into pkt context. + * + * @pkt: packet + * @pa: physical address of fragment + * @len: length of fragment + * + */ +bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, + size_t len); + +/** + * fix ip header fields and calculate checksums needed. + * + * @pkt: packet + * + */ +void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt); + +/** + * get length of all populated data. + * + * @pkt: packet + * @ret: total data length + * + */ +size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt); + +/** + * get packet type + * + * @pkt: packet + * @ret: packet type + * + */ +eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt); + +/** + * prints packet data if debug is enabled + * + * @pkt: packet + * + */ +void net_tx_pkt_dump(struct NetTxPkt *pkt); + +/** + * reset tx packet private context (needed to be called between packets) + * + * @pkt: packet + * + */ +void net_tx_pkt_reset(struct NetTxPkt *pkt); + +/** + * Send packet to qemu. handles sw offloads if vhdr is not supported. + * + * @pkt: packet + * @nc: NetClientState + * @ret: operation result + * + */ +bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc); + +/** + * parse raw packet data and analyze offload requirements. + * + * @pkt: packet + * + */ +bool net_tx_pkt_parse(struct NetTxPkt *pkt); + +#endif diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 200d2ea..33cd07d 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -30,8 +30,8 @@ #include "vmxnet3.h" #include "vmxnet_debug.h" #include "vmware_utils.h" -#include "vmxnet_tx_pkt.h" -#include "vmxnet_rx_pkt.h" +#include "net_tx_pkt.h" +#include "net_rx_pkt.h" #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1 #define VMXNET3_MSIX_BAR_SIZE 0x2000 @@ -314,13 +314,13 @@ typedef struct { bool peer_has_vhdr; /* TX packets to QEMU interface */ - struct VmxnetTxPkt *tx_pkt; + struct NetTxPkt *tx_pkt; uint32_t offload_mode; uint32_t cso_or_gso_size; uint16_t tci; bool needs_vlan; - struct VmxnetRxPkt *rx_pkt; + struct NetRxPkt *rx_pkt; bool tx_sop; bool skip_current_tx_pkt; @@ -546,18 +546,18 @@ vmxnet3_setup_tx_offloads(VMXNET3State *s) { switch (s->offload_mode) { case VMXNET3_OM_NONE: - vmxnet_tx_pkt_build_vheader(s->tx_pkt, false, false, 0); + net_tx_pkt_build_vheader(s->tx_pkt, false, false, 0); break; case VMXNET3_OM_CSUM: - vmxnet_tx_pkt_build_vheader(s->tx_pkt, false, true, 0); + net_tx_pkt_build_vheader(s->tx_pkt, false, true, 0); VMW_PKPRN("L4 CSO requested\n"); break; case VMXNET3_OM_TSO: - vmxnet_tx_pkt_build_vheader(s->tx_pkt, true, true, + net_tx_pkt_build_vheader(s->tx_pkt, true, true, s->cso_or_gso_size); - vmxnet_tx_pkt_update_ip_checksums(s->tx_pkt); + net_tx_pkt_update_ip_checksums(s->tx_pkt); VMW_PKPRN("GSO offload requested."); break; @@ -590,12 +590,12 @@ static void vmxnet3_on_tx_done_update_stats(VMXNET3State *s, int qidx, Vmxnet3PktStatus status) { - size_t tot_len = vmxnet_tx_pkt_get_total_len(s->tx_pkt); + size_t tot_len = net_tx_pkt_get_total_len(s->tx_pkt); struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats; switch (status) { case VMXNET3_PKT_STATUS_OK: - switch (vmxnet_tx_pkt_get_packet_type(s->tx_pkt)) { + switch (net_tx_pkt_get_packet_type(s->tx_pkt)) { case ETH_PKT_BCAST: stats->bcastPktsTxOK++; stats->bcastBytesTxOK += tot_len; @@ -643,7 +643,7 @@ vmxnet3_on_rx_done_update_stats(VMXNET3State *s, Vmxnet3PktStatus status) { struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats; - size_t tot_len = vmxnet_rx_pkt_get_total_len(s->rx_pkt); + size_t tot_len = net_rx_pkt_get_total_len(s->rx_pkt); switch (status) { case VMXNET3_PKT_STATUS_OUT_OF_BUF: @@ -654,7 +654,7 @@ vmxnet3_on_rx_done_update_stats(VMXNET3State *s, stats->pktsRxError++; break; case VMXNET3_PKT_STATUS_OK: - switch (vmxnet_rx_pkt_get_packet_type(s->rx_pkt)) { + switch (net_rx_pkt_get_packet_type(s->rx_pkt)) { case ETH_PKT_BCAST: stats->bcastPktsRxOK++; stats->bcastBytesRxOK += tot_len; @@ -715,10 +715,10 @@ vmxnet3_send_packet(VMXNET3State *s, uint32_t qidx) } /* debug prints */ - vmxnet3_dump_virt_hdr(vmxnet_tx_pkt_get_vhdr(s->tx_pkt)); - vmxnet_tx_pkt_dump(s->tx_pkt); + vmxnet3_dump_virt_hdr(net_tx_pkt_get_vhdr(s->tx_pkt)); + net_tx_pkt_dump(s->tx_pkt); - if (!vmxnet_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) { + if (!net_tx_pkt_send(s->tx_pkt, qemu_get_queue(s->nic))) { status = VMXNET3_PKT_STATUS_DISCARD; goto func_exit; } @@ -746,7 +746,7 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE; data_pa = le64_to_cpu(txd.addr); - if (!vmxnet_tx_pkt_add_raw_fragment(s->tx_pkt, + if (!net_tx_pkt_add_raw_fragment(s->tx_pkt, data_pa, data_len)) { s->skip_current_tx_pkt = true; @@ -759,9 +759,9 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) } if (txd.eop) { - if (!s->skip_current_tx_pkt && vmxnet_tx_pkt_parse(s->tx_pkt)) { + if (!s->skip_current_tx_pkt && net_tx_pkt_parse(s->tx_pkt)) { if (s->needs_vlan) { - vmxnet_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci); + net_tx_pkt_setup_vlan_header(s->tx_pkt, s->tci); } vmxnet3_send_packet(s, qidx); @@ -773,7 +773,7 @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) vmxnet3_complete_packet(s, qidx, txd_idx); s->tx_sop = true; s->skip_current_tx_pkt = false; - vmxnet_tx_pkt_reset(s->tx_pkt); + net_tx_pkt_reset(s->tx_pkt); } } } @@ -928,7 +928,7 @@ vmxnet3_get_next_rx_descr(VMXNET3State *s, bool is_head, * in the case the host OS performs forwarding, it will forward an * incorrectly checksummed packet. */ -static void vmxnet3_rx_need_csum_calculate(struct VmxnetRxPkt *pkt, +static void vmxnet3_rx_need_csum_calculate(struct NetRxPkt *pkt, const void *pkt_data, size_t pkt_len) { @@ -937,16 +937,16 @@ static void vmxnet3_rx_need_csum_calculate(struct VmxnetRxPkt *pkt, uint8_t *data; int len; - if (!vmxnet_rx_pkt_has_virt_hdr(pkt)) { + if (!net_rx_pkt_has_virt_hdr(pkt)) { return; } - vhdr = vmxnet_rx_pkt_get_vhdr(pkt); + vhdr = net_rx_pkt_get_vhdr(pkt); if (!VMXNET_FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM)) { return; } - vmxnet_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); if (!(isip4 || isip6) || !(istcp || isudp)) { return; } @@ -970,7 +970,7 @@ static void vmxnet3_rx_need_csum_calculate(struct VmxnetRxPkt *pkt, vhdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID; } -static void vmxnet3_rx_update_descr(struct VmxnetRxPkt *pkt, +static void vmxnet3_rx_update_descr(struct NetRxPkt *pkt, struct Vmxnet3_RxCompDesc *rxcd) { int csum_ok, is_gso; @@ -978,16 +978,16 @@ static void vmxnet3_rx_update_descr(struct VmxnetRxPkt *pkt, struct virtio_net_hdr *vhdr; uint8_t offload_type; - if (vmxnet_rx_pkt_is_vlan_stripped(pkt)) { + if (net_rx_pkt_is_vlan_stripped(pkt)) { rxcd->ts = 1; - rxcd->tci = vmxnet_rx_pkt_get_vlan_tag(pkt); + rxcd->tci = net_rx_pkt_get_vlan_tag(pkt); } - if (!vmxnet_rx_pkt_has_virt_hdr(pkt)) { + if (!net_rx_pkt_has_virt_hdr(pkt)) { goto nocsum; } - vhdr = vmxnet_rx_pkt_get_vhdr(pkt); + vhdr = net_rx_pkt_get_vhdr(pkt); /* * Checksum is valid when lower level tell so or when lower level * requires checksum offload telling that packet produced/bridged @@ -1004,7 +1004,7 @@ static void vmxnet3_rx_update_descr(struct VmxnetRxPkt *pkt, goto nocsum; } - vmxnet_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); if ((!istcp && !isudp) || (!isip4 && !isip6)) { goto nocsum; } @@ -1063,13 +1063,13 @@ vmxnet3_indicate_packet(VMXNET3State *s) uint32_t new_rxcd_gen = VMXNET3_INIT_GEN; hwaddr new_rxcd_pa = 0; hwaddr ready_rxcd_pa = 0; - struct iovec *data = vmxnet_rx_pkt_get_iovec(s->rx_pkt); + struct iovec *data = net_rx_pkt_get_iovec(s->rx_pkt); size_t bytes_copied = 0; - size_t bytes_left = vmxnet_rx_pkt_get_total_len(s->rx_pkt); + size_t bytes_left = net_rx_pkt_get_total_len(s->rx_pkt); uint16_t num_frags = 0; size_t chunk_size; - vmxnet_rx_pkt_dump(s->rx_pkt); + net_rx_pkt_dump(s->rx_pkt); while (bytes_left > 0) { @@ -1226,9 +1226,9 @@ static void vmxnet3_deactivate_device(VMXNET3State *s) { if (s->device_active) { VMW_CBPRN("Deactivating vmxnet3..."); - vmxnet_tx_pkt_reset(s->tx_pkt); - vmxnet_tx_pkt_uninit(s->tx_pkt); - vmxnet_rx_pkt_uninit(s->rx_pkt); + net_tx_pkt_reset(s->tx_pkt); + net_tx_pkt_uninit(s->tx_pkt); + net_rx_pkt_uninit(s->rx_pkt); s->device_active = false; } } @@ -1558,8 +1558,8 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* Preallocate TX packet wrapper */ VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags); - vmxnet_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); - vmxnet_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); + net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); + net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); /* Read rings memory locations for RX queues */ for (i = 0; i < s->rxq_num; i++) { @@ -1965,7 +1965,7 @@ vmxnet3_rx_filter_may_indicate(VMXNET3State *s, const void *data, return false; } - switch (vmxnet_rx_pkt_get_packet_type(s->rx_pkt)) { + switch (net_rx_pkt_get_packet_type(s->rx_pkt)) { case ETH_PKT_UCAST: if (!VMXNET_FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) { return false; @@ -2013,7 +2013,7 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) } if (s->peer_has_vhdr) { - vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); + net_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf); buf += sizeof(struct virtio_net_hdr); size -= sizeof(struct virtio_net_hdr); } @@ -2026,13 +2026,13 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) size = sizeof(min_buf); } - vmxnet_rx_pkt_set_packet_type(s->rx_pkt, + net_rx_pkt_set_packet_type(s->rx_pkt, get_eth_packet_type(PKT_GET_ETH_HDR(buf))); if (vmxnet3_rx_filter_may_indicate(s, buf, size)) { - vmxnet_rx_pkt_set_protocols(s->rx_pkt, buf, size); + net_rx_pkt_set_protocols(s->rx_pkt, buf, size); vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size); - vmxnet_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping); + net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping); bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1; if (bytes_indicated < size) { VMW_PKPRN("RX: %zu of %zu bytes indicated", bytes_indicated, size); @@ -2536,8 +2536,8 @@ static int vmxnet3_post_load(void *opaque, int version_id) VMXNET3State *s = opaque; PCIDevice *d = PCI_DEVICE(s); - vmxnet_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); - vmxnet_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); + net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); + net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); if (s->msix_used) { if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) { diff --git a/hw/net/vmxnet_rx_pkt.c b/hw/net/vmxnet_rx_pkt.c deleted file mode 100644 index 21bb46e..0000000 --- a/hw/net/vmxnet_rx_pkt.c +++ /dev/null @@ -1,187 +0,0 @@ -/* - * QEMU VMWARE VMXNET* paravirtual NICs - RX packets abstractions - * - * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) - * - * Developed by Daynix Computing LTD (http://www.daynix.com) - * - * Authors: - * Dmitry Fleytman - * Tamir Shomer - * Yan Vugenfirer - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "vmxnet_rx_pkt.h" -#include "net/eth.h" -#include "qemu-common.h" -#include "qemu/iov.h" -#include "net/checksum.h" -#include "net/tap.h" - -/* - * RX packet may contain up to 2 fragments - rebuilt eth header - * in case of VLAN tag stripping - * and payload received from QEMU - in any case - */ -#define VMXNET_MAX_RX_PACKET_FRAGMENTS (2) - -struct VmxnetRxPkt { - struct virtio_net_hdr virt_hdr; - uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN]; - struct iovec vec[VMXNET_MAX_RX_PACKET_FRAGMENTS]; - uint16_t vec_len; - uint32_t tot_len; - uint16_t tci; - bool vlan_stripped; - bool has_virt_hdr; - eth_pkt_types_e packet_type; - - /* Analysis results */ - bool isip4; - bool isip6; - bool isudp; - bool istcp; -}; - -void vmxnet_rx_pkt_init(struct VmxnetRxPkt **pkt, bool has_virt_hdr) -{ - struct VmxnetRxPkt *p = g_malloc0(sizeof *p); - p->has_virt_hdr = has_virt_hdr; - *pkt = p; -} - -void vmxnet_rx_pkt_uninit(struct VmxnetRxPkt *pkt) -{ - g_free(pkt); -} - -struct virtio_net_hdr *vmxnet_rx_pkt_get_vhdr(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - return &pkt->virt_hdr; -} - -void vmxnet_rx_pkt_attach_data(struct VmxnetRxPkt *pkt, const void *data, - size_t len, bool strip_vlan) -{ - uint16_t tci = 0; - uint16_t ploff; - assert(pkt); - pkt->vlan_stripped = false; - - if (strip_vlan) { - pkt->vlan_stripped = eth_strip_vlan(data, pkt->ehdr_buf, &ploff, &tci); - } - - if (pkt->vlan_stripped) { - pkt->vec[0].iov_base = pkt->ehdr_buf; - pkt->vec[0].iov_len = ploff - sizeof(struct vlan_header); - pkt->vec[1].iov_base = (uint8_t *) data + ploff; - pkt->vec[1].iov_len = len - ploff; - pkt->vec_len = 2; - pkt->tot_len = len - ploff + sizeof(struct eth_header); - } else { - pkt->vec[0].iov_base = (void *)data; - pkt->vec[0].iov_len = len; - pkt->vec_len = 1; - pkt->tot_len = len; - } - - pkt->tci = tci; -} - -void vmxnet_rx_pkt_dump(struct VmxnetRxPkt *pkt) -{ -#ifdef VMXNET_RX_PKT_DEBUG - VmxnetRxPkt *pkt = (VmxnetRxPkt *)pkt; - assert(pkt); - - printf("RX PKT: tot_len: %d, vlan_stripped: %d, vlan_tag: %d\n", - pkt->tot_len, pkt->vlan_stripped, pkt->tci); -#endif -} - -void vmxnet_rx_pkt_set_packet_type(struct VmxnetRxPkt *pkt, - eth_pkt_types_e packet_type) -{ - assert(pkt); - - pkt->packet_type = packet_type; - -} - -eth_pkt_types_e vmxnet_rx_pkt_get_packet_type(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->packet_type; -} - -size_t vmxnet_rx_pkt_get_total_len(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->tot_len; -} - -void vmxnet_rx_pkt_set_protocols(struct VmxnetRxPkt *pkt, const void *data, - size_t len) -{ - assert(pkt); - - eth_get_protocols(data, len, &pkt->isip4, &pkt->isip6, - &pkt->isudp, &pkt->istcp); -} - -void vmxnet_rx_pkt_get_protocols(struct VmxnetRxPkt *pkt, - bool *isip4, bool *isip6, - bool *isudp, bool *istcp) -{ - assert(pkt); - - *isip4 = pkt->isip4; - *isip6 = pkt->isip6; - *isudp = pkt->isudp; - *istcp = pkt->istcp; -} - -struct iovec *vmxnet_rx_pkt_get_iovec(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->vec; -} - -void vmxnet_rx_pkt_set_vhdr(struct VmxnetRxPkt *pkt, - struct virtio_net_hdr *vhdr) -{ - assert(pkt); - - memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); -} - -bool vmxnet_rx_pkt_is_vlan_stripped(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->vlan_stripped; -} - -bool vmxnet_rx_pkt_has_virt_hdr(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->has_virt_hdr; -} - -uint16_t vmxnet_rx_pkt_get_vlan_tag(struct VmxnetRxPkt *pkt) -{ - assert(pkt); - - return pkt->tci; -} diff --git a/hw/net/vmxnet_rx_pkt.h b/hw/net/vmxnet_rx_pkt.h deleted file mode 100644 index 0a45c1b..0000000 --- a/hw/net/vmxnet_rx_pkt.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * QEMU VMWARE VMXNET* paravirtual NICs - RX packets abstraction - * - * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) - * - * Developed by Daynix Computing LTD (http://www.daynix.com) - * - * Authors: - * Dmitry Fleytman - * Tamir Shomer - * Yan Vugenfirer - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef VMXNET_RX_PKT_H -#define VMXNET_RX_PKT_H - -#include "net/eth.h" - -/* defines to enable packet dump functions */ -/*#define VMXNET_RX_PKT_DEBUG*/ - -struct VmxnetRxPkt; - -/** - * Clean all rx packet resources - * - * @pkt: packet - * - */ -void vmxnet_rx_pkt_uninit(struct VmxnetRxPkt *pkt); - -/** - * Init function for rx packet functionality - * - * @pkt: packet pointer - * @has_virt_hdr: device uses virtio header - * - */ -void vmxnet_rx_pkt_init(struct VmxnetRxPkt **pkt, bool has_virt_hdr); - -/** - * returns total length of data attached to rx context - * - * @pkt: packet - * - * Return: nothing - * - */ -size_t vmxnet_rx_pkt_get_total_len(struct VmxnetRxPkt *pkt); - -/** - * parse and set packet analysis results - * - * @pkt: packet - * @data: pointer to the data buffer to be parsed - * @len: data length - * - */ -void vmxnet_rx_pkt_set_protocols(struct VmxnetRxPkt *pkt, const void *data, - size_t len); - -/** - * fetches packet analysis results - * - * @pkt: packet - * @isip4: whether the packet given is IPv4 - * @isip6: whether the packet given is IPv6 - * @isudp: whether the packet given is UDP - * @istcp: whether the packet given is TCP - * - */ -void vmxnet_rx_pkt_get_protocols(struct VmxnetRxPkt *pkt, - bool *isip4, bool *isip6, - bool *isudp, bool *istcp); - -/** - * returns virtio header stored in rx context - * - * @pkt: packet - * @ret: virtio header - * - */ -struct virtio_net_hdr *vmxnet_rx_pkt_get_vhdr(struct VmxnetRxPkt *pkt); - -/** - * returns packet type - * - * @pkt: packet - * @ret: packet type - * - */ -eth_pkt_types_e vmxnet_rx_pkt_get_packet_type(struct VmxnetRxPkt *pkt); - -/** - * returns vlan tag - * - * @pkt: packet - * @ret: VLAN tag - * - */ -uint16_t vmxnet_rx_pkt_get_vlan_tag(struct VmxnetRxPkt *pkt); - -/** - * tells whether vlan was stripped from the packet - * - * @pkt: packet - * @ret: VLAN stripped sign - * - */ -bool vmxnet_rx_pkt_is_vlan_stripped(struct VmxnetRxPkt *pkt); - -/** - * notifies caller if the packet has virtio header - * - * @pkt: packet - * @ret: true if packet has virtio header, false otherwize - * - */ -bool vmxnet_rx_pkt_has_virt_hdr(struct VmxnetRxPkt *pkt); - -/** - * attach data to rx packet - * - * @pkt: packet - * @data: pointer to the data buffer - * @len: data length - * @strip_vlan: should the module strip vlan from data - * - */ -void vmxnet_rx_pkt_attach_data(struct VmxnetRxPkt *pkt, const void *data, - size_t len, bool strip_vlan); - -/** - * returns io vector that holds the attached data - * - * @pkt: packet - * @ret: pointer to IOVec - * - */ -struct iovec *vmxnet_rx_pkt_get_iovec(struct VmxnetRxPkt *pkt); - -/** - * prints rx packet data if debug is enabled - * - * @pkt: packet - * - */ -void vmxnet_rx_pkt_dump(struct VmxnetRxPkt *pkt); - -/** - * copy passed vhdr data to packet context - * - * @pkt: packet - * @vhdr: VHDR buffer - * - */ -void vmxnet_rx_pkt_set_vhdr(struct VmxnetRxPkt *pkt, - struct virtio_net_hdr *vhdr); - -/** - * save packet type in packet context - * - * @pkt: packet - * @packet_type: the packet type - * - */ -void vmxnet_rx_pkt_set_packet_type(struct VmxnetRxPkt *pkt, - eth_pkt_types_e packet_type); - -#endif diff --git a/hw/net/vmxnet_tx_pkt.c b/hw/net/vmxnet_tx_pkt.c deleted file mode 100644 index 91e1e08..0000000 --- a/hw/net/vmxnet_tx_pkt.c +++ /dev/null @@ -1,581 +0,0 @@ -/* - * QEMU VMWARE VMXNET* paravirtual NICs - TX packets abstractions - * - * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) - * - * Developed by Daynix Computing LTD (http://www.daynix.com) - * - * Authors: - * Dmitry Fleytman - * Tamir Shomer - * Yan Vugenfirer - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "hw/hw.h" -#include "vmxnet_tx_pkt.h" -#include "net/eth.h" -#include "qemu-common.h" -#include "qemu/iov.h" -#include "net/checksum.h" -#include "net/tap.h" -#include "net/net.h" - -enum { - VMXNET_TX_PKT_VHDR_FRAG = 0, - VMXNET_TX_PKT_L2HDR_FRAG, - VMXNET_TX_PKT_L3HDR_FRAG, - VMXNET_TX_PKT_PL_START_FRAG -}; - -/* TX packet private context */ -struct VmxnetTxPkt { - struct virtio_net_hdr virt_hdr; - bool has_virt_hdr; - - struct iovec *raw; - uint32_t raw_frags; - uint32_t max_raw_frags; - - struct iovec *vec; - - uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; - - uint32_t payload_len; - - uint32_t payload_frags; - uint32_t max_payload_frags; - - uint16_t hdr_len; - eth_pkt_types_e packet_type; - uint8_t l4proto; -}; - -void vmxnet_tx_pkt_init(struct VmxnetTxPkt **pkt, uint32_t max_frags, - bool has_virt_hdr) -{ - struct VmxnetTxPkt *p = g_malloc0(sizeof *p); - - p->vec = g_malloc((sizeof *p->vec) * - (max_frags + VMXNET_TX_PKT_PL_START_FRAG)); - - p->raw = g_malloc((sizeof *p->raw) * max_frags); - - p->max_payload_frags = max_frags; - p->max_raw_frags = max_frags; - p->has_virt_hdr = has_virt_hdr; - p->vec[VMXNET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr; - p->vec[VMXNET_TX_PKT_VHDR_FRAG].iov_len = - p->has_virt_hdr ? sizeof p->virt_hdr : 0; - p->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; - p->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base = NULL; - p->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len = 0; - - *pkt = p; -} - -void vmxnet_tx_pkt_uninit(struct VmxnetTxPkt *pkt) -{ - if (pkt) { - g_free(pkt->vec); - g_free(pkt->raw); - g_free(pkt); - } -} - -void vmxnet_tx_pkt_update_ip_checksums(struct VmxnetTxPkt *pkt) -{ - uint16_t csum; - uint32_t ph_raw_csum; - assert(pkt); - uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; - struct ip_header *ip_hdr; - - if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type && - VIRTIO_NET_HDR_GSO_UDP != gso_type) { - return; - } - - ip_hdr = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; - - if (pkt->payload_len + pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len > - ETH_MAX_IP_DGRAM_LEN) { - return; - } - - ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len); - - /* Calculate IP header checksum */ - ip_hdr->ip_sum = 0; - csum = net_raw_checksum((uint8_t *)ip_hdr, - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len); - ip_hdr->ip_sum = cpu_to_be16(csum); - - /* Calculate IP pseudo header checksum */ - ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len); - csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum)); - iov_from_buf(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], pkt->payload_frags, - pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); -} - -static void vmxnet_tx_pkt_calculate_hdr_len(struct VmxnetTxPkt *pkt) -{ - pkt->hdr_len = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len + - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len; -} - -static bool vmxnet_tx_pkt_parse_headers(struct VmxnetTxPkt *pkt) -{ - struct iovec *l2_hdr, *l3_hdr; - size_t bytes_read; - size_t full_ip6hdr_len; - uint16_t l3_proto; - - assert(pkt); - - l2_hdr = &pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG]; - l3_hdr = &pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG]; - - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base, - ETH_MAX_L2_HDR_LEN); - if (bytes_read < sizeof(struct eth_header)) { - l2_hdr->iov_len = 0; - return false; - } - - l2_hdr->iov_len = sizeof(struct eth_header); - switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr->iov_base)->h_proto)) { - case ETH_P_VLAN: - l2_hdr->iov_len += sizeof(struct vlan_header); - break; - case ETH_P_DVLAN: - l2_hdr->iov_len += 2 * sizeof(struct vlan_header); - break; - } - - if (bytes_read < l2_hdr->iov_len) { - l2_hdr->iov_len = 0; - return false; - } - - l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len); - - switch (l3_proto) { - case ETH_P_IP: - l3_hdr->iov_base = g_malloc(ETH_MAX_IP4_HDR_LEN); - - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, - l3_hdr->iov_base, sizeof(struct ip_header)); - - if (bytes_read < sizeof(struct ip_header)) { - l3_hdr->iov_len = 0; - return false; - } - - l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); - pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; - - /* copy optional IPv4 header data */ - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, - l2_hdr->iov_len + sizeof(struct ip_header), - l3_hdr->iov_base + sizeof(struct ip_header), - l3_hdr->iov_len - sizeof(struct ip_header)); - if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { - l3_hdr->iov_len = 0; - return false; - } - break; - - case ETH_P_IPV6: - if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, - &pkt->l4proto, &full_ip6hdr_len)) { - l3_hdr->iov_len = 0; - return false; - } - - l3_hdr->iov_base = g_malloc(full_ip6hdr_len); - - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, - l3_hdr->iov_base, full_ip6hdr_len); - - if (bytes_read < full_ip6hdr_len) { - l3_hdr->iov_len = 0; - return false; - } else { - l3_hdr->iov_len = full_ip6hdr_len; - } - break; - - default: - l3_hdr->iov_len = 0; - break; - } - - vmxnet_tx_pkt_calculate_hdr_len(pkt); - pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); - return true; -} - -static bool vmxnet_tx_pkt_rebuild_payload(struct VmxnetTxPkt *pkt) -{ - size_t payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; - - pkt->payload_frags = iov_copy(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], - pkt->max_payload_frags, - pkt->raw, pkt->raw_frags, - pkt->hdr_len, payload_len); - - if (pkt->payload_frags != (uint32_t) -1) { - pkt->payload_len = payload_len; - return true; - } else { - return false; - } -} - -bool vmxnet_tx_pkt_parse(struct VmxnetTxPkt *pkt) -{ - return vmxnet_tx_pkt_parse_headers(pkt) && - vmxnet_tx_pkt_rebuild_payload(pkt); -} - -struct virtio_net_hdr *vmxnet_tx_pkt_get_vhdr(struct VmxnetTxPkt *pkt) -{ - assert(pkt); - return &pkt->virt_hdr; -} - -static uint8_t vmxnet_tx_pkt_get_gso_type(struct VmxnetTxPkt *pkt, - bool tso_enable) -{ - uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; - uint16_t l3_proto; - - l3_proto = eth_get_l3_proto(pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base, - pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len); - - if (!tso_enable) { - goto func_exit; - } - - rc = eth_get_gso_type(l3_proto, pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base, - pkt->l4proto); - -func_exit: - return rc; -} - -void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable, - bool csum_enable, uint32_t gso_size) -{ - struct tcp_hdr l4hdr; - assert(pkt); - - /* csum has to be enabled if tso is. */ - assert(csum_enable || !tso_enable); - - pkt->virt_hdr.gso_type = vmxnet_tx_pkt_get_gso_type(pkt, tso_enable); - - switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { - case VIRTIO_NET_HDR_GSO_NONE: - pkt->virt_hdr.hdr_len = 0; - pkt->virt_hdr.gso_size = 0; - break; - - case VIRTIO_NET_HDR_GSO_UDP: - pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); - pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); - break; - - case VIRTIO_NET_HDR_GSO_TCPV4: - case VIRTIO_NET_HDR_GSO_TCPV6: - iov_to_buf(&pkt->vec[VMXNET_TX_PKT_PL_START_FRAG], pkt->payload_frags, - 0, &l4hdr, sizeof(l4hdr)); - pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); - pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); - break; - - default: - g_assert_not_reached(); - } - - if (csum_enable) { - switch (pkt->l4proto) { - case IP_PROTO_TCP: - pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - pkt->virt_hdr.csum_start = pkt->hdr_len; - pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum); - break; - case IP_PROTO_UDP: - pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - pkt->virt_hdr.csum_start = pkt->hdr_len; - pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum); - break; - default: - break; - } - } -} - -void vmxnet_tx_pkt_setup_vlan_header(struct VmxnetTxPkt *pkt, uint16_t vlan) -{ - bool is_new; - assert(pkt); - - eth_setup_vlan_headers(pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base, - vlan, &is_new); - - /* update l2hdrlen */ - if (is_new) { - pkt->hdr_len += sizeof(struct vlan_header); - pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len += - sizeof(struct vlan_header); - } -} - -bool vmxnet_tx_pkt_add_raw_fragment(struct VmxnetTxPkt *pkt, hwaddr pa, - size_t len) -{ - hwaddr mapped_len = 0; - struct iovec *ventry; - assert(pkt); - assert(pkt->max_raw_frags > pkt->raw_frags); - - if (!len) { - return true; - } - - ventry = &pkt->raw[pkt->raw_frags]; - mapped_len = len; - - ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false); - ventry->iov_len = mapped_len; - pkt->raw_frags += !!ventry->iov_base; - - if ((ventry->iov_base == NULL) || (len != mapped_len)) { - return false; - } - - return true; -} - -eth_pkt_types_e vmxnet_tx_pkt_get_packet_type(struct VmxnetTxPkt *pkt) -{ - assert(pkt); - - return pkt->packet_type; -} - -size_t vmxnet_tx_pkt_get_total_len(struct VmxnetTxPkt *pkt) -{ - assert(pkt); - - return pkt->hdr_len + pkt->payload_len; -} - -void vmxnet_tx_pkt_dump(struct VmxnetTxPkt *pkt) -{ -#ifdef VMXNET_TX_PKT_DEBUG - assert(pkt); - - printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, " - "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type, - pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len, - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len); -#endif -} - -void vmxnet_tx_pkt_reset(struct VmxnetTxPkt *pkt) -{ - int i; - - /* no assert, as reset can be called before tx_pkt_init */ - if (!pkt) { - return; - } - - memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); - - g_free(pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base); - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base = NULL; - - assert(pkt->vec); - for (i = VMXNET_TX_PKT_L2HDR_FRAG; - i < pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG; i++) { - pkt->vec[i].iov_len = 0; - } - pkt->payload_len = 0; - pkt->payload_frags = 0; - - assert(pkt->raw); - for (i = 0; i < pkt->raw_frags; i++) { - assert(pkt->raw[i].iov_base); - cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len, - false, pkt->raw[i].iov_len); - pkt->raw[i].iov_len = 0; - } - pkt->raw_frags = 0; - - pkt->hdr_len = 0; - pkt->packet_type = 0; - pkt->l4proto = 0; -} - -static void vmxnet_tx_pkt_do_sw_csum(struct VmxnetTxPkt *pkt) -{ - struct iovec *iov = &pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG]; - uint32_t csum_cntr; - uint16_t csum = 0; - /* num of iovec without vhdr */ - uint32_t iov_len = pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG - 1; - uint16_t csl; - struct ip_header *iphdr; - size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset; - - /* Put zero to checksum field */ - iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); - - /* Calculate L4 TCP/UDP checksum */ - csl = pkt->payload_len; - - /* data checksum */ - csum_cntr = - net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl); - /* add pseudo header to csum */ - iphdr = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; - csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl); - - /* Put the checksum obtained into the packet */ - csum = cpu_to_be16(net_checksum_finish(csum_cntr)); - iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum); -} - -enum { - VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS = 0, - VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS, - VMXNET_TX_PKT_FRAGMENT_HEADER_NUM -}; - -#define VMXNET_MAX_FRAG_SG_LIST (64) - -static size_t vmxnet_tx_pkt_fetch_fragment(struct VmxnetTxPkt *pkt, - int *src_idx, size_t *src_offset, struct iovec *dst, int *dst_idx) -{ - size_t fetched = 0; - struct iovec *src = pkt->vec; - - *dst_idx = VMXNET_TX_PKT_FRAGMENT_HEADER_NUM; - - while (fetched < pkt->virt_hdr.gso_size) { - - /* no more place in fragment iov */ - if (*dst_idx == VMXNET_MAX_FRAG_SG_LIST) { - break; - } - - /* no more data in iovec */ - if (*src_idx == (pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG)) { - break; - } - - - dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; - dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, - pkt->virt_hdr.gso_size - fetched); - - *src_offset += dst[*dst_idx].iov_len; - fetched += dst[*dst_idx].iov_len; - - if (*src_offset == src[*src_idx].iov_len) { - *src_offset = 0; - (*src_idx)++; - } - - (*dst_idx)++; - } - - return fetched; -} - -static bool vmxnet_tx_pkt_do_sw_fragmentation(struct VmxnetTxPkt *pkt, - NetClientState *nc) -{ - struct iovec fragment[VMXNET_MAX_FRAG_SG_LIST]; - size_t fragment_len = 0; - bool more_frags = false; - - /* some pointers for shorter code */ - void *l2_iov_base, *l3_iov_base; - size_t l2_iov_len, l3_iov_len; - int src_idx = VMXNET_TX_PKT_PL_START_FRAG, dst_idx; - size_t src_offset = 0; - size_t fragment_offset = 0; - - l2_iov_base = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_base; - l2_iov_len = pkt->vec[VMXNET_TX_PKT_L2HDR_FRAG].iov_len; - l3_iov_base = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_base; - l3_iov_len = pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len; - - /* Copy headers */ - fragment[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base; - fragment[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len; - fragment[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base; - fragment[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len; - - - /* Put as much data as possible and send */ - do { - fragment_len = vmxnet_tx_pkt_fetch_fragment(pkt, &src_idx, &src_offset, - fragment, &dst_idx); - - more_frags = (fragment_offset + fragment_len < pkt->payload_len); - - eth_setup_ip4_fragmentation(l2_iov_base, l2_iov_len, l3_iov_base, - l3_iov_len, fragment_len, fragment_offset, more_frags); - - eth_fix_ip4_checksum(l3_iov_base, l3_iov_len); - - qemu_sendv_packet(nc, fragment, dst_idx); - - fragment_offset += fragment_len; - - } while (more_frags); - - return true; -} - -bool vmxnet_tx_pkt_send(struct VmxnetTxPkt *pkt, NetClientState *nc) -{ - assert(pkt); - - if (!pkt->has_virt_hdr && - pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { - vmxnet_tx_pkt_do_sw_csum(pkt); - } - - /* - * Since underlying infrastructure does not support IP datagrams longer - * than 64K we should drop such packets and don't even try to send - */ - if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) { - if (pkt->payload_len > - ETH_MAX_IP_DGRAM_LEN - - pkt->vec[VMXNET_TX_PKT_L3HDR_FRAG].iov_len) { - return false; - } - } - - if (pkt->has_virt_hdr || - pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) { - qemu_sendv_packet(nc, pkt->vec, - pkt->payload_frags + VMXNET_TX_PKT_PL_START_FRAG); - return true; - } - - return vmxnet_tx_pkt_do_sw_fragmentation(pkt, nc); -} diff --git a/hw/net/vmxnet_tx_pkt.h b/hw/net/vmxnet_tx_pkt.h deleted file mode 100644 index f51e98a..0000000 --- a/hw/net/vmxnet_tx_pkt.h +++ /dev/null @@ -1,146 +0,0 @@ -/* - * QEMU VMWARE VMXNET* paravirtual NICs - TX packets abstraction - * - * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com) - * - * Developed by Daynix Computing LTD (http://www.daynix.com) - * - * Authors: - * Dmitry Fleytman - * Tamir Shomer - * Yan Vugenfirer - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef VMXNET_TX_PKT_H -#define VMXNET_TX_PKT_H - -#include "net/eth.h" -#include "exec/hwaddr.h" - -/* define to enable packet dump functions */ -/*#define VMXNET_TX_PKT_DEBUG*/ - -struct VmxnetTxPkt; - -/** - * Init function for tx packet functionality - * - * @pkt: packet pointer - * @max_frags: max tx ip fragments - * @has_virt_hdr: device uses virtio header. - */ -void vmxnet_tx_pkt_init(struct VmxnetTxPkt **pkt, uint32_t max_frags, - bool has_virt_hdr); - -/** - * Clean all tx packet resources. - * - * @pkt: packet. - */ -void vmxnet_tx_pkt_uninit(struct VmxnetTxPkt *pkt); - -/** - * get virtio header - * - * @pkt: packet - * @ret: virtio header - */ -struct virtio_net_hdr *vmxnet_tx_pkt_get_vhdr(struct VmxnetTxPkt *pkt); - -/** - * build virtio header (will be stored in module context) - * - * @pkt: packet - * @tso_enable: TSO enabled - * @csum_enable: CSO enabled - * @gso_size: MSS size for TSO - * - */ -void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt *pkt, bool tso_enable, - bool csum_enable, uint32_t gso_size); - -/** - * updates vlan tag, and adds vlan header in case it is missing - * - * @pkt: packet - * @vlan: VLAN tag - * - */ -void vmxnet_tx_pkt_setup_vlan_header(struct VmxnetTxPkt *pkt, uint16_t vlan); - -/** - * populate data fragment into pkt context. - * - * @pkt: packet - * @pa: physical address of fragment - * @len: length of fragment - * - */ -bool vmxnet_tx_pkt_add_raw_fragment(struct VmxnetTxPkt *pkt, hwaddr pa, - size_t len); - -/** - * fix ip header fields and calculate checksums needed. - * - * @pkt: packet - * - */ -void vmxnet_tx_pkt_update_ip_checksums(struct VmxnetTxPkt *pkt); - -/** - * get length of all populated data. - * - * @pkt: packet - * @ret: total data length - * - */ -size_t vmxnet_tx_pkt_get_total_len(struct VmxnetTxPkt *pkt); - -/** - * get packet type - * - * @pkt: packet - * @ret: packet type - * - */ -eth_pkt_types_e vmxnet_tx_pkt_get_packet_type(struct VmxnetTxPkt *pkt); - -/** - * prints packet data if debug is enabled - * - * @pkt: packet - * - */ -void vmxnet_tx_pkt_dump(struct VmxnetTxPkt *pkt); - -/** - * reset tx packet private context (needed to be called between packets) - * - * @pkt: packet - * - */ -void vmxnet_tx_pkt_reset(struct VmxnetTxPkt *pkt); - -/** - * Send packet to qemu. handles sw offloads if vhdr is not supported. - * - * @pkt: packet - * @nc: NetClientState - * @ret: operation result - * - */ -bool vmxnet_tx_pkt_send(struct VmxnetTxPkt *pkt, NetClientState *nc); - -/** - * parse raw packet data and analyze offload requirements. - * - * @pkt: packet - * - */ -bool vmxnet_tx_pkt_parse(struct VmxnetTxPkt *pkt); - -#endif -- cgit v1.1 From 66409b7c8bd0ebb075a6af8cbc7846fc0a95107d Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:40 +0300 Subject: rtl8139: Move more TCP definitions to common header Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/rtl8139.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'hw') diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index 1e5ec14..562c1fd 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -1867,11 +1867,6 @@ static int rtl8139_transmit_one(RTL8139State *s, int descriptor) return 1; } -/* structures and macros for task offloading */ -#define TCP_HEADER_DATA_OFFSET(tcp) (((be16_to_cpu(tcp->th_offset_flags) >> 12)&0xf) << 2) -#define TCP_FLAGS_ONLY(flags) ((flags)&0x3f) -#define TCP_HEADER_FLAGS(tcp) TCP_FLAGS_ONLY(be16_to_cpu(tcp->th_offset_flags)) - #define TCP_HEADER_CLEAR_FLAGS(tcp, off) ((tcp)->th_offset_flags &= cpu_to_be16(~TCP_FLAGS_ONLY(off))) /* produces ones' complement sum of data */ -- cgit v1.1 From eb700029c7836798046191d62d595363d92c84d4 Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:41 +0300 Subject: net_pkt: Extend packet abstraction as required by e1000e functionality This patch extends the TX/RX packet abstractions with features that will be used by the e1000e device implementation. Changes are: 1. Support iovec lists for RX buffers 2. Deeper RX packets parsing 3. Loopback option for TX packets 4. Extended VLAN headers handling 5. RSS processing for RX packets Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/net_rx_pkt.c | 473 ++++++++++++++++++++++++++++++++++++++++++++++++---- hw/net/net_rx_pkt.h | 193 ++++++++++++++++++++- hw/net/net_tx_pkt.c | 204 +++++++++++++--------- hw/net/net_tx_pkt.h | 60 ++++++- 4 files changed, 813 insertions(+), 117 deletions(-) (limited to 'hw') diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c index 8a4f29f..1019b50 100644 --- a/hw/net/net_rx_pkt.c +++ b/hw/net/net_rx_pkt.c @@ -16,24 +16,16 @@ */ #include "qemu/osdep.h" +#include "trace.h" #include "net_rx_pkt.h" -#include "net/eth.h" -#include "qemu-common.h" -#include "qemu/iov.h" #include "net/checksum.h" #include "net/tap.h" -/* - * RX packet may contain up to 2 fragments - rebuilt eth header - * in case of VLAN tag stripping - * and payload received from QEMU - in any case - */ -#define NET_MAX_RX_PACKET_FRAGMENTS (2) - struct NetRxPkt { struct virtio_net_hdr virt_hdr; - uint8_t ehdr_buf[ETH_MAX_L2_HDR_LEN]; - struct iovec vec[NET_MAX_RX_PACKET_FRAGMENTS]; + uint8_t ehdr_buf[sizeof(struct eth_header)]; + struct iovec *vec; + uint16_t vec_len_total; uint16_t vec_len; uint32_t tot_len; uint16_t tci; @@ -46,17 +38,31 @@ struct NetRxPkt { bool isip6; bool isudp; bool istcp; + + size_t l3hdr_off; + size_t l4hdr_off; + size_t l5hdr_off; + + eth_ip6_hdr_info ip6hdr_info; + eth_ip4_hdr_info ip4hdr_info; + eth_l4_hdr_info l4hdr_info; }; void net_rx_pkt_init(struct NetRxPkt **pkt, bool has_virt_hdr) { struct NetRxPkt *p = g_malloc0(sizeof *p); p->has_virt_hdr = has_virt_hdr; + p->vec = NULL; + p->vec_len_total = 0; *pkt = p; } void net_rx_pkt_uninit(struct NetRxPkt *pkt) { + if (pkt->vec_len_total != 0) { + g_free(pkt->vec); + } + g_free(pkt); } @@ -66,33 +72,88 @@ struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt) return &pkt->virt_hdr; } -void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, - size_t len, bool strip_vlan) +static inline void +net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt, + int new_iov_len) +{ + if (pkt->vec_len_total < new_iov_len) { + g_free(pkt->vec); + pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len); + pkt->vec_len_total = new_iov_len; + } +} + +static void +net_rx_pkt_pull_data(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t ploff) +{ + if (pkt->vlan_stripped) { + net_rx_pkt_iovec_realloc(pkt, iovcnt + 1); + + pkt->vec[0].iov_base = pkt->ehdr_buf; + pkt->vec[0].iov_len = sizeof(pkt->ehdr_buf); + + pkt->tot_len = + iov_size(iov, iovcnt) - ploff + sizeof(struct eth_header); + + pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1, + iov, iovcnt, ploff, pkt->tot_len); + } else { + net_rx_pkt_iovec_realloc(pkt, iovcnt); + + pkt->tot_len = iov_size(iov, iovcnt) - ploff; + pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total, + iov, iovcnt, ploff, pkt->tot_len); + } + + eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->isip4, &pkt->isip6, + &pkt->isudp, &pkt->istcp, + &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, + &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); + + trace_net_rx_pkt_parsed(pkt->isip4, pkt->isip6, pkt->isudp, pkt->istcp, + pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off); +} + +void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan) { uint16_t tci = 0; - uint16_t ploff; + uint16_t ploff = iovoff; assert(pkt); pkt->vlan_stripped = false; if (strip_vlan) { - pkt->vlan_stripped = eth_strip_vlan(data, pkt->ehdr_buf, &ploff, &tci); + pkt->vlan_stripped = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf, + &ploff, &tci); } - if (pkt->vlan_stripped) { - pkt->vec[0].iov_base = pkt->ehdr_buf; - pkt->vec[0].iov_len = ploff - sizeof(struct vlan_header); - pkt->vec[1].iov_base = (uint8_t *) data + ploff; - pkt->vec[1].iov_len = len - ploff; - pkt->vec_len = 2; - pkt->tot_len = len - ploff + sizeof(struct eth_header); - } else { - pkt->vec[0].iov_base = (void *)data; - pkt->vec[0].iov_len = len; - pkt->vec_len = 1; - pkt->tot_len = len; + pkt->tci = tci; + + net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); +} + +void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan, + uint16_t vet) +{ + uint16_t tci = 0; + uint16_t ploff = iovoff; + assert(pkt); + pkt->vlan_stripped = false; + + if (strip_vlan) { + pkt->vlan_stripped = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet, + pkt->ehdr_buf, + &ploff, &tci); } pkt->tci = tci; + + net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff); } void net_rx_pkt_dump(struct NetRxPkt *pkt) @@ -132,10 +193,17 @@ size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt) void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, size_t len) { + const struct iovec iov = { + .iov_base = (void *)data, + .iov_len = len + }; + assert(pkt); - eth_get_protocols(data, len, &pkt->isip4, &pkt->isip6, - &pkt->isudp, &pkt->istcp); + eth_get_protocols(&iov, 1, &pkt->isip4, &pkt->isip6, + &pkt->isudp, &pkt->istcp, + &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, + &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); } void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, @@ -150,6 +218,180 @@ void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, *istcp = pkt->istcp; } +size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l3hdr_off; +} + +size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l4hdr_off; +} + +size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt) +{ + assert(pkt); + return pkt->l5hdr_off; +} + +eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt) +{ + return &pkt->ip6hdr_info; +} + +eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt) +{ + return &pkt->ip4hdr_info; +} + +eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt) +{ + return &pkt->l4hdr_info; +} + +static inline void +_net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written, + void *ptr, size_t size) +{ + memcpy(&rss_input[*bytes_written], ptr, size); + trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written); + *bytes_written += size; +} + +static inline void +_net_rx_rss_prepare_ip4(uint8_t *rss_input, + struct NetRxPkt *pkt, + size_t *bytes_written) +{ + struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &ip4_hdr->ip_src, sizeof(uint32_t)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &ip4_hdr->ip_dst, sizeof(uint32_t)); +} + +static inline void +_net_rx_rss_prepare_ip6(uint8_t *rss_input, + struct NetRxPkt *pkt, + bool ipv6ex, size_t *bytes_written) +{ + eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src + : &ip6info->ip6_hdr.ip6_src, + sizeof(struct in6_address)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst + : &ip6info->ip6_hdr.ip6_dst, + sizeof(struct in6_address)); +} + +static inline void +_net_rx_rss_prepare_tcp(uint8_t *rss_input, + struct NetRxPkt *pkt, + size_t *bytes_written) +{ + struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp; + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &tcphdr->th_sport, sizeof(uint16_t)); + + _net_rx_rss_add_chunk(rss_input, bytes_written, + &tcphdr->th_dport, sizeof(uint16_t)); +} + +uint32_t +net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, + NetRxPktRssType type, + uint8_t *key) +{ + uint8_t rss_input[36]; + size_t rss_length = 0; + uint32_t rss_hash = 0; + net_toeplitz_key key_data; + + switch (type) { + case NetPktRssIpV4: + assert(pkt->isip4); + trace_net_rx_pkt_rss_ip4(); + _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV4Tcp: + assert(pkt->isip4); + assert(pkt->istcp); + trace_net_rx_pkt_rss_ip4_tcp(); + _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length); + _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV6Tcp: + assert(pkt->isip6); + assert(pkt->istcp); + trace_net_rx_pkt_rss_ip6_tcp(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); + _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length); + break; + case NetPktRssIpV6: + assert(pkt->isip6); + trace_net_rx_pkt_rss_ip6(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length); + break; + case NetPktRssIpV6Ex: + assert(pkt->isip6); + trace_net_rx_pkt_rss_ip6_ex(); + _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length); + break; + default: + assert(false); + break; + } + + net_toeplitz_key_init(&key_data, key); + net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data); + + trace_net_rx_pkt_rss_hash(rss_length, rss_hash); + + return rss_hash; +} + +uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->isip4) { + return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id); + } + + return 0; +} + +bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->istcp) { + return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK; + } + + return false; +} + +bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt) +{ + assert(pkt); + + if (pkt->istcp) { + return pkt->l4hdr_info.has_tcp_data; + } + + return false; +} + struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) { assert(pkt); @@ -157,6 +399,13 @@ struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt) return pkt->vec; } +uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt) +{ + assert(pkt); + + return pkt->vec_len; +} + void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, struct virtio_net_hdr *vhdr) { @@ -165,6 +414,14 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr); } +void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt) +{ + assert(pkt); + + iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr); +} + bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt) { assert(pkt); @@ -185,3 +442,159 @@ uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt) return pkt->tci; } + +bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid) +{ + uint32_t cntr; + uint16_t csum; + uint32_t csl; + + trace_net_rx_pkt_l3_csum_validate_entry(); + + if (!pkt->isip4) { + trace_net_rx_pkt_l3_csum_validate_not_ip4(); + return false; + } + + csl = pkt->l4hdr_off - pkt->l3hdr_off; + + cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len, + pkt->l3hdr_off, + csl, 0); + + csum = net_checksum_finish(cntr); + + *csum_valid = (csum == 0); + + trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl, + cntr, csum, *csum_valid); + + return true; +} + +static uint16_t +_net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt) +{ + uint32_t cntr; + uint16_t csum; + uint16_t csl; + uint32_t cso; + + trace_net_rx_pkt_l4_csum_calc_entry(); + + if (pkt->isip4) { + if (pkt->isudp) { + csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); + trace_net_rx_pkt_l4_csum_calc_ip4_udp(); + } else { + csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) - + IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr); + trace_net_rx_pkt_l4_csum_calc_ip4_tcp(); + } + + cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr, + csl, &cso); + trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); + } else { + if (pkt->isudp) { + csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen); + trace_net_rx_pkt_l4_csum_calc_ip6_udp(); + } else { + struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr; + size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off; + size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header); + + csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) - + ip6opts_len; + trace_net_rx_pkt_l4_csum_calc_ip6_tcp(); + } + + cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl, + pkt->ip6hdr_info.l4proto, &cso); + trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl); + } + + cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len, + pkt->l4hdr_off, csl, cso); + + csum = net_checksum_finish(cntr); + + trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum); + + return csum; +} + +bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid) +{ + uint16_t csum; + + trace_net_rx_pkt_l4_csum_validate_entry(); + + if (!pkt->istcp && !pkt->isudp) { + trace_net_rx_pkt_l4_csum_validate_not_xxp(); + return false; + } + + if (pkt->isudp && (pkt->l4hdr_info.hdr.udp.uh_sum == 0)) { + trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum(); + return false; + } + + if (pkt->isip4 && pkt->ip4hdr_info.fragment) { + trace_net_rx_pkt_l4_csum_validate_ip4_fragment(); + return false; + } + + csum = _net_rx_pkt_calc_l4_csum(pkt); + + *csum_valid = ((csum == 0) || (csum == 0xFFFF)); + + trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid); + + return true; +} + +bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt) +{ + uint16_t csum = 0; + uint32_t l4_cso; + + trace_net_rx_pkt_l4_csum_fix_entry(); + + if (pkt->istcp) { + l4_cso = offsetof(struct tcp_header, th_sum); + trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso); + } else if (pkt->isudp) { + if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) { + trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum(); + return false; + } + l4_cso = offsetof(struct udp_header, uh_sum); + trace_net_rx_pkt_l4_csum_fix_udp(l4_cso); + } else { + trace_net_rx_pkt_l4_csum_fix_not_xxp(); + return false; + } + + if (pkt->isip4 && pkt->ip4hdr_info.fragment) { + trace_net_rx_pkt_l4_csum_fix_ip4_fragment(); + return false; + } + + /* Set zero to checksum word */ + iov_from_buf(pkt->vec, pkt->vec_len, + pkt->l4hdr_off + l4_cso, + &csum, sizeof(csum)); + + /* Calculate L4 checksum */ + csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt)); + + /* Set calculated checksum to checksum word */ + iov_from_buf(pkt->vec, pkt->vec_len, + pkt->l4hdr_off + l4_cso, + &csum, sizeof(csum)); + + trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum); + + return true; +} diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h index 897330a..7adf0fa 100644 --- a/hw/net/net_rx_pkt.h +++ b/hw/net/net_rx_pkt.h @@ -78,6 +78,103 @@ void net_rx_pkt_get_protocols(struct NetRxPkt *pkt, bool *isudp, bool *istcp); /** +* fetches L3 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt); + +/** +* fetches L4 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt); + +/** +* fetches L5 header offset +* +* @pkt: packet +* +*/ +size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt); + +/** + * fetches IP6 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt); + +/** + * fetches IP4 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt); + +/** + * fetches L4 header analysis results + * + * Return: pointer to analysis results structure which is stored in internal + * packet area. + * + */ +eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt); + +typedef enum { + NetPktRssIpV4, + NetPktRssIpV4Tcp, + NetPktRssIpV6Tcp, + NetPktRssIpV6, + NetPktRssIpV6Ex +} NetRxPktRssType; + +/** +* calculates RSS hash for packet +* +* @pkt: packet +* @type: RSS hash type +* +* Return: Toeplitz RSS hash. +* +*/ +uint32_t +net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt, + NetRxPktRssType type, + uint8_t *key); + +/** +* fetches IP identification for the packet +* +* @pkt: packet +* +*/ +uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt); + +/** +* check if given packet is a TCP ACK packet +* +* @pkt: packet +* +*/ +bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt); + +/** +* check if given packet contains TCP data +* +* @pkt: packet +* +*/ +bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt); + +/** * returns virtio header stored in rx context * * @pkt: packet @@ -123,6 +220,37 @@ bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt); bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt); /** +* attach scatter-gather data to rx packet +* +* @pkt: packet +* @iov: received data scatter-gather list +* @iovcnt number of elements in iov +* @iovoff data start offset in the iov +* @strip_vlan: should the module strip vlan from data +* +*/ +void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, + int iovcnt, size_t iovoff, + bool strip_vlan); + +/** +* attach scatter-gather data to rx packet +* +* @pkt: packet +* @iov: received data scatter-gather list +* @iovcnt number of elements in iov +* @iovoff data start offset in the iov +* @strip_vlan: should the module strip vlan from data +* @vet: VLAN tag Ethernet type +* +*/ +void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt, + size_t iovoff, bool strip_vlan, + uint16_t vet); + +/** * attach data to rx packet * * @pkt: packet @@ -131,8 +259,17 @@ bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt); * @strip_vlan: should the module strip vlan from data * */ -void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, - size_t len, bool strip_vlan); +static inline void +net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, + size_t len, bool strip_vlan) +{ + const struct iovec iov = { + .iov_base = (void *) data, + .iov_len = len + }; + + net_rx_pkt_attach_iovec(pkt, &iov, 1, 0, strip_vlan); +} /** * returns io vector that holds the attached data @@ -144,6 +281,15 @@ void net_rx_pkt_attach_data(struct NetRxPkt *pkt, const void *data, struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt); /** +* returns io vector length that holds the attached data +* +* @pkt: packet +* @ret: IOVec length +* +*/ +uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt); + +/** * prints rx packet data if debug is enabled * * @pkt: packet @@ -162,6 +308,17 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, struct virtio_net_hdr *vhdr); /** +* copy passed vhdr data to packet context +* +* @pkt: packet +* @iov: VHDR iov +* @iovcnt: VHDR iov array size +* +*/ +void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt, + const struct iovec *iov, int iovcnt); + +/** * save packet type in packet context * * @pkt: packet @@ -171,4 +328,36 @@ void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt, void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt, eth_pkt_types_e packet_type); +/** +* validate TCP/UDP checksum of the packet +* +* @pkt: packet +* @csum_valid: checksum validation result +* @ret: true if validation was performed, false in case packet is +* not TCP/UDP or checksum validation is not possible +* +*/ +bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid); + +/** +* validate IPv4 checksum of the packet +* +* @pkt: packet +* @csum_valid: checksum validation result +* @ret: true if validation was performed, false in case packet is +* not TCP/UDP or checksum validation is not possible +* +*/ +bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid); + +/** +* fix IPv4 checksum of the packet +* +* @pkt: packet +* @ret: true if checksum was fixed, false in case packet is +* not TCP/UDP or checksum correction is not possible +* +*/ +bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt); + #endif diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c index 94c7e3d..a64f51c 100644 --- a/hw/net/net_tx_pkt.c +++ b/hw/net/net_tx_pkt.c @@ -15,12 +15,8 @@ * */ -#include "qemu/osdep.h" -#include "hw/hw.h" #include "net_tx_pkt.h" #include "net/eth.h" -#include "qemu-common.h" -#include "qemu/iov.h" #include "net/checksum.h" #include "net/tap.h" #include "net/net.h" @@ -44,6 +40,7 @@ struct NetTxPkt { struct iovec *vec; uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; + uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN]; uint32_t payload_len; @@ -53,6 +50,8 @@ struct NetTxPkt { uint16_t hdr_len; eth_pkt_types_e packet_type; uint8_t l4proto; + + bool is_loopback; }; void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, @@ -72,8 +71,7 @@ void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = p->has_virt_hdr ? sizeof p->virt_hdr : 0; p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr; - p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL; - p->vec[NET_TX_PKT_L3HDR_FRAG].iov_len = 0; + p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = &p->l3_hdr; *pkt = p; } @@ -87,38 +85,52 @@ void net_tx_pkt_uninit(struct NetTxPkt *pkt) } } -void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) +void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt) { uint16_t csum; - uint32_t ph_raw_csum; assert(pkt); - uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; struct ip_header *ip_hdr; - - if (VIRTIO_NET_HDR_GSO_TCPV4 != gso_type && - VIRTIO_NET_HDR_GSO_UDP != gso_type) { - return; - } - ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; - if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > - ETH_MAX_IP_DGRAM_LEN) { - return; - } - ip_hdr->ip_len = cpu_to_be16(pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); - /* Calculate IP header checksum */ ip_hdr->ip_sum = 0; csum = net_raw_checksum((uint8_t *)ip_hdr, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len); ip_hdr->ip_sum = cpu_to_be16(csum); +} + +void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) +{ + uint16_t csum; + uint32_t cntr, cso; + assert(pkt); + uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; + void *ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; + + if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len > + ETH_MAX_IP_DGRAM_LEN) { + return; + } + + if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 || + gso_type == VIRTIO_NET_HDR_GSO_UDP) { + /* Calculate IP header checksum */ + net_tx_pkt_update_ip_hdr_checksum(pkt); + + /* Calculate IP pseudo header checksum */ + cntr = eth_calc_ip4_pseudo_hdr_csum(ip_hdr, pkt->payload_len, &cso); + csum = cpu_to_be16(~net_checksum_finish(cntr)); + } else if (gso_type == VIRTIO_NET_HDR_GSO_TCPV6) { + /* Calculate IP pseudo header checksum */ + cntr = eth_calc_ip6_pseudo_hdr_csum(ip_hdr, pkt->payload_len, + IP_PROTO_TCP, &cso); + csum = cpu_to_be16(~net_checksum_finish(cntr)); + } else { + return; + } - /* Calculate IP pseudo header checksum */ - ph_raw_csum = eth_calc_pseudo_hdr_csum(ip_hdr, pkt->payload_len); - csum = cpu_to_be16(~net_checksum_finish(ph_raw_csum)); iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); } @@ -160,15 +172,19 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) if (bytes_read < l2_hdr->iov_len) { l2_hdr->iov_len = 0; + l3_hdr->iov_len = 0; + pkt->packet_type = ETH_PKT_UCAST; return false; + } else { + l2_hdr->iov_len = ETH_MAX_L2_HDR_LEN; + l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base); + pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); } - l3_proto = eth_get_l3_proto(l2_hdr->iov_base, l2_hdr->iov_len); + l3_proto = eth_get_l3_proto(l2_hdr, 1, l2_hdr->iov_len); switch (l3_proto) { case ETH_P_IP: - l3_hdr->iov_base = g_malloc(ETH_MAX_IP4_HDR_LEN); - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, l3_hdr->iov_base, sizeof(struct ip_header)); @@ -178,27 +194,45 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) } l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base); - pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; - /* copy optional IPv4 header data */ - bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, - l2_hdr->iov_len + sizeof(struct ip_header), - l3_hdr->iov_base + sizeof(struct ip_header), - l3_hdr->iov_len - sizeof(struct ip_header)); - if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { + if (l3_hdr->iov_len < sizeof(struct ip_header)) { l3_hdr->iov_len = 0; return false; } + + pkt->l4proto = ((struct ip_header *) l3_hdr->iov_base)->ip_p; + + if (IP_HDR_GET_LEN(l3_hdr->iov_base) != sizeof(struct ip_header)) { + /* copy optional IPv4 header data if any*/ + bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, + l2_hdr->iov_len + sizeof(struct ip_header), + l3_hdr->iov_base + sizeof(struct ip_header), + l3_hdr->iov_len - sizeof(struct ip_header)); + if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) { + l3_hdr->iov_len = 0; + return false; + } + } + break; case ETH_P_IPV6: + { + eth_ip6_hdr_info hdrinfo; + if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, - &pkt->l4proto, &full_ip6hdr_len)) { + &hdrinfo)) { l3_hdr->iov_len = 0; return false; } - l3_hdr->iov_base = g_malloc(full_ip6hdr_len); + pkt->l4proto = hdrinfo.l4proto; + full_ip6hdr_len = hdrinfo.full_hdr_len; + + if (full_ip6hdr_len > ETH_MAX_IP_DGRAM_LEN) { + l3_hdr->iov_len = 0; + return false; + } bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len, l3_hdr->iov_base, full_ip6hdr_len); @@ -210,40 +244,35 @@ static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt) l3_hdr->iov_len = full_ip6hdr_len; } break; - + } default: l3_hdr->iov_len = 0; break; } net_tx_pkt_calculate_hdr_len(pkt); - pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base); return true; } -static bool net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) +static void net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt) { - size_t payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; - + pkt->payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len; pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->max_payload_frags, pkt->raw, pkt->raw_frags, - pkt->hdr_len, payload_len); + pkt->hdr_len, pkt->payload_len); +} - if (pkt->payload_frags != (uint32_t) -1) { - pkt->payload_len = payload_len; +bool net_tx_pkt_parse(struct NetTxPkt *pkt) +{ + if (net_tx_pkt_parse_headers(pkt)) { + net_tx_pkt_rebuild_payload(pkt); return true; } else { return false; } } -bool net_tx_pkt_parse(struct NetTxPkt *pkt) -{ - return net_tx_pkt_parse_headers(pkt) && - net_tx_pkt_rebuild_payload(pkt); -} - struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt) { assert(pkt); @@ -256,7 +285,7 @@ static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt, uint8_t rc = VIRTIO_NET_HDR_GSO_NONE; uint16_t l3_proto; - l3_proto = eth_get_l3_proto(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, + l3_proto = eth_get_l3_proto(&pkt->vec[NET_TX_PKT_L2HDR_FRAG], 1, pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len); if (!tso_enable) { @@ -288,7 +317,7 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, break; case VIRTIO_NET_HDR_GSO_UDP: - pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + pkt->virt_hdr.gso_size = gso_size; pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header); break; @@ -297,7 +326,7 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags, 0, &l4hdr, sizeof(l4hdr)); pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t); - pkt->virt_hdr.gso_size = IP_FRAG_ALIGN_SIZE(gso_size); + pkt->virt_hdr.gso_size = gso_size; break; default: @@ -322,13 +351,14 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, } } -void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan) +void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, + uint16_t vlan, uint16_t vlan_ethtype) { bool is_new; assert(pkt); - eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, - vlan, &is_new); + eth_setup_vlan_headers_ex(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, + vlan, vlan_ethtype, &is_new); /* update l2hdrlen */ if (is_new) { @@ -354,14 +384,19 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, mapped_len = len; ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false); - ventry->iov_len = mapped_len; - pkt->raw_frags += !!ventry->iov_base; - if ((ventry->iov_base == NULL) || (len != mapped_len)) { + if ((ventry->iov_base != NULL) && (len == mapped_len)) { + ventry->iov_len = mapped_len; + pkt->raw_frags++; + return true; + } else { return false; } +} - return true; +bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt) +{ + return pkt->raw_frags > 0; } eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt) @@ -401,14 +436,8 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt) memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr)); - g_free(pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base); - pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = NULL; - assert(pkt->vec); - for (i = NET_TX_PKT_L2HDR_FRAG; - i < pkt->payload_frags + NET_TX_PKT_PL_START_FRAG; i++) { - pkt->vec[i].iov_len = 0; - } + pkt->payload_len = 0; pkt->payload_frags = 0; @@ -417,12 +446,10 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt) assert(pkt->raw[i].iov_base); cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len, false, pkt->raw[i].iov_len); - pkt->raw[i].iov_len = 0; } pkt->raw_frags = 0; pkt->hdr_len = 0; - pkt->packet_type = 0; pkt->l4proto = 0; } @@ -431,6 +458,7 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) struct iovec *iov = &pkt->vec[NET_TX_PKT_L2HDR_FRAG]; uint32_t csum_cntr; uint16_t csum = 0; + uint32_t cso; /* num of iovec without vhdr */ uint32_t iov_len = pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1; uint16_t csl; @@ -443,12 +471,13 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt) /* Calculate L4 TCP/UDP checksum */ csl = pkt->payload_len; - /* data checksum */ - csum_cntr = - net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl); /* add pseudo header to csum */ iphdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base; - csum_cntr += eth_calc_pseudo_hdr_csum(iphdr, csl); + csum_cntr = eth_calc_ip4_pseudo_hdr_csum(iphdr, csl, &cso); + + /* data checksum */ + csum_cntr += + net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl, cso); /* Put the checksum obtained into the packet */ csum = cpu_to_be16(net_checksum_finish(csum_cntr)); @@ -471,7 +500,7 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, *dst_idx = NET_TX_PKT_FRAGMENT_HEADER_NUM; - while (fetched < pkt->virt_hdr.gso_size) { + while (fetched < IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size)) { /* no more place in fragment iov */ if (*dst_idx == NET_MAX_FRAG_SG_LIST) { @@ -486,7 +515,7 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset; dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset, - pkt->virt_hdr.gso_size - fetched); + IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size) - fetched); *src_offset += dst[*dst_idx].iov_len; fetched += dst[*dst_idx].iov_len; @@ -502,6 +531,16 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt, return fetched; } +static inline void net_tx_pkt_sendv(struct NetTxPkt *pkt, + NetClientState *nc, const struct iovec *iov, int iov_cnt) +{ + if (pkt->is_loopback) { + nc->info->receive_iov(nc, iov, iov_cnt); + } else { + qemu_sendv_packet(nc, iov, iov_cnt); + } +} + static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, NetClientState *nc) { @@ -540,7 +579,7 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, eth_fix_ip4_checksum(l3_iov_base, l3_iov_len); - qemu_sendv_packet(nc, fragment, dst_idx); + net_tx_pkt_sendv(pkt, nc, fragment, dst_idx); fragment_offset += fragment_len; @@ -572,10 +611,21 @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc) if (pkt->has_virt_hdr || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) { - qemu_sendv_packet(nc, pkt->vec, + net_tx_pkt_sendv(pkt, nc, pkt->vec, pkt->payload_frags + NET_TX_PKT_PL_START_FRAG); return true; } return net_tx_pkt_do_sw_fragmentation(pkt, nc); } + +bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc) +{ + bool res; + + pkt->is_loopback = true; + res = net_tx_pkt_send(pkt, nc); + pkt->is_loopback = false; + + return res; +} diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h index be2e117..e49772d 100644 --- a/hw/net/net_tx_pkt.h +++ b/hw/net/net_tx_pkt.h @@ -18,6 +18,7 @@ #ifndef NET_TX_PKT_H #define NET_TX_PKT_H +#include "qemu/osdep.h" #include "net/eth.h" #include "exec/hwaddr.h" @@ -64,13 +65,29 @@ void net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, bool csum_enable, uint32_t gso_size); /** - * updates vlan tag, and adds vlan header in case it is missing - * - * @pkt: packet - * @vlan: VLAN tag - * - */ -void net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan); +* updates vlan tag, and adds vlan header with custom ethernet type +* in case it is missing. +* +* @pkt: packet +* @vlan: VLAN tag +* @vlan_ethtype: VLAN header Ethernet type +* +*/ +void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, + uint16_t vlan, uint16_t vlan_ethtype); + +/** +* updates vlan tag, and adds vlan header in case it is missing +* +* @pkt: packet +* @vlan: VLAN tag +* +*/ +static inline void +net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan) +{ + net_tx_pkt_setup_vlan_header_ex(pkt, vlan, ETH_P_VLAN); +} /** * populate data fragment into pkt context. @@ -84,7 +101,7 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, size_t len); /** - * fix ip header fields and calculate checksums needed. + * Fix ip header fields and calculate IP header and pseudo header checksums. * * @pkt: packet * @@ -92,6 +109,14 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt); /** + * Calculate the IP header checksum. + * + * @pkt: packet + * + */ +void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt); + +/** * get length of all populated data. * * @pkt: packet @@ -136,6 +161,17 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt); bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc); /** +* Redirect packet directly to receive path (emulate loopback phy). +* Handles sw offloads if vhdr is not supported. +* +* @pkt: packet +* @nc: NetClientState +* @ret: operation result +* +*/ +bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc); + +/** * parse raw packet data and analyze offload requirements. * * @pkt: packet @@ -143,4 +179,12 @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc); */ bool net_tx_pkt_parse(struct NetTxPkt *pkt); +/** +* indicates if there are data fragments held by this packet object. +* +* @pkt: packet +* +*/ +bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt); + #endif -- cgit v1.1 From 111710107dc6041de871af437214d32d1db088d1 Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:42 +0300 Subject: vmxnet3: Use pci_dma_* API instead of cpu_physical_memory_* To make this device and network packets abstractions ready for IOMMU. Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/net_tx_pkt.c | 16 +++++++++++----- hw/net/net_tx_pkt.h | 5 +++-- hw/net/vmxnet3.c | 51 ++++++++++++++++++++++++++++++--------------------- 3 files changed, 44 insertions(+), 28 deletions(-) (limited to 'hw') diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c index a64f51c..e4478be 100644 --- a/hw/net/net_tx_pkt.c +++ b/hw/net/net_tx_pkt.c @@ -20,6 +20,7 @@ #include "net/checksum.h" #include "net/tap.h" #include "net/net.h" +#include "hw/pci/pci.h" enum { NET_TX_PKT_VHDR_FRAG = 0, @@ -30,6 +31,8 @@ enum { /* TX packet private context */ struct NetTxPkt { + PCIDevice *pci_dev; + struct virtio_net_hdr virt_hdr; bool has_virt_hdr; @@ -54,11 +57,13 @@ struct NetTxPkt { bool is_loopback; }; -void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, - bool has_virt_hdr) +void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev, + uint32_t max_frags, bool has_virt_hdr) { struct NetTxPkt *p = g_malloc0(sizeof *p); + p->pci_dev = pci_dev; + p->vec = g_malloc((sizeof *p->vec) * (max_frags + NET_TX_PKT_PL_START_FRAG)); @@ -383,7 +388,8 @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, ventry = &pkt->raw[pkt->raw_frags]; mapped_len = len; - ventry->iov_base = cpu_physical_memory_map(pa, &mapped_len, false); + ventry->iov_base = pci_dma_map(pkt->pci_dev, pa, + &mapped_len, DMA_DIRECTION_TO_DEVICE); if ((ventry->iov_base != NULL) && (len == mapped_len)) { ventry->iov_len = mapped_len; @@ -444,8 +450,8 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt) assert(pkt->raw); for (i = 0; i < pkt->raw_frags; i++) { assert(pkt->raw[i].iov_base); - cpu_physical_memory_unmap(pkt->raw[i].iov_base, pkt->raw[i].iov_len, - false, pkt->raw[i].iov_len); + pci_dma_unmap(pkt->pci_dev, pkt->raw[i].iov_base, pkt->raw[i].iov_len, + DMA_DIRECTION_TO_DEVICE, 0); } pkt->raw_frags = 0; diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h index e49772d..07b9a20 100644 --- a/hw/net/net_tx_pkt.h +++ b/hw/net/net_tx_pkt.h @@ -31,11 +31,12 @@ struct NetTxPkt; * Init function for tx packet functionality * * @pkt: packet pointer + * @pci_dev: PCI device processing this packet * @max_frags: max tx ip fragments * @has_virt_hdr: device uses virtio header. */ -void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags, - bool has_virt_hdr); +void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev, + uint32_t max_frags, bool has_virt_hdr); /** * Clean all tx packet resources. diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index 33cd07d..16645e6 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -802,7 +802,9 @@ vmxnet3_pop_rxc_descr(VMXNET3State *s, int qidx, uint32_t *descr_gen) hwaddr daddr = vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring); - cpu_physical_memory_read(daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc)); + pci_dma_read(PCI_DEVICE(s), daddr, + &rxcd, sizeof(struct Vmxnet3_RxCompDesc)); + ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring); if (rxcd.gen != ring_gen) { @@ -1023,10 +1025,11 @@ nocsum: } static void -vmxnet3_physical_memory_writev(const struct iovec *iov, - size_t start_iov_off, - hwaddr target_addr, - size_t bytes_to_copy) +vmxnet3_pci_dma_writev(PCIDevice *pci_dev, + const struct iovec *iov, + size_t start_iov_off, + hwaddr target_addr, + size_t bytes_to_copy) { size_t curr_off = 0; size_t copied = 0; @@ -1036,9 +1039,9 @@ vmxnet3_physical_memory_writev(const struct iovec *iov, size_t chunk_len = MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy); - cpu_physical_memory_write(target_addr + copied, - iov->iov_base + start_iov_off - curr_off, - chunk_len); + pci_dma_write(pci_dev, target_addr + copied, + iov->iov_base + start_iov_off - curr_off, + chunk_len); copied += chunk_len; start_iov_off += chunk_len; @@ -1088,15 +1091,15 @@ vmxnet3_indicate_packet(VMXNET3State *s) } chunk_size = MIN(bytes_left, rxd.len); - vmxnet3_physical_memory_writev(data, bytes_copied, - le64_to_cpu(rxd.addr), chunk_size); + vmxnet3_pci_dma_writev(PCI_DEVICE(s), data, bytes_copied, + le64_to_cpu(rxd.addr), chunk_size); bytes_copied += chunk_size; bytes_left -= chunk_size; vmxnet3_dump_rx_descr(&rxd); if (ready_rxcd_pa != 0) { - cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd)); + pci_dma_write(PCI_DEVICE(s), ready_rxcd_pa, &rxcd, sizeof(rxcd)); } memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc)); @@ -1127,7 +1130,8 @@ vmxnet3_indicate_packet(VMXNET3State *s) if (ready_rxcd_pa != 0) { rxcd.eop = 1; rxcd.err = (bytes_left != 0); - cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd)); + + pci_dma_write(PCI_DEVICE(s), ready_rxcd_pa, &rxcd, sizeof(rxcd)); /* Flush RX descriptor changes */ smp_wmb(); @@ -1298,7 +1302,8 @@ static void vmxnet3_update_mcast_filters(VMXNET3State *s) VMXNET3_READ_DRV_SHARED64(s->drv_shmem, devRead.rxFilterConf.mfTablePA); - cpu_physical_memory_read(mcast_list_pa, s->mcast_list, list_bytes); + pci_dma_read(PCI_DEVICE(s), mcast_list_pa, s->mcast_list, list_bytes); + VMW_CFPRN("Current multicast list len is %d:", s->mcast_list_len); for (i = 0; i < s->mcast_list_len; i++) { VMW_CFPRN("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a)); @@ -1328,15 +1333,17 @@ static void vmxnet3_fill_stats(VMXNET3State *s) return; for (i = 0; i < s->txq_num; i++) { - cpu_physical_memory_write(s->txq_descr[i].tx_stats_pa, - &s->txq_descr[i].txq_stats, - sizeof(s->txq_descr[i].txq_stats)); + pci_dma_write(PCI_DEVICE(s), + s->txq_descr[i].tx_stats_pa, + &s->txq_descr[i].txq_stats, + sizeof(s->txq_descr[i].txq_stats)); } for (i = 0; i < s->rxq_num; i++) { - cpu_physical_memory_write(s->rxq_descr[i].rx_stats_pa, - &s->rxq_descr[i].rxq_stats, - sizeof(s->rxq_descr[i].rxq_stats)); + pci_dma_write(PCI_DEVICE(s), + s->rxq_descr[i].rx_stats_pa, + &s->rxq_descr[i].rxq_stats, + sizeof(s->rxq_descr[i].rxq_stats)); } } @@ -1558,7 +1565,8 @@ static void vmxnet3_activate_device(VMXNET3State *s) /* Preallocate TX packet wrapper */ VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags); - net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); + net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), + s->max_tx_frags, s->peer_has_vhdr); net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); /* Read rings memory locations for RX queues */ @@ -2536,7 +2544,8 @@ static int vmxnet3_post_load(void *opaque, int version_id) VMXNET3State *s = opaque; PCIDevice *d = PCI_DEVICE(s); - net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags, s->peer_has_vhdr); + net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), + s->max_tx_frags, s->peer_has_vhdr); net_rx_pkt_init(&s->rx_pkt, s->peer_has_vhdr); if (s->msix_used) { -- cgit v1.1 From 06e7fa0ad7a0977d741e485c6f2366c8535648fd Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:43 +0300 Subject: e1000_regs: Add definitions for Intel 82574-specific bits Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/e1000_regs.h | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 342 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h index 1c40244..d62b3fa 100644 --- a/hw/net/e1000_regs.h +++ b/hw/net/e1000_regs.h @@ -85,6 +85,7 @@ #define E1000_DEV_ID_82573E 0x108B #define E1000_DEV_ID_82573E_IAMT 0x108C #define E1000_DEV_ID_82573L 0x109A +#define E1000_DEV_ID_82574L 0x10D3 #define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 #define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 #define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 @@ -104,6 +105,7 @@ #define E1000_PHY_ID2_82544x 0xC30 #define E1000_PHY_ID2_8254xx_DEFAULT 0xC20 /* 82540x, 82545x, and 82546x */ #define E1000_PHY_ID2_82573x 0xCC0 +#define E1000_PHY_ID2_82574x 0xCB1 /* Register Set. (82543, 82544) * @@ -135,8 +137,11 @@ #define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ #define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ #define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ +#define E1000_EIAC 0x000DC /* Ext. Interrupt Auto Clear - RW */ #define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ #define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ +#define E1000_IVAR 0x000E4 /* Interrupt Vector Allocation Register - RW */ +#define E1000_EITR 0x000E8 /* Extended Interrupt Throttling Rate - RW */ #define E1000_RCTL 0x00100 /* RX Control - RW */ #define E1000_RDTR1 0x02820 /* RX Delay Timer (1) - RW */ #define E1000_RDBAL1 0x02900 /* RX Descriptor Base Address Low (1) - RW */ @@ -145,6 +150,7 @@ #define E1000_RDH1 0x02910 /* RX Descriptor Head (1) - RW */ #define E1000_RDT1 0x02918 /* RX Descriptor Tail (1) - RW */ #define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ +#define E1000_FCRTV 0x05F40 /* Flow Control Refresh Timer Value - RW */ #define E1000_TXCW 0x00178 /* TX Configuration Word - RW */ #define E1000_RXCW 0x00180 /* RX Configuration Word - RO */ #define E1000_TCTL 0x00400 /* TX Control - RW */ @@ -161,6 +167,10 @@ #define E1000_PBM 0x10000 /* Packet Buffer Memory - RW */ #define E1000_PBS 0x01008 /* Packet Buffer Size - RW */ #define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ +#define E1000_EEMNGDATA 0x01014 /* MNG EEPROM Read/Write data */ +#define E1000_FLMNGCTL 0x01018 /* MNG Flash Control */ +#define E1000_FLMNGDATA 0x0101C /* MNG FLASH Read data */ +#define E1000_FLMNGCNT 0x01020 /* MNG FLASH Read Counter */ #define E1000_FLASH_UPDATES 1000 #define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ #define E1000_FLASHT 0x01028 /* FLASH Timer Register */ @@ -169,9 +179,12 @@ #define E1000_FLSWDATA 0x01034 /* FLASH data register */ #define E1000_FLSWCNT 0x01038 /* FLASH Access Counter */ #define E1000_FLOP 0x0103C /* FLASH Opcode Register */ +#define E1000_FLOL 0x01050 /* FEEP Auto Load */ #define E1000_ERT 0x02008 /* Early Rx Threshold - RW */ #define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ +#define E1000_FCRTL_A 0x00168 /* Alias to FCRTL */ #define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ +#define E1000_FCRTH_A 0x00160 /* Alias to FCRTH */ #define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */ #define E1000_RDBAL 0x02800 /* RX Descriptor Base Address Low - RW */ #define E1000_RDBAH 0x02804 /* RX Descriptor Base Address High - RW */ @@ -179,11 +192,17 @@ #define E1000_RDH 0x02810 /* RX Descriptor Head - RW */ #define E1000_RDT 0x02818 /* RX Descriptor Tail - RW */ #define E1000_RDTR 0x02820 /* RX Delay Timer - RW */ +#define E1000_RDTR_A 0x00108 /* Alias to RDTR */ #define E1000_RDBAL0 E1000_RDBAL /* RX Desc Base Address Low (0) - RW */ +#define E1000_RDBAL0_A 0x00110 /* Alias to RDBAL0 */ #define E1000_RDBAH0 E1000_RDBAH /* RX Desc Base Address High (0) - RW */ +#define E1000_RDBAH0_A 0x00114 /* Alias to RDBAH0 */ #define E1000_RDLEN0 E1000_RDLEN /* RX Desc Length (0) - RW */ +#define E1000_RDLEN0_A 0x00118 /* Alias to RDLEN0 */ #define E1000_RDH0 E1000_RDH /* RX Desc Head (0) - RW */ +#define E1000_RDH0_A 0x00120 /* Alias to RDH0 */ #define E1000_RDT0 E1000_RDT /* RX Desc Tail (0) - RW */ +#define E1000_RDT0_A 0x00128 /* Alias to RDT0 */ #define E1000_RDTR0 E1000_RDTR /* RX Delay Timer (0) - RW */ #define E1000_RXDCTL 0x02828 /* RX Descriptor Control queue 0 - RW */ #define E1000_RXDCTL1 0x02928 /* RX Descriptor Control queue 1 - RW */ @@ -192,22 +211,33 @@ #define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */ #define E1000_TXDMAC 0x03000 /* TX DMA Control - RW */ #define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */ +#define E1000_POEMB 0x00F10 /* PHY OEM Bits Register - RW */ #define E1000_RDFH 0x02410 /* Receive Data FIFO Head Register - RW */ +#define E1000_RDFH_A 0x08000 /* Alias to RDFH */ #define E1000_RDFT 0x02418 /* Receive Data FIFO Tail Register - RW */ +#define E1000_RDFT_A 0x08008 /* Alias to RDFT */ #define E1000_RDFHS 0x02420 /* Receive Data FIFO Head Saved Register - RW */ #define E1000_RDFTS 0x02428 /* Receive Data FIFO Tail Saved Register - RW */ #define E1000_RDFPC 0x02430 /* Receive Data FIFO Packet Count - RW */ #define E1000_TDFH 0x03410 /* TX Data FIFO Head - RW */ +#define E1000_TDFH_A 0x08010 /* Alias to TDFH */ #define E1000_TDFT 0x03418 /* TX Data FIFO Tail - RW */ +#define E1000_TDFT_A 0x08018 /* Alias to TDFT */ #define E1000_TDFHS 0x03420 /* TX Data FIFO Head Saved - RW */ #define E1000_TDFTS 0x03428 /* TX Data FIFO Tail Saved - RW */ #define E1000_TDFPC 0x03430 /* TX Data FIFO Packet Count - RW */ #define E1000_TDBAL 0x03800 /* TX Descriptor Base Address Low - RW */ +#define E1000_TDBAL_A 0x00420 /* Alias to TDBAL */ #define E1000_TDBAH 0x03804 /* TX Descriptor Base Address High - RW */ +#define E1000_TDBAH_A 0x00424 /* Alias to TDBAH */ #define E1000_TDLEN 0x03808 /* TX Descriptor Length - RW */ +#define E1000_TDLEN_A 0x00428 /* Alias to TDLEN */ #define E1000_TDH 0x03810 /* TX Descriptor Head - RW */ +#define E1000_TDH_A 0x00430 /* Alias to TDH */ #define E1000_TDT 0x03818 /* TX Descripotr Tail - RW */ +#define E1000_TDT_A 0x00438 /* Alias to TDT */ #define E1000_TIDV 0x03820 /* TX Interrupt Delay Value - RW */ +#define E1000_TIDV_A 0x00440 /* Alias to TIDV */ #define E1000_TXDCTL 0x03828 /* TX Descriptor Control - RW */ #define E1000_TADV 0x0382C /* TX Interrupt Absolute Delay Val - RW */ #define E1000_TSPMT 0x03830 /* TCP Segmentation PAD & Min Threshold - RW */ @@ -288,9 +318,15 @@ #define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ #define E1000_RXCSUM 0x05000 /* RX Checksum Control - RW */ #define E1000_RFCTL 0x05008 /* Receive Filter Control*/ +#define E1000_MAVTV0 0x05010 /* Management VLAN TAG Value 0 */ +#define E1000_MAVTV1 0x05014 /* Management VLAN TAG Value 1 */ +#define E1000_MAVTV2 0x05018 /* Management VLAN TAG Value 2 */ +#define E1000_MAVTV3 0x0501c /* Management VLAN TAG Value 3 */ #define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ #define E1000_RA 0x05400 /* Receive Address - RW Array */ +#define E1000_RA_A 0x00040 /* Alias to RA */ #define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ +#define E1000_VFTA_A 0x00600 /* Alias to VFTA */ #define E1000_WUC 0x05800 /* Wakeup Control - RW */ #define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ #define E1000_WUS 0x05810 /* Wakeup Status - RO */ @@ -300,27 +336,57 @@ #define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */ #define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ #define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */ +#define E1000_MFUTP01 0x05828 /* Management Flex UDP/TCP Ports 0/1 - RW */ +#define E1000_MFUTP23 0x05830 /* Management Flex UDP/TCP Ports 2/3 - RW */ +#define E1000_MFVAL 0x05824 /* Manageability Filters Valid - RW */ +#define E1000_MDEF 0x05890 /* Manageability Decision Filters - RW Array */ #define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */ #define E1000_HOST_IF 0x08800 /* Host Interface */ #define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */ +#define E1000_FTFT 0x09400 /* Flexible TCO Filter Table - RW Array */ #define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */ #define E1000_KUMCTRLSTA 0x00034 /* MAC-PHY interface - RW */ -#define E1000_MDPHYA 0x0003C /* PHY address - RW */ -#define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ +#define E1000_MDPHYA 0x0003C /* PHY address - RW */ +#define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ #define E1000_SW_FW_SYNC 0x05B5C /* Software-Firmware Synchronization - RW */ #define E1000_GCR 0x05B00 /* PCI-Ex Control */ +#define E1000_FUNCTAG 0x05B08 /* Function-Tag Register */ #define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ #define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ #define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */ #define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */ +#define E1000_GSCN_0 0x05B20 /* 3GIO Statistic Counter Register #0 */ +#define E1000_GSCN_1 0x05B24 /* 3GIO Statistic Counter Register #1 */ +#define E1000_GSCN_2 0x05B28 /* 3GIO Statistic Counter Register #2 */ +#define E1000_GSCN_3 0x05B2C /* 3GIO Statistic Counter Register #3 */ #define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ #define E1000_SWSM 0x05B50 /* SW Semaphore */ +#define E1000_GCR2 0x05B64 /* 3GIO Control Register 2 */ #define E1000_FWSM 0x05B54 /* FW Semaphore */ +#define E1000_PBACLR 0x05B68 /* MSI-X PBA Clear */ #define E1000_FFLT_DBG 0x05F04 /* Debug Register */ #define E1000_HICR 0x08F00 /* Host Inteface Control */ +#define E1000_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */ +#define E1000_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */ +#define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ +#define E1000_RXSTMPL 0x0B624 /* Rx timestamp Low - RO */ +#define E1000_RXSTMPH 0x0B628 /* Rx timestamp High - RO */ +#define E1000_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */ +#define E1000_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */ +#define E1000_SYSTIML 0x0B600 /* System time register Low - RO */ +#define E1000_SYSTIMH 0x0B604 /* System time register High - RO */ +#define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ +#define E1000_RXMTRL 0x0B634 /* Time sync Rx EtherType and Msg Type - RW */ +#define E1000_RXUDP 0x0B638 /* Time Sync Rx UDP Port - RW */ +#define E1000_RXSATRL 0x0B62C /* Rx timestamp attribute low - RO */ +#define E1000_RXSATRH 0x0B630 /* Rx timestamp attribute high - RO */ +#define E1000_TIMADJL 0x0B60C /* Time Adjustment Offset register Low - RW */ +#define E1000_TIMADJH 0x0B610 /* Time Adjustment Offset register High - RW */ +#define E1000_RXCFGL 0x0B634 /* RX Ethertype and Message Type - RW*/ + /* RSS registers */ #define E1000_CPUVEC 0x02C10 /* CPU Vector Register - RW */ #define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */ @@ -329,6 +395,81 @@ #define E1000_RSSIM 0x05864 /* RSS Interrupt Mask */ #define E1000_RSSIR 0x05868 /* RSS Interrupt Request */ +#define E1000_MRQC_ENABLED(mrqc) (((mrqc) & (BIT(0) | BIT(1))) == BIT(0)) + +#define E1000_RETA_IDX(hash) ((hash) & (BIT(7) - 1)) +#define E1000_RETA_VAL(reta, hash) (((uint8_t *)(reta))[E1000_RETA_IDX(hash)]) +#define E1000_RSS_QUEUE(reta, hash) ((E1000_RETA_VAL(reta, hash) & BIT(7)) >> 7) + +#define E1000_MRQC_EN_TCPIPV4(mrqc) ((mrqc) & BIT(16)) +#define E1000_MRQC_EN_IPV4(mrqc) ((mrqc) & BIT(17)) +#define E1000_MRQC_EN_TCPIPV6(mrqc) ((mrqc) & BIT(18)) +#define E1000_MRQC_EN_IPV6EX(mrqc) ((mrqc) & BIT(19)) +#define E1000_MRQC_EN_IPV6(mrqc) ((mrqc) & BIT(20)) + +#define E1000_MRQ_RSS_TYPE_NONE (0) +#define E1000_MRQ_RSS_TYPE_IPV4TCP (1) +#define E1000_MRQ_RSS_TYPE_IPV4 (2) +#define E1000_MRQ_RSS_TYPE_IPV6TCP (3) +#define E1000_MRQ_RSS_TYPE_IPV6EX (4) +#define E1000_MRQ_RSS_TYPE_IPV6 (5) + +#define E1000_ICR_ASSERTED BIT(31) +#define E1000_EIAC_MASK 0x01F00000 + +/* IVAR register parsing helpers */ +#define E1000_IVAR_INT_ALLOC_VALID (0x8) + +#define E1000_IVAR_RXQ0_SHIFT (0) +#define E1000_IVAR_RXQ1_SHIFT (4) +#define E1000_IVAR_TXQ0_SHIFT (8) +#define E1000_IVAR_TXQ1_SHIFT (12) +#define E1000_IVAR_OTHER_SHIFT (16) + +#define E1000_IVAR_ENTRY_MASK (0xF) +#define E1000_IVAR_ENTRY_VALID_MASK E1000_IVAR_INT_ALLOC_VALID +#define E1000_IVAR_ENTRY_VEC_MASK (0x7) + +#define E1000_IVAR_RXQ0(x) ((x) >> E1000_IVAR_RXQ0_SHIFT) +#define E1000_IVAR_RXQ1(x) ((x) >> E1000_IVAR_RXQ1_SHIFT) +#define E1000_IVAR_TXQ0(x) ((x) >> E1000_IVAR_TXQ0_SHIFT) +#define E1000_IVAR_TXQ1(x) ((x) >> E1000_IVAR_TXQ1_SHIFT) +#define E1000_IVAR_OTHER(x) ((x) >> E1000_IVAR_OTHER_SHIFT) + +#define E1000_IVAR_ENTRY_VALID(x) ((x) & E1000_IVAR_ENTRY_VALID_MASK) +#define E1000_IVAR_ENTRY_VEC(x) ((x) & E1000_IVAR_ENTRY_VEC_MASK) + +#define E1000_IVAR_TX_INT_EVERY_WB BIT(31) + +/* RFCTL register bits */ +#define E1000_RFCTL_ISCSI_DIS 0x00000001 +#define E1000_RFCTL_NFSW_DIS 0x00000040 +#define E1000_RFCTL_NFSR_DIS 0x00000080 +#define E1000_RFCTL_IPV6_DIS 0x00000400 +#define E1000_RFCTL_IPV6_XSUM_DIS 0x00000800 +#define E1000_RFCTL_ACK_DIS 0x00001000 +#define E1000_RFCTL_ACK_DATA_DIS 0x00002000 +#define E1000_RFCTL_IPFRSP_DIS 0x00004000 +#define E1000_RFCTL_EXTEN 0x00008000 +#define E1000_RFCTL_IPV6_EX_DIS 0x00010000 +#define E1000_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 + +/* PSRCTL parsing */ +#define E1000_PSRCTL_BSIZE0_MASK 0x0000007F +#define E1000_PSRCTL_BSIZE1_MASK 0x00003F00 +#define E1000_PSRCTL_BSIZE2_MASK 0x003F0000 +#define E1000_PSRCTL_BSIZE3_MASK 0x3F000000 + +#define E1000_PSRCTL_BSIZE0_SHIFT 0 +#define E1000_PSRCTL_BSIZE1_SHIFT 8 +#define E1000_PSRCTL_BSIZE2_SHIFT 16 +#define E1000_PSRCTL_BSIZE3_SHIFT 24 + +#define E1000_PSRCTL_BUFFS_PER_DESC 4 + +/* TARC* parsing */ +#define E1000_TARC_ENABLE BIT(10) + /* PHY 1000 MII Register/Bit Definitions */ /* PHY Registers defined by IEEE */ #define PHY_CTRL 0x00 /* Control Register */ @@ -344,6 +485,40 @@ #define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ #define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ +/* 82574-specific registers */ +#define PHY_COPPER_CTRL1 0x10 /* Copper Specific Control Register 1 */ +#define PHY_COPPER_STAT1 0x11 /* Copper Specific Status Register 1 */ +#define PHY_COPPER_INT_ENABLE 0x12 /* Interrupt Enable Register */ +#define PHY_COPPER_STAT2 0x13 /* Copper Specific Status Register 2 */ +#define PHY_COPPER_CTRL3 0x14 /* Copper Specific Control Register 3 */ +#define PHY_COPPER_CTRL2 0x1A /* Copper Specific Control Register 2 */ +#define PHY_RX_ERR_CNTR 0x15 /* Receive Error Counter */ +#define PHY_PAGE 0x16 /* Page Address (Any page) */ +#define PHY_OEM_BITS 0x19 /* OEM Bits (Page 0) */ +#define PHY_BIAS_1 0x1d /* Bias Setting Register */ +#define PHY_BIAS_2 0x1e /* Bias Setting Register */ + +/* 82574-specific registers - page 2 */ +#define PHY_MAC_CTRL1 0x10 /* MAC Specific Control Register 1 */ +#define PHY_MAC_INT_ENABLE 0x12 /* MAC Interrupt Enable Register */ +#define PHY_MAC_STAT 0x13 /* MAC Specific Status Register */ +#define PHY_MAC_CTRL2 0x15 /* MAC Specific Control Register 2 */ + +/* 82574-specific registers - page 3 */ +#define PHY_LED_03_FUNC_CTRL1 0x10 /* LED[3:0] Function Control */ +#define PHY_LED_03_POL_CTRL 0x11 /* LED[3:0] Polarity Control */ +#define PHY_LED_TIMER_CTRL 0x12 /* LED Timer Control */ +#define PHY_LED_45_CTRL 0x13 /* LED[5:4] Function Control and Polarity */ + +/* 82574-specific registers - page 5 */ +#define PHY_1000T_SKEW 0x14 /* 1000 BASE - T Pair Skew Register */ +#define PHY_1000T_SWAP 0x15 /* 1000 BASE - T Pair Swap and Polarity */ + +/* 82574-specific registers - page 6 */ +#define PHY_CRC_COUNTERS 0x11 /* CRC Counters */ + +#define PHY_PAGE_RW_MASK 0x7F /* R/W part of page address register */ + #define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ #define MAX_PHY_MULTI_PAGE_REG 0xF /* Registers equal on all pages */ @@ -423,6 +598,18 @@ #define E1000_ICR_DSW 0x00000020 /* FW changed the status of DISSW bit in the FWSM */ #define E1000_ICR_PHYINT 0x00001000 /* LAN connected device generates an interrupt */ #define E1000_ICR_EPRST 0x00100000 /* ME handware reset occurs */ +#define E1000_ICR_RXQ0 0x00100000 /* Rx Queue 0 Interrupt */ +#define E1000_ICR_RXQ1 0x00200000 /* Rx Queue 1 Interrupt */ +#define E1000_ICR_TXQ0 0x00400000 /* Tx Queue 0 Interrupt */ +#define E1000_ICR_TXQ1 0x00800000 /* Tx Queue 1 Interrupt */ +#define E1000_ICR_OTHER 0x01000000 /* Other Interrupts */ + +#define E1000_ICR_OTHER_CAUSES (E1000_ICR_LSC | \ + E1000_ICR_RXO | \ + E1000_ICR_MDAC | \ + E1000_ICR_SRPD | \ + E1000_ICR_ACK | \ + E1000_ICR_MNG) /* Interrupt Cause Set */ #define E1000_ICS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ @@ -471,6 +658,11 @@ #define E1000_IMS_SRPD E1000_ICR_SRPD #define E1000_IMS_ACK E1000_ICR_ACK /* Receive Ack frame */ #define E1000_IMS_MNG E1000_ICR_MNG /* Manageability event */ +#define E1000_IMS_RXQ0 E1000_ICR_RXQ0 +#define E1000_IMS_RXQ1 E1000_ICR_RXQ1 +#define E1000_IMS_TXQ0 E1000_ICR_TXQ0 +#define E1000_IMS_TXQ1 E1000_ICR_TXQ1 +#define E1000_IMS_OTHER E1000_ICR_OTHER #define E1000_IMS_DOCK E1000_ICR_DOCK /* Dock/Undock */ #define E1000_IMS_RXD_FIFO_PAR0 E1000_ICR_RXD_FIFO_PAR0 /* queue 0 Rx descriptor FIFO parity error */ #define E1000_IMS_TXD_FIFO_PAR0 E1000_ICR_TXD_FIFO_PAR0 /* queue 0 Tx descriptor FIFO parity error */ @@ -562,6 +754,15 @@ #define E1000_EEPROM_RW_ADDR_SHIFT 8 /* Shift to the address bits */ #define E1000_EEPROM_POLL_WRITE 1 /* Flag for polling for write complete */ #define E1000_EEPROM_POLL_READ 0 /* Flag for polling for read complete */ + +/* 82574 EERD/EEWR registers layout */ +#define E1000_EERW_START BIT(0) +#define E1000_EERW_DONE BIT(1) +#define E1000_EERW_ADDR_SHIFT 2 +#define E1000_EERW_ADDR_MASK ((1L << 14) - 1) +#define E1000_EERW_DATA_SHIFT 16 +#define E1000_EERW_DATA_MASK ((1L << 16) - 1) + /* Register Bit Masks */ /* Device Control */ #define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ @@ -584,7 +785,17 @@ #define E1000_CTRL_D_UD_EN 0x00002000 /* Dock/Undock enable */ #define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */ #define E1000_CTRL_FORCE_PHY_RESET 0x00008000 /* Reset both PHY ports, through PHYRST_N pin */ +#define E1000_CTRL_SPD_SHIFT 8 /* Speed Select Shift */ + +#define E1000_CTRL_EXT_ASDCHK 0x00001000 /* auto speed detection check */ +#define E1000_CTRL_EXT_EE_RST 0x00002000 /* EEPROM reset */ #define E1000_CTRL_EXT_LINK_EN 0x00010000 /* enable link status from external LINK_0 and LINK_1 pins */ +#define E1000_CTRL_EXT_EIAME 0x01000000 +#define E1000_CTRL_EXT_IAME 0x08000000 /* Int ACK Auto-mask */ +#define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ +#define E1000_CTRL_EXT_INT_TIMERS_CLEAR_ENA 0x20000000 +#define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */ + #define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ #define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ #define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ @@ -593,6 +804,7 @@ #define E1000_CTRL_SWDPIO1 0x00800000 /* SWDPIN 1 input or output */ #define E1000_CTRL_SWDPIO2 0x01000000 /* SWDPIN 2 input or output */ #define E1000_CTRL_SWDPIO3 0x02000000 /* SWDPIN 3 input or output */ +#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ #define E1000_CTRL_RST 0x04000000 /* Global reset */ #define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ #define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ @@ -617,9 +829,13 @@ #define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Completion by EEPROM/Flash */ #define E1000_STATUS_ASDV 0x00000300 /* Auto speed detect value */ +#define E1000_STATUS_ASDV_10 0x00000000 /* ASDV 10Mb */ +#define E1000_STATUS_ASDV_100 0x00000100 /* ASDV 100Mb */ +#define E1000_STATUS_ASDV_1000 0x00000200 /* ASDV 1Gb */ #define E1000_STATUS_DOCK_CI 0x00000800 /* Change in Dock/Undock state. Clear on write '0'. */ #define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Status of Master requests. */ #define E1000_STATUS_MTXCKOK 0x00000400 /* MTX clock running OK */ +#define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */ #define E1000_STATUS_PCI66 0x00000800 /* In 66Mhz slot */ #define E1000_STATUS_BUS64 0x00001000 /* In 64 bit slot */ #define E1000_STATUS_PCIX_MODE 0x00002000 /* PCI-X mode */ @@ -634,6 +850,8 @@ #define E1000_STATUS_FUSE_9 0x08000000 #define E1000_STATUS_SERDES0_DIS 0x10000000 /* SERDES disabled on port 0 */ #define E1000_STATUS_SERDES1_DIS 0x20000000 /* SERDES disabled on port 1 */ +#define E1000_STATUS_SPEED_SHIFT 6 +#define E1000_STATUS_ASDV_SHIFT 8 /* EEPROM/Flash Control */ #define E1000_EECD_SK 0x00000001 /* EEPROM Clock */ @@ -664,6 +882,8 @@ #define E1000_EECD_AUPDEN 0x00100000 /* Enable Autonomous FLASH update */ #define E1000_EECD_SHADV 0x00200000 /* Shadow RAM Data Valid */ #define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ + + #define E1000_EECD_SECVAL_SHIFT 22 #define E1000_STM_OPCODE 0xDB00 #define E1000_HICR_FW_RESET 0xC0 @@ -684,6 +904,18 @@ #define E1000_MDIC_INT_EN 0x20000000 #define E1000_MDIC_ERROR 0x40000000 +/* Rx Interrupt Delay Timer */ +#define E1000_RDTR_FPD BIT(31) + +/* Tx Interrupt Delay Timer */ +#define E1000_TIDV_FPD BIT(31) + +/* Delay increments in nanoseconds for delayed interrupts registers */ +#define E1000_INTR_DELAY_NS_RES (1024) + +/* Delay increments in nanoseconds for interrupt throttling registers */ +#define E1000_INTR_THROTTLING_NS_RES (256) + /* EEPROM Commands - Microwire */ #define EEPROM_READ_OPCODE_MICROWIRE 0x6 /* EEPROM read opcode */ #define EEPROM_WRITE_OPCODE_MICROWIRE 0x5 /* EEPROM write opcode */ @@ -711,6 +943,21 @@ #define E1000_EEPROM_CFG_DONE 0x00040000 /* MNG config cycle done */ #define E1000_EEPROM_CFG_DONE_PORT_1 0x00080000 /* ...for second port */ +/* PCI Express Control */ +/* 3GIO Control Register - GCR (0x05B00; RW) */ +#define E1000_L0S_ADJUST (1 << 9) +#define E1000_L1_ENTRY_LATENCY_MSB (1 << 23) +#define E1000_L1_ENTRY_LATENCY_LSB (1 << 25 | 1 << 26) + +#define E1000_L0S_ADJUST (1 << 9) +#define E1000_L1_ENTRY_LATENCY_MSB (1 << 23) +#define E1000_L1_ENTRY_LATENCY_LSB (1 << 25 | 1 << 26) + +#define E1000_GCR_RO_BITS (1 << 23 | 1 << 25 | 1 << 26) + +/* MSI-X PBA Clear register */ +#define E1000_PBACLR_VALID_MASK (BIT(5) - 1) + /* Transmit Descriptor */ struct e1000_tx_desc { uint64_t buffer_addr; /* Address of the descriptor's data buffer */ @@ -752,7 +999,9 @@ struct e1000_tx_desc { #define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ #define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ #define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ +#define E1000_TXD_CMD_SNAP 0x40000000 /* Update SNAP header */ #define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ +#define E1000_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */ /* Transmit Control */ #define E1000_TCTL_RST 0x00000001 /* software reset */ @@ -767,7 +1016,7 @@ struct e1000_tx_desc { #define E1000_TCTL_NRTU 0x02000000 /* No Re-transmit on underrun */ #define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ -/* Receive Descriptor */ +/* Legacy Receive Descriptor */ struct e1000_rx_desc { uint64_t buffer_addr; /* Address of the descriptor's data buffer */ uint16_t length; /* Length of data DMAed into data buffer */ @@ -777,6 +1026,78 @@ struct e1000_rx_desc { uint16_t special; }; +/* Extended Receive Descriptor */ +union e1000_rx_desc_extended { + struct { + uint64_t buffer_addr; + uint64_t reserved; + } read; + struct { + struct { + uint32_t mrq; /* Multiple Rx Queues */ + union { + uint32_t rss; /* RSS Hash */ + struct { + uint16_t ip_id; /* IP id */ + uint16_t csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + uint32_t status_error; /* ext status/error */ + uint16_t length; + uint16_t vlan; /* VLAN tag */ + } upper; + } wb; /* writeback */ +}; + +#define MAX_PS_BUFFERS 4 + +/* Number of packet split data buffers (not including the header buffer) */ +#define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1) + +/* Receive Descriptor - Packet Split */ +union e1000_rx_desc_packet_split { + struct { + /* one buffer for protocol header(s), three data buffers */ + uint64_t buffer_addr[MAX_PS_BUFFERS]; + } read; + struct { + struct { + uint32_t mrq; /* Multiple Rx Queues */ + union { + uint32_t rss; /* RSS Hash */ + struct { + uint16_t ip_id; /* IP id */ + uint16_t csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + uint32_t status_error; /* ext status/error */ + uint16_t length0; /* length of buffer 0 */ + uint16_t vlan; /* VLAN tag */ + } middle; + struct { + uint16_t header_status; + /* length of buffers 1-3 */ + uint16_t length[PS_PAGE_BUFFERS]; + } upper; + uint64_t reserved; + } wb; /* writeback */ +}; + +/* Receive Checksum Control bits */ +#define E1000_RXCSUM_IPOFLD 0x100 /* IP Checksum Offload Enable */ +#define E1000_RXCSUM_TUOFLD 0x200 /* TCP/UDP Checksum Offload Enable */ +#define E1000_RXCSUM_PCSD 0x2000 /* Packet Checksum Disable */ + +#define E1000_RING_DESC_LEN (16) +#define E1000_RING_DESC_LEN_SHIFT (4) + +#define E1000_MIN_RX_DESC_LEN E1000_RING_DESC_LEN +#define E1000_MAX_RX_DESC_LEN (sizeof(union e1000_rx_desc_packet_split)) + /* Receive Descriptor bit definitions */ #define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ #define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ @@ -802,6 +1123,15 @@ struct e1000_rx_desc { #define E1000_RXD_SPC_CFI_MASK 0x1000 /* CFI is bit 12 */ #define E1000_RXD_SPC_CFI_SHIFT 12 +/* RX packet types */ +#define E1000_RXD_PKT_MAC (0) +#define E1000_RXD_PKT_IP4 (1) +#define E1000_RXD_PKT_IP4_XDP (2) +#define E1000_RXD_PKT_IP6 (5) +#define E1000_RXD_PKT_IP6_XDP (6) + +#define E1000_RXD_PKT_TYPE(t) ((t) << 16) + #define E1000_RXDEXT_STATERR_CE 0x01000000 #define E1000_RXDEXT_STATERR_SE 0x02000000 #define E1000_RXDEXT_STATERR_SEQ 0x04000000 @@ -879,6 +1209,8 @@ struct e1000_data_desc { #define E1000_MANC_NEIGHBOR_EN 0x00004000 /* Enable Neighbor Discovery * Filtering */ #define E1000_MANC_ARP_RES_EN 0x00008000 /* Enable ARP response Filtering */ +#define E1000_MANC_DIS_IP_CHK_ARP 0x10000000 /* Disable IP address chacking */ + /*for ARP packets - in 82574 */ #define E1000_MANC_TCO_RESET 0x00010000 /* TCO Reset Occurred */ #define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ #define E1000_MANC_REPORT_STATUS 0x00040000 /* Status Reporting Enabled */ @@ -902,7 +1234,14 @@ struct e1000_data_desc { #define E1000_MANC_SMB_DATA_OUT_SHIFT 28 /* SMBus Data Out Shift */ #define E1000_MANC_SMB_CLK_OUT_SHIFT 29 /* SMBus Clock Out Shift */ +/* FACTPS Control */ +#define E1000_FACTPS_LAN0_ON 0x00000004 /* Lan 0 enable */ + /* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */ #define EEPROM_SUM 0xBABA +/* I/O-Mapped Access to Internal Registers, Memories, and Flash */ +#define E1000_IOADDR 0x00 +#define E1000_IODATA 0x04 + #endif /* _E1000_HW_H_ */ -- cgit v1.1 From 093454e21d4f6799bb2e549394cd918da1530569 Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:44 +0300 Subject: e1000: Move out code that will be reused in e1000e Code that will be shared moved to a separate files. Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/Makefile.objs | 2 +- hw/net/e1000.c | 411 +++++++++++-------------------------------------- hw/net/e1000x_common.c | 267 ++++++++++++++++++++++++++++++++ hw/net/e1000x_common.h | 213 +++++++++++++++++++++++++ 4 files changed, 573 insertions(+), 320 deletions(-) create mode 100644 hw/net/e1000x_common.c create mode 100644 hw/net/e1000x_common.h (limited to 'hw') diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs index 527d264..bc69948 100644 --- a/hw/net/Makefile.objs +++ b/hw/net/Makefile.objs @@ -6,7 +6,7 @@ common-obj-$(CONFIG_NE2000_PCI) += ne2000.o common-obj-$(CONFIG_EEPRO100_PCI) += eepro100.o common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o -common-obj-$(CONFIG_E1000_PCI) += e1000.o +common-obj-$(CONFIG_E1000_PCI) += e1000.o e1000x_common.o common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o common-obj-$(CONFIG_VMXNET3_PCI) += net_tx_pkt.o net_rx_pkt.o common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o diff --git a/hw/net/e1000.c b/hw/net/e1000.c index 8e79b55..36e3dbe 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -36,7 +36,7 @@ #include "qemu/iov.h" #include "qemu/range.h" -#include "e1000_regs.h" +#include "e1000x_common.h" static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -64,11 +64,6 @@ static int debugflags = DBGBIT(TXERR) | DBGBIT(GENERAL); #define PNPMMIO_SIZE 0x20000 #define MIN_BUF_SIZE 60 /* Min. octets in an ethernet frame sans FCS */ -/* this is the size past which hardware will drop packets when setting LPE=0 */ -#define MAXIMUM_ETHERNET_VLAN_SIZE 1522 -/* this is the size past which hardware will drop packets when setting LPE=1 */ -#define MAXIMUM_ETHERNET_LPE_SIZE 16384 - #define MAXIMUM_ETHERNET_HDR_LEN (14+4) /* @@ -102,22 +97,9 @@ typedef struct E1000State_st { unsigned char vlan[4]; unsigned char data[0x10000]; uint16_t size; - unsigned char sum_needed; unsigned char vlan_needed; - uint8_t ipcss; - uint8_t ipcso; - uint16_t ipcse; - uint8_t tucss; - uint8_t tucso; - uint16_t tucse; - uint8_t hdr_len; - uint16_t mss; - uint32_t paylen; + e1000x_txd_props props; uint16_t tso_frames; - char tse; - int8_t ip; - int8_t tcp; - char cptse; // current packet tse bit } tx; struct { @@ -162,52 +144,19 @@ typedef struct E1000BaseClass { #define E1000_DEVICE_GET_CLASS(obj) \ OBJECT_GET_CLASS(E1000BaseClass, (obj), TYPE_E1000_BASE) -#define defreg(x) x = (E1000_##x>>2) -enum { - defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC), - defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC), - defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC), - defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH), - defreg(RDBAL), defreg(RDH), defreg(RDLEN), defreg(RDT), - defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH), - defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT), - defreg(TORH), defreg(TORL), defreg(TOTH), defreg(TOTL), - defreg(TPR), defreg(TPT), defreg(TXDCTL), defreg(WUFC), - defreg(RA), defreg(MTA), defreg(CRCERRS), defreg(VFTA), - defreg(VET), defreg(RDTR), defreg(RADV), defreg(TADV), - defreg(ITR), defreg(FCRUC), defreg(TDFH), defreg(TDFT), - defreg(TDFHS), defreg(TDFTS), defreg(TDFPC), defreg(RDFH), - defreg(RDFT), defreg(RDFHS), defreg(RDFTS), defreg(RDFPC), - defreg(IPAV), defreg(WUC), defreg(WUS), defreg(AIT), - defreg(IP6AT), defreg(IP4AT), defreg(FFLT), defreg(FFMT), - defreg(FFVT), defreg(WUPM), defreg(PBM), defreg(SCC), - defreg(ECOL), defreg(MCC), defreg(LATECOL), defreg(COLC), - defreg(DC), defreg(TNCRS), defreg(SEC), defreg(CEXTERR), - defreg(RLEC), defreg(XONRXC), defreg(XONTXC), defreg(XOFFRXC), - defreg(XOFFTXC), defreg(RFC), defreg(RJC), defreg(RNBC), - defreg(TSCTFC), defreg(MGTPRC), defreg(MGTPDC), defreg(MGTPTC), - defreg(RUC), defreg(ROC), defreg(GORCL), defreg(GORCH), - defreg(GOTCL), defreg(GOTCH), defreg(BPRC), defreg(MPRC), - defreg(TSCTC), defreg(PRC64), defreg(PRC127), defreg(PRC255), - defreg(PRC511), defreg(PRC1023), defreg(PRC1522), defreg(PTC64), - defreg(PTC127), defreg(PTC255), defreg(PTC511), defreg(PTC1023), - defreg(PTC1522), defreg(MPTC), defreg(BPTC) -}; - static void -e1000_link_down(E1000State *s) +e1000_link_up(E1000State *s) { - s->mac_reg[STATUS] &= ~E1000_STATUS_LU; - s->phy_reg[PHY_STATUS] &= ~MII_SR_LINK_STATUS; - s->phy_reg[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE; - s->phy_reg[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK; + e1000x_update_regs_on_link_up(s->mac_reg, s->phy_reg); + + /* E1000_STATUS_LU is tested by e1000_can_receive() */ + qemu_flush_queued_packets(qemu_get_queue(s->nic)); } static void -e1000_link_up(E1000State *s) +e1000_autoneg_done(E1000State *s) { - s->mac_reg[STATUS] |= E1000_STATUS_LU; - s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS; + e1000x_update_regs_on_autoneg_done(s->mac_reg, s->phy_reg); /* E1000_STATUS_LU is tested by e1000_can_receive() */ qemu_flush_queued_packets(qemu_get_queue(s->nic)); @@ -233,10 +182,7 @@ set_phy_ctrl(E1000State *s, int index, uint16_t val) * down. */ if (have_autoneg(s) && (val & MII_CR_RESTART_AUTO_NEG)) { - e1000_link_down(s); - DBGOUT(PHY, "Start link auto negotiation\n"); - timer_mod(s->autoneg_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer); } } @@ -401,43 +347,16 @@ e1000_autoneg_timer(void *opaque) { E1000State *s = opaque; if (!qemu_get_queue(s->nic)->link_down) { - e1000_link_up(s); - s->phy_reg[PHY_LP_ABILITY] |= MII_LPAR_LPACK; - s->phy_reg[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; - DBGOUT(PHY, "Auto negotiation is completed\n"); + e1000_autoneg_done(s); set_ics(s, 0, E1000_ICS_LSC); /* signal link status change to guest */ } } -static int -rxbufsize(uint32_t v) -{ - v &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 | - E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 | - E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256; - switch (v) { - case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384: - return 16384; - case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192: - return 8192; - case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096: - return 4096; - case E1000_RCTL_SZ_1024: - return 1024; - case E1000_RCTL_SZ_512: - return 512; - case E1000_RCTL_SZ_256: - return 256; - } - return 2048; -} - static void e1000_reset(void *opaque) { E1000State *d = opaque; E1000BaseClass *edc = E1000_DEVICE_GET_CLASS(d); uint8_t *macaddr = d->conf.macaddr.a; - int i; timer_del(d->autoneg_timer); timer_del(d->mit_timer); @@ -453,17 +372,10 @@ static void e1000_reset(void *opaque) memset(&d->tx, 0, sizeof d->tx); if (qemu_get_queue(d->nic)->link_down) { - e1000_link_down(d); + e1000x_update_regs_on_link_down(d->mac_reg, d->phy_reg); } - /* Some guests expect pre-initialized RAH/RAL (AddrValid flag + MACaddr) */ - d->mac_reg[RA] = 0; - d->mac_reg[RA + 1] = E1000_RAH_AV; - for (i = 0; i < 4; i++) { - d->mac_reg[RA] |= macaddr[i] << (8 * i); - d->mac_reg[RA + 1] |= (i < 2) ? macaddr[i + 4] << (8 * i) : 0; - } - qemu_format_nic_info_str(qemu_get_queue(d->nic), macaddr); + e1000x_reset_mac_addr(d->nic, d->mac_reg, macaddr); } static void @@ -477,7 +389,7 @@ static void set_rx_control(E1000State *s, int index, uint32_t val) { s->mac_reg[RCTL] = val; - s->rxbuf_size = rxbufsize(val); + s->rxbuf_size = e1000x_rxbufsize(val); s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1; DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT], s->mac_reg[RCTL]); @@ -598,89 +510,15 @@ putsum(uint8_t *data, uint32_t n, uint32_t sloc, uint32_t css, uint32_t cse) } static inline void -inc_reg_if_not_full(E1000State *s, int index) -{ - if (s->mac_reg[index] != 0xffffffff) { - s->mac_reg[index]++; - } -} - -static inline void inc_tx_bcast_or_mcast_count(E1000State *s, const unsigned char *arr) { if (!memcmp(arr, bcast, sizeof bcast)) { - inc_reg_if_not_full(s, BPTC); + e1000x_inc_reg_if_not_full(s->mac_reg, BPTC); } else if (arr[0] & 1) { - inc_reg_if_not_full(s, MPTC); - } -} - -static void -grow_8reg_if_not_full(E1000State *s, int index, int size) -{ - uint64_t sum = s->mac_reg[index] | (uint64_t)s->mac_reg[index+1] << 32; - - if (sum + size < sum) { - sum = ~0ULL; - } else { - sum += size; - } - s->mac_reg[index] = sum; - s->mac_reg[index+1] = sum >> 32; -} - -static void -increase_size_stats(E1000State *s, const int *size_regs, int size) -{ - if (size > 1023) { - inc_reg_if_not_full(s, size_regs[5]); - } else if (size > 511) { - inc_reg_if_not_full(s, size_regs[4]); - } else if (size > 255) { - inc_reg_if_not_full(s, size_regs[3]); - } else if (size > 127) { - inc_reg_if_not_full(s, size_regs[2]); - } else if (size > 64) { - inc_reg_if_not_full(s, size_regs[1]); - } else if (size == 64) { - inc_reg_if_not_full(s, size_regs[0]); + e1000x_inc_reg_if_not_full(s->mac_reg, MPTC); } } -static inline int -vlan_enabled(E1000State *s) -{ - return ((s->mac_reg[CTRL] & E1000_CTRL_VME) != 0); -} - -static inline int -vlan_rx_filter_enabled(E1000State *s) -{ - return ((s->mac_reg[RCTL] & E1000_RCTL_VFE) != 0); -} - -static inline int -is_vlan_packet(E1000State *s, const uint8_t *buf) -{ - return (be16_to_cpup((uint16_t *)(buf + 12)) == - le16_to_cpu(s->mac_reg[VET])); -} - -static inline int -is_vlan_txd(uint32_t txd_lower) -{ - return ((txd_lower & E1000_TXD_CMD_VLE) != 0); -} - -/* FCS aka Ethernet CRC-32. We don't get it from backends and can't - * fill it in, just pad descriptor length by 4 bytes unless guest - * told us to strip it off the packet. */ -static inline int -fcs_len(E1000State *s) -{ - return (s->mac_reg[RCTL] & E1000_RCTL_SECRC) ? 0 : 4; -} - static void e1000_send_packet(E1000State *s, const uint8_t *buf, int size) { @@ -694,7 +532,7 @@ e1000_send_packet(E1000State *s, const uint8_t *buf, int size) qemu_send_packet(nc, buf, size); } inc_tx_bcast_or_mcast_count(s, buf); - increase_size_stats(s, PTCregs, size); + e1000x_increase_size_stats(s->mac_reg, PTCregs, size); } static void @@ -704,34 +542,34 @@ xmit_seg(E1000State *s) unsigned int frames = s->tx.tso_frames, css, sofar; struct e1000_tx *tp = &s->tx; - if (tp->tse && tp->cptse) { - css = tp->ipcss; + if (tp->props.tse && tp->props.cptse) { + css = tp->props.ipcss; DBGOUT(TXSUM, "frames %d size %d ipcss %d\n", frames, tp->size, css); - if (tp->ip) { /* IPv4 */ + if (tp->props.ip) { /* IPv4 */ stw_be_p(tp->data+css+2, tp->size - css); stw_be_p(tp->data+css+4, be16_to_cpup((uint16_t *)(tp->data+css+4))+frames); } else { /* IPv6 */ stw_be_p(tp->data+css+4, tp->size - css); } - css = tp->tucss; + css = tp->props.tucss; len = tp->size - css; - DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->tcp, css, len); - if (tp->tcp) { - sofar = frames * tp->mss; + DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->props.tcp, css, len); + if (tp->props.tcp) { + sofar = frames * tp->props.mss; stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */ - if (tp->paylen - sofar > tp->mss) { + if (tp->props.paylen - sofar > tp->props.mss) { tp->data[css + 13] &= ~9; /* PSH, FIN */ } else if (frames) { - inc_reg_if_not_full(s, TSCTC); + e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC); } } else /* UDP */ stw_be_p(tp->data+css+4, len); - if (tp->sum_needed & E1000_TXD_POPTS_TXSM) { + if (tp->props.sum_needed & E1000_TXD_POPTS_TXSM) { unsigned int phsum; // add pseudo-header length before checksum calculation - sp = (uint16_t *)(tp->data + tp->tucso); + sp = (uint16_t *)(tp->data + tp->props.tucso); phsum = be16_to_cpup(sp) + len; phsum = (phsum >> 16) + (phsum & 0xffff); stw_be_p(sp, phsum); @@ -739,10 +577,14 @@ xmit_seg(E1000State *s) tp->tso_frames++; } - if (tp->sum_needed & E1000_TXD_POPTS_TXSM) - putsum(tp->data, tp->size, tp->tucso, tp->tucss, tp->tucse); - if (tp->sum_needed & E1000_TXD_POPTS_IXSM) - putsum(tp->data, tp->size, tp->ipcso, tp->ipcss, tp->ipcse); + if (tp->props.sum_needed & E1000_TXD_POPTS_TXSM) { + putsum(tp->data, tp->size, tp->props.tucso, + tp->props.tucss, tp->props.tucse); + } + if (tp->props.sum_needed & E1000_TXD_POPTS_IXSM) { + putsum(tp->data, tp->size, tp->props.ipcso, + tp->props.ipcss, tp->props.ipcse); + } if (tp->vlan_needed) { memmove(tp->vlan, tp->data, 4); memmove(tp->data, tp->data + 4, 8); @@ -752,8 +594,8 @@ xmit_seg(E1000State *s) e1000_send_packet(s, tp->data, tp->size); } - inc_reg_if_not_full(s, TPT); - grow_8reg_if_not_full(s, TOTL, s->tx.size); + e1000x_inc_reg_if_not_full(s->mac_reg, TPT); + e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size); s->mac_reg[GPTC] = s->mac_reg[TPT]; s->mac_reg[GOTCL] = s->mac_reg[TOTL]; s->mac_reg[GOTCH] = s->mac_reg[TOTH]; @@ -765,7 +607,7 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) PCIDevice *d = PCI_DEVICE(s); uint32_t txd_lower = le32_to_cpu(dp->lower.data); uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D); - unsigned int split_size = txd_lower & 0xffff, bytes, sz, op; + unsigned int split_size = txd_lower & 0xffff, bytes, sz; unsigned int msh = 0xfffff; uint64_t addr; struct e1000_context_desc *xp = (struct e1000_context_desc *)dp; @@ -773,38 +615,27 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE); if (dtype == E1000_TXD_CMD_DEXT) { /* context descriptor */ - op = le32_to_cpu(xp->cmd_and_length); - tp->ipcss = xp->lower_setup.ip_fields.ipcss; - tp->ipcso = xp->lower_setup.ip_fields.ipcso; - tp->ipcse = le16_to_cpu(xp->lower_setup.ip_fields.ipcse); - tp->tucss = xp->upper_setup.tcp_fields.tucss; - tp->tucso = xp->upper_setup.tcp_fields.tucso; - tp->tucse = le16_to_cpu(xp->upper_setup.tcp_fields.tucse); - tp->paylen = op & 0xfffff; - tp->hdr_len = xp->tcp_seg_setup.fields.hdr_len; - tp->mss = le16_to_cpu(xp->tcp_seg_setup.fields.mss); - tp->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0; - tp->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0; - tp->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0; + e1000x_read_tx_ctx_descr(xp, &tp->props); tp->tso_frames = 0; - if (tp->tucso == 0) { /* this is probably wrong */ + if (tp->props.tucso == 0) { /* this is probably wrong */ DBGOUT(TXSUM, "TCP/UDP: cso 0!\n"); - tp->tucso = tp->tucss + (tp->tcp ? 16 : 6); + tp->props.tucso = tp->props.tucss + (tp->props.tcp ? 16 : 6); } return; } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) { // data descriptor if (tp->size == 0) { - tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8; + tp->props.sum_needed = le32_to_cpu(dp->upper.data) >> 8; } - tp->cptse = ( txd_lower & E1000_TXD_CMD_TSE ) ? 1 : 0; + tp->props.cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0; } else { // legacy descriptor - tp->cptse = 0; + tp->props.cptse = 0; } - if (vlan_enabled(s) && is_vlan_txd(txd_lower) && - (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) { + if (e1000x_vlan_enabled(s->mac_reg) && + e1000x_is_vlan_txd(txd_lower) && + (tp->props.cptse || txd_lower & E1000_TXD_CMD_EOP)) { tp->vlan_needed = 1; stw_be_p(tp->vlan_header, le16_to_cpu(s->mac_reg[VET])); @@ -813,8 +644,8 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) } addr = le64_to_cpu(dp->buffer_addr); - if (tp->tse && tp->cptse) { - msh = tp->hdr_len + tp->mss; + if (tp->props.tse && tp->props.cptse) { + msh = tp->props.hdr_len + tp->props.mss; do { bytes = split_size; if (tp->size + bytes > msh) @@ -823,19 +654,19 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) bytes = MIN(sizeof(tp->data) - tp->size, bytes); pci_dma_read(d, addr, tp->data + tp->size, bytes); sz = tp->size + bytes; - if (sz >= tp->hdr_len && tp->size < tp->hdr_len) { - memmove(tp->header, tp->data, tp->hdr_len); + if (sz >= tp->props.hdr_len && tp->size < tp->props.hdr_len) { + memmove(tp->header, tp->data, tp->props.hdr_len); } tp->size = sz; addr += bytes; if (sz == msh) { xmit_seg(s); - memmove(tp->data, tp->header, tp->hdr_len); - tp->size = tp->hdr_len; + memmove(tp->data, tp->header, tp->props.hdr_len); + tp->size = tp->props.hdr_len; } split_size -= bytes; } while (bytes && split_size); - } else if (!tp->tse && tp->cptse) { + } else if (!tp->props.tse && tp->props.cptse) { // context descriptor TSE is not set, while data descriptor TSE is set DBGOUT(TXERR, "TCP segmentation error\n"); } else { @@ -846,14 +677,14 @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp) if (!(txd_lower & E1000_TXD_CMD_EOP)) return; - if (!(tp->tse && tp->cptse && tp->size < tp->hdr_len)) { + if (!(tp->props.tse && tp->props.cptse && tp->size < tp->props.hdr_len)) { xmit_seg(s); } tp->tso_frames = 0; - tp->sum_needed = 0; + tp->props.sum_needed = 0; tp->vlan_needed = 0; tp->size = 0; - tp->cptse = 0; + tp->props.cptse = 0; } static uint32_t @@ -925,11 +756,11 @@ start_xmit(E1000State *s) static int receive_filter(E1000State *s, const uint8_t *buf, int size) { - static const int mta_shift[] = {4, 3, 2, 0}; - uint32_t f, rctl = s->mac_reg[RCTL], ra[2], *rp; + uint32_t rctl = s->mac_reg[RCTL]; int isbcast = !memcmp(buf, bcast, sizeof bcast), ismcast = (buf[0] & 1); - if (is_vlan_packet(s, buf) && vlan_rx_filter_enabled(s)) { + if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) && + e1000x_vlan_rx_filter_enabled(s->mac_reg)) { uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14)); uint32_t vfta = le32_to_cpup((uint32_t *)(s->mac_reg + VFTA) + ((vid >> 5) & 0x7f)); @@ -942,44 +773,16 @@ receive_filter(E1000State *s, const uint8_t *buf, int size) } if (ismcast && (rctl & E1000_RCTL_MPE)) { /* promiscuous mcast */ - inc_reg_if_not_full(s, MPRC); + e1000x_inc_reg_if_not_full(s->mac_reg, MPRC); return 1; } if (isbcast && (rctl & E1000_RCTL_BAM)) { /* broadcast enabled */ - inc_reg_if_not_full(s, BPRC); - return 1; - } - - for (rp = s->mac_reg + RA; rp < s->mac_reg + RA + 32; rp += 2) { - if (!(rp[1] & E1000_RAH_AV)) - continue; - ra[0] = cpu_to_le32(rp[0]); - ra[1] = cpu_to_le32(rp[1]); - if (!memcmp(buf, (uint8_t *)ra, 6)) { - DBGOUT(RXFILTER, - "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x\n", - (int)(rp - s->mac_reg - RA)/2, - buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); - return 1; - } - } - DBGOUT(RXFILTER, "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x\n", - buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); - - f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; - f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff; - if (s->mac_reg[MTA + (f >> 5)] & (1 << (f & 0x1f))) { - inc_reg_if_not_full(s, MPRC); + e1000x_inc_reg_if_not_full(s->mac_reg, BPRC); return 1; } - DBGOUT(RXFILTER, - "dropping, inexact filter mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] %x\n", - buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], - (rctl >> E1000_RCTL_MO_SHIFT) & 3, f >> 5, - s->mac_reg[MTA + (f >> 5)]); - return 0; + return e1000x_rx_group_filter(s->mac_reg, buf); } static void @@ -989,13 +792,11 @@ e1000_set_link_status(NetClientState *nc) uint32_t old_status = s->mac_reg[STATUS]; if (nc->link_down) { - e1000_link_down(s); + e1000x_update_regs_on_link_down(s->mac_reg, s->phy_reg); } else { if (have_autoneg(s) && !(s->phy_reg[PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { - /* emulate auto-negotiation if supported */ - timer_mod(s->autoneg_timer, - qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + e1000x_restart_autoneg(s->mac_reg, s->phy_reg, s->autoneg_timer); } else { e1000_link_up(s); } @@ -1028,9 +829,7 @@ e1000_can_receive(NetClientState *nc) { E1000State *s = qemu_get_nic_opaque(nc); - return (s->mac_reg[STATUS] & E1000_STATUS_LU) && - (s->mac_reg[RCTL] & E1000_RCTL_EN) && - (s->parent_obj.config[PCI_COMMAND] & PCI_COMMAND_MASTER) && + return e1000x_rx_ready(&s->parent_obj, s->mac_reg) && e1000_has_rxbufs(s, 1); } @@ -1061,14 +860,8 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) size_t desc_offset; size_t desc_size; size_t total_size; - static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511, - PRC1023, PRC1522 }; - - if (!(s->mac_reg[STATUS] & E1000_STATUS_LU)) { - return -1; - } - if (!(s->mac_reg[RCTL] & E1000_RCTL_EN)) { + if (!e1000x_hw_rx_enabled(s->mac_reg)) { return -1; } @@ -1076,7 +869,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) if (size < sizeof(min_buf)) { iov_to_buf(iov, iovcnt, 0, min_buf, size); memset(&min_buf[size], 0, sizeof(min_buf) - size); - inc_reg_if_not_full(s, RUC); + e1000x_inc_reg_if_not_full(s->mac_reg, RUC); min_iov.iov_base = filter_buf = min_buf; min_iov.iov_len = size = sizeof(min_buf); iovcnt = 1; @@ -1088,11 +881,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) } /* Discard oversized packets if !LPE and !SBP. */ - if ((size > MAXIMUM_ETHERNET_LPE_SIZE || - (size > MAXIMUM_ETHERNET_VLAN_SIZE - && !(s->mac_reg[RCTL] & E1000_RCTL_LPE))) - && !(s->mac_reg[RCTL] & E1000_RCTL_SBP)) { - inc_reg_if_not_full(s, ROC); + if (e1000x_is_oversized(s->mac_reg, size)) { return size; } @@ -1100,7 +889,8 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) return size; } - if (vlan_enabled(s) && is_vlan_packet(s, filter_buf)) { + if (e1000x_vlan_enabled(s->mac_reg) && + e1000x_is_vlan_packet(filter_buf, le16_to_cpu(s->mac_reg[VET]))) { vlan_special = cpu_to_le16(be16_to_cpup((uint16_t *)(filter_buf + 14))); iov_ofs = 4; @@ -1119,7 +909,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) rdh_start = s->mac_reg[RDH]; desc_offset = 0; - total_size = size + fcs_len(s); + total_size = size + e1000x_fcs_len(s->mac_reg); if (!e1000_has_rxbufs(s, total_size)) { set_ics(s, 0, E1000_ICS_RXO); return -1; @@ -1179,17 +969,7 @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) } } while (desc_offset < total_size); - increase_size_stats(s, PRCregs, total_size); - inc_reg_if_not_full(s, TPR); - s->mac_reg[GPRC] = s->mac_reg[TPR]; - /* TOR - Total Octets Received: - * This register includes bytes received in a packet from the field through the field, inclusively. - * Always include FCS length (4) in size. - */ - grow_8reg_if_not_full(s, TORL, size+4); - s->mac_reg[GORCL] = s->mac_reg[TORL]; - s->mac_reg[GORCH] = s->mac_reg[TORH]; + e1000x_update_rx_total_stats(s->mac_reg, size, total_size); n = E1000_ICS_RXT0; if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH]) @@ -1670,20 +1450,20 @@ static const VMStateDescription vmstate_e1000 = { VMSTATE_UINT16(eecd_state.bitnum_out, E1000State), VMSTATE_UINT16(eecd_state.reading, E1000State), VMSTATE_UINT32(eecd_state.old_eecd, E1000State), - VMSTATE_UINT8(tx.ipcss, E1000State), - VMSTATE_UINT8(tx.ipcso, E1000State), - VMSTATE_UINT16(tx.ipcse, E1000State), - VMSTATE_UINT8(tx.tucss, E1000State), - VMSTATE_UINT8(tx.tucso, E1000State), - VMSTATE_UINT16(tx.tucse, E1000State), - VMSTATE_UINT32(tx.paylen, E1000State), - VMSTATE_UINT8(tx.hdr_len, E1000State), - VMSTATE_UINT16(tx.mss, E1000State), + VMSTATE_UINT8(tx.props.ipcss, E1000State), + VMSTATE_UINT8(tx.props.ipcso, E1000State), + VMSTATE_UINT16(tx.props.ipcse, E1000State), + VMSTATE_UINT8(tx.props.tucss, E1000State), + VMSTATE_UINT8(tx.props.tucso, E1000State), + VMSTATE_UINT16(tx.props.tucse, E1000State), + VMSTATE_UINT32(tx.props.paylen, E1000State), + VMSTATE_UINT8(tx.props.hdr_len, E1000State), + VMSTATE_UINT16(tx.props.mss, E1000State), VMSTATE_UINT16(tx.size, E1000State), VMSTATE_UINT16(tx.tso_frames, E1000State), - VMSTATE_UINT8(tx.sum_needed, E1000State), - VMSTATE_INT8(tx.ip, E1000State), - VMSTATE_INT8(tx.tcp, E1000State), + VMSTATE_UINT8(tx.props.sum_needed, E1000State), + VMSTATE_INT8(tx.props.ip, E1000State), + VMSTATE_INT8(tx.props.tcp, E1000State), VMSTATE_BUFFER(tx.header, E1000State), VMSTATE_BUFFER(tx.data, E1000State), VMSTATE_UINT16_ARRAY(eeprom_data, E1000State, 64), @@ -1806,15 +1586,11 @@ static void e1000_write_config(PCIDevice *pci_dev, uint32_t address, } } - static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) { DeviceState *dev = DEVICE(pci_dev); E1000State *d = E1000(pci_dev); - PCIDeviceClass *pdc = PCI_DEVICE_GET_CLASS(pci_dev); uint8_t *pci_conf; - uint16_t checksum = 0; - int i; uint8_t *macaddr; pci_dev->config_write = e1000_write_config; @@ -1832,17 +1608,14 @@ static void pci_e1000_realize(PCIDevice *pci_dev, Error **errp) pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io); - memmove(d->eeprom_data, e1000_eeprom_template, - sizeof e1000_eeprom_template); qemu_macaddr_default_if_unset(&d->conf.macaddr); macaddr = d->conf.macaddr.a; - for (i = 0; i < 3; i++) - d->eeprom_data[i] = (macaddr[2*i+1]<<8) | macaddr[2*i]; - d->eeprom_data[11] = d->eeprom_data[13] = pdc->device_id; - for (i = 0; i < EEPROM_CHECKSUM_REG; i++) - checksum += d->eeprom_data[i]; - checksum = (uint16_t) EEPROM_SUM - checksum; - d->eeprom_data[EEPROM_CHECKSUM_REG] = checksum; + + e1000x_core_prepare_eeprom(d->eeprom_data, + e1000_eeprom_template, + sizeof(e1000_eeprom_template), + PCI_DEVICE_GET_CLASS(pci_dev)->device_id, + macaddr); d->nic = qemu_new_nic(&net_e1000_info, &d->conf, object_get_typename(OBJECT(d)), dev->id, d); diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c new file mode 100644 index 0000000..94f85c9 --- /dev/null +++ b/hw/net/e1000x_common.c @@ -0,0 +1,267 @@ +/* +* QEMU e1000(e) emulation - shared code +* +* Copyright (c) 2008 Qumranet +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see . +*/ + +#include "qemu/osdep.h" +#include "hw/hw.h" +#include "hw/pci/pci.h" +#include "net/net.h" + +#include "e1000x_common.h" + +#include "trace.h" + +bool e1000x_rx_ready(PCIDevice *d, uint32_t *mac) +{ + bool link_up = mac[STATUS] & E1000_STATUS_LU; + bool rx_enabled = mac[RCTL] & E1000_RCTL_EN; + bool pci_master = d->config[PCI_COMMAND] & PCI_COMMAND_MASTER; + + if (!link_up || !rx_enabled || !pci_master) { + trace_e1000x_rx_can_recv_disabled(link_up, rx_enabled, pci_master); + return false; + } + + return true; +} + +bool e1000x_is_vlan_packet(const uint8_t *buf, uint16_t vet) +{ + uint16_t eth_proto = be16_to_cpup((uint16_t *)(buf + 12)); + bool res = (eth_proto == vet); + + trace_e1000x_vlan_is_vlan_pkt(res, eth_proto, vet); + + return res; +} + +bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf) +{ + static const int mta_shift[] = { 4, 3, 2, 0 }; + uint32_t f, ra[2], *rp, rctl = mac[RCTL]; + + for (rp = mac + RA; rp < mac + RA + 32; rp += 2) { + if (!(rp[1] & E1000_RAH_AV)) { + continue; + } + ra[0] = cpu_to_le32(rp[0]); + ra[1] = cpu_to_le32(rp[1]); + if (!memcmp(buf, (uint8_t *)ra, 6)) { + trace_e1000x_rx_flt_ucast_match((int)(rp - mac - RA) / 2, + MAC_ARG(buf)); + return true; + } + } + trace_e1000x_rx_flt_ucast_mismatch(MAC_ARG(buf)); + + f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; + f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff; + if (mac[MTA + (f >> 5)] & (1 << (f & 0x1f))) { + e1000x_inc_reg_if_not_full(mac, MPRC); + return true; + } + + trace_e1000x_rx_flt_inexact_mismatch(MAC_ARG(buf), + (rctl >> E1000_RCTL_MO_SHIFT) & 3, + f >> 5, + mac[MTA + (f >> 5)]); + + return false; +} + +bool e1000x_hw_rx_enabled(uint32_t *mac) +{ + if (!(mac[STATUS] & E1000_STATUS_LU)) { + trace_e1000x_rx_link_down(mac[STATUS]); + return false; + } + + if (!(mac[RCTL] & E1000_RCTL_EN)) { + trace_e1000x_rx_disabled(mac[RCTL]); + return false; + } + + return true; +} + +bool e1000x_is_oversized(uint32_t *mac, size_t size) +{ + /* this is the size past which hardware will + drop packets when setting LPE=0 */ + static const int maximum_ethernet_vlan_size = 1522; + /* this is the size past which hardware will + drop packets when setting LPE=1 */ + static const int maximum_ethernet_lpe_size = 16384; + + if ((size > maximum_ethernet_lpe_size || + (size > maximum_ethernet_vlan_size + && !(mac[RCTL] & E1000_RCTL_LPE))) + && !(mac[RCTL] & E1000_RCTL_SBP)) { + e1000x_inc_reg_if_not_full(mac, ROC); + trace_e1000x_rx_oversized(size); + return true; + } + + return false; +} + +void e1000x_restart_autoneg(uint32_t *mac, uint16_t *phy, QEMUTimer *timer) +{ + e1000x_update_regs_on_link_down(mac, phy); + trace_e1000x_link_negotiation_start(); + timer_mod(timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); +} + +void e1000x_reset_mac_addr(NICState *nic, uint32_t *mac_regs, + uint8_t *mac_addr) +{ + int i; + + mac_regs[RA] = 0; + mac_regs[RA + 1] = E1000_RAH_AV; + for (i = 0; i < 4; i++) { + mac_regs[RA] |= mac_addr[i] << (8 * i); + mac_regs[RA + 1] |= + (i < 2) ? mac_addr[i + 4] << (8 * i) : 0; + } + + qemu_format_nic_info_str(qemu_get_queue(nic), mac_addr); + trace_e1000x_mac_indicate(MAC_ARG(mac_addr)); +} + +void e1000x_update_regs_on_autoneg_done(uint32_t *mac, uint16_t *phy) +{ + e1000x_update_regs_on_link_up(mac, phy); + phy[PHY_LP_ABILITY] |= MII_LPAR_LPACK; + phy[PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; + trace_e1000x_link_negotiation_done(); +} + +void +e1000x_core_prepare_eeprom(uint16_t *eeprom, + const uint16_t *templ, + uint32_t templ_size, + uint16_t dev_id, + const uint8_t *macaddr) +{ + uint16_t checksum = 0; + int i; + + memmove(eeprom, templ, templ_size); + + for (i = 0; i < 3; i++) { + eeprom[i] = (macaddr[2 * i + 1] << 8) | macaddr[2 * i]; + } + + eeprom[11] = eeprom[13] = dev_id; + + for (i = 0; i < EEPROM_CHECKSUM_REG; i++) { + checksum += eeprom[i]; + } + + checksum = (uint16_t) EEPROM_SUM - checksum; + + eeprom[EEPROM_CHECKSUM_REG] = checksum; +} + +uint32_t +e1000x_rxbufsize(uint32_t rctl) +{ + rctl &= E1000_RCTL_BSEX | E1000_RCTL_SZ_16384 | E1000_RCTL_SZ_8192 | + E1000_RCTL_SZ_4096 | E1000_RCTL_SZ_2048 | E1000_RCTL_SZ_1024 | + E1000_RCTL_SZ_512 | E1000_RCTL_SZ_256; + switch (rctl) { + case E1000_RCTL_BSEX | E1000_RCTL_SZ_16384: + return 16384; + case E1000_RCTL_BSEX | E1000_RCTL_SZ_8192: + return 8192; + case E1000_RCTL_BSEX | E1000_RCTL_SZ_4096: + return 4096; + case E1000_RCTL_SZ_1024: + return 1024; + case E1000_RCTL_SZ_512: + return 512; + case E1000_RCTL_SZ_256: + return 256; + } + return 2048; +} + +void +e1000x_update_rx_total_stats(uint32_t *mac, + size_t data_size, + size_t data_fcs_size) +{ + static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511, + PRC1023, PRC1522 }; + + e1000x_increase_size_stats(mac, PRCregs, data_fcs_size); + e1000x_inc_reg_if_not_full(mac, TPR); + mac[GPRC] = mac[TPR]; + /* TOR - Total Octets Received: + * This register includes bytes received in a packet from the field through the field, inclusively. + * Always include FCS length (4) in size. + */ + e1000x_grow_8reg_if_not_full(mac, TORL, data_size + 4); + mac[GORCL] = mac[TORL]; + mac[GORCH] = mac[TORH]; +} + +void +e1000x_increase_size_stats(uint32_t *mac, const int *size_regs, int size) +{ + if (size > 1023) { + e1000x_inc_reg_if_not_full(mac, size_regs[5]); + } else if (size > 511) { + e1000x_inc_reg_if_not_full(mac, size_regs[4]); + } else if (size > 255) { + e1000x_inc_reg_if_not_full(mac, size_regs[3]); + } else if (size > 127) { + e1000x_inc_reg_if_not_full(mac, size_regs[2]); + } else if (size > 64) { + e1000x_inc_reg_if_not_full(mac, size_regs[1]); + } else if (size == 64) { + e1000x_inc_reg_if_not_full(mac, size_regs[0]); + } +} + +void +e1000x_read_tx_ctx_descr(struct e1000_context_desc *d, + e1000x_txd_props *props) +{ + uint32_t op = le32_to_cpu(d->cmd_and_length); + + props->ipcss = d->lower_setup.ip_fields.ipcss; + props->ipcso = d->lower_setup.ip_fields.ipcso; + props->ipcse = le16_to_cpu(d->lower_setup.ip_fields.ipcse); + props->tucss = d->upper_setup.tcp_fields.tucss; + props->tucso = d->upper_setup.tcp_fields.tucso; + props->tucse = le16_to_cpu(d->upper_setup.tcp_fields.tucse); + props->paylen = op & 0xfffff; + props->hdr_len = d->tcp_seg_setup.fields.hdr_len; + props->mss = le16_to_cpu(d->tcp_seg_setup.fields.mss); + props->ip = (op & E1000_TXD_CMD_IP) ? 1 : 0; + props->tcp = (op & E1000_TXD_CMD_TCP) ? 1 : 0; + props->tse = (op & E1000_TXD_CMD_TSE) ? 1 : 0; +} diff --git a/hw/net/e1000x_common.h b/hw/net/e1000x_common.h new file mode 100644 index 0000000..21bf28e --- /dev/null +++ b/hw/net/e1000x_common.h @@ -0,0 +1,213 @@ +/* +* QEMU e1000(e) emulation - shared code +* +* Copyright (c) 2008 Qumranet +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see . +*/ + +#include "e1000_regs.h" + +#define defreg(x) x = (E1000_##x >> 2) +enum { + defreg(CTRL), defreg(EECD), defreg(EERD), defreg(GPRC), + defreg(GPTC), defreg(ICR), defreg(ICS), defreg(IMC), + defreg(IMS), defreg(LEDCTL), defreg(MANC), defreg(MDIC), + defreg(MPC), defreg(PBA), defreg(RCTL), defreg(RDBAH0), + defreg(RDBAL0), defreg(RDH0), defreg(RDLEN0), defreg(RDT0), + defreg(STATUS), defreg(SWSM), defreg(TCTL), defreg(TDBAH), + defreg(TDBAL), defreg(TDH), defreg(TDLEN), defreg(TDT), + defreg(TDLEN1), defreg(TDBAL1), defreg(TDBAH1), defreg(TDH1), + defreg(TDT1), defreg(TORH), defreg(TORL), defreg(TOTH), + defreg(TOTL), defreg(TPR), defreg(TPT), defreg(TXDCTL), + defreg(WUFC), defreg(RA), defreg(MTA), defreg(CRCERRS), + defreg(VFTA), defreg(VET), defreg(RDTR), defreg(RADV), + defreg(TADV), defreg(ITR), defreg(SCC), defreg(ECOL), + defreg(MCC), defreg(LATECOL), defreg(COLC), defreg(DC), + defreg(TNCRS), defreg(SEC), defreg(CEXTERR), defreg(RLEC), + defreg(XONRXC), defreg(XONTXC), defreg(XOFFRXC), defreg(XOFFTXC), + defreg(FCRUC), defreg(AIT), defreg(TDFH), defreg(TDFT), + defreg(TDFHS), defreg(TDFTS), defreg(TDFPC), defreg(WUC), + defreg(WUS), defreg(POEMB), defreg(PBS), defreg(RDFH), + defreg(RDFT), defreg(RDFHS), defreg(RDFTS), defreg(RDFPC), + defreg(PBM), defreg(IPAV), defreg(IP4AT), defreg(IP6AT), + defreg(WUPM), defreg(FFLT), defreg(FFMT), defreg(FFVT), + defreg(TARC0), defreg(TARC1), defreg(IAM), defreg(EXTCNF_CTRL), + defreg(GCR), defreg(TIMINCA), defreg(EIAC), defreg(CTRL_EXT), + defreg(IVAR), defreg(MFUTP01), defreg(MFUTP23), defreg(MANC2H), + defreg(MFVAL), defreg(MDEF), defreg(FACTPS), defreg(FTFT), + defreg(RUC), defreg(ROC), defreg(RFC), defreg(RJC), + defreg(PRC64), defreg(PRC127), defreg(PRC255), defreg(PRC511), + defreg(PRC1023), defreg(PRC1522), defreg(PTC64), defreg(PTC127), + defreg(PTC255), defreg(PTC511), defreg(PTC1023), defreg(PTC1522), + defreg(GORCL), defreg(GORCH), defreg(GOTCL), defreg(GOTCH), + defreg(RNBC), defreg(BPRC), defreg(MPRC), defreg(RFCTL), + defreg(PSRCTL), defreg(MPTC), defreg(BPTC), defreg(TSCTFC), + defreg(IAC), defreg(MGTPRC), defreg(MGTPDC), defreg(MGTPTC), + defreg(TSCTC), defreg(RXCSUM), defreg(FUNCTAG), defreg(GSCL_1), + defreg(GSCL_2), defreg(GSCL_3), defreg(GSCL_4), defreg(GSCN_0), + defreg(GSCN_1), defreg(GSCN_2), defreg(GSCN_3), defreg(GCR2), + defreg(RAID), defreg(RSRPD), defreg(TIDV), defreg(EITR), + defreg(MRQC), defreg(RETA), defreg(RSSRK), defreg(RDBAH1), + defreg(RDBAL1), defreg(RDLEN1), defreg(RDH1), defreg(RDT1), + defreg(PBACLR), defreg(FCAL), defreg(FCAH), defreg(FCT), + defreg(FCRTH), defreg(FCRTL), defreg(FCTTV), defreg(FCRTV), + defreg(FLA), defreg(EEWR), defreg(FLOP), defreg(FLOL), + defreg(FLSWCTL), defreg(FLSWCNT), defreg(RXDCTL), defreg(RXDCTL1), + defreg(MAVTV0), defreg(MAVTV1), defreg(MAVTV2), defreg(MAVTV3), + defreg(TXSTMPL), defreg(TXSTMPH), defreg(SYSTIML), defreg(SYSTIMH), + defreg(RXCFGL), defreg(RXUDP), defreg(TIMADJL), defreg(TIMADJH), + defreg(RXSTMPH), defreg(RXSTMPL), defreg(RXSATRL), defreg(RXSATRH), + defreg(FLASHT), defreg(TIPG), defreg(RDH), defreg(RDT), + defreg(RDLEN), defreg(RDBAH), defreg(RDBAL), + defreg(TXDCTL1), + defreg(FLSWDATA), + defreg(CTRL_DUP), + defreg(EXTCNF_SIZE), + defreg(EEMNGCTL), + defreg(EEMNGDATA), + defreg(FLMNGCTL), + defreg(FLMNGDATA), + defreg(FLMNGCNT), + defreg(TSYNCRXCTL), + defreg(TSYNCTXCTL), + + /* Aliases */ + defreg(RDH0_A), defreg(RDT0_A), defreg(RDTR_A), defreg(RDFH_A), + defreg(RDFT_A), defreg(TDH_A), defreg(TDT_A), defreg(TIDV_A), + defreg(TDFH_A), defreg(TDFT_A), defreg(RA_A), defreg(RDBAL0_A), + defreg(TDBAL_A), defreg(TDLEN_A), defreg(VFTA_A), defreg(RDLEN0_A), + defreg(FCRTL_A), defreg(FCRTH_A) +}; + +static inline void +e1000x_inc_reg_if_not_full(uint32_t *mac, int index) +{ + if (mac[index] != 0xffffffff) { + mac[index]++; + } +} + +static inline void +e1000x_grow_8reg_if_not_full(uint32_t *mac, int index, int size) +{ + uint64_t sum = mac[index] | (uint64_t)mac[index + 1] << 32; + + if (sum + size < sum) { + sum = ~0ULL; + } else { + sum += size; + } + mac[index] = sum; + mac[index + 1] = sum >> 32; +} + +static inline int +e1000x_vlan_enabled(uint32_t *mac) +{ + return ((mac[CTRL] & E1000_CTRL_VME) != 0); +} + +static inline int +e1000x_is_vlan_txd(uint32_t txd_lower) +{ + return ((txd_lower & E1000_TXD_CMD_VLE) != 0); +} + +static inline int +e1000x_vlan_rx_filter_enabled(uint32_t *mac) +{ + return ((mac[RCTL] & E1000_RCTL_VFE) != 0); +} + +static inline int +e1000x_fcs_len(uint32_t *mac) +{ + /* FCS aka Ethernet CRC-32. We don't get it from backends and can't + * fill it in, just pad descriptor length by 4 bytes unless guest + * told us to strip it off the packet. */ + return (mac[RCTL] & E1000_RCTL_SECRC) ? 0 : 4; +} + +static inline void +e1000x_update_regs_on_link_down(uint32_t *mac, uint16_t *phy) +{ + mac[STATUS] &= ~E1000_STATUS_LU; + phy[PHY_STATUS] &= ~MII_SR_LINK_STATUS; + phy[PHY_STATUS] &= ~MII_SR_AUTONEG_COMPLETE; + phy[PHY_LP_ABILITY] &= ~MII_LPAR_LPACK; +} + +static inline void +e1000x_update_regs_on_link_up(uint32_t *mac, uint16_t *phy) +{ + mac[STATUS] |= E1000_STATUS_LU; + phy[PHY_STATUS] |= MII_SR_LINK_STATUS; +} + +void e1000x_update_rx_total_stats(uint32_t *mac, + size_t data_size, + size_t data_fcs_size); + +void e1000x_core_prepare_eeprom(uint16_t *eeprom, + const uint16_t *templ, + uint32_t templ_size, + uint16_t dev_id, + const uint8_t *macaddr); + +uint32_t e1000x_rxbufsize(uint32_t rctl); + +bool e1000x_rx_ready(PCIDevice *d, uint32_t *mac); + +bool e1000x_is_vlan_packet(const uint8_t *buf, uint16_t vet); + +bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf); + +bool e1000x_hw_rx_enabled(uint32_t *mac); + +bool e1000x_is_oversized(uint32_t *mac, size_t size); + +void e1000x_restart_autoneg(uint32_t *mac, uint16_t *phy, QEMUTimer *timer); + +void e1000x_reset_mac_addr(NICState *nic, uint32_t *mac_regs, + uint8_t *mac_addr); + +void e1000x_update_regs_on_autoneg_done(uint32_t *mac, uint16_t *phy); + +void e1000x_increase_size_stats(uint32_t *mac, const int *size_regs, int size); + +typedef struct e1000x_txd_props { + unsigned char sum_needed; + uint8_t ipcss; + uint8_t ipcso; + uint16_t ipcse; + uint8_t tucss; + uint8_t tucso; + uint16_t tucse; + uint32_t paylen; + uint8_t hdr_len; + uint16_t mss; + int8_t ip; + int8_t tcp; + bool tse; + bool cptse; +} e1000x_txd_props; + +void e1000x_read_tx_ctx_descr(struct e1000_context_desc *d, + e1000x_txd_props *props); -- cgit v1.1 From 6f3fbe4ed06a54b14716e619f1053c073fddae49 Mon Sep 17 00:00:00 2001 From: Dmitry Fleytman Date: Wed, 1 Jun 2016 11:23:45 +0300 Subject: net: Introduce e1000e device emulation This patch introduces emulation for the Intel 82574 adapter, AKA e1000e. This implementation is derived from the e1000 emulation code, and utilizes the TX/RX packet abstractions that were initially developed for the vmxnet3 device. Although some parts of the introduced code may be shared with e1000, the differences are substantial enough so that the only shared resources for the two devices are the definitions in hw/net/e1000_regs.h. Similarly to vmxnet3, the new device uses virtio headers for task offloads (for backends that support virtio extensions). Usage of virtio headers may be forcibly disabled via a boolean device property "vnet" (which is enabled by default). In such case task offloads will be performed in software, in the same way it is done on backends that do not support virtio headers. The device code is split into two parts: 1. hw/net/e1000e.c: QEMU-specific code for a network device; 2. hw/net/e1000e_core.[hc]: Device emulation according to the spec. The new device name is e1000e. Intel specifications for the 82574 controller are available at: http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf Throughput measurement results (iperf2): Fedora 22 guest, TCP, RX 4 ++------------------------------------------+ | | | X X X X X 3.5 ++ X X X X | | X | | | 3 ++ | G | X | b | | / 2.5 ++ | s | | | | 2 ++ | | | | | 1.5 X+ | | | + + + + + + + + + + + + 1 ++--+---+---+---+---+---+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Fedora 22 guest, TCP, TX 18 ++-------------------------------------------+ | X | 16 ++ X X X X X | X | 14 ++ | | | 12 ++ | G | X | b 10 ++ | / | | s 8 ++ | | | 6 ++ X | | | 4 ++ | | X | 2 ++ X | X + + + + + + + + + + + 0 ++--+---+---+---+---+----+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Fedora 22 guest, UDP, RX 3 ++------------------------------------------+ | X | | 2.5 ++ | | | | | 2 ++ X | G | | b | | / 1.5 ++ | s | X | | | 1 ++ | | | | X | 0.5 ++ | | X | X + + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Fedora 22 guest, UDP, TX 1 ++------------------------------------------+ | X 0.9 ++ | | | 0.8 ++ | 0.7 ++ | | | G 0.6 ++ | b | | / 0.5 ++ | s | X | 0.4 ++ | | | 0.3 ++ | 0.2 ++ X | | | 0.1 ++ X | X X + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Windows 2012R2 guest, TCP, RX 3.2 ++------------------------------------------+ | X | 3 ++ | | | 2.8 ++ | | | 2.6 ++ X | G | X X X X X b 2.4 ++ X X | / | | s 2.2 ++ | | | 2 ++ | | X X | 1.8 ++ | | | 1.6 X+ | + + + + + + + + + + + + 1.4 ++--+---+---+---+---+---+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Windows 2012R2 guest, TCP, TX 14 ++-------------------------------------------+ | | | X X 12 ++ | | | 10 ++ | | | G | | b 8 ++ | / | X | s 6 ++ | | | | | 4 ++ X | | | 2 ++ | | X X X | + X X + + X X + + + + + 0 X+--+---+---+---+---+----+---+---+---+---+---+ 32 64 128 256 512 1 2 4 8 16 32 64 B B B B B KB KB KB KB KB KB KB Buffer size Windows 2012R2 guest, UDP, RX 1.6 ++------------------------------------------X | | 1.4 ++ | | | 1.2 ++ | | X | | | G 1 ++ | b | | / 0.8 ++ | s | | 0.6 ++ X | | | 0.4 ++ | | X | | | 0.2 ++ X | X + + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Windows 2012R2 guest, UDP, TX 0.6 ++------------------------------------------+ | X | | 0.5 ++ | | | | | 0.4 ++ | G | | b | | / 0.3 ++ X | s | | | | 0.2 ++ | | | | X | 0.1 ++ | | X | X X + + + + 0 ++-------+--------+-------+--------+--------+ 32 64 128 256 512 1 B B B B B KB Datagram size Signed-off-by: Dmitry Fleytman Signed-off-by: Leonid Bloch Reviewed-by: Michael S. Tsirkin Signed-off-by: Jason Wang --- hw/net/Makefile.objs | 1 + hw/net/e1000_regs.h | 4 + hw/net/e1000e.c | 739 +++++++++++ hw/net/e1000e_core.c | 3476 ++++++++++++++++++++++++++++++++++++++++++++++++++ hw/net/e1000e_core.h | 146 +++ 5 files changed, 4366 insertions(+) create mode 100644 hw/net/e1000e.c create mode 100644 hw/net/e1000e_core.c create mode 100644 hw/net/e1000e_core.h (limited to 'hw') diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs index bc69948..fe61e9f 100644 --- a/hw/net/Makefile.objs +++ b/hw/net/Makefile.objs @@ -7,6 +7,7 @@ common-obj-$(CONFIG_EEPRO100_PCI) += eepro100.o common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o common-obj-$(CONFIG_E1000_PCI) += e1000.o e1000x_common.o +common-obj-$(CONFIG_E1000E_PCI) += e1000e.o e1000e_core.o e1000x_common.o common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o common-obj-$(CONFIG_VMXNET3_PCI) += net_tx_pkt.o net_rx_pkt.o common-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o diff --git a/hw/net/e1000_regs.h b/hw/net/e1000_regs.h index d62b3fa..c1acd45 100644 --- a/hw/net/e1000_regs.h +++ b/hw/net/e1000_regs.h @@ -417,6 +417,10 @@ #define E1000_ICR_ASSERTED BIT(31) #define E1000_EIAC_MASK 0x01F00000 +/* [TR]DBAL and [TR]DLEN masks */ +#define E1000_XDBAL_MASK (~(BIT(4) - 1)) +#define E1000_XDLEN_MASK ((BIT(20) - 1) & (~(BIT(7) - 1))) + /* IVAR register parsing helpers */ #define E1000_IVAR_INT_ALLOC_VALID (0x8) diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c new file mode 100644 index 0000000..61bcbb6 --- /dev/null +++ b/hw/net/e1000e.c @@ -0,0 +1,739 @@ +/* +* QEMU INTEL 82574 GbE NIC emulation +* +* Software developer's manuals: +* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf +* +* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) +* Developed by Daynix Computing LTD (http://www.daynix.com) +* +* Authors: +* Dmitry Fleytman +* Leonid Bloch +* Yan Vugenfirer +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2008 Qumranet +* Based on work done by: +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see . +*/ + +#include "qemu/osdep.h" +#include "net/net.h" +#include "net/tap.h" +#include "qemu/range.h" +#include "sysemu/sysemu.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" + +#include "hw/net/e1000_regs.h" + +#include "e1000x_common.h" +#include "e1000e_core.h" + +#include "trace.h" + +#define TYPE_E1000E "e1000e" +#define E1000E(obj) OBJECT_CHECK(E1000EState, (obj), TYPE_E1000E) + +typedef struct E1000EState { + PCIDevice parent_obj; + NICState *nic; + NICConf conf; + + MemoryRegion mmio; + MemoryRegion flash; + MemoryRegion io; + MemoryRegion msix; + + uint32_t ioaddr; + + uint16_t subsys_ven; + uint16_t subsys; + + uint16_t subsys_ven_used; + uint16_t subsys_used; + + uint32_t intr_state; + bool disable_vnet; + + E1000ECore core; + +} E1000EState; + +#define E1000E_MMIO_IDX 0 +#define E1000E_FLASH_IDX 1 +#define E1000E_IO_IDX 2 +#define E1000E_MSIX_IDX 3 + +#define E1000E_MMIO_SIZE (128 * 1024) +#define E1000E_FLASH_SIZE (128 * 1024) +#define E1000E_IO_SIZE (32) +#define E1000E_MSIX_SIZE (16 * 1024) + +#define E1000E_MSIX_TABLE (0x0000) +#define E1000E_MSIX_PBA (0x2000) + +#define E1000E_USE_MSI BIT(0) +#define E1000E_USE_MSIX BIT(1) + +static uint64_t +e1000e_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + E1000EState *s = opaque; + return e1000e_core_read(&s->core, addr, size); +} + +static void +e1000e_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + E1000EState *s = opaque; + e1000e_core_write(&s->core, addr, val, size); +} + +static bool +e1000e_io_get_reg_index(E1000EState *s, uint32_t *idx) +{ + if (s->ioaddr < 0x1FFFF) { + *idx = s->ioaddr; + return true; + } + + if (s->ioaddr < 0x7FFFF) { + trace_e1000e_wrn_io_addr_undefined(s->ioaddr); + return false; + } + + if (s->ioaddr < 0xFFFFF) { + trace_e1000e_wrn_io_addr_flash(s->ioaddr); + return false; + } + + trace_e1000e_wrn_io_addr_unknown(s->ioaddr); + return false; +} + +static uint64_t +e1000e_io_read(void *opaque, hwaddr addr, unsigned size) +{ + E1000EState *s = opaque; + uint32_t idx; + uint64_t val; + + switch (addr) { + case E1000_IOADDR: + trace_e1000e_io_read_addr(s->ioaddr); + return s->ioaddr; + case E1000_IODATA: + if (e1000e_io_get_reg_index(s, &idx)) { + val = e1000e_core_read(&s->core, idx, sizeof(val)); + trace_e1000e_io_read_data(idx, val); + return val; + } + return 0; + default: + trace_e1000e_wrn_io_read_unknown(addr); + return 0; + } +} + +static void +e1000e_io_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + E1000EState *s = opaque; + uint32_t idx; + + switch (addr) { + case E1000_IOADDR: + trace_e1000e_io_write_addr(val); + s->ioaddr = (uint32_t) val; + return; + case E1000_IODATA: + if (e1000e_io_get_reg_index(s, &idx)) { + trace_e1000e_io_write_data(idx, val); + e1000e_core_write(&s->core, idx, val, sizeof(val)); + } + return; + default: + trace_e1000e_wrn_io_write_unknown(addr); + return; + } +} + +static const MemoryRegionOps mmio_ops = { + .read = e1000e_mmio_read, + .write = e1000e_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static const MemoryRegionOps io_ops = { + .read = e1000e_io_read, + .write = e1000e_io_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static int +e1000e_nc_can_receive(NetClientState *nc) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + return e1000e_can_receive(&s->core); +} + +static ssize_t +e1000e_nc_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + return e1000e_receive_iov(&s->core, iov, iovcnt); +} + +static ssize_t +e1000e_nc_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + return e1000e_receive(&s->core, buf, size); +} + +static void +e1000e_set_link_status(NetClientState *nc) +{ + E1000EState *s = qemu_get_nic_opaque(nc); + e1000e_core_set_link_status(&s->core); +} + +static NetClientInfo net_e1000e_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = e1000e_nc_can_receive, + .receive = e1000e_nc_receive, + .receive_iov = e1000e_nc_receive_iov, + .link_status_changed = e1000e_set_link_status, +}; + +/* +* EEPROM (NVM) contents documented in Table 36, section 6.1 +* and generally 6.1.2 Software accessed words. +*/ +static const uint16_t e1000e_eeprom_template[64] = { + /* Address | Compat. | ImVer | Compat. */ + 0x0000, 0x0000, 0x0000, 0x0420, 0xf746, 0x2010, 0xffff, 0xffff, + /* PBA |ICtrl1 | SSID | SVID | DevID |-------|ICtrl2 */ + 0x0000, 0x0000, 0x026b, 0x0000, 0x8086, 0x0000, 0x0000, 0x8058, + /* NVM words 1,2,3 |-------------------------------|PCI-EID*/ + 0x0000, 0x2001, 0x7e7c, 0xffff, 0x1000, 0x00c8, 0x0000, 0x2704, + /* PCIe Init. Conf 1,2,3 |PCICtrl|PHY|LD1|-------| RevID | LD0,2 */ + 0x6cc9, 0x3150, 0x070e, 0x460b, 0x2d84, 0x0100, 0xf000, 0x0706, + /* FLPAR |FLANADD|LAN-PWR|FlVndr |ICtrl3 |APTSMBA|APTRxEP|APTSMBC*/ + 0x6000, 0x0080, 0x0f04, 0x7fff, 0x4f01, 0xc600, 0x0000, 0x20ff, + /* APTIF | APTMC |APTuCP |LSWFWID|MSWFWID|NC-SIMC|NC-SIC | VPDP */ + 0x0028, 0x0003, 0x0000, 0x0000, 0x0000, 0x0003, 0x0000, 0xffff, + /* SW Section */ + 0x0100, 0xc000, 0x121c, 0xc007, 0xffff, 0xffff, 0xffff, 0xffff, + /* SW Section |CHKSUM */ + 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0120, 0xffff, 0x0000, +}; + +static void e1000e_core_realize(E1000EState *s) +{ + s->core.owner = &s->parent_obj; + s->core.owner_nic = s->nic; +} + +static void +e1000e_init_msi(E1000EState *s) +{ + int res; + + res = msi_init(PCI_DEVICE(s), + 0xD0, /* MSI capability offset */ + 1, /* MAC MSI interrupts */ + true, /* 64-bit message addresses supported */ + false); /* Per vector mask supported */ + + if (res > 0) { + s->intr_state |= E1000E_USE_MSI; + } else { + trace_e1000e_msi_init_fail(res); + } +} + +static void +e1000e_cleanup_msi(E1000EState *s) +{ + if (s->intr_state & E1000E_USE_MSI) { + msi_uninit(PCI_DEVICE(s)); + } +} + +static void +e1000e_unuse_msix_vectors(E1000EState *s, int num_vectors) +{ + int i; + for (i = 0; i < num_vectors; i++) { + msix_vector_unuse(PCI_DEVICE(s), i); + } +} + +static bool +e1000e_use_msix_vectors(E1000EState *s, int num_vectors) +{ + int i; + for (i = 0; i < num_vectors; i++) { + int res = msix_vector_use(PCI_DEVICE(s), i); + if (res < 0) { + trace_e1000e_msix_use_vector_fail(i, res); + e1000e_unuse_msix_vectors(s, i); + return false; + } + } + return true; +} + +static void +e1000e_init_msix(E1000EState *s) +{ + PCIDevice *d = PCI_DEVICE(s); + int res = msix_init(PCI_DEVICE(s), E1000E_MSIX_VEC_NUM, + &s->msix, + E1000E_MSIX_IDX, E1000E_MSIX_TABLE, + &s->msix, + E1000E_MSIX_IDX, E1000E_MSIX_PBA, + 0xA0); + + if (res < 0) { + trace_e1000e_msix_init_fail(res); + } else { + if (!e1000e_use_msix_vectors(s, E1000E_MSIX_VEC_NUM)) { + msix_uninit(d, &s->msix, &s->msix); + } else { + s->intr_state |= E1000E_USE_MSIX; + } + } +} + +static void +e1000e_cleanup_msix(E1000EState *s) +{ + if (s->intr_state & E1000E_USE_MSIX) { + e1000e_unuse_msix_vectors(s, E1000E_MSIX_VEC_NUM); + msix_uninit(PCI_DEVICE(s), &s->msix, &s->msix); + } +} + +static void +e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr) +{ + DeviceState *dev = DEVICE(pci_dev); + NetClientState *nc; + int i; + + s->nic = qemu_new_nic(&net_e1000e_info, &s->conf, + object_get_typename(OBJECT(s)), dev->id, s); + + s->core.max_queue_num = s->conf.peers.queues - 1; + + trace_e1000e_mac_set_permanent(MAC_ARG(macaddr)); + memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac)); + + qemu_format_nic_info_str(qemu_get_queue(s->nic), macaddr); + + /* Setup virtio headers */ + if (s->disable_vnet) { + s->core.has_vnet = false; + trace_e1000e_cfg_support_virtio(false); + return; + } else { + s->core.has_vnet = true; + } + + for (i = 0; i < s->conf.peers.queues; i++) { + nc = qemu_get_subqueue(s->nic, i); + if (!nc->peer || !qemu_has_vnet_hdr(nc->peer)) { + s->core.has_vnet = false; + trace_e1000e_cfg_support_virtio(false); + return; + } + } + + trace_e1000e_cfg_support_virtio(true); + + for (i = 0; i < s->conf.peers.queues; i++) { + nc = qemu_get_subqueue(s->nic, i); + qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr)); + qemu_using_vnet_hdr(nc->peer, true); + } +} + +static inline uint64_t +e1000e_gen_dsn(uint8_t *mac) +{ + return (uint64_t)(mac[5]) | + (uint64_t)(mac[4]) << 8 | + (uint64_t)(mac[3]) << 16 | + (uint64_t)(0x00FF) << 24 | + (uint64_t)(0x00FF) << 32 | + (uint64_t)(mac[2]) << 40 | + (uint64_t)(mac[1]) << 48 | + (uint64_t)(mac[0]) << 56; +} + +static int +e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) +{ + int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF); + + if (ret >= 0) { + pci_set_word(pdev->config + offset + PCI_PM_PMC, + PCI_PM_CAP_VER_1_1 | + pmc); + + pci_set_word(pdev->wmask + offset + PCI_PM_CTRL, + PCI_PM_CTRL_STATE_MASK | + PCI_PM_CTRL_PME_ENABLE | + PCI_PM_CTRL_DATA_SEL_MASK); + + pci_set_word(pdev->w1cmask + offset + PCI_PM_CTRL, + PCI_PM_CTRL_PME_STATUS); + } + + return ret; +} + +static void e1000e_write_config(PCIDevice *pci_dev, uint32_t address, + uint32_t val, int len) +{ + E1000EState *s = E1000E(pci_dev); + + pci_default_write_config(pci_dev, address, val, len); + + if (range_covers_byte(address, len, PCI_COMMAND) && + (pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { + qemu_flush_queued_packets(qemu_get_queue(s->nic)); + } +} + +static void e1000e_pci_realize(PCIDevice *pci_dev, Error **errp) +{ + static const uint16_t e1000e_pmrb_offset = 0x0C8; + static const uint16_t e1000e_pcie_offset = 0x0E0; + static const uint16_t e1000e_aer_offset = 0x100; + static const uint16_t e1000e_dsn_offset = 0x140; + E1000EState *s = E1000E(pci_dev); + uint8_t *macaddr; + + trace_e1000e_cb_pci_realize(); + + pci_dev->config_write = e1000e_write_config; + + pci_dev->config[PCI_CACHE_LINE_SIZE] = 0x10; + pci_dev->config[PCI_INTERRUPT_PIN] = 1; + + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_VENDOR_ID, s->subsys_ven); + pci_set_word(pci_dev->config + PCI_SUBSYSTEM_ID, s->subsys); + + s->subsys_ven_used = s->subsys_ven; + s->subsys_used = s->subsys; + + /* Define IO/MMIO regions */ + memory_region_init_io(&s->mmio, OBJECT(s), &mmio_ops, s, + "e1000e-mmio", E1000E_MMIO_SIZE); + pci_register_bar(pci_dev, E1000E_MMIO_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio); + + /* + * We provide a dummy implementation for the flash BAR + * for drivers that may theoretically probe for its presence. + */ + memory_region_init(&s->flash, OBJECT(s), + "e1000e-flash", E1000E_FLASH_SIZE); + pci_register_bar(pci_dev, E1000E_FLASH_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->flash); + + memory_region_init_io(&s->io, OBJECT(s), &io_ops, s, + "e1000e-io", E1000E_IO_SIZE); + pci_register_bar(pci_dev, E1000E_IO_IDX, + PCI_BASE_ADDRESS_SPACE_IO, &s->io); + + memory_region_init(&s->msix, OBJECT(s), "e1000e-msix", + E1000E_MSIX_SIZE); + pci_register_bar(pci_dev, E1000E_MSIX_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix); + + /* Create networking backend */ + qemu_macaddr_default_if_unset(&s->conf.macaddr); + macaddr = s->conf.macaddr.a; + + e1000e_init_msix(s); + + if (pcie_endpoint_cap_v1_init(pci_dev, e1000e_pcie_offset) < 0) { + hw_error("Failed to initialize PCIe capability"); + } + + e1000e_init_msi(s); + + if (e1000e_add_pm_capability(pci_dev, e1000e_pmrb_offset, + PCI_PM_CAP_DSI) < 0) { + hw_error("Failed to initialize PM capability"); + } + + if (pcie_aer_init(pci_dev, e1000e_aer_offset, PCI_ERR_SIZEOF) < 0) { + hw_error("Failed to initialize AER capability"); + } + + pcie_dev_ser_num_init(pci_dev, e1000e_dsn_offset, + e1000e_gen_dsn(macaddr)); + + e1000e_init_net_peer(s, pci_dev, macaddr); + + /* Initialize core */ + e1000e_core_realize(s); + + e1000e_core_pci_realize(&s->core, + e1000e_eeprom_template, + sizeof(e1000e_eeprom_template), + macaddr); +} + +static void e1000e_pci_uninit(PCIDevice *pci_dev) +{ + E1000EState *s = E1000E(pci_dev); + + trace_e1000e_cb_pci_uninit(); + + e1000e_core_pci_uninit(&s->core); + + pcie_aer_exit(pci_dev); + pcie_cap_exit(pci_dev); + + qemu_del_nic(s->nic); + + e1000e_cleanup_msix(s); + e1000e_cleanup_msi(s); +} + +static void e1000e_qdev_reset(DeviceState *dev) +{ + E1000EState *s = E1000E(dev); + + trace_e1000e_cb_qdev_reset(); + + e1000e_core_reset(&s->core); +} + +static void e1000e_pre_save(void *opaque) +{ + E1000EState *s = opaque; + + trace_e1000e_cb_pre_save(); + + e1000e_core_pre_save(&s->core); +} + +static int e1000e_post_load(void *opaque, int version_id) +{ + E1000EState *s = opaque; + + trace_e1000e_cb_post_load(); + + if ((s->subsys != s->subsys_used) || + (s->subsys_ven != s->subsys_ven_used)) { + fprintf(stderr, + "ERROR: Cannot migrate while device properties " + "(subsys/subsys_ven) differ"); + return -1; + } + + return e1000e_core_post_load(&s->core); +} + +static const VMStateDescription e1000e_vmstate_tx = { + .name = "e1000e-tx", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8(props.sum_needed, struct e1000e_tx), + VMSTATE_UINT8(props.ipcss, struct e1000e_tx), + VMSTATE_UINT8(props.ipcso, struct e1000e_tx), + VMSTATE_UINT16(props.ipcse, struct e1000e_tx), + VMSTATE_UINT8(props.tucss, struct e1000e_tx), + VMSTATE_UINT8(props.tucso, struct e1000e_tx), + VMSTATE_UINT16(props.tucse, struct e1000e_tx), + VMSTATE_UINT8(props.hdr_len, struct e1000e_tx), + VMSTATE_UINT16(props.mss, struct e1000e_tx), + VMSTATE_UINT32(props.paylen, struct e1000e_tx), + VMSTATE_INT8(props.ip, struct e1000e_tx), + VMSTATE_INT8(props.tcp, struct e1000e_tx), + VMSTATE_BOOL(props.tse, struct e1000e_tx), + VMSTATE_BOOL(props.cptse, struct e1000e_tx), + VMSTATE_BOOL(skip_cp, struct e1000e_tx), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription e1000e_vmstate_intr_timer = { + .name = "e1000e-intr-timer", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_TIMER_PTR(timer, E1000IntrDelayTimer), + VMSTATE_BOOL(running, E1000IntrDelayTimer), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_E1000E_INTR_DELAY_TIMER(_f, _s) \ + VMSTATE_STRUCT(_f, _s, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + +#define VMSTATE_E1000E_INTR_DELAY_TIMER_ARRAY(_f, _s, _num) \ + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + e1000e_vmstate_intr_timer, E1000IntrDelayTimer) + +static const VMStateDescription e1000e_vmstate = { + .name = "e1000e", + .version_id = 1, + .minimum_version_id = 1, + .pre_save = e1000e_pre_save, + .post_load = e1000e_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCIE_DEVICE(parent_obj, E1000EState), + VMSTATE_MSIX(parent_obj, E1000EState), + + VMSTATE_UINT32(ioaddr, E1000EState), + VMSTATE_UINT32(intr_state, E1000EState), + VMSTATE_UINT32(core.rxbuf_min_shift, E1000EState), + VMSTATE_UINT8(core.rx_desc_len, E1000EState), + VMSTATE_UINT32_ARRAY(core.rxbuf_sizes, E1000EState, + E1000_PSRCTL_BUFFS_PER_DESC), + VMSTATE_UINT32(core.rx_desc_buf_size, E1000EState), + VMSTATE_UINT16_ARRAY(core.eeprom, E1000EState, E1000E_EEPROM_SIZE), + VMSTATE_UINT16_2DARRAY(core.phy, E1000EState, + E1000E_PHY_PAGES, E1000E_PHY_PAGE_SIZE), + VMSTATE_UINT32_ARRAY(core.mac, E1000EState, E1000E_MAC_SIZE), + VMSTATE_UINT8_ARRAY(core.permanent_mac, E1000EState, ETH_ALEN), + + VMSTATE_UINT32(core.delayed_causes, E1000EState), + + VMSTATE_UINT16(subsys, E1000EState), + VMSTATE_UINT16(subsys_ven, E1000EState), + + VMSTATE_E1000E_INTR_DELAY_TIMER(core.rdtr, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.radv, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.raid, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.tadv, E1000EState), + VMSTATE_E1000E_INTR_DELAY_TIMER(core.tidv, E1000EState), + + VMSTATE_E1000E_INTR_DELAY_TIMER(core.itr, E1000EState), + VMSTATE_BOOL(core.itr_intr_pending, E1000EState), + + VMSTATE_E1000E_INTR_DELAY_TIMER_ARRAY(core.eitr, E1000EState, + E1000E_MSIX_VEC_NUM), + VMSTATE_BOOL_ARRAY(core.eitr_intr_pending, E1000EState, + E1000E_MSIX_VEC_NUM), + + VMSTATE_UINT32(core.itr_guest_value, E1000EState), + VMSTATE_UINT32_ARRAY(core.eitr_guest_value, E1000EState, + E1000E_MSIX_VEC_NUM), + + VMSTATE_UINT16(core.vet, E1000EState), + + VMSTATE_STRUCT_ARRAY(core.tx, E1000EState, E1000E_NUM_QUEUES, 0, + e1000e_vmstate_tx, struct e1000e_tx), + VMSTATE_END_OF_LIST() + } +}; + +static PropertyInfo e1000e_prop_disable_vnet, + e1000e_prop_subsys_ven, + e1000e_prop_subsys; + +static Property e1000e_properties[] = { + DEFINE_NIC_PROPERTIES(E1000EState, conf), + DEFINE_PROP_DEFAULT("disable_vnet_hdr", E1000EState, disable_vnet, false, + e1000e_prop_disable_vnet, bool), + DEFINE_PROP_DEFAULT("subsys_ven", E1000EState, subsys_ven, + PCI_VENDOR_ID_INTEL, + e1000e_prop_subsys_ven, uint16_t), + DEFINE_PROP_DEFAULT("subsys", E1000EState, subsys, 0, + e1000e_prop_subsys, uint16_t), + DEFINE_PROP_END_OF_LIST(), +}; + +static void e1000e_class_init(ObjectClass *class, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(class); + PCIDeviceClass *c = PCI_DEVICE_CLASS(class); + + c->realize = e1000e_pci_realize; + c->exit = e1000e_pci_uninit; + c->vendor_id = PCI_VENDOR_ID_INTEL; + c->device_id = E1000_DEV_ID_82574L; + c->revision = 0; + c->class_id = PCI_CLASS_NETWORK_ETHERNET; + c->is_express = 1; + + dc->desc = "Intel 82574L GbE Controller"; + dc->reset = e1000e_qdev_reset; + dc->vmsd = &e1000e_vmstate; + dc->props = e1000e_properties; + + e1000e_prop_disable_vnet = qdev_prop_uint8; + e1000e_prop_disable_vnet.description = "Do not use virtio headers, " + "perform SW offloads emulation " + "instead"; + + e1000e_prop_subsys_ven = qdev_prop_uint16; + e1000e_prop_subsys_ven.description = "PCI device Subsystem Vendor ID"; + + e1000e_prop_subsys = qdev_prop_uint16; + e1000e_prop_subsys.description = "PCI device Subsystem ID"; + + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); +} + +static void e1000e_instance_init(Object *obj) +{ + E1000EState *s = E1000E(obj); + device_add_bootindex_property(obj, &s->conf.bootindex, + "bootindex", "/ethernet-phy@0", + DEVICE(obj), NULL); +} + +static const TypeInfo e1000e_info = { + .name = TYPE_E1000E, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(E1000EState), + .class_init = e1000e_class_init, + .instance_init = e1000e_instance_init, +}; + +static void e1000e_register_types(void) +{ + type_register_static(&e1000e_info); +} + +type_init(e1000e_register_types) diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c new file mode 100644 index 0000000..6a44ea1 --- /dev/null +++ b/hw/net/e1000e_core.c @@ -0,0 +1,3476 @@ +/* +* Core code for QEMU e1000e emulation +* +* Software developer's manuals: +* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf +* +* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) +* Developed by Daynix Computing LTD (http://www.daynix.com) +* +* Authors: +* Dmitry Fleytman +* Leonid Bloch +* Yan Vugenfirer +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2008 Qumranet +* Based on work done by: +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see . +*/ + +#include "qemu/osdep.h" +#include "sysemu/sysemu.h" +#include "net/net.h" +#include "net/tap.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" + +#include "net_tx_pkt.h" +#include "net_rx_pkt.h" + +#include "e1000x_common.h" +#include "e1000e_core.h" + +#include "trace.h" + +#define E1000E_MIN_XITR (500) /* No more then 7813 interrupts per + second according to spec 10.2.4.2 */ +#define E1000E_MAX_TX_FRAGS (64) + +static void +e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val); + +static inline void +e1000e_process_ts_option(E1000ECore *core, struct e1000_tx_desc *dp) +{ + if (le32_to_cpu(dp->upper.data) & E1000_TXD_EXTCMD_TSTAMP) { + trace_e1000e_wrn_no_ts_support(); + } +} + +static inline void +e1000e_process_snap_option(E1000ECore *core, uint32_t cmd_and_length) +{ + if (cmd_and_length & E1000_TXD_CMD_SNAP) { + trace_e1000e_wrn_no_snap_support(); + } +} + +static inline void +e1000e_raise_legacy_irq(E1000ECore *core) +{ + trace_e1000e_irq_legacy_notify(true); + e1000x_inc_reg_if_not_full(core->mac, IAC); + pci_set_irq(core->owner, 1); +} + +static inline void +e1000e_lower_legacy_irq(E1000ECore *core) +{ + trace_e1000e_irq_legacy_notify(false); + pci_set_irq(core->owner, 0); +} + +static inline void +e1000e_intrmgr_rearm_timer(E1000IntrDelayTimer *timer) +{ + int64_t delay_ns = (int64_t) timer->core->mac[timer->delay_reg] * + timer->delay_resolution_ns; + + trace_e1000e_irq_rearm_timer(timer->delay_reg << 2, delay_ns); + + timer_mod(timer->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + delay_ns); + + timer->running = true; +} + +static void +e1000e_intmgr_timer_resume(E1000IntrDelayTimer *timer) +{ + if (timer->running) { + e1000e_intrmgr_rearm_timer(timer); + } +} + +static void +e1000e_intmgr_timer_pause(E1000IntrDelayTimer *timer) +{ + if (timer->running) { + timer_del(timer->timer); + } +} + +static inline void +e1000e_intrmgr_stop_timer(E1000IntrDelayTimer *timer) +{ + if (timer->running) { + timer_del(timer->timer); + timer->running = false; + } +} + +static inline void +e1000e_intrmgr_fire_delayed_interrupts(E1000ECore *core) +{ + trace_e1000e_irq_fire_delayed_interrupts(); + e1000e_set_interrupt_cause(core, 0); +} + +static void +e1000e_intrmgr_on_timer(void *opaque) +{ + E1000IntrDelayTimer *timer = opaque; + + trace_e1000e_irq_throttling_timer(timer->delay_reg << 2); + + timer->running = false; + e1000e_intrmgr_fire_delayed_interrupts(timer->core); +} + +static void +e1000e_intrmgr_on_throttling_timer(void *opaque) +{ + E1000IntrDelayTimer *timer = opaque; + + assert(!msix_enabled(timer->core->owner)); + + timer->running = false; + + if (!timer->core->itr_intr_pending) { + trace_e1000e_irq_throttling_no_pending_interrupts(); + return; + } + + if (msi_enabled(timer->core->owner)) { + trace_e1000e_irq_msi_notify_postponed(); + e1000e_set_interrupt_cause(timer->core, 0); + } else { + trace_e1000e_irq_legacy_notify_postponed(); + e1000e_set_interrupt_cause(timer->core, 0); + } +} + +static void +e1000e_intrmgr_on_msix_throttling_timer(void *opaque) +{ + E1000IntrDelayTimer *timer = opaque; + int idx = timer - &timer->core->eitr[0]; + + assert(msix_enabled(timer->core->owner)); + + timer->running = false; + + if (!timer->core->eitr_intr_pending[idx]) { + trace_e1000e_irq_throttling_no_pending_vec(idx); + return; + } + + trace_e1000e_irq_msix_notify_postponed_vec(idx); + msix_notify(timer->core->owner, idx); +} + +static void +e1000e_intrmgr_initialize_all_timers(E1000ECore *core, bool create) +{ + int i; + + core->radv.delay_reg = RADV; + core->rdtr.delay_reg = RDTR; + core->raid.delay_reg = RAID; + core->tadv.delay_reg = TADV; + core->tidv.delay_reg = TIDV; + + core->radv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->rdtr.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->raid.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->tadv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + core->tidv.delay_resolution_ns = E1000_INTR_DELAY_NS_RES; + + core->radv.core = core; + core->rdtr.core = core; + core->raid.core = core; + core->tadv.core = core; + core->tidv.core = core; + + core->itr.core = core; + core->itr.delay_reg = ITR; + core->itr.delay_resolution_ns = E1000_INTR_THROTTLING_NS_RES; + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + core->eitr[i].core = core; + core->eitr[i].delay_reg = EITR + i; + core->eitr[i].delay_resolution_ns = E1000_INTR_THROTTLING_NS_RES; + } + + if (!create) { + return; + } + + core->radv.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->radv); + core->rdtr.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->rdtr); + core->raid.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->raid); + + core->tadv.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->tadv); + core->tidv.timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, e1000e_intrmgr_on_timer, &core->tidv); + + core->itr.timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + e1000e_intrmgr_on_throttling_timer, + &core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + core->eitr[i].timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, + e1000e_intrmgr_on_msix_throttling_timer, + &core->eitr[i]); + } +} + +static inline void +e1000e_intrmgr_stop_delay_timers(E1000ECore *core) +{ + e1000e_intrmgr_stop_timer(&core->radv); + e1000e_intrmgr_stop_timer(&core->rdtr); + e1000e_intrmgr_stop_timer(&core->raid); + e1000e_intrmgr_stop_timer(&core->tidv); + e1000e_intrmgr_stop_timer(&core->tadv); +} + +static bool +e1000e_intrmgr_delay_rx_causes(E1000ECore *core, uint32_t *causes) +{ + uint32_t delayable_causes; + uint32_t rdtr = core->mac[RDTR]; + uint32_t radv = core->mac[RADV]; + uint32_t raid = core->mac[RAID]; + + if (msix_enabled(core->owner)) { + return false; + } + + delayable_causes = E1000_ICR_RXQ0 | + E1000_ICR_RXQ1 | + E1000_ICR_RXT0; + + if (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS)) { + delayable_causes |= E1000_ICR_ACK; + } + + /* Clean up all causes that may be delayed */ + core->delayed_causes |= *causes & delayable_causes; + *causes &= ~delayable_causes; + + /* Check if delayed RX interrupts disabled by client + or if there are causes that cannot be delayed */ + if ((rdtr == 0) || (causes != 0)) { + return false; + } + + /* Check if delayed RX ACK interrupts disabled by client + and there is an ACK packet received */ + if ((raid == 0) && (core->delayed_causes & E1000_ICR_ACK)) { + return false; + } + + /* All causes delayed */ + e1000e_intrmgr_rearm_timer(&core->rdtr); + + if (!core->radv.running && (radv != 0)) { + e1000e_intrmgr_rearm_timer(&core->radv); + } + + if (!core->raid.running && (core->delayed_causes & E1000_ICR_ACK)) { + e1000e_intrmgr_rearm_timer(&core->raid); + } + + return true; +} + +static bool +e1000e_intrmgr_delay_tx_causes(E1000ECore *core, uint32_t *causes) +{ + static const uint32_t delayable_causes = E1000_ICR_TXQ0 | + E1000_ICR_TXQ1 | + E1000_ICR_TXQE | + E1000_ICR_TXDW; + + if (msix_enabled(core->owner)) { + return false; + } + + /* Clean up all causes that may be delayed */ + core->delayed_causes |= *causes & delayable_causes; + *causes &= ~delayable_causes; + + /* If there are causes that cannot be delayed */ + if (causes != 0) { + return false; + } + + /* All causes delayed */ + e1000e_intrmgr_rearm_timer(&core->tidv); + + if (!core->tadv.running && (core->mac[TADV] != 0)) { + e1000e_intrmgr_rearm_timer(&core->tadv); + } + + return true; +} + +static uint32_t +e1000e_intmgr_collect_delayed_causes(E1000ECore *core) +{ + uint32_t res; + + if (msix_enabled(core->owner)) { + assert(core->delayed_causes == 0); + return 0; + } + + res = core->delayed_causes; + core->delayed_causes = 0; + + e1000e_intrmgr_stop_delay_timers(core); + + return res; +} + +static void +e1000e_intrmgr_fire_all_timers(E1000ECore *core) +{ + int i; + uint32_t val = e1000e_intmgr_collect_delayed_causes(core); + + trace_e1000e_irq_adding_delayed_causes(val, core->mac[ICR]); + core->mac[ICR] |= val; + + if (core->itr.running) { + timer_del(core->itr.timer); + e1000e_intrmgr_on_throttling_timer(&core->itr); + } + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + if (core->eitr[i].running) { + timer_del(core->eitr[i].timer); + e1000e_intrmgr_on_msix_throttling_timer(&core->eitr[i]); + } + } +} + +static void +e1000e_intrmgr_resume(E1000ECore *core) +{ + int i; + + e1000e_intmgr_timer_resume(&core->radv); + e1000e_intmgr_timer_resume(&core->rdtr); + e1000e_intmgr_timer_resume(&core->raid); + e1000e_intmgr_timer_resume(&core->tidv); + e1000e_intmgr_timer_resume(&core->tadv); + + e1000e_intmgr_timer_resume(&core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + e1000e_intmgr_timer_resume(&core->eitr[i]); + } +} + +static void +e1000e_intrmgr_pause(E1000ECore *core) +{ + int i; + + e1000e_intmgr_timer_pause(&core->radv); + e1000e_intmgr_timer_pause(&core->rdtr); + e1000e_intmgr_timer_pause(&core->raid); + e1000e_intmgr_timer_pause(&core->tidv); + e1000e_intmgr_timer_pause(&core->tadv); + + e1000e_intmgr_timer_pause(&core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + e1000e_intmgr_timer_pause(&core->eitr[i]); + } +} + +static void +e1000e_intrmgr_reset(E1000ECore *core) +{ + int i; + + core->delayed_causes = 0; + + e1000e_intrmgr_stop_delay_timers(core); + + e1000e_intrmgr_stop_timer(&core->itr); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + e1000e_intrmgr_stop_timer(&core->eitr[i]); + } +} + +static void +e1000e_intrmgr_pci_unint(E1000ECore *core) +{ + int i; + + timer_del(core->radv.timer); + timer_free(core->radv.timer); + timer_del(core->rdtr.timer); + timer_free(core->rdtr.timer); + timer_del(core->raid.timer); + timer_free(core->raid.timer); + + timer_del(core->tadv.timer); + timer_free(core->tadv.timer); + timer_del(core->tidv.timer); + timer_free(core->tidv.timer); + + timer_del(core->itr.timer); + timer_free(core->itr.timer); + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + timer_del(core->eitr[i].timer); + timer_free(core->eitr[i].timer); + } +} + +static void +e1000e_intrmgr_pci_realize(E1000ECore *core) +{ + e1000e_intrmgr_initialize_all_timers(core, true); +} + +static inline bool +e1000e_rx_csum_enabled(E1000ECore *core) +{ + return (core->mac[RXCSUM] & E1000_RXCSUM_PCSD) ? false : true; +} + +static inline bool +e1000e_rx_use_legacy_descriptor(E1000ECore *core) +{ + return (core->mac[RFCTL] & E1000_RFCTL_EXTEN) ? false : true; +} + +static inline bool +e1000e_rx_use_ps_descriptor(E1000ECore *core) +{ + return !e1000e_rx_use_legacy_descriptor(core) && + (core->mac[RCTL] & E1000_RCTL_DTYP_PS); +} + +static inline bool +e1000e_rss_enabled(E1000ECore *core) +{ + return E1000_MRQC_ENABLED(core->mac[MRQC]) && + !e1000e_rx_csum_enabled(core) && + !e1000e_rx_use_legacy_descriptor(core); +} + +typedef struct E1000E_RSSInfo_st { + bool enabled; + uint32_t hash; + uint32_t queue; + uint32_t type; +} E1000E_RSSInfo; + +static uint32_t +e1000e_rss_get_hash_type(E1000ECore *core, struct NetRxPkt *pkt) +{ + bool isip4, isip6, isudp, istcp; + + assert(e1000e_rss_enabled(core)); + + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + + if (isip4) { + bool fragment = net_rx_pkt_get_ip4_info(pkt)->fragment; + + trace_e1000e_rx_rss_ip4(fragment, istcp, core->mac[MRQC], + E1000_MRQC_EN_TCPIPV4(core->mac[MRQC]), + E1000_MRQC_EN_IPV4(core->mac[MRQC])); + + if (!fragment && istcp && E1000_MRQC_EN_TCPIPV4(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV4TCP; + } + + if (E1000_MRQC_EN_IPV4(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV4; + } + } else if (isip6) { + eth_ip6_hdr_info *ip6info = net_rx_pkt_get_ip6_info(pkt); + + bool ex_dis = core->mac[RFCTL] & E1000_RFCTL_IPV6_EX_DIS; + bool new_ex_dis = core->mac[RFCTL] & E1000_RFCTL_NEW_IPV6_EXT_DIS; + + trace_e1000e_rx_rss_ip6(core->mac[RFCTL], + ex_dis, new_ex_dis, istcp, + ip6info->has_ext_hdrs, + ip6info->rss_ex_dst_valid, + ip6info->rss_ex_src_valid, + core->mac[MRQC], + E1000_MRQC_EN_TCPIPV6(core->mac[MRQC]), + E1000_MRQC_EN_IPV6EX(core->mac[MRQC]), + E1000_MRQC_EN_IPV6(core->mac[MRQC])); + + if ((!ex_dis || !ip6info->has_ext_hdrs) && + (!new_ex_dis || !(ip6info->rss_ex_dst_valid || + ip6info->rss_ex_src_valid))) { + + if (istcp && !ip6info->fragment && + E1000_MRQC_EN_TCPIPV6(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV6TCP; + } + + if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV6EX; + } + + } + + if (E1000_MRQC_EN_IPV6(core->mac[MRQC])) { + return E1000_MRQ_RSS_TYPE_IPV6; + } + + } + + return E1000_MRQ_RSS_TYPE_NONE; +} + +static uint32_t +e1000e_rss_calc_hash(E1000ECore *core, + struct NetRxPkt *pkt, + E1000E_RSSInfo *info) +{ + NetRxPktRssType type; + + assert(e1000e_rss_enabled(core)); + + switch (info->type) { + case E1000_MRQ_RSS_TYPE_IPV4: + type = NetPktRssIpV4; + break; + case E1000_MRQ_RSS_TYPE_IPV4TCP: + type = NetPktRssIpV4Tcp; + break; + case E1000_MRQ_RSS_TYPE_IPV6TCP: + type = NetPktRssIpV6Tcp; + break; + case E1000_MRQ_RSS_TYPE_IPV6: + type = NetPktRssIpV6; + break; + case E1000_MRQ_RSS_TYPE_IPV6EX: + type = NetPktRssIpV6Ex; + break; + default: + assert(false); + return 0; + } + + return net_rx_pkt_calc_rss_hash(pkt, type, (uint8_t *) &core->mac[RSSRK]); +} + +static void +e1000e_rss_parse_packet(E1000ECore *core, + struct NetRxPkt *pkt, + E1000E_RSSInfo *info) +{ + trace_e1000e_rx_rss_started(); + + if (!e1000e_rss_enabled(core)) { + info->enabled = false; + info->hash = 0; + info->queue = 0; + info->type = 0; + trace_e1000e_rx_rss_disabled(); + return; + } + + info->enabled = true; + + info->type = e1000e_rss_get_hash_type(core, pkt); + + trace_e1000e_rx_rss_type(info->type); + + if (info->type == E1000_MRQ_RSS_TYPE_NONE) { + info->hash = 0; + info->queue = 0; + return; + } + + info->hash = e1000e_rss_calc_hash(core, pkt, info); + info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash); +} + +static void +e1000e_setup_tx_offloads(E1000ECore *core, struct e1000e_tx *tx) +{ + if (tx->props.tse && tx->props.cptse) { + net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->props.mss); + net_tx_pkt_update_ip_checksums(tx->tx_pkt); + e1000x_inc_reg_if_not_full(core->mac, TSCTC); + return; + } + + if (tx->props.sum_needed & E1000_TXD_POPTS_TXSM) { + net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0); + } + + if (tx->props.sum_needed & E1000_TXD_POPTS_IXSM) { + net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt); + } +} + +static bool +e1000e_tx_pkt_send(E1000ECore *core, struct e1000e_tx *tx, int queue_index) +{ + int target_queue = MIN(core->max_queue_num, queue_index); + NetClientState *queue = qemu_get_subqueue(core->owner_nic, target_queue); + + e1000e_setup_tx_offloads(core, tx); + + net_tx_pkt_dump(tx->tx_pkt); + + if ((core->phy[0][PHY_CTRL] & MII_CR_LOOPBACK) || + ((core->mac[RCTL] & E1000_RCTL_LBM_MAC) == E1000_RCTL_LBM_MAC)) { + return net_tx_pkt_send_loopback(tx->tx_pkt, queue); + } else { + return net_tx_pkt_send(tx->tx_pkt, queue); + } +} + +static void +e1000e_on_tx_done_update_stats(E1000ECore *core, struct NetTxPkt *tx_pkt) +{ + static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511, + PTC1023, PTC1522 }; + + size_t tot_len = net_tx_pkt_get_total_len(tx_pkt); + + e1000x_increase_size_stats(core->mac, PTCregs, tot_len); + e1000x_inc_reg_if_not_full(core->mac, TPT); + e1000x_grow_8reg_if_not_full(core->mac, TOTL, tot_len); + + switch (net_tx_pkt_get_packet_type(tx_pkt)) { + case ETH_PKT_BCAST: + e1000x_inc_reg_if_not_full(core->mac, BPTC); + break; + case ETH_PKT_MCAST: + e1000x_inc_reg_if_not_full(core->mac, MPTC); + break; + case ETH_PKT_UCAST: + break; + default: + g_assert_not_reached(); + } + + core->mac[GPTC] = core->mac[TPT]; + core->mac[GOTCL] = core->mac[TOTL]; + core->mac[GOTCH] = core->mac[TOTH]; +} + +static void +e1000e_process_tx_desc(E1000ECore *core, + struct e1000e_tx *tx, + struct e1000_tx_desc *dp, + int queue_index) +{ + uint32_t txd_lower = le32_to_cpu(dp->lower.data); + uint32_t dtype = txd_lower & (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D); + unsigned int split_size = txd_lower & 0xffff; + uint64_t addr; + struct e1000_context_desc *xp = (struct e1000_context_desc *)dp; + bool eop = txd_lower & E1000_TXD_CMD_EOP; + + if (dtype == E1000_TXD_CMD_DEXT) { /* context descriptor */ + e1000x_read_tx_ctx_descr(xp, &tx->props); + e1000e_process_snap_option(core, le32_to_cpu(xp->cmd_and_length)); + return; + } else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) { + /* data descriptor */ + tx->props.sum_needed = le32_to_cpu(dp->upper.data) >> 8; + tx->props.cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0; + e1000e_process_ts_option(core, dp); + } else { + /* legacy descriptor */ + e1000e_process_ts_option(core, dp); + tx->props.cptse = 0; + } + + addr = le64_to_cpu(dp->buffer_addr); + + if (!tx->skip_cp) { + if (!net_tx_pkt_add_raw_fragment(tx->tx_pkt, addr, split_size)) { + tx->skip_cp = true; + } + } + + if (eop) { + if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) { + if (e1000x_vlan_enabled(core->mac) && + e1000x_is_vlan_txd(txd_lower)) { + net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, + le16_to_cpu(dp->upper.fields.special), core->vet); + } + if (e1000e_tx_pkt_send(core, tx, queue_index)) { + e1000e_on_tx_done_update_stats(core, tx->tx_pkt); + } + } + + tx->skip_cp = false; + net_tx_pkt_reset(tx->tx_pkt); + + tx->props.sum_needed = 0; + tx->props.cptse = 0; + } +} + +static inline uint32_t +e1000e_tx_wb_interrupt_cause(E1000ECore *core, int queue_idx) +{ + if (!msix_enabled(core->owner)) { + return E1000_ICR_TXDW; + } + + return (queue_idx == 0) ? E1000_ICR_TXQ0 : E1000_ICR_TXQ1; +} + +static inline uint32_t +e1000e_rx_wb_interrupt_cause(E1000ECore *core, int queue_idx, + bool min_threshold_hit) +{ + if (!msix_enabled(core->owner)) { + return E1000_ICS_RXT0 | (min_threshold_hit ? E1000_ICS_RXDMT0 : 0); + } + + return (queue_idx == 0) ? E1000_ICR_RXQ0 : E1000_ICR_RXQ1; +} + +static uint32_t +e1000e_txdesc_writeback(E1000ECore *core, dma_addr_t base, + struct e1000_tx_desc *dp, bool *ide, int queue_idx) +{ + uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data); + + if (!(txd_lower & E1000_TXD_CMD_RS) && + !(core->mac[IVAR] & E1000_IVAR_TX_INT_EVERY_WB)) { + return 0; + } + + *ide = (txd_lower & E1000_TXD_CMD_IDE) ? true : false; + + txd_upper = le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD; + + dp->upper.data = cpu_to_le32(txd_upper); + pci_dma_write(core->owner, base + ((char *)&dp->upper - (char *)dp), + &dp->upper, sizeof(dp->upper)); + return e1000e_tx_wb_interrupt_cause(core, queue_idx); +} + +typedef struct E1000E_RingInfo_st { + int dbah; + int dbal; + int dlen; + int dh; + int dt; + int idx; +} E1000E_RingInfo; + +static inline bool +e1000e_ring_empty(E1000ECore *core, const E1000E_RingInfo *r) +{ + return core->mac[r->dh] == core->mac[r->dt]; +} + +static inline uint64_t +e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r) +{ + uint64_t bah = core->mac[r->dbah]; + uint64_t bal = core->mac[r->dbal]; + + return (bah << 32) + bal; +} + +static inline uint64_t +e1000e_ring_head_descr(E1000ECore *core, const E1000E_RingInfo *r) +{ + return e1000e_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh]; +} + +static inline void +e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count) +{ + core->mac[r->dh] += count; + + if (core->mac[r->dh] * E1000_RING_DESC_LEN >= core->mac[r->dlen]) { + core->mac[r->dh] = 0; + } +} + +static inline uint32_t +e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r) +{ + trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen], + core->mac[r->dh], core->mac[r->dt]); + + if (core->mac[r->dh] <= core->mac[r->dt]) { + return core->mac[r->dt] - core->mac[r->dh]; + } + + if (core->mac[r->dh] > core->mac[r->dt]) { + return core->mac[r->dlen] / E1000_RING_DESC_LEN + + core->mac[r->dt] - core->mac[r->dh]; + } + + g_assert_not_reached(); + return 0; +} + +static inline bool +e1000e_ring_enabled(E1000ECore *core, const E1000E_RingInfo *r) +{ + return core->mac[r->dlen] > 0; +} + +static inline uint32_t +e1000e_ring_len(E1000ECore *core, const E1000E_RingInfo *r) +{ + return core->mac[r->dlen]; +} + +typedef struct E1000E_TxRing_st { + const E1000E_RingInfo *i; + struct e1000e_tx *tx; +} E1000E_TxRing; + +static inline int +e1000e_mq_queue_idx(int base_reg_idx, int reg_idx) +{ + return (reg_idx - base_reg_idx) / (0x100 >> 2); +} + +static inline void +e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx) +{ + static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = { + { TDBAH, TDBAL, TDLEN, TDH, TDT, 0 }, + { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 } + }; + + assert(idx < ARRAY_SIZE(i)); + + txr->i = &i[idx]; + txr->tx = &core->tx[idx]; +} + +typedef struct E1000E_RxRing_st { + const E1000E_RingInfo *i; +} E1000E_RxRing; + +static inline void +e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx) +{ + static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = { + { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 }, + { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 } + }; + + assert(idx < ARRAY_SIZE(i)); + + rxr->i = &i[idx]; +} + +static void +e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) +{ + dma_addr_t base; + struct e1000_tx_desc desc; + bool ide = false; + const E1000E_RingInfo *txi = txr->i; + uint32_t cause = E1000_ICS_TXQE; + + if (!(core->mac[TCTL] & E1000_TCTL_EN)) { + trace_e1000e_tx_disabled(); + return; + } + + while (!e1000e_ring_empty(core, txi)) { + base = e1000e_ring_head_descr(core, txi); + + pci_dma_read(core->owner, base, &desc, sizeof(desc)); + + trace_e1000e_tx_descr((void *)(intptr_t)desc.buffer_addr, + desc.lower.data, desc.upper.data); + + e1000e_process_tx_desc(core, txr->tx, &desc, txi->idx); + cause |= e1000e_txdesc_writeback(core, base, &desc, &ide, txi->idx); + + e1000e_ring_advance(core, txi, 1); + } + + if (!ide || !e1000e_intrmgr_delay_tx_causes(core, &cause)) { + e1000e_set_interrupt_cause(core, cause); + } +} + +static bool +e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r, + size_t total_size) +{ + uint32_t bufs = e1000e_ring_free_descr_num(core, r); + + trace_e1000e_rx_has_buffers(r->idx, bufs, total_size, + core->rx_desc_buf_size); + + return total_size <= bufs / (core->rx_desc_len / E1000_MIN_RX_DESC_LEN) * + core->rx_desc_buf_size; +} + +static inline void +e1000e_start_recv(E1000ECore *core) +{ + int i; + + trace_e1000e_rx_start_recv(); + + for (i = 0; i <= core->max_queue_num; i++) { + qemu_flush_queued_packets(qemu_get_subqueue(core->owner_nic, i)); + } +} + +int +e1000e_can_receive(E1000ECore *core) +{ + int i; + + if (!e1000x_rx_ready(core->owner, core->mac)) { + return false; + } + + for (i = 0; i < E1000E_NUM_QUEUES; i++) { + E1000E_RxRing rxr; + + e1000e_rx_ring_init(core, &rxr, i); + if (e1000e_ring_enabled(core, rxr.i) && + e1000e_has_rxbufs(core, rxr.i, 1)) { + trace_e1000e_rx_can_recv(); + return true; + } + } + + trace_e1000e_rx_can_recv_rings_full(); + return false; +} + +ssize_t +e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size) +{ + const struct iovec iov = { + .iov_base = (uint8_t *)buf, + .iov_len = size + }; + + return e1000e_receive_iov(core, &iov, 1); +} + +static inline bool +e1000e_rx_l3_cso_enabled(E1000ECore *core) +{ + return !!(core->mac[RXCSUM] & E1000_RXCSUM_IPOFLD); +} + +static inline bool +e1000e_rx_l4_cso_enabled(E1000ECore *core) +{ + return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD); +} + +static bool +e1000e_receive_filter(E1000ECore *core, const uint8_t *buf, int size) +{ + uint32_t rctl = core->mac[RCTL]; + + if (e1000x_is_vlan_packet(buf, core->vet) && + e1000x_vlan_rx_filter_enabled(core->mac)) { + uint16_t vid = be16_to_cpup((uint16_t *)(buf + 14)); + uint32_t vfta = le32_to_cpup((uint32_t *)(core->mac + VFTA) + + ((vid >> 5) & 0x7f)); + if ((vfta & (1 << (vid & 0x1f))) == 0) { + trace_e1000e_rx_flt_vlan_mismatch(vid); + return false; + } else { + trace_e1000e_rx_flt_vlan_match(vid); + } + } + + switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { + case ETH_PKT_UCAST: + if (rctl & E1000_RCTL_UPE) { + return true; /* promiscuous ucast */ + } + break; + + case ETH_PKT_BCAST: + if (rctl & E1000_RCTL_BAM) { + return true; /* broadcast enabled */ + } + break; + + case ETH_PKT_MCAST: + if (rctl & E1000_RCTL_MPE) { + return true; /* promiscuous mcast */ + } + break; + + default: + g_assert_not_reached(); + } + + return e1000x_rx_group_filter(core->mac, buf); +} + +static inline void +e1000e_read_lgcy_rx_descr(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr) +{ + struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc; + *buff_addr = le64_to_cpu(d->buffer_addr); +} + +static inline void +e1000e_read_ext_rx_descr(E1000ECore *core, uint8_t *desc, hwaddr *buff_addr) +{ + union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc; + *buff_addr = le64_to_cpu(d->read.buffer_addr); +} + +static inline void +e1000e_read_ps_rx_descr(E1000ECore *core, uint8_t *desc, + hwaddr (*buff_addr)[MAX_PS_BUFFERS]) +{ + int i; + union e1000_rx_desc_packet_split *d = + (union e1000_rx_desc_packet_split *) desc; + + for (i = 0; i < MAX_PS_BUFFERS; i++) { + (*buff_addr)[i] = le64_to_cpu(d->read.buffer_addr[i]); + } + + trace_e1000e_rx_desc_ps_read((*buff_addr)[0], (*buff_addr)[1], + (*buff_addr)[2], (*buff_addr)[3]); +} + +static inline void +e1000e_read_rx_descr(E1000ECore *core, uint8_t *desc, + hwaddr (*buff_addr)[MAX_PS_BUFFERS]) +{ + if (e1000e_rx_use_legacy_descriptor(core)) { + e1000e_read_lgcy_rx_descr(core, desc, &(*buff_addr)[0]); + (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0; + } else { + if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) { + e1000e_read_ps_rx_descr(core, desc, buff_addr); + } else { + e1000e_read_ext_rx_descr(core, desc, &(*buff_addr)[0]); + (*buff_addr)[1] = (*buff_addr)[2] = (*buff_addr)[3] = 0; + } + } +} + +static void +e1000e_verify_csum_in_sw(E1000ECore *core, + struct NetRxPkt *pkt, + uint32_t *status_flags, + bool istcp, bool isudp) +{ + bool csum_valid; + uint32_t csum_error; + + if (e1000e_rx_l3_cso_enabled(core)) { + if (!net_rx_pkt_validate_l3_csum(pkt, &csum_valid)) { + trace_e1000e_rx_metadata_l3_csum_validation_failed(); + } else { + csum_error = csum_valid ? 0 : E1000_RXDEXT_STATERR_IPE; + *status_flags |= E1000_RXD_STAT_IPCS | csum_error; + } + } else { + trace_e1000e_rx_metadata_l3_cso_disabled(); + } + + if (!e1000e_rx_l4_cso_enabled(core)) { + trace_e1000e_rx_metadata_l4_cso_disabled(); + return; + } + + if (!net_rx_pkt_validate_l4_csum(pkt, &csum_valid)) { + trace_e1000e_rx_metadata_l4_csum_validation_failed(); + return; + } + + csum_error = csum_valid ? 0 : E1000_RXDEXT_STATERR_TCPE; + + if (istcp) { + *status_flags |= E1000_RXD_STAT_TCPCS | + csum_error; + } else if (isudp) { + *status_flags |= E1000_RXD_STAT_TCPCS | + E1000_RXD_STAT_UDPCS | + csum_error; + } +} + +static inline bool +e1000e_is_tcp_ack(E1000ECore *core, struct NetRxPkt *rx_pkt) +{ + if (!net_rx_pkt_is_tcp_ack(rx_pkt)) { + return false; + } + + if (core->mac[RFCTL] & E1000_RFCTL_ACK_DATA_DIS) { + return !net_rx_pkt_has_tcp_data(rx_pkt); + } + + return true; +} + +static void +e1000e_build_rx_metadata(E1000ECore *core, + struct NetRxPkt *pkt, + bool is_eop, + const E1000E_RSSInfo *rss_info, + uint32_t *rss, uint32_t *mrq, + uint32_t *status_flags, + uint16_t *ip_id, + uint16_t *vlan_tag) +{ + struct virtio_net_hdr *vhdr; + bool isip4, isip6, istcp, isudp; + uint32_t pkt_type; + + *status_flags = E1000_RXD_STAT_DD; + + /* No additional metadata needed for non-EOP descriptors */ + if (!is_eop) { + goto func_exit; + } + + *status_flags |= E1000_RXD_STAT_EOP; + + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + trace_e1000e_rx_metadata_protocols(isip4, isip6, isudp, istcp); + + /* VLAN state */ + if (net_rx_pkt_is_vlan_stripped(pkt)) { + *status_flags |= E1000_RXD_STAT_VP; + *vlan_tag = cpu_to_le16(net_rx_pkt_get_vlan_tag(pkt)); + trace_e1000e_rx_metadata_vlan(*vlan_tag); + } + + /* Packet parsing results */ + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { + if (rss_info->enabled) { + *rss = cpu_to_le32(rss_info->hash); + *mrq = cpu_to_le32(rss_info->type | (rss_info->queue << 8)); + trace_e1000e_rx_metadata_rss(*rss, *mrq); + } + } else if (isip4) { + *status_flags |= E1000_RXD_STAT_IPIDV; + *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); + trace_e1000e_rx_metadata_ip_id(*ip_id); + } + + if (istcp && e1000e_is_tcp_ack(core, pkt)) { + *status_flags |= E1000_RXD_STAT_ACK; + trace_e1000e_rx_metadata_ack(); + } + + if (isip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { + trace_e1000e_rx_metadata_ipv6_filtering_disabled(); + pkt_type = E1000_RXD_PKT_MAC; + } else if (istcp || isudp) { + pkt_type = isip4 ? E1000_RXD_PKT_IP4_XDP : E1000_RXD_PKT_IP6_XDP; + } else if (isip4 || isip6) { + pkt_type = isip4 ? E1000_RXD_PKT_IP4 : E1000_RXD_PKT_IP6; + } else { + pkt_type = E1000_RXD_PKT_MAC; + } + + *status_flags |= E1000_RXD_PKT_TYPE(pkt_type); + trace_e1000e_rx_metadata_pkt_type(pkt_type); + + /* RX CSO information */ + if (isip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { + trace_e1000e_rx_metadata_ipv6_sum_disabled(); + goto func_exit; + } + + if (!net_rx_pkt_has_virt_hdr(pkt)) { + trace_e1000e_rx_metadata_no_virthdr(); + e1000e_verify_csum_in_sw(core, pkt, status_flags, istcp, isudp); + goto func_exit; + } + + vhdr = net_rx_pkt_get_vhdr(pkt); + + if (!(vhdr->flags & VIRTIO_NET_HDR_F_DATA_VALID) && + !(vhdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) { + trace_e1000e_rx_metadata_virthdr_no_csum_info(); + e1000e_verify_csum_in_sw(core, pkt, status_flags, istcp, isudp); + goto func_exit; + } + + if (e1000e_rx_l3_cso_enabled(core)) { + *status_flags |= isip4 ? E1000_RXD_STAT_IPCS : 0; + } else { + trace_e1000e_rx_metadata_l3_cso_disabled(); + } + + if (e1000e_rx_l4_cso_enabled(core)) { + if (istcp) { + *status_flags |= E1000_RXD_STAT_TCPCS; + } else if (isudp) { + *status_flags |= E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS; + } + } else { + trace_e1000e_rx_metadata_l4_cso_disabled(); + } + + trace_e1000e_rx_metadata_status_flags(*status_flags); + +func_exit: + *status_flags = cpu_to_le32(*status_flags); +} + +static inline void +e1000e_write_lgcy_rx_descr(E1000ECore *core, uint8_t *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + uint16_t length) +{ + uint32_t status_flags, rss, mrq; + uint16_t ip_id; + + struct e1000_rx_desc *d = (struct e1000_rx_desc *) desc; + + memset(d, 0, sizeof(*d)); + + assert(!rss_info->enabled); + + d->length = cpu_to_le16(length); + + e1000e_build_rx_metadata(core, pkt, pkt != NULL, + rss_info, + &rss, &mrq, + &status_flags, &ip_id, + &d->special); + d->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); + d->status = (uint8_t) le32_to_cpu(status_flags); +} + +static inline void +e1000e_write_ext_rx_descr(E1000ECore *core, uint8_t *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + uint16_t length) +{ + union e1000_rx_desc_extended *d = (union e1000_rx_desc_extended *) desc; + + memset(d, 0, sizeof(*d)); + + d->wb.upper.length = cpu_to_le16(length); + + e1000e_build_rx_metadata(core, pkt, pkt != NULL, + rss_info, + &d->wb.lower.hi_dword.rss, + &d->wb.lower.mrq, + &d->wb.upper.status_error, + &d->wb.lower.hi_dword.csum_ip.ip_id, + &d->wb.upper.vlan); +} + +static inline void +e1000e_write_ps_rx_descr(E1000ECore *core, uint8_t *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + size_t ps_hdr_len, + uint16_t(*written)[MAX_PS_BUFFERS]) +{ + int i; + union e1000_rx_desc_packet_split *d = + (union e1000_rx_desc_packet_split *) desc; + + memset(d, 0, sizeof(*d)); + + d->wb.middle.length0 = cpu_to_le16((*written)[0]); + + for (i = 0; i < PS_PAGE_BUFFERS; i++) { + d->wb.upper.length[i] = cpu_to_le16((*written)[i + 1]); + } + + e1000e_build_rx_metadata(core, pkt, pkt != NULL, + rss_info, + &d->wb.lower.hi_dword.rss, + &d->wb.lower.mrq, + &d->wb.middle.status_error, + &d->wb.lower.hi_dword.csum_ip.ip_id, + &d->wb.middle.vlan); + + d->wb.upper.header_status = + cpu_to_le16(ps_hdr_len | (ps_hdr_len ? E1000_RXDPS_HDRSTAT_HDRSP : 0)); + + trace_e1000e_rx_desc_ps_write((*written)[0], (*written)[1], + (*written)[2], (*written)[3]); +} + +static inline void +e1000e_write_rx_descr(E1000ECore *core, uint8_t *desc, +struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, + size_t ps_hdr_len, uint16_t(*written)[MAX_PS_BUFFERS]) +{ + if (e1000e_rx_use_legacy_descriptor(core)) { + assert(ps_hdr_len == 0); + e1000e_write_lgcy_rx_descr(core, desc, pkt, rss_info, (*written)[0]); + } else { + if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) { + e1000e_write_ps_rx_descr(core, desc, pkt, rss_info, + ps_hdr_len, written); + } else { + assert(ps_hdr_len == 0); + e1000e_write_ext_rx_descr(core, desc, pkt, rss_info, + (*written)[0]); + } + } +} + +typedef struct e1000e_ba_state_st { + uint16_t written[MAX_PS_BUFFERS]; + uint8_t cur_idx; +} e1000e_ba_state; + +static inline void +e1000e_write_hdr_to_rx_buffers(E1000ECore *core, + hwaddr (*ba)[MAX_PS_BUFFERS], + e1000e_ba_state *bastate, + const char *data, + dma_addr_t data_len) +{ + assert(data_len <= core->rxbuf_sizes[0] - bastate->written[0]); + + pci_dma_write(core->owner, (*ba)[0] + bastate->written[0], data, data_len); + bastate->written[0] += data_len; + + bastate->cur_idx = 1; +} + +static void +e1000e_write_to_rx_buffers(E1000ECore *core, + hwaddr (*ba)[MAX_PS_BUFFERS], + e1000e_ba_state *bastate, + const char *data, + dma_addr_t data_len) +{ + while (data_len > 0) { + uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; + uint32_t cur_buf_bytes_left = cur_buf_len - + bastate->written[bastate->cur_idx]; + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); + + trace_e1000e_rx_desc_buff_write(bastate->cur_idx, + (*ba)[bastate->cur_idx], + bastate->written[bastate->cur_idx], + data, + bytes_to_write); + + pci_dma_write(core->owner, + (*ba)[bastate->cur_idx] + bastate->written[bastate->cur_idx], + data, bytes_to_write); + + bastate->written[bastate->cur_idx] += bytes_to_write; + data += bytes_to_write; + data_len -= bytes_to_write; + + if (bastate->written[bastate->cur_idx] == cur_buf_len) { + bastate->cur_idx++; + } + + assert(bastate->cur_idx < MAX_PS_BUFFERS); + } +} + +static void +e1000e_update_rx_stats(E1000ECore *core, + size_t data_size, + size_t data_fcs_size) +{ + e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size); + + switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { + case ETH_PKT_BCAST: + e1000x_inc_reg_if_not_full(core->mac, BPRC); + break; + + case ETH_PKT_MCAST: + e1000x_inc_reg_if_not_full(core->mac, MPRC); + break; + + default: + break; + } +} + +static inline bool +e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000E_RingInfo *rxi) +{ + return e1000e_ring_free_descr_num(core, rxi) == + e1000e_ring_len(core, rxi) >> core->rxbuf_min_shift; +} + +static bool +e1000e_do_ps(E1000ECore *core, struct NetRxPkt *pkt, size_t *hdr_len) +{ + bool isip4, isip6, isudp, istcp; + bool fragment; + + if (!e1000e_rx_use_ps_descriptor(core)) { + return false; + } + + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + + if (isip4) { + fragment = net_rx_pkt_get_ip4_info(pkt)->fragment; + } else if (isip6) { + fragment = net_rx_pkt_get_ip6_info(pkt)->fragment; + } else { + return false; + } + + if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) { + return false; + } + + if (!fragment && (isudp || istcp)) { + *hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt); + } else { + *hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt); + } + + if ((*hdr_len > core->rxbuf_sizes[0]) || + (*hdr_len > net_rx_pkt_get_total_len(pkt))) { + return false; + } + + return true; +} + +static void +e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, + const E1000E_RxRing *rxr, + const E1000E_RSSInfo *rss_info) +{ + PCIDevice *d = core->owner; + dma_addr_t base; + uint8_t desc[E1000_MAX_RX_DESC_LEN]; + size_t desc_size; + size_t desc_offset = 0; + size_t iov_ofs = 0; + + struct iovec *iov = net_rx_pkt_get_iovec(pkt); + size_t size = net_rx_pkt_get_total_len(pkt); + size_t total_size = size + e1000x_fcs_len(core->mac); + const E1000E_RingInfo *rxi; + size_t ps_hdr_len = 0; + bool do_ps = e1000e_do_ps(core, pkt, &ps_hdr_len); + + rxi = rxr->i; + + do { + hwaddr ba[MAX_PS_BUFFERS]; + e1000e_ba_state bastate = { { 0 } }; + bool is_last = false; + bool is_first = true; + + desc_size = total_size - desc_offset; + + if (desc_size > core->rx_desc_buf_size) { + desc_size = core->rx_desc_buf_size; + } + + base = e1000e_ring_head_descr(core, rxi); + + pci_dma_read(d, base, &desc, core->rx_desc_len); + + trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); + + e1000e_read_rx_descr(core, desc, &ba); + + if (ba[0]) { + if (desc_offset < size) { + static const uint32_t fcs_pad; + size_t iov_copy; + size_t copy_size = size - desc_offset; + if (copy_size > core->rx_desc_buf_size) { + copy_size = core->rx_desc_buf_size; + } + + /* For PS mode copy the packet header first */ + if (do_ps) { + if (is_first) { + size_t ps_hdr_copied = 0; + do { + iov_copy = MIN(ps_hdr_len - ps_hdr_copied, + iov->iov_len - iov_ofs); + + e1000e_write_hdr_to_rx_buffers(core, &ba, &bastate, + iov->iov_base, iov_copy); + + copy_size -= iov_copy; + ps_hdr_copied += iov_copy; + + iov_ofs += iov_copy; + if (iov_ofs == iov->iov_len) { + iov++; + iov_ofs = 0; + } + } while (ps_hdr_copied < ps_hdr_len); + + is_first = false; + } else { + /* Leave buffer 0 of each descriptor except first */ + /* empty as per spec 7.1.5.1 */ + e1000e_write_hdr_to_rx_buffers(core, &ba, &bastate, + NULL, 0); + } + } + + /* Copy packet payload */ + while (copy_size) { + iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); + + e1000e_write_to_rx_buffers(core, &ba, &bastate, + iov->iov_base + iov_ofs, iov_copy); + + copy_size -= iov_copy; + iov_ofs += iov_copy; + if (iov_ofs == iov->iov_len) { + iov++; + iov_ofs = 0; + } + } + + if (desc_offset + desc_size >= total_size) { + /* Simulate FCS checksum presence in the last descriptor */ + e1000e_write_to_rx_buffers(core, &ba, &bastate, + (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); + } + } + desc_offset += desc_size; + if (desc_offset >= total_size) { + is_last = true; + } + } else { /* as per intel docs; skip descriptors with null buf addr */ + trace_e1000e_rx_null_descriptor(); + } + + e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL, + rss_info, do_ps ? ps_hdr_len : 0, &bastate.written); + pci_dma_write(d, base, &desc, core->rx_desc_len); + + e1000e_ring_advance(core, rxi, + core->rx_desc_len / E1000_MIN_RX_DESC_LEN); + + } while (desc_offset < total_size); + + e1000e_update_rx_stats(core, size, total_size); +} + +static inline void +e1000e_rx_fix_l4_csum(E1000ECore *core, struct NetRxPkt *pkt) +{ + if (net_rx_pkt_has_virt_hdr(pkt)) { + struct virtio_net_hdr *vhdr = net_rx_pkt_get_vhdr(pkt); + + if (vhdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + net_rx_pkt_fix_l4_csum(pkt); + } + } +} + +ssize_t +e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt) +{ + static const int maximum_ethernet_hdr_len = (14 + 4); + /* Min. octets in an ethernet frame sans FCS */ + static const int min_buf_size = 60; + + uint32_t n = 0; + uint8_t min_buf[min_buf_size]; + struct iovec min_iov; + uint8_t *filter_buf; + size_t size, orig_size; + size_t iov_ofs = 0; + E1000E_RxRing rxr; + E1000E_RSSInfo rss_info; + size_t total_size; + ssize_t retval; + bool rdmts_hit; + + trace_e1000e_rx_receive_iov(iovcnt); + + if (!e1000x_hw_rx_enabled(core->mac)) { + return -1; + } + + /* Pull virtio header in */ + if (core->has_vnet) { + net_rx_pkt_set_vhdr_iovec(core->rx_pkt, iov, iovcnt); + iov_ofs = sizeof(struct virtio_net_hdr); + } + + filter_buf = iov->iov_base + iov_ofs; + orig_size = iov_size(iov, iovcnt); + size = orig_size - iov_ofs; + + /* Pad to minimum Ethernet frame length */ + if (size < sizeof(min_buf)) { + iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size); + memset(&min_buf[size], 0, sizeof(min_buf) - size); + e1000x_inc_reg_if_not_full(core->mac, RUC); + min_iov.iov_base = filter_buf = min_buf; + min_iov.iov_len = size = sizeof(min_buf); + iovcnt = 1; + iov = &min_iov; + iov_ofs = 0; + } else if (iov->iov_len < maximum_ethernet_hdr_len) { + /* This is very unlikely, but may happen. */ + iov_to_buf(iov, iovcnt, iov_ofs, min_buf, maximum_ethernet_hdr_len); + filter_buf = min_buf; + } + + /* Discard oversized packets if !LPE and !SBP. */ + if (e1000x_is_oversized(core->mac, size)) { + return orig_size; + } + + net_rx_pkt_set_packet_type(core->rx_pkt, + get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf))); + + if (!e1000e_receive_filter(core, filter_buf, size)) { + trace_e1000e_rx_flt_dropped(); + return orig_size; + } + + net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, + e1000x_vlan_enabled(core->mac), core->vet); + + e1000e_rss_parse_packet(core, core->rx_pkt, &rss_info); + e1000e_rx_ring_init(core, &rxr, rss_info.queue); + + trace_e1000e_rx_rss_dispatched_to_queue(rxr.i->idx); + + total_size = net_rx_pkt_get_total_len(core->rx_pkt) + + e1000x_fcs_len(core->mac); + + if (e1000e_has_rxbufs(core, rxr.i, total_size)) { + e1000e_rx_fix_l4_csum(core, core->rx_pkt); + + e1000e_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info); + + retval = orig_size; + + /* Perform small receive detection (RSRPD) */ + if (total_size < core->mac[RSRPD]) { + n |= E1000_ICS_SRPD; + } + + /* Perform ACK receive detection */ + if (e1000e_is_tcp_ack(core, core->rx_pkt)) { + n |= E1000_ICS_ACK; + } + + /* Check if receive descriptor minimum threshold hit */ + rdmts_hit = e1000e_rx_descr_threshold_hit(core, rxr.i); + n |= e1000e_rx_wb_interrupt_cause(core, rxr.i->idx, rdmts_hit); + + trace_e1000e_rx_written_to_guest(n); + } else { + n |= E1000_ICS_RXO; + retval = 0; + + trace_e1000e_rx_not_written_to_guest(n); + } + + if (!e1000e_intrmgr_delay_rx_causes(core, &n)) { + trace_e1000e_rx_interrupt_set(n); + e1000e_set_interrupt_cause(core, n); + } else { + trace_e1000e_rx_interrupt_delayed(n); + } + + return retval; +} + +static inline bool +e1000e_have_autoneg(E1000ECore *core) +{ + return core->phy[0][PHY_CTRL] & MII_CR_AUTO_NEG_EN; +} + +static void e1000e_update_flowctl_status(E1000ECore *core) +{ + if (e1000e_have_autoneg(core) && + core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE) { + trace_e1000e_link_autoneg_flowctl(true); + core->mac[CTRL] |= E1000_CTRL_TFCE | E1000_CTRL_RFCE; + } else { + trace_e1000e_link_autoneg_flowctl(false); + } +} + +static inline void +e1000e_link_down(E1000ECore *core) +{ + e1000x_update_regs_on_link_down(core->mac, core->phy[0]); + e1000e_update_flowctl_status(core); +} + +static inline void +e1000e_set_phy_ctrl(E1000ECore *core, int index, uint16_t val) +{ + /* bits 0-5 reserved; MII_CR_[RESTART_AUTO_NEG,RESET] are self clearing */ + core->phy[0][PHY_CTRL] = val & ~(0x3f | + MII_CR_RESET | + MII_CR_RESTART_AUTO_NEG); + + if ((val & MII_CR_RESTART_AUTO_NEG) && + e1000e_have_autoneg(core)) { + e1000x_restart_autoneg(core->mac, core->phy[0], core->autoneg_timer); + } +} + +static void +e1000e_set_phy_oem_bits(E1000ECore *core, int index, uint16_t val) +{ + core->phy[0][PHY_OEM_BITS] = val & ~BIT(10); + + if (val & BIT(10)) { + e1000x_restart_autoneg(core->mac, core->phy[0], core->autoneg_timer); + } +} + +static void +e1000e_set_phy_page(E1000ECore *core, int index, uint16_t val) +{ + core->phy[0][PHY_PAGE] = val & PHY_PAGE_RW_MASK; +} + +void +e1000e_core_set_link_status(E1000ECore *core) +{ + NetClientState *nc = qemu_get_queue(core->owner_nic); + uint32_t old_status = core->mac[STATUS]; + + trace_e1000e_link_status_changed(nc->link_down ? false : true); + + if (nc->link_down) { + e1000x_update_regs_on_link_down(core->mac, core->phy[0]); + } else { + if (e1000e_have_autoneg(core) && + !(core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { + e1000x_restart_autoneg(core->mac, core->phy[0], + core->autoneg_timer); + } else { + e1000x_update_regs_on_link_up(core->mac, core->phy[0]); + } + } + + if (core->mac[STATUS] != old_status) { + e1000e_set_interrupt_cause(core, E1000_ICR_LSC); + } +} + +static void +e1000e_set_ctrl(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_core_ctrl_write(index, val); + + /* RST is self clearing */ + core->mac[CTRL] = val & ~E1000_CTRL_RST; + core->mac[CTRL_DUP] = core->mac[CTRL]; + + trace_e1000e_link_set_params( + !!(val & E1000_CTRL_ASDE), + (val & E1000_CTRL_SPD_SEL) >> E1000_CTRL_SPD_SHIFT, + !!(val & E1000_CTRL_FRCSPD), + !!(val & E1000_CTRL_FRCDPX), + !!(val & E1000_CTRL_RFCE), + !!(val & E1000_CTRL_TFCE)); + + if (val & E1000_CTRL_RST) { + trace_e1000e_core_ctrl_sw_reset(); + e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac); + } + + if (val & E1000_CTRL_PHY_RST) { + trace_e1000e_core_ctrl_phy_reset(); + core->mac[STATUS] |= E1000_STATUS_PHYRA; + } +} + +static void +e1000e_set_rfctl(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_rx_set_rfctl(val); + + if (!(val & E1000_RFCTL_ISCSI_DIS)) { + trace_e1000e_wrn_iscsi_filtering_not_supported(); + } + + if (!(val & E1000_RFCTL_NFSW_DIS)) { + trace_e1000e_wrn_nfsw_filtering_not_supported(); + } + + if (!(val & E1000_RFCTL_NFSR_DIS)) { + trace_e1000e_wrn_nfsr_filtering_not_supported(); + } + + core->mac[RFCTL] = val; +} + +static void +e1000e_calc_per_desc_buf_size(E1000ECore *core) +{ + int i; + core->rx_desc_buf_size = 0; + + for (i = 0; i < ARRAY_SIZE(core->rxbuf_sizes); i++) { + core->rx_desc_buf_size += core->rxbuf_sizes[i]; + } +} + +static void +e1000e_parse_rxbufsize(E1000ECore *core) +{ + uint32_t rctl = core->mac[RCTL]; + + memset(core->rxbuf_sizes, 0, sizeof(core->rxbuf_sizes)); + + if (rctl & E1000_RCTL_DTYP_MASK) { + uint32_t bsize; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE0_MASK; + core->rxbuf_sizes[0] = (bsize >> E1000_PSRCTL_BSIZE0_SHIFT) * 128; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE1_MASK; + core->rxbuf_sizes[1] = (bsize >> E1000_PSRCTL_BSIZE1_SHIFT) * 1024; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE2_MASK; + core->rxbuf_sizes[2] = (bsize >> E1000_PSRCTL_BSIZE2_SHIFT) * 1024; + + bsize = core->mac[PSRCTL] & E1000_PSRCTL_BSIZE3_MASK; + core->rxbuf_sizes[3] = (bsize >> E1000_PSRCTL_BSIZE3_SHIFT) * 1024; + } else if (rctl & E1000_RCTL_FLXBUF_MASK) { + int flxbuf = rctl & E1000_RCTL_FLXBUF_MASK; + core->rxbuf_sizes[0] = (flxbuf >> E1000_RCTL_FLXBUF_SHIFT) * 1024; + } else { + core->rxbuf_sizes[0] = e1000x_rxbufsize(rctl); + } + + trace_e1000e_rx_desc_buff_sizes(core->rxbuf_sizes[0], core->rxbuf_sizes[1], + core->rxbuf_sizes[2], core->rxbuf_sizes[3]); + + e1000e_calc_per_desc_buf_size(core); +} + +static void +e1000e_calc_rxdesclen(E1000ECore *core) +{ + if (e1000e_rx_use_legacy_descriptor(core)) { + core->rx_desc_len = sizeof(struct e1000_rx_desc); + } else { + if (core->mac[RCTL] & E1000_RCTL_DTYP_PS) { + core->rx_desc_len = sizeof(union e1000_rx_desc_packet_split); + } else { + core->rx_desc_len = sizeof(union e1000_rx_desc_extended); + } + } + trace_e1000e_rx_desc_len(core->rx_desc_len); +} + +static void +e1000e_set_rx_control(E1000ECore *core, int index, uint32_t val) +{ + core->mac[RCTL] = val; + trace_e1000e_rx_set_rctl(core->mac[RCTL]); + + if (val & E1000_RCTL_EN) { + e1000e_parse_rxbufsize(core); + e1000e_calc_rxdesclen(core); + core->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1 + + E1000_RING_DESC_LEN_SHIFT; + + e1000e_start_recv(core); + } +} + +static +void(*e1000e_phyreg_writeops[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE]) +(E1000ECore *, int, uint16_t) = { + [0] = { + [PHY_CTRL] = e1000e_set_phy_ctrl, + [PHY_PAGE] = e1000e_set_phy_page, + [PHY_OEM_BITS] = e1000e_set_phy_oem_bits + } +}; + +static inline void +e1000e_clear_ims_bits(E1000ECore *core, uint32_t bits) +{ + trace_e1000e_irq_clear_ims(bits, core->mac[IMS], core->mac[IMS] & ~bits); + core->mac[IMS] &= ~bits; +} + +static inline bool +e1000e_postpone_interrupt(bool *interrupt_pending, + E1000IntrDelayTimer *timer) +{ + if (timer->running) { + trace_e1000e_irq_postponed_by_xitr(timer->delay_reg << 2); + + *interrupt_pending = true; + return true; + } + + if (timer->core->mac[timer->delay_reg] != 0) { + e1000e_intrmgr_rearm_timer(timer); + } + + return false; +} + +static inline bool +e1000e_itr_should_postpone(E1000ECore *core) +{ + return e1000e_postpone_interrupt(&core->itr_intr_pending, &core->itr); +} + +static inline bool +e1000e_eitr_should_postpone(E1000ECore *core, int idx) +{ + return e1000e_postpone_interrupt(&core->eitr_intr_pending[idx], + &core->eitr[idx]); +} + +static void +e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg) +{ + uint32_t effective_eiac; + + if (E1000_IVAR_ENTRY_VALID(int_cfg)) { + uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg); + if (vec < E1000E_MSIX_VEC_NUM) { + if (!e1000e_eitr_should_postpone(core, vec)) { + trace_e1000e_irq_msix_notify_vec(vec); + msix_notify(core->owner, vec); + } + } else { + trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg); + } + } else { + trace_e1000e_wrn_msix_invalid(cause, int_cfg); + } + + if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_EIAME) { + trace_e1000e_irq_ims_clear_eiame(core->mac[IAM], cause); + e1000e_clear_ims_bits(core, core->mac[IAM] & cause); + } + + trace_e1000e_irq_icr_clear_eiac(core->mac[ICR], core->mac[EIAC]); + + if (core->mac[EIAC] & E1000_ICR_OTHER) { + effective_eiac = (core->mac[EIAC] & E1000_EIAC_MASK) | + E1000_ICR_OTHER_CAUSES; + } else { + effective_eiac = core->mac[EIAC] & E1000_EIAC_MASK; + } + core->mac[ICR] &= ~effective_eiac; +} + +static void +e1000e_msix_notify(E1000ECore *core, uint32_t causes) +{ + if (causes & E1000_ICR_RXQ0) { + e1000e_msix_notify_one(core, E1000_ICR_RXQ0, + E1000_IVAR_RXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_RXQ1) { + e1000e_msix_notify_one(core, E1000_ICR_RXQ1, + E1000_IVAR_RXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ0) { + e1000e_msix_notify_one(core, E1000_ICR_TXQ0, + E1000_IVAR_TXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ1) { + e1000e_msix_notify_one(core, E1000_ICR_TXQ1, + E1000_IVAR_TXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_OTHER) { + e1000e_msix_notify_one(core, E1000_ICR_OTHER, + E1000_IVAR_OTHER(core->mac[IVAR])); + } +} + +static void +e1000e_msix_clear_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg) +{ + if (E1000_IVAR_ENTRY_VALID(int_cfg)) { + uint32_t vec = E1000_IVAR_ENTRY_VEC(int_cfg); + if (vec < E1000E_MSIX_VEC_NUM) { + trace_e1000e_irq_msix_pending_clearing(cause, int_cfg, vec); + msix_clr_pending(core->owner, vec); + } else { + trace_e1000e_wrn_msix_vec_wrong(cause, int_cfg); + } + } else { + trace_e1000e_wrn_msix_invalid(cause, int_cfg); + } +} + +static void +e1000e_msix_clear(E1000ECore *core, uint32_t causes) +{ + if (causes & E1000_ICR_RXQ0) { + e1000e_msix_clear_one(core, E1000_ICR_RXQ0, + E1000_IVAR_RXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_RXQ1) { + e1000e_msix_clear_one(core, E1000_ICR_RXQ1, + E1000_IVAR_RXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ0) { + e1000e_msix_clear_one(core, E1000_ICR_TXQ0, + E1000_IVAR_TXQ0(core->mac[IVAR])); + } + + if (causes & E1000_ICR_TXQ1) { + e1000e_msix_clear_one(core, E1000_ICR_TXQ1, + E1000_IVAR_TXQ1(core->mac[IVAR])); + } + + if (causes & E1000_ICR_OTHER) { + e1000e_msix_clear_one(core, E1000_ICR_OTHER, + E1000_IVAR_OTHER(core->mac[IVAR])); + } +} + +static inline void +e1000e_fix_icr_asserted(E1000ECore *core) +{ + core->mac[ICR] &= ~E1000_ICR_ASSERTED; + if (core->mac[ICR]) { + core->mac[ICR] |= E1000_ICR_ASSERTED; + } + + trace_e1000e_irq_fix_icr_asserted(core->mac[ICR]); +} + +static void +e1000e_send_msi(E1000ECore *core, bool msix) +{ + uint32_t causes = core->mac[ICR] & core->mac[IMS] & ~E1000_ICR_ASSERTED; + + if (msix) { + e1000e_msix_notify(core, causes); + } else { + if (!e1000e_itr_should_postpone(core)) { + trace_e1000e_irq_msi_notify(causes); + msi_notify(core->owner, 0); + } + } +} + +static void +e1000e_update_interrupt_state(E1000ECore *core) +{ + bool interrupts_pending; + bool is_msix = msix_enabled(core->owner); + + /* Set ICR[OTHER] for MSI-X */ + if (is_msix) { + if (core->mac[ICR] & core->mac[IMS] & E1000_ICR_OTHER_CAUSES) { + core->mac[ICR] |= E1000_ICR_OTHER; + trace_e1000e_irq_add_msi_other(core->mac[ICR]); + } + } + + e1000e_fix_icr_asserted(core); + + /* + * Make sure ICR and ICS registers have the same value. + * The spec says that the ICS register is write-only. However in practice, + * on real hardware ICS is readable, and for reads it has the same value as + * ICR (except that ICS does not have the clear on read behaviour of ICR). + * + * The VxWorks PRO/1000 driver uses this behaviour. + */ + core->mac[ICS] = core->mac[ICR]; + + interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true : false; + + trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS], + core->mac[ICR], core->mac[IMS]); + + if (is_msix || msi_enabled(core->owner)) { + if (interrupts_pending) { + e1000e_send_msi(core, is_msix); + } + } else { + if (interrupts_pending) { + if (!e1000e_itr_should_postpone(core)) { + e1000e_raise_legacy_irq(core); + } + } else { + e1000e_lower_legacy_irq(core); + } + } +} + +static inline void +e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val) +{ + trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]); + + val |= e1000e_intmgr_collect_delayed_causes(core); + core->mac[ICR] |= val; + + trace_e1000e_irq_set_cause_exit(val, core->mac[ICR]); + + e1000e_update_interrupt_state(core); +} + +static inline void +e1000e_autoneg_timer(void *opaque) +{ + E1000ECore *core = opaque; + if (!qemu_get_queue(core->owner_nic)->link_down) { + e1000x_update_regs_on_autoneg_done(core->mac, core->phy[0]); + e1000e_update_flowctl_status(core); + /* signal link status change to the guest */ + e1000e_set_interrupt_cause(core, E1000_ICR_LSC); + } +} + +static inline uint16_t +e1000e_get_reg_index_with_offset(const uint16_t *mac_reg_access, hwaddr addr) +{ + uint16_t index = (addr & 0x1ffff) >> 2; + return index + (mac_reg_access[index] & 0xfffe); +} + +static const char e1000e_phy_regcap[E1000E_PHY_PAGES][0x20] = { + [0] = { + [PHY_CTRL] = PHY_ANYPAGE | PHY_RW, + [PHY_STATUS] = PHY_ANYPAGE | PHY_R, + [PHY_ID1] = PHY_ANYPAGE | PHY_R, + [PHY_ID2] = PHY_ANYPAGE | PHY_R, + [PHY_AUTONEG_ADV] = PHY_ANYPAGE | PHY_RW, + [PHY_LP_ABILITY] = PHY_ANYPAGE | PHY_R, + [PHY_AUTONEG_EXP] = PHY_ANYPAGE | PHY_R, + [PHY_NEXT_PAGE_TX] = PHY_ANYPAGE | PHY_RW, + [PHY_LP_NEXT_PAGE] = PHY_ANYPAGE | PHY_R, + [PHY_1000T_CTRL] = PHY_ANYPAGE | PHY_RW, + [PHY_1000T_STATUS] = PHY_ANYPAGE | PHY_R, + [PHY_EXT_STATUS] = PHY_ANYPAGE | PHY_R, + [PHY_PAGE] = PHY_ANYPAGE | PHY_RW, + + [PHY_COPPER_CTRL1] = PHY_RW, + [PHY_COPPER_STAT1] = PHY_R, + [PHY_COPPER_CTRL3] = PHY_RW, + [PHY_RX_ERR_CNTR] = PHY_R, + [PHY_OEM_BITS] = PHY_RW, + [PHY_BIAS_1] = PHY_RW, + [PHY_BIAS_2] = PHY_RW, + [PHY_COPPER_INT_ENABLE] = PHY_RW, + [PHY_COPPER_STAT2] = PHY_R, + [PHY_COPPER_CTRL2] = PHY_RW + }, + [2] = { + [PHY_MAC_CTRL1] = PHY_RW, + [PHY_MAC_INT_ENABLE] = PHY_RW, + [PHY_MAC_STAT] = PHY_R, + [PHY_MAC_CTRL2] = PHY_RW + }, + [3] = { + [PHY_LED_03_FUNC_CTRL1] = PHY_RW, + [PHY_LED_03_POL_CTRL] = PHY_RW, + [PHY_LED_TIMER_CTRL] = PHY_RW, + [PHY_LED_45_CTRL] = PHY_RW + }, + [5] = { + [PHY_1000T_SKEW] = PHY_R, + [PHY_1000T_SWAP] = PHY_R + }, + [6] = { + [PHY_CRC_COUNTERS] = PHY_R + } +}; + +static bool +e1000e_phy_reg_check_cap(E1000ECore *core, uint32_t addr, + char cap, uint8_t *page) +{ + *page = + (e1000e_phy_regcap[0][addr] & PHY_ANYPAGE) ? 0 + : core->phy[0][PHY_PAGE]; + + if (*page >= E1000E_PHY_PAGES) { + return false; + } + + return e1000e_phy_regcap[*page][addr] & cap; +} + +static void +e1000e_phy_reg_write(E1000ECore *core, uint8_t page, + uint32_t addr, uint16_t data) +{ + assert(page < E1000E_PHY_PAGES); + assert(addr < E1000E_PHY_PAGE_SIZE); + + if (e1000e_phyreg_writeops[page][addr]) { + e1000e_phyreg_writeops[page][addr](core, addr, data); + } else { + core->phy[page][addr] = data; + } +} + +static void +e1000e_set_mdic(E1000ECore *core, int index, uint32_t val) +{ + uint32_t data = val & E1000_MDIC_DATA_MASK; + uint32_t addr = ((val & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + uint8_t page; + + if ((val & E1000_MDIC_PHY_MASK) >> E1000_MDIC_PHY_SHIFT != 1) { /* phy # */ + val = core->mac[MDIC] | E1000_MDIC_ERROR; + } else if (val & E1000_MDIC_OP_READ) { + if (!e1000e_phy_reg_check_cap(core, addr, PHY_R, &page)) { + trace_e1000e_core_mdic_read_unhandled(page, addr); + val |= E1000_MDIC_ERROR; + } else { + val = (val ^ data) | core->phy[page][addr]; + trace_e1000e_core_mdic_read(page, addr, val); + } + } else if (val & E1000_MDIC_OP_WRITE) { + if (!e1000e_phy_reg_check_cap(core, addr, PHY_W, &page)) { + trace_e1000e_core_mdic_write_unhandled(page, addr); + val |= E1000_MDIC_ERROR; + } else { + trace_e1000e_core_mdic_write(page, addr, data); + e1000e_phy_reg_write(core, page, addr, data); + } + } + core->mac[MDIC] = val | E1000_MDIC_READY; + + if (val & E1000_MDIC_INT_EN) { + e1000e_set_interrupt_cause(core, E1000_ICR_MDAC); + } +} + +static void +e1000e_set_rdt(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & 0xffff; + trace_e1000e_rx_set_rdt(e1000e_mq_queue_idx(RDT0, index), val); + e1000e_start_recv(core); +} + +static void +e1000e_set_status(E1000ECore *core, int index, uint32_t val) +{ + if ((val & E1000_STATUS_PHYRA) == 0) { + core->mac[index] &= ~E1000_STATUS_PHYRA; + } +} + +static void +e1000e_set_ctrlext(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK), + !!(val & E1000_CTRL_EXT_SPD_BYPS)); + + /* Zero self-clearing bits */ + val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST); + core->mac[CTRL_EXT] = val; +} + +static void +e1000e_set_pbaclr(E1000ECore *core, int index, uint32_t val) +{ + int i; + + core->mac[PBACLR] = val & E1000_PBACLR_VALID_MASK; + + if (msix_enabled(core->owner)) { + return; + } + + for (i = 0; i < E1000E_MSIX_VEC_NUM; i++) { + if (core->mac[PBACLR] & BIT(i)) { + msix_clr_pending(core->owner, i); + } + } +} + +static void +e1000e_set_fcrth(E1000ECore *core, int index, uint32_t val) +{ + core->mac[FCRTH] = val & 0xFFF8; +} + +static void +e1000e_set_fcrtl(E1000ECore *core, int index, uint32_t val) +{ + core->mac[FCRTL] = val & 0x8000FFF8; +} + +static inline void +e1000e_set_16bit(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & 0xffff; +} + +static void +e1000e_set_12bit(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & 0xfff; +} + +static void +e1000e_set_vet(E1000ECore *core, int index, uint32_t val) +{ + core->mac[VET] = val & 0xffff; + core->vet = le16_to_cpu(core->mac[VET]); + trace_e1000e_vlan_vet(core->vet); +} + +static void +e1000e_set_dlen(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & E1000_XDLEN_MASK; +} + +static void +e1000e_set_dbal(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val & E1000_XDBAL_MASK; +} + +static void +e1000e_set_tctl(E1000ECore *core, int index, uint32_t val) +{ + E1000E_TxRing txr; + core->mac[index] = val; + + if (core->mac[TARC0] & E1000_TARC_ENABLE) { + e1000e_tx_ring_init(core, &txr, 0); + e1000e_start_xmit(core, &txr); + } + + if (core->mac[TARC1] & E1000_TARC_ENABLE) { + e1000e_tx_ring_init(core, &txr, 1); + e1000e_start_xmit(core, &txr); + } +} + +static void +e1000e_set_tdt(E1000ECore *core, int index, uint32_t val) +{ + E1000E_TxRing txr; + int qidx = e1000e_mq_queue_idx(TDT, index); + uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1; + + core->mac[index] = val & 0xffff; + + if (core->mac[tarc_reg] & E1000_TARC_ENABLE) { + e1000e_tx_ring_init(core, &txr, qidx); + e1000e_start_xmit(core, &txr); + } +} + +static void +e1000e_set_ics(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_irq_write_ics(val); + e1000e_set_interrupt_cause(core, val); +} + +static void +e1000e_set_icr(E1000ECore *core, int index, uint32_t val) +{ + if ((core->mac[ICR] & E1000_ICR_ASSERTED) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { + trace_e1000e_irq_icr_process_iame(); + e1000e_clear_ims_bits(core, core->mac[IAM]); + } + + trace_e1000e_irq_icr_write(val, core->mac[ICR], core->mac[ICR] & ~val); + core->mac[ICR] &= ~val; + e1000e_update_interrupt_state(core); +} + +static void +e1000e_set_imc(E1000ECore *core, int index, uint32_t val) +{ + trace_e1000e_irq_ims_clear_set_imc(val); + e1000e_clear_ims_bits(core, val); + e1000e_update_interrupt_state(core); +} + +static void +e1000e_set_ims(E1000ECore *core, int index, uint32_t val) +{ + static const uint32_t ims_ext_mask = + E1000_IMS_RXQ0 | E1000_IMS_RXQ1 | + E1000_IMS_TXQ0 | E1000_IMS_TXQ1 | + E1000_IMS_OTHER; + + static const uint32_t ims_valid_mask = + E1000_IMS_TXDW | E1000_IMS_TXQE | E1000_IMS_LSC | + E1000_IMS_RXDMT0 | E1000_IMS_RXO | E1000_IMS_RXT0 | + E1000_IMS_MDAC | E1000_IMS_TXD_LOW | E1000_IMS_SRPD | + E1000_IMS_ACK | E1000_IMS_MNG | E1000_IMS_RXQ0 | + E1000_IMS_RXQ1 | E1000_IMS_TXQ0 | E1000_IMS_TXQ1 | + E1000_IMS_OTHER; + + uint32_t valid_val = val & ims_valid_mask; + + trace_e1000e_irq_set_ims(val, core->mac[IMS], core->mac[IMS] | valid_val); + core->mac[IMS] |= valid_val; + + if ((valid_val & ims_ext_mask) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PBA_CLR) && + msix_enabled(core->owner)) { + e1000e_msix_clear(core, valid_val); + } + + if ((valid_val == ims_valid_mask) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_INT_TIMERS_CLEAR_ENA)) { + trace_e1000e_irq_fire_all_timers(val); + e1000e_intrmgr_fire_all_timers(core); + } + + e1000e_update_interrupt_state(core); +} + +static void +e1000e_set_rdtr(E1000ECore *core, int index, uint32_t val) +{ + e1000e_set_16bit(core, index, val); + + if ((val & E1000_RDTR_FPD) && (core->rdtr.running)) { + trace_e1000e_irq_rdtr_fpd_running(); + e1000e_intrmgr_fire_delayed_interrupts(core); + } else { + trace_e1000e_irq_rdtr_fpd_not_running(); + } +} + +static void +e1000e_set_tidv(E1000ECore *core, int index, uint32_t val) +{ + e1000e_set_16bit(core, index, val); + + if ((val & E1000_TIDV_FPD) && (core->tidv.running)) { + trace_e1000e_irq_tidv_fpd_running(); + e1000e_intrmgr_fire_delayed_interrupts(core); + } else { + trace_e1000e_irq_tidv_fpd_not_running(); + } +} + +static uint32_t +e1000e_mac_readreg(E1000ECore *core, int index) +{ + return core->mac[index]; +} + +static uint32_t +e1000e_mac_ics_read(E1000ECore *core, int index) +{ + trace_e1000e_irq_read_ics(core->mac[ICS]); + return core->mac[ICS]; +} + +static uint32_t +e1000e_mac_ims_read(E1000ECore *core, int index) +{ + trace_e1000e_irq_read_ims(core->mac[IMS]); + return core->mac[IMS]; +} + +#define E1000E_LOW_BITS_READ_FUNC(num) \ + static uint32_t \ + e1000e_mac_low##num##_read(E1000ECore *core, int index) \ + { \ + return core->mac[index] & (BIT(num) - 1); \ + } \ + +#define E1000E_LOW_BITS_READ(num) \ + e1000e_mac_low##num##_read + +E1000E_LOW_BITS_READ_FUNC(4); +E1000E_LOW_BITS_READ_FUNC(6); +E1000E_LOW_BITS_READ_FUNC(11); +E1000E_LOW_BITS_READ_FUNC(13); +E1000E_LOW_BITS_READ_FUNC(16); + +static uint32_t +e1000e_mac_swsm_read(E1000ECore *core, int index) +{ + uint32_t val = core->mac[SWSM]; + core->mac[SWSM] = val | 1; + return val; +} + +static uint32_t +e1000e_mac_itr_read(E1000ECore *core, int index) +{ + return core->itr_guest_value; +} + +static uint32_t +e1000e_mac_eitr_read(E1000ECore *core, int index) +{ + return core->eitr_guest_value[index - EITR]; +} + +static uint32_t +e1000e_mac_icr_read(E1000ECore *core, int index) +{ + uint32_t ret = core->mac[ICR]; + trace_e1000e_irq_icr_read_entry(ret); + + if (core->mac[IMS] == 0) { + trace_e1000e_irq_icr_clear_zero_ims(); + core->mac[ICR] = 0; + } + + if ((core->mac[ICR] & E1000_ICR_ASSERTED) && + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { + trace_e1000e_irq_icr_clear_iame(); + core->mac[ICR] = 0; + trace_e1000e_irq_icr_process_iame(); + e1000e_clear_ims_bits(core, core->mac[IAM]); + } + + trace_e1000e_irq_icr_read_exit(core->mac[ICR]); + e1000e_update_interrupt_state(core); + return ret; +} + +static uint32_t +e1000e_mac_read_clr4(E1000ECore *core, int index) +{ + uint32_t ret = core->mac[index]; + + core->mac[index] = 0; + return ret; +} + +static uint32_t +e1000e_mac_read_clr8(E1000ECore *core, int index) +{ + uint32_t ret = core->mac[index]; + + core->mac[index] = 0; + core->mac[index - 1] = 0; + return ret; +} + +static uint32_t +e1000e_get_ctrl(E1000ECore *core, int index) +{ + uint32_t val = core->mac[CTRL]; + + trace_e1000e_link_read_params( + !!(val & E1000_CTRL_ASDE), + (val & E1000_CTRL_SPD_SEL) >> E1000_CTRL_SPD_SHIFT, + !!(val & E1000_CTRL_FRCSPD), + !!(val & E1000_CTRL_FRCDPX), + !!(val & E1000_CTRL_RFCE), + !!(val & E1000_CTRL_TFCE)); + + return val; +} + +static uint32_t +e1000e_get_status(E1000ECore *core, int index) +{ + uint32_t res = core->mac[STATUS]; + + if (!(core->mac[CTRL] & E1000_CTRL_GIO_MASTER_DISABLE)) { + res |= E1000_STATUS_GIO_MASTER_ENABLE; + } + + if (core->mac[CTRL] & E1000_CTRL_FRCDPX) { + res |= (core->mac[CTRL] & E1000_CTRL_FD) ? E1000_STATUS_FD : 0; + } else { + res |= E1000_STATUS_FD; + } + + if ((core->mac[CTRL] & E1000_CTRL_FRCSPD) || + (core->mac[CTRL_EXT] & E1000_CTRL_EXT_SPD_BYPS)) { + switch (core->mac[CTRL] & E1000_CTRL_SPD_SEL) { + case E1000_CTRL_SPD_10: + res |= E1000_STATUS_SPEED_10; + break; + case E1000_CTRL_SPD_100: + res |= E1000_STATUS_SPEED_100; + break; + case E1000_CTRL_SPD_1000: + default: + res |= E1000_STATUS_SPEED_1000; + break; + } + } else { + res |= E1000_STATUS_SPEED_1000; + } + + trace_e1000e_link_status( + !!(res & E1000_STATUS_LU), + !!(res & E1000_STATUS_FD), + (res & E1000_STATUS_SPEED_MASK) >> E1000_STATUS_SPEED_SHIFT, + (res & E1000_STATUS_ASDV) >> E1000_STATUS_ASDV_SHIFT); + + return res; +} + +static uint32_t +e1000e_get_tarc(E1000ECore *core, int index) +{ + return core->mac[index] & ((BIT(11) - 1) | + BIT(27) | + BIT(28) | + BIT(29) | + BIT(30)); +} + +static void +e1000e_mac_writereg(E1000ECore *core, int index, uint32_t val) +{ + core->mac[index] = val; +} + +static void +e1000e_mac_setmacaddr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t macaddr[2]; + + core->mac[index] = val; + + macaddr[0] = cpu_to_le32(core->mac[RA]); + macaddr[1] = cpu_to_le32(core->mac[RA + 1]); + qemu_format_nic_info_str(qemu_get_queue(core->owner_nic), + (uint8_t *) macaddr); + + trace_e1000e_mac_set_sw(MAC_ARG(macaddr)); +} + +static void +e1000e_set_eecd(E1000ECore *core, int index, uint32_t val) +{ + static const uint32_t ro_bits = E1000_EECD_PRES | + E1000_EECD_AUTO_RD | + E1000_EECD_SIZE_EX_MASK; + + core->mac[EECD] = (core->mac[EECD] & ro_bits) | (val & ~ro_bits); +} + +static void +e1000e_set_eerd(E1000ECore *core, int index, uint32_t val) +{ + uint32_t addr = (val >> E1000_EERW_ADDR_SHIFT) & E1000_EERW_ADDR_MASK; + uint32_t flags = 0; + uint32_t data = 0; + + if ((addr < E1000E_EEPROM_SIZE) && (val & E1000_EERW_START)) { + data = core->eeprom[addr]; + flags = E1000_EERW_DONE; + } + + core->mac[EERD] = flags | + (addr << E1000_EERW_ADDR_SHIFT) | + (data << E1000_EERW_DATA_SHIFT); +} + +static void +e1000e_set_eewr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t addr = (val >> E1000_EERW_ADDR_SHIFT) & E1000_EERW_ADDR_MASK; + uint32_t data = (val >> E1000_EERW_DATA_SHIFT) & E1000_EERW_DATA_MASK; + uint32_t flags = 0; + + if ((addr < E1000E_EEPROM_SIZE) && (val & E1000_EERW_START)) { + core->eeprom[addr] = data; + flags = E1000_EERW_DONE; + } + + core->mac[EERD] = flags | + (addr << E1000_EERW_ADDR_SHIFT) | + (data << E1000_EERW_DATA_SHIFT); +} + +static void +e1000e_set_rxdctl(E1000ECore *core, int index, uint32_t val) +{ + core->mac[RXDCTL] = core->mac[RXDCTL1] = val; +} + +static void +e1000e_set_itr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t interval = val & 0xffff; + + trace_e1000e_irq_itr_set(val); + + core->itr_guest_value = interval; + core->mac[index] = MAX(interval, E1000E_MIN_XITR); +} + +static void +e1000e_set_eitr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t interval = val & 0xffff; + uint32_t eitr_num = index - EITR; + + trace_e1000e_irq_eitr_set(eitr_num, val); + + core->eitr_guest_value[eitr_num] = interval; + core->mac[index] = MAX(interval, E1000E_MIN_XITR); +} + +static void +e1000e_set_psrctl(E1000ECore *core, int index, uint32_t val) +{ + if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) { + hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero"); + } + + if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) { + hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero"); + } + + core->mac[PSRCTL] = val; +} + +static void +e1000e_update_rx_offloads(E1000ECore *core) +{ + int cso_state = e1000e_rx_l4_cso_enabled(core); + + trace_e1000e_rx_set_cso(cso_state); + + if (core->has_vnet) { + qemu_set_offload(qemu_get_queue(core->owner_nic)->peer, + cso_state, 0, 0, 0, 0); + } +} + +static void +e1000e_set_rxcsum(E1000ECore *core, int index, uint32_t val) +{ + core->mac[RXCSUM] = val; + e1000e_update_rx_offloads(core); +} + +static void +e1000e_set_gcr(E1000ECore *core, int index, uint32_t val) +{ + uint32_t ro_bits = core->mac[GCR] & E1000_GCR_RO_BITS; + core->mac[GCR] = (val & ~E1000_GCR_RO_BITS) | ro_bits; +} + +#define e1000e_getreg(x) [x] = e1000e_mac_readreg +static uint32_t (*e1000e_macreg_readops[])(E1000ECore *, int) = { + e1000e_getreg(PBA), + e1000e_getreg(WUFC), + e1000e_getreg(MANC), + e1000e_getreg(TOTL), + e1000e_getreg(RDT0), + e1000e_getreg(RDBAH0), + e1000e_getreg(TDBAL1), + e1000e_getreg(RDLEN0), + e1000e_getreg(RDH1), + e1000e_getreg(LATECOL), + e1000e_getreg(SEC), + e1000e_getreg(XONTXC), + e1000e_getreg(WUS), + e1000e_getreg(GORCL), + e1000e_getreg(MGTPRC), + e1000e_getreg(EERD), + e1000e_getreg(EIAC), + e1000e_getreg(PSRCTL), + e1000e_getreg(MANC2H), + e1000e_getreg(RXCSUM), + e1000e_getreg(GSCL_3), + e1000e_getreg(GSCN_2), + e1000e_getreg(RSRPD), + e1000e_getreg(RDBAL1), + e1000e_getreg(FCAH), + e1000e_getreg(FCRTH), + e1000e_getreg(FLOP), + e1000e_getreg(FLASHT), + e1000e_getreg(RXSTMPH), + e1000e_getreg(TXSTMPL), + e1000e_getreg(TIMADJL), + e1000e_getreg(TXDCTL), + e1000e_getreg(RDH0), + e1000e_getreg(TDT1), + e1000e_getreg(TNCRS), + e1000e_getreg(RJC), + e1000e_getreg(IAM), + e1000e_getreg(GSCL_2), + e1000e_getreg(RDBAH1), + e1000e_getreg(FLSWDATA), + e1000e_getreg(RXSATRH), + e1000e_getreg(TIPG), + e1000e_getreg(FLMNGCTL), + e1000e_getreg(FLMNGCNT), + e1000e_getreg(TSYNCTXCTL), + e1000e_getreg(EXTCNF_SIZE), + e1000e_getreg(EXTCNF_CTRL), + e1000e_getreg(EEMNGDATA), + e1000e_getreg(CTRL_EXT), + e1000e_getreg(SYSTIMH), + e1000e_getreg(EEMNGCTL), + e1000e_getreg(FLMNGDATA), + e1000e_getreg(TSYNCRXCTL), + e1000e_getreg(TDH), + e1000e_getreg(LEDCTL), + e1000e_getreg(STATUS), + e1000e_getreg(TCTL), + e1000e_getreg(TDBAL), + e1000e_getreg(TDLEN), + e1000e_getreg(TDH1), + e1000e_getreg(RADV), + e1000e_getreg(ECOL), + e1000e_getreg(DC), + e1000e_getreg(RLEC), + e1000e_getreg(XOFFTXC), + e1000e_getreg(RFC), + e1000e_getreg(RNBC), + e1000e_getreg(MGTPTC), + e1000e_getreg(TIMINCA), + e1000e_getreg(RXCFGL), + e1000e_getreg(MFUTP01), + e1000e_getreg(FACTPS), + e1000e_getreg(GSCL_1), + e1000e_getreg(GSCN_0), + e1000e_getreg(GCR2), + e1000e_getreg(RDT1), + e1000e_getreg(PBACLR), + e1000e_getreg(FCTTV), + e1000e_getreg(EEWR), + e1000e_getreg(FLSWCTL), + e1000e_getreg(RXDCTL1), + e1000e_getreg(RXSATRL), + e1000e_getreg(SYSTIML), + e1000e_getreg(RXUDP), + e1000e_getreg(TORL), + e1000e_getreg(TDLEN1), + e1000e_getreg(MCC), + e1000e_getreg(WUC), + e1000e_getreg(EECD), + e1000e_getreg(MFUTP23), + e1000e_getreg(RAID), + e1000e_getreg(FCRTV), + e1000e_getreg(TXDCTL1), + e1000e_getreg(RCTL), + e1000e_getreg(TDT), + e1000e_getreg(MDIC), + e1000e_getreg(FCRUC), + e1000e_getreg(VET), + e1000e_getreg(RDBAL0), + e1000e_getreg(TDBAH1), + e1000e_getreg(RDTR), + e1000e_getreg(SCC), + e1000e_getreg(COLC), + e1000e_getreg(CEXTERR), + e1000e_getreg(XOFFRXC), + e1000e_getreg(IPAV), + e1000e_getreg(GOTCL), + e1000e_getreg(MGTPDC), + e1000e_getreg(GCR), + e1000e_getreg(IVAR), + e1000e_getreg(POEMB), + e1000e_getreg(MFVAL), + e1000e_getreg(FUNCTAG), + e1000e_getreg(GSCL_4), + e1000e_getreg(GSCN_3), + e1000e_getreg(MRQC), + e1000e_getreg(RDLEN1), + e1000e_getreg(FCT), + e1000e_getreg(FLA), + e1000e_getreg(FLOL), + e1000e_getreg(RXDCTL), + e1000e_getreg(RXSTMPL), + e1000e_getreg(TXSTMPH), + e1000e_getreg(TIMADJH), + e1000e_getreg(FCRTL), + e1000e_getreg(TDBAH), + e1000e_getreg(TADV), + e1000e_getreg(XONRXC), + e1000e_getreg(TSCTFC), + e1000e_getreg(RFCTL), + e1000e_getreg(GSCN_1), + e1000e_getreg(FCAL), + e1000e_getreg(FLSWCNT), + + [TOTH] = e1000e_mac_read_clr8, + [GOTCH] = e1000e_mac_read_clr8, + [PRC64] = e1000e_mac_read_clr4, + [PRC255] = e1000e_mac_read_clr4, + [PRC1023] = e1000e_mac_read_clr4, + [PTC64] = e1000e_mac_read_clr4, + [PTC255] = e1000e_mac_read_clr4, + [PTC1023] = e1000e_mac_read_clr4, + [GPRC] = e1000e_mac_read_clr4, + [TPT] = e1000e_mac_read_clr4, + [RUC] = e1000e_mac_read_clr4, + [BPRC] = e1000e_mac_read_clr4, + [MPTC] = e1000e_mac_read_clr4, + [IAC] = e1000e_mac_read_clr4, + [ICR] = e1000e_mac_icr_read, + [RDFH] = E1000E_LOW_BITS_READ(13), + [RDFHS] = E1000E_LOW_BITS_READ(13), + [RDFPC] = E1000E_LOW_BITS_READ(13), + [TDFH] = E1000E_LOW_BITS_READ(13), + [TDFHS] = E1000E_LOW_BITS_READ(13), + [STATUS] = e1000e_get_status, + [TARC0] = e1000e_get_tarc, + [PBS] = E1000E_LOW_BITS_READ(6), + [ICS] = e1000e_mac_ics_read, + [AIT] = E1000E_LOW_BITS_READ(16), + [TORH] = e1000e_mac_read_clr8, + [GORCH] = e1000e_mac_read_clr8, + [PRC127] = e1000e_mac_read_clr4, + [PRC511] = e1000e_mac_read_clr4, + [PRC1522] = e1000e_mac_read_clr4, + [PTC127] = e1000e_mac_read_clr4, + [PTC511] = e1000e_mac_read_clr4, + [PTC1522] = e1000e_mac_read_clr4, + [GPTC] = e1000e_mac_read_clr4, + [TPR] = e1000e_mac_read_clr4, + [ROC] = e1000e_mac_read_clr4, + [MPRC] = e1000e_mac_read_clr4, + [BPTC] = e1000e_mac_read_clr4, + [TSCTC] = e1000e_mac_read_clr4, + [ITR] = e1000e_mac_itr_read, + [RDFT] = E1000E_LOW_BITS_READ(13), + [RDFTS] = E1000E_LOW_BITS_READ(13), + [TDFPC] = E1000E_LOW_BITS_READ(13), + [TDFT] = E1000E_LOW_BITS_READ(13), + [TDFTS] = E1000E_LOW_BITS_READ(13), + [CTRL] = e1000e_get_ctrl, + [TARC1] = e1000e_get_tarc, + [SWSM] = e1000e_mac_swsm_read, + [IMS] = e1000e_mac_ims_read, + + [CRCERRS ... MPC] = e1000e_mac_readreg, + [IP6AT ... IP6AT + 3] = e1000e_mac_readreg, + [IP4AT ... IP4AT + 6] = e1000e_mac_readreg, + [RA ... RA + 31] = e1000e_mac_readreg, + [WUPM ... WUPM + 31] = e1000e_mac_readreg, + [MTA ... MTA + 127] = e1000e_mac_readreg, + [VFTA ... VFTA + 127] = e1000e_mac_readreg, + [FFMT ... FFMT + 254] = E1000E_LOW_BITS_READ(4), + [FFVT ... FFVT + 254] = e1000e_mac_readreg, + [MDEF ... MDEF + 7] = e1000e_mac_readreg, + [FFLT ... FFLT + 10] = E1000E_LOW_BITS_READ(11), + [FTFT ... FTFT + 254] = e1000e_mac_readreg, + [PBM ... PBM + 10239] = e1000e_mac_readreg, + [RETA ... RETA + 31] = e1000e_mac_readreg, + [RSSRK ... RSSRK + 31] = e1000e_mac_readreg, + [MAVTV0 ... MAVTV3] = e1000e_mac_readreg, + [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = e1000e_mac_eitr_read +}; +enum { E1000E_NREADOPS = ARRAY_SIZE(e1000e_macreg_readops) }; + +#define e1000e_putreg(x) [x] = e1000e_mac_writereg +static void (*e1000e_macreg_writeops[])(E1000ECore *, int, uint32_t) = { + e1000e_putreg(PBA), + e1000e_putreg(SWSM), + e1000e_putreg(WUFC), + e1000e_putreg(RDBAH1), + e1000e_putreg(TDBAH), + e1000e_putreg(TXDCTL), + e1000e_putreg(RDBAH0), + e1000e_putreg(LEDCTL), + e1000e_putreg(FCAL), + e1000e_putreg(FCRUC), + e1000e_putreg(AIT), + e1000e_putreg(TDFH), + e1000e_putreg(TDFT), + e1000e_putreg(TDFHS), + e1000e_putreg(TDFTS), + e1000e_putreg(TDFPC), + e1000e_putreg(WUC), + e1000e_putreg(WUS), + e1000e_putreg(RDFH), + e1000e_putreg(RDFT), + e1000e_putreg(RDFHS), + e1000e_putreg(RDFTS), + e1000e_putreg(RDFPC), + e1000e_putreg(IPAV), + e1000e_putreg(TDBAH1), + e1000e_putreg(TIMINCA), + e1000e_putreg(IAM), + e1000e_putreg(EIAC), + e1000e_putreg(IVAR), + e1000e_putreg(TARC0), + e1000e_putreg(TARC1), + e1000e_putreg(FLSWDATA), + e1000e_putreg(POEMB), + e1000e_putreg(PBS), + e1000e_putreg(MFUTP01), + e1000e_putreg(MFUTP23), + e1000e_putreg(MANC), + e1000e_putreg(MANC2H), + e1000e_putreg(MFVAL), + e1000e_putreg(EXTCNF_CTRL), + e1000e_putreg(FACTPS), + e1000e_putreg(FUNCTAG), + e1000e_putreg(GSCL_1), + e1000e_putreg(GSCL_2), + e1000e_putreg(GSCL_3), + e1000e_putreg(GSCL_4), + e1000e_putreg(GSCN_0), + e1000e_putreg(GSCN_1), + e1000e_putreg(GSCN_2), + e1000e_putreg(GSCN_3), + e1000e_putreg(GCR2), + e1000e_putreg(MRQC), + e1000e_putreg(FLOP), + e1000e_putreg(FLOL), + e1000e_putreg(FLSWCTL), + e1000e_putreg(FLSWCNT), + e1000e_putreg(FLA), + e1000e_putreg(RXDCTL1), + e1000e_putreg(TXDCTL1), + e1000e_putreg(TIPG), + e1000e_putreg(RXSTMPH), + e1000e_putreg(RXSTMPL), + e1000e_putreg(RXSATRL), + e1000e_putreg(RXSATRH), + e1000e_putreg(TXSTMPL), + e1000e_putreg(TXSTMPH), + e1000e_putreg(SYSTIML), + e1000e_putreg(SYSTIMH), + e1000e_putreg(TIMADJL), + e1000e_putreg(TIMADJH), + e1000e_putreg(RXUDP), + e1000e_putreg(RXCFGL), + e1000e_putreg(TSYNCRXCTL), + e1000e_putreg(TSYNCTXCTL), + e1000e_putreg(FLSWDATA), + e1000e_putreg(EXTCNF_SIZE), + e1000e_putreg(EEMNGCTL), + e1000e_putreg(RA), + + [TDH1] = e1000e_set_16bit, + [TDT1] = e1000e_set_tdt, + [TCTL] = e1000e_set_tctl, + [TDT] = e1000e_set_tdt, + [MDIC] = e1000e_set_mdic, + [ICS] = e1000e_set_ics, + [TDH] = e1000e_set_16bit, + [RDH0] = e1000e_set_16bit, + [RDT0] = e1000e_set_rdt, + [IMC] = e1000e_set_imc, + [IMS] = e1000e_set_ims, + [ICR] = e1000e_set_icr, + [EECD] = e1000e_set_eecd, + [RCTL] = e1000e_set_rx_control, + [CTRL] = e1000e_set_ctrl, + [RDTR] = e1000e_set_rdtr, + [RADV] = e1000e_set_16bit, + [TADV] = e1000e_set_16bit, + [ITR] = e1000e_set_itr, + [EERD] = e1000e_set_eerd, + [GCR] = e1000e_set_gcr, + [PSRCTL] = e1000e_set_psrctl, + [RXCSUM] = e1000e_set_rxcsum, + [RAID] = e1000e_set_16bit, + [RSRPD] = e1000e_set_12bit, + [TIDV] = e1000e_set_tidv, + [TDLEN1] = e1000e_set_dlen, + [TDLEN] = e1000e_set_dlen, + [RDLEN0] = e1000e_set_dlen, + [RDLEN1] = e1000e_set_dlen, + [TDBAL] = e1000e_set_dbal, + [TDBAL1] = e1000e_set_dbal, + [RDBAL0] = e1000e_set_dbal, + [RDBAL1] = e1000e_set_dbal, + [RDH1] = e1000e_set_16bit, + [RDT1] = e1000e_set_rdt, + [STATUS] = e1000e_set_status, + [PBACLR] = e1000e_set_pbaclr, + [CTRL_EXT] = e1000e_set_ctrlext, + [FCAH] = e1000e_set_16bit, + [FCT] = e1000e_set_16bit, + [FCTTV] = e1000e_set_16bit, + [FCRTV] = e1000e_set_16bit, + [FCRTH] = e1000e_set_fcrth, + [FCRTL] = e1000e_set_fcrtl, + [VET] = e1000e_set_vet, + [RXDCTL] = e1000e_set_rxdctl, + [FLASHT] = e1000e_set_16bit, + [EEWR] = e1000e_set_eewr, + [CTRL_DUP] = e1000e_set_ctrl, + [RFCTL] = e1000e_set_rfctl, + [RA + 1] = e1000e_mac_setmacaddr, + + [IP6AT ... IP6AT + 3] = e1000e_mac_writereg, + [IP4AT ... IP4AT + 6] = e1000e_mac_writereg, + [RA + 2 ... RA + 31] = e1000e_mac_writereg, + [WUPM ... WUPM + 31] = e1000e_mac_writereg, + [MTA ... MTA + 127] = e1000e_mac_writereg, + [VFTA ... VFTA + 127] = e1000e_mac_writereg, + [FFMT ... FFMT + 254] = e1000e_mac_writereg, + [FFVT ... FFVT + 254] = e1000e_mac_writereg, + [PBM ... PBM + 10239] = e1000e_mac_writereg, + [MDEF ... MDEF + 7] = e1000e_mac_writereg, + [FFLT ... FFLT + 10] = e1000e_mac_writereg, + [FTFT ... FTFT + 254] = e1000e_mac_writereg, + [RETA ... RETA + 31] = e1000e_mac_writereg, + [RSSRK ... RSSRK + 31] = e1000e_mac_writereg, + [MAVTV0 ... MAVTV3] = e1000e_mac_writereg, + [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = e1000e_set_eitr +}; +enum { E1000E_NWRITEOPS = ARRAY_SIZE(e1000e_macreg_writeops) }; + +enum { MAC_ACCESS_PARTIAL = 1 }; + +/* The array below combines alias offsets of the index values for the + * MAC registers that have aliases, with the indication of not fully + * implemented registers (lowest bit). This combination is possible + * because all of the offsets are even. */ +static const uint16_t mac_reg_access[E1000E_MAC_SIZE] = { + /* Alias index offsets */ + [FCRTL_A] = 0x07fe, [FCRTH_A] = 0x0802, + [RDH0_A] = 0x09bc, [RDT0_A] = 0x09bc, [RDTR_A] = 0x09c6, + [RDFH_A] = 0xe904, [RDFT_A] = 0xe904, + [TDH_A] = 0x0cf8, [TDT_A] = 0x0cf8, [TIDV_A] = 0x0cf8, + [TDFH_A] = 0xed00, [TDFT_A] = 0xed00, + [RA_A ... RA_A + 31] = 0x14f0, + [VFTA_A ... VFTA_A + 127] = 0x1400, + [RDBAL0_A ... RDLEN0_A] = 0x09bc, + [TDBAL_A ... TDLEN_A] = 0x0cf8, + /* Access options */ + [RDFH] = MAC_ACCESS_PARTIAL, [RDFT] = MAC_ACCESS_PARTIAL, + [RDFHS] = MAC_ACCESS_PARTIAL, [RDFTS] = MAC_ACCESS_PARTIAL, + [RDFPC] = MAC_ACCESS_PARTIAL, + [TDFH] = MAC_ACCESS_PARTIAL, [TDFT] = MAC_ACCESS_PARTIAL, + [TDFHS] = MAC_ACCESS_PARTIAL, [TDFTS] = MAC_ACCESS_PARTIAL, + [TDFPC] = MAC_ACCESS_PARTIAL, [EECD] = MAC_ACCESS_PARTIAL, + [PBM] = MAC_ACCESS_PARTIAL, [FLA] = MAC_ACCESS_PARTIAL, + [FCAL] = MAC_ACCESS_PARTIAL, [FCAH] = MAC_ACCESS_PARTIAL, + [FCT] = MAC_ACCESS_PARTIAL, [FCTTV] = MAC_ACCESS_PARTIAL, + [FCRTV] = MAC_ACCESS_PARTIAL, [FCRTL] = MAC_ACCESS_PARTIAL, + [FCRTH] = MAC_ACCESS_PARTIAL, [TXDCTL] = MAC_ACCESS_PARTIAL, + [TXDCTL1] = MAC_ACCESS_PARTIAL, + [MAVTV0 ... MAVTV3] = MAC_ACCESS_PARTIAL +}; + +void +e1000e_core_write(E1000ECore *core, hwaddr addr, uint64_t val, unsigned size) +{ + uint16_t index = e1000e_get_reg_index_with_offset(mac_reg_access, addr); + + if (index < E1000E_NWRITEOPS && e1000e_macreg_writeops[index]) { + if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) { + trace_e1000e_wrn_regs_write_trivial(index << 2); + } + trace_e1000e_core_write(index << 2, size, val); + e1000e_macreg_writeops[index](core, index, val); + } else if (index < E1000E_NREADOPS && e1000e_macreg_readops[index]) { + trace_e1000e_wrn_regs_write_ro(index << 2, size, val); + } else { + trace_e1000e_wrn_regs_write_unknown(index << 2, size, val); + } +} + +uint64_t +e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size) +{ + uint64_t val; + uint16_t index = e1000e_get_reg_index_with_offset(mac_reg_access, addr); + + if (index < E1000E_NREADOPS && e1000e_macreg_readops[index]) { + if (mac_reg_access[index] & MAC_ACCESS_PARTIAL) { + trace_e1000e_wrn_regs_read_trivial(index << 2); + } + val = e1000e_macreg_readops[index](core, index); + trace_e1000e_core_read(index << 2, size, val); + return val; + } else { + trace_e1000e_wrn_regs_read_unknown(index << 2, size); + } + return 0; +} + +static inline void +e1000e_autoneg_pause(E1000ECore *core) +{ + timer_del(core->autoneg_timer); +} + +static void +e1000e_autoneg_resume(E1000ECore *core) +{ + if (e1000e_have_autoneg(core) && + !(core->phy[0][PHY_STATUS] & MII_SR_AUTONEG_COMPLETE)) { + qemu_get_queue(core->owner_nic)->link_down = false; + timer_mod(core->autoneg_timer, + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 500); + } +} + +static void +e1000e_vm_state_change(void *opaque, int running, RunState state) +{ + E1000ECore *core = opaque; + + if (running) { + trace_e1000e_vm_state_running(); + e1000e_intrmgr_resume(core); + e1000e_autoneg_resume(core); + } else { + trace_e1000e_vm_state_stopped(); + e1000e_autoneg_pause(core); + e1000e_intrmgr_pause(core); + } +} + +void +e1000e_core_pci_realize(E1000ECore *core, + const uint16_t *eeprom_templ, + uint32_t eeprom_size, + const uint8_t *macaddr) +{ + int i; + + core->autoneg_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + e1000e_autoneg_timer, core); + e1000e_intrmgr_pci_realize(core); + + core->vmstate = + qemu_add_vm_change_state_handler(e1000e_vm_state_change, core); + + for (i = 0; i < E1000E_NUM_QUEUES; i++) { + net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, + E1000E_MAX_TX_FRAGS, core->has_vnet); + } + + net_rx_pkt_init(&core->rx_pkt, core->has_vnet); + + e1000x_core_prepare_eeprom(core->eeprom, + eeprom_templ, + eeprom_size, + PCI_DEVICE_GET_CLASS(core->owner)->device_id, + macaddr); + e1000e_update_rx_offloads(core); +} + +void +e1000e_core_pci_uninit(E1000ECore *core) +{ + int i; + + timer_del(core->autoneg_timer); + timer_free(core->autoneg_timer); + + e1000e_intrmgr_pci_unint(core); + + qemu_del_vm_change_state_handler(core->vmstate); + + for (i = 0; i < E1000E_NUM_QUEUES; i++) { + net_tx_pkt_reset(core->tx[i].tx_pkt); + net_tx_pkt_uninit(core->tx[i].tx_pkt); + } + + net_rx_pkt_uninit(core->rx_pkt); +} + +static const uint16_t +e1000e_phy_reg_init[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE] = { + [0] = { + [PHY_CTRL] = MII_CR_SPEED_SELECT_MSB | + MII_CR_FULL_DUPLEX | + MII_CR_AUTO_NEG_EN, + + [PHY_STATUS] = MII_SR_EXTENDED_CAPS | + MII_SR_LINK_STATUS | + MII_SR_AUTONEG_CAPS | + MII_SR_PREAMBLE_SUPPRESS | + MII_SR_EXTENDED_STATUS | + MII_SR_10T_HD_CAPS | + MII_SR_10T_FD_CAPS | + MII_SR_100X_HD_CAPS | + MII_SR_100X_FD_CAPS, + + [PHY_ID1] = 0x141, + [PHY_ID2] = E1000_PHY_ID2_82574x, + [PHY_AUTONEG_ADV] = 0xde1, + [PHY_LP_ABILITY] = 0x7e0, + [PHY_AUTONEG_EXP] = BIT(2), + [PHY_NEXT_PAGE_TX] = BIT(0) | BIT(13), + [PHY_1000T_CTRL] = BIT(8) | BIT(9) | BIT(10) | BIT(11), + [PHY_1000T_STATUS] = 0x3c00, + [PHY_EXT_STATUS] = BIT(12) | BIT(13), + + [PHY_COPPER_CTRL1] = BIT(5) | BIT(6) | BIT(8) | BIT(9) | + BIT(12) | BIT(13), + [PHY_COPPER_STAT1] = BIT(3) | BIT(10) | BIT(11) | BIT(13) | BIT(15) + }, + [2] = { + [PHY_MAC_CTRL1] = BIT(3) | BIT(7), + [PHY_MAC_CTRL2] = BIT(1) | BIT(2) | BIT(6) | BIT(12) + }, + [3] = { + [PHY_LED_TIMER_CTRL] = BIT(0) | BIT(2) | BIT(14) + } +}; + +static const uint32_t e1000e_mac_reg_init[] = { + [PBA] = 0x00140014, + [LEDCTL] = BIT(1) | BIT(8) | BIT(9) | BIT(15) | BIT(17) | BIT(18), + [EXTCNF_CTRL] = BIT(3), + [EEMNGCTL] = BIT(31), + [FLASHT] = 0x2, + [FLSWCTL] = BIT(30) | BIT(31), + [FLOL] = BIT(0), + [RXDCTL] = BIT(16), + [RXDCTL1] = BIT(16), + [TIPG] = 0x8 | (0x8 << 10) | (0x6 << 20), + [RXCFGL] = 0x88F7, + [RXUDP] = 0x319, + [CTRL] = E1000_CTRL_FD | E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN0 | + E1000_CTRL_SPD_1000 | E1000_CTRL_SLU | + E1000_CTRL_ADVD3WUC, + [STATUS] = E1000_STATUS_ASDV_1000 | E1000_STATUS_LU, + [PSRCTL] = (2 << E1000_PSRCTL_BSIZE0_SHIFT) | + (4 << E1000_PSRCTL_BSIZE1_SHIFT) | + (4 << E1000_PSRCTL_BSIZE2_SHIFT), + [TARC0] = 0x3 | E1000_TARC_ENABLE, + [TARC1] = 0x3 | E1000_TARC_ENABLE, + [EECD] = E1000_EECD_AUTO_RD | E1000_EECD_PRES, + [EERD] = E1000_EERW_DONE, + [EEWR] = E1000_EERW_DONE, + [GCR] = E1000_L0S_ADJUST | + E1000_L1_ENTRY_LATENCY_MSB | + E1000_L1_ENTRY_LATENCY_LSB, + [TDFH] = 0x600, + [TDFT] = 0x600, + [TDFHS] = 0x600, + [TDFTS] = 0x600, + [POEMB] = 0x30D, + [PBS] = 0x028, + [MANC] = E1000_MANC_DIS_IP_CHK_ARP, + [FACTPS] = E1000_FACTPS_LAN0_ON | 0x20000000, + [SWSM] = 1, + [RXCSUM] = E1000_RXCSUM_IPOFLD | E1000_RXCSUM_TUOFLD, + [ITR] = E1000E_MIN_XITR, + [EITR...EITR + E1000E_MSIX_VEC_NUM - 1] = E1000E_MIN_XITR, +}; + +void +e1000e_core_reset(E1000ECore *core) +{ + int i; + + timer_del(core->autoneg_timer); + + e1000e_intrmgr_reset(core); + + memset(core->phy, 0, sizeof core->phy); + memmove(core->phy, e1000e_phy_reg_init, sizeof e1000e_phy_reg_init); + memset(core->mac, 0, sizeof core->mac); + memmove(core->mac, e1000e_mac_reg_init, sizeof e1000e_mac_reg_init); + + core->rxbuf_min_shift = 1 + E1000_RING_DESC_LEN_SHIFT; + + if (qemu_get_queue(core->owner_nic)->link_down) { + e1000e_link_down(core); + } + + e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac); + + for (i = 0; i < ARRAY_SIZE(core->tx); i++) { + net_tx_pkt_reset(core->tx[i].tx_pkt); + memset(&core->tx[i].props, 0, sizeof(core->tx[i].props)); + core->tx[i].skip_cp = false; + } +} + +void e1000e_core_pre_save(E1000ECore *core) +{ + int i; + NetClientState *nc = qemu_get_queue(core->owner_nic); + + /* + * If link is down and auto-negotiation is supported and ongoing, + * complete auto-negotiation immediately. This allows us to look + * at MII_SR_AUTONEG_COMPLETE to infer link status on load. + */ + if (nc->link_down && e1000e_have_autoneg(core)) { + core->phy[0][PHY_STATUS] |= MII_SR_AUTONEG_COMPLETE; + e1000e_update_flowctl_status(core); + } + + for (i = 0; i < ARRAY_SIZE(core->tx); i++) { + if (net_tx_pkt_has_fragments(core->tx[i].tx_pkt)) { + core->tx[i].skip_cp = true; + } + } +} + +int +e1000e_core_post_load(E1000ECore *core) +{ + NetClientState *nc = qemu_get_queue(core->owner_nic); + + /* nc.link_down can't be migrated, so infer link_down according + * to link status bit in core.mac[STATUS]. + */ + nc->link_down = (core->mac[STATUS] & E1000_STATUS_LU) == 0; + + return 0; +} diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h new file mode 100644 index 0000000..5f413a9 --- /dev/null +++ b/hw/net/e1000e_core.h @@ -0,0 +1,146 @@ +/* +* Core code for QEMU e1000e emulation +* +* Software developer's manuals: +* http://www.intel.com/content/dam/doc/datasheet/82574l-gbe-controller-datasheet.pdf +* +* Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) +* Developed by Daynix Computing LTD (http://www.daynix.com) +* +* Authors: +* Dmitry Fleytman +* Leonid Bloch +* Yan Vugenfirer +* +* Based on work done by: +* Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. +* Copyright (c) 2008 Qumranet +* Based on work done by: +* Copyright (c) 2007 Dan Aloni +* Copyright (c) 2004 Antony T Curtis +* +* This library is free software; you can redistribute it and/or +* modify it under the terms of the GNU Lesser General Public +* License as published by the Free Software Foundation; either +* version 2 of the License, or (at your option) any later version. +* +* This library is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* Lesser General Public License for more details. +* +* You should have received a copy of the GNU Lesser General Public +* License along with this library; if not, see . +*/ + +#define E1000E_PHY_PAGE_SIZE (0x20) +#define E1000E_PHY_PAGES (0x07) +#define E1000E_MAC_SIZE (0x8000) +#define E1000E_EEPROM_SIZE (64) +#define E1000E_MSIX_VEC_NUM (5) +#define E1000E_NUM_QUEUES (2) + +typedef struct E1000Core E1000ECore; + +enum { PHY_R = BIT(0), + PHY_W = BIT(1), + PHY_RW = PHY_R | PHY_W, + PHY_ANYPAGE = BIT(2) }; + +typedef struct E1000IntrDelayTimer_st { + QEMUTimer *timer; + bool running; + uint32_t delay_reg; + uint32_t delay_resolution_ns; + E1000ECore *core; +} E1000IntrDelayTimer; + +struct E1000Core { + uint32_t mac[E1000E_MAC_SIZE]; + uint16_t phy[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE]; + uint16_t eeprom[E1000E_EEPROM_SIZE]; + + uint32_t rxbuf_sizes[E1000_PSRCTL_BUFFS_PER_DESC]; + uint32_t rx_desc_buf_size; + uint32_t rxbuf_min_shift; + uint8_t rx_desc_len; + + QEMUTimer *autoneg_timer; + + struct e1000e_tx { + e1000x_txd_props props; + + bool skip_cp; + struct NetTxPkt *tx_pkt; + } tx[E1000E_NUM_QUEUES]; + + struct NetRxPkt *rx_pkt; + + bool has_vnet; + int max_queue_num; + + /* Interrupt moderation management */ + uint32_t delayed_causes; + + E1000IntrDelayTimer radv; + E1000IntrDelayTimer rdtr; + E1000IntrDelayTimer raid; + + E1000IntrDelayTimer tadv; + E1000IntrDelayTimer tidv; + + E1000IntrDelayTimer itr; + bool itr_intr_pending; + + E1000IntrDelayTimer eitr[E1000E_MSIX_VEC_NUM]; + bool eitr_intr_pending[E1000E_MSIX_VEC_NUM]; + + VMChangeStateEntry *vmstate; + + uint32_t itr_guest_value; + uint32_t eitr_guest_value[E1000E_MSIX_VEC_NUM]; + + uint16_t vet; + + uint8_t permanent_mac[ETH_ALEN]; + + NICState *owner_nic; + PCIDevice *owner; + void (*owner_start_recv)(PCIDevice *d); +}; + +void +e1000e_core_write(E1000ECore *core, hwaddr addr, uint64_t val, unsigned size); + +uint64_t +e1000e_core_read(E1000ECore *core, hwaddr addr, unsigned size); + +void +e1000e_core_pci_realize(E1000ECore *regs, + const uint16_t *eeprom_templ, + uint32_t eeprom_size, + const uint8_t *macaddr); + +void +e1000e_core_reset(E1000ECore *core); + +void +e1000e_core_pre_save(E1000ECore *core); + +int +e1000e_core_post_load(E1000ECore *core); + +void +e1000e_core_set_link_status(E1000ECore *core); + +void +e1000e_core_pci_uninit(E1000ECore *core); + +int +e1000e_can_receive(E1000ECore *core); + +ssize_t +e1000e_receive(E1000ECore *core, const uint8_t *buf, size_t size); + +ssize_t +e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt); -- cgit v1.1 From 4816dc168b5745708eba4c005f5e8771623ee405 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Dubois Date: Mon, 30 May 2016 19:25:51 +0200 Subject: i.MX: Fix FEC code for MDIO operation selection According to the FEC chapter of i.MX25 reference manual When writing the MMFR register, bit 29 and 28 select the requested operation. * 10 means read operation with valid MII mgmt frame * 11 means read operation with non compliant MII mgmt frame * 01 means write operation with valid MII mgmt frame * 00 means write operation with non compliant MII mgmt frame So while bit 28 does change beween read/write for valid MII mgmt frame, the mening is inverted for non compliant MII mgmt frame. Bit 29 on the other hand means read/write whatever the type of mgmt frame involved. So this patch change the operation selection from bit 28 to bit 29 as it is more generic. Signed-off-by: Jean-Christophe Dubois Signed-off-by: Jason Wang --- hw/net/imx_fec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'hw') diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index 9055ea8..fce3661 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -459,10 +459,10 @@ static void imx_fec_write(void *opaque, hwaddr addr, case 0x040: /* MMFR */ /* store the value */ s->mmfr = value; - if (extract32(value, 28, 1)) { - do_phy_write(s, extract32(value, 18, 9), extract32(value, 0, 16)); - } else { + if (extract32(value, 29, 1)) { s->mmfr = do_phy_read(s, extract32(value, 18, 9)); + } else { + do_phy_write(s, extract32(value, 18, 9), extract32(value, 0, 16)); } /* raise the interrupt as the PHY operation is done */ s->eir |= FEC_INT_MII; -- cgit v1.1 From b413643a5cdac809a1374481c990387b732f9e60 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Dubois Date: Mon, 30 May 2016 19:25:53 +0200 Subject: i.MX: Fix FEC code for MDIO address selection According to the FEC chapter of i.MX25 reference manual When writing to MMFR register, the MDIO device and adress are selected by bit 27 to 23 and bit 22 to 18 respectively. This is a total of 10 bits that need to be used by the Phy chip/address decoding function. This patch fixes the number of bits used from 9 to 10. Signed-off-by: Jean-Christophe Dubois Signed-off-by: Jason Wang --- hw/net/imx_fec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'hw') diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index fce3661..bf68ce6 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -460,9 +460,9 @@ static void imx_fec_write(void *opaque, hwaddr addr, /* store the value */ s->mmfr = value; if (extract32(value, 29, 1)) { - s->mmfr = do_phy_read(s, extract32(value, 18, 9)); + s->mmfr = do_phy_read(s, extract32(value, 18, 10)); } else { - do_phy_write(s, extract32(value, 18, 9), extract32(value, 0, 16)); + do_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16)); } /* raise the interrupt as the PHY operation is done */ s->eir |= FEC_INT_MII; -- cgit v1.1 From ccdb81d3274d281d770703417257bd40bcdf4c0e Mon Sep 17 00:00:00 2001 From: Jean-Christophe Dubois Date: Mon, 30 May 2016 19:25:56 +0200 Subject: i.MX: Fix FEC code for ECR register reset value. According to the FEC chapter of i.MX25 reference manual ECR register is initialized at 0xf0000000 at reset time. We fix the value. Signed-off-by: Jean-Christophe Dubois Signed-off-by: Jason Wang --- hw/net/imx_fec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'hw') diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index bf68ce6..768181e 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -339,7 +339,7 @@ static void imx_fec_reset(DeviceState *d) s->eir = 0; s->eimr = 0; s->rx_enabled = 0; - s->ecr = 0; + s->ecr = 0xf0000000; s->mscr = 0; s->mibc = 0xc0000000; s->rcr = 0x05ee0001; -- cgit v1.1 From ff4b325f5e1d68cd089e71edcaa3fe3432ca18dc Mon Sep 17 00:00:00 2001 From: Jean-Christophe Dubois Date: Mon, 30 May 2016 19:26:00 +0200 Subject: i.MX: reset TX/RX descriptors when FEC is disabled. According to the FEC chapter of i.MX25 reference manual RX adn TX descriptors are reseted when the FEC device is disabled through ECR. Signed-off-by: Jean-Christophe Dubois Signed-off-by: Jason Wang --- hw/net/imx_fec.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'hw') diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index 768181e..7369cfa 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -454,6 +454,8 @@ static void imx_fec_write(void *opaque, hwaddr addr, } if ((s->ecr & FEC_EN) == 0) { s->rx_enabled = 0; + s->rx_descriptor = s->erdsr; + s->tx_descriptor = s->etdsr; } break; case 0x040: /* MMFR */ -- cgit v1.1 From 1bb3c37182399c618485aefe158ea1f94182c67b Mon Sep 17 00:00:00 2001 From: Jean-Christophe Dubois Date: Mon, 30 May 2016 19:26:02 +0200 Subject: i.MX: Rename i.MX FEC defines to ENET_XXX Signed-off-by: Jean-Christophe Dubois Signed-off-by: Jason Wang --- hw/net/imx_fec.c | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'hw') diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index 7369cfa..f5eede8 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -266,7 +266,7 @@ static void imx_fec_update(IMXFECState *s) static void imx_fec_do_tx(IMXFECState *s) { int frame_size = 0; - uint8_t frame[FEC_MAX_FRAME_SIZE]; + uint8_t frame[ENET_MAX_FRAME_SIZE]; uint8_t *ptr = frame; uint32_t addr = s->tx_descriptor; @@ -277,31 +277,31 @@ static void imx_fec_do_tx(IMXFECState *s) imx_fec_read_bd(&bd, addr); FEC_PRINTF("tx_bd %x flags %04x len %d data %08x\n", addr, bd.flags, bd.length, bd.data); - if ((bd.flags & FEC_BD_R) == 0) { + if ((bd.flags & ENET_BD_R) == 0) { /* Run out of descriptors to transmit. */ break; } len = bd.length; - if (frame_size + len > FEC_MAX_FRAME_SIZE) { - len = FEC_MAX_FRAME_SIZE - frame_size; - s->eir |= FEC_INT_BABT; + if (frame_size + len > ENET_MAX_FRAME_SIZE) { + len = ENET_MAX_FRAME_SIZE - frame_size; + s->eir |= ENET_INT_BABT; } dma_memory_read(&address_space_memory, bd.data, ptr, len); ptr += len; frame_size += len; - if (bd.flags & FEC_BD_L) { + if (bd.flags & ENET_BD_L) { /* Last buffer in frame. */ qemu_send_packet(qemu_get_queue(s->nic), frame, len); ptr = frame; frame_size = 0; - s->eir |= FEC_INT_TXF; + s->eir |= ENET_INT_TXF; } - s->eir |= FEC_INT_TXB; - bd.flags &= ~FEC_BD_R; + s->eir |= ENET_INT_TXB; + bd.flags &= ~ENET_BD_R; /* Write back the modified descriptor. */ imx_fec_write_bd(&bd, addr); /* Advance to the next descriptor. */ - if ((bd.flags & FEC_BD_W) != 0) { + if ((bd.flags & ENET_BD_W) != 0) { addr = s->etdsr; } else { addr += 8; @@ -320,7 +320,7 @@ static void imx_fec_enable_rx(IMXFECState *s) imx_fec_read_bd(&bd, s->rx_descriptor); - tmp = ((bd.flags & FEC_BD_E) != 0); + tmp = ((bd.flags & ENET_BD_E) != 0); if (!tmp) { FEC_PRINTF("RX buffer full\n"); @@ -438,21 +438,21 @@ static void imx_fec_write(void *opaque, hwaddr addr, s->eimr = value; break; case 0x010: /* RDAR */ - if ((s->ecr & FEC_EN) && !s->rx_enabled) { + if ((s->ecr & ENET_ECR_ETHEREN) && !s->rx_enabled) { imx_fec_enable_rx(s); } break; case 0x014: /* TDAR */ - if (s->ecr & FEC_EN) { + if (s->ecr & ENET_ECR_ETHEREN) { imx_fec_do_tx(s); } break; case 0x024: /* ECR */ s->ecr = value; - if (value & FEC_RESET) { + if (value & ENET_ECR_RESET) { imx_fec_reset(DEVICE(s)); } - if ((s->ecr & FEC_EN) == 0) { + if ((s->ecr & ENET_ECR_ETHEREN) == 0) { s->rx_enabled = 0; s->rx_descriptor = s->erdsr; s->tx_descriptor = s->etdsr; @@ -467,7 +467,7 @@ static void imx_fec_write(void *opaque, hwaddr addr, do_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16)); } /* raise the interrupt as the PHY operation is done */ - s->eir |= FEC_INT_MII; + s->eir |= ENET_INT_MII; break; case 0x044: /* MSCR */ s->mscr = value & 0xfe; @@ -484,7 +484,7 @@ static void imx_fec_write(void *opaque, hwaddr addr, /* We transmit immediately, so raise GRA immediately. */ s->tcr = value; if (value & 1) { - s->eir |= FEC_INT_GRA; + s->eir |= ENET_INT_GRA; } break; case 0x0e4: /* PALR */ @@ -574,20 +574,20 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, crc_ptr = (uint8_t *) &crc; /* Huge frames are truncted. */ - if (size > FEC_MAX_FRAME_SIZE) { - size = FEC_MAX_FRAME_SIZE; - flags |= FEC_BD_TR | FEC_BD_LG; + if (size > ENET_MAX_FRAME_SIZE) { + size = ENET_MAX_FRAME_SIZE; + flags |= ENET_BD_TR | ENET_BD_LG; } /* Frames larger than the user limit just set error flags. */ if (size > (s->rcr >> 16)) { - flags |= FEC_BD_LG; + flags |= ENET_BD_LG; } addr = s->rx_descriptor; while (size > 0) { imx_fec_read_bd(&bd, addr); - if ((bd.flags & FEC_BD_E) == 0) { + if ((bd.flags & ENET_BD_E) == 0) { /* No descriptors available. Bail out. */ /* * FIXME: This is wrong. We should probably either @@ -616,18 +616,18 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, crc_ptr, 4 - size); crc_ptr += 4 - size; } - bd.flags &= ~FEC_BD_E; + bd.flags &= ~ENET_BD_E; if (size == 0) { /* Last buffer in frame. */ - bd.flags |= flags | FEC_BD_L; + bd.flags |= flags | ENET_BD_L; FEC_PRINTF("rx frame flags %04x\n", bd.flags); - s->eir |= FEC_INT_RXF; + s->eir |= ENET_INT_RXF; } else { - s->eir |= FEC_INT_RXB; + s->eir |= ENET_INT_RXB; } imx_fec_write_bd(&bd, addr); /* Advance to the next descriptor. */ - if ((bd.flags & FEC_BD_W) != 0) { + if ((bd.flags & ENET_BD_W) != 0) { addr = s->erdsr; } else { addr += 8; -- cgit v1.1 From db0de35268066108c693db52db24e89581878dd4 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Dubois Date: Mon, 30 May 2016 19:26:05 +0200 Subject: i.MX: move FEC device to a register array structure. This is to prepare for the ENET Gb device of the i.MX6. Signed-off-by: Jean-Christophe Dubois Signed-off-by: Jason Wang --- hw/net/imx_fec.c | 398 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 222 insertions(+), 176 deletions(-) (limited to 'hw') diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index f5eede8..d8e4145 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -53,30 +53,75 @@ } \ } while (0) +static const char *imx_fec_reg_name(IMXFECState *s, uint32_t index) +{ + static char tmp[20]; + + switch (index) { + case ENET_EIR: + return "EIR"; + case ENET_EIMR: + return "EIMR"; + case ENET_RDAR: + return "RDAR"; + case ENET_TDAR: + return "TDAR"; + case ENET_ECR: + return "ECR"; + case ENET_MMFR: + return "MMFR"; + case ENET_MSCR: + return "MSCR"; + case ENET_MIBC: + return "MIBC"; + case ENET_RCR: + return "RCR"; + case ENET_TCR: + return "TCR"; + case ENET_PALR: + return "PALR"; + case ENET_PAUR: + return "PAUR"; + case ENET_OPD: + return "OPD"; + case ENET_IAUR: + return "IAUR"; + case ENET_IALR: + return "IALR"; + case ENET_GAUR: + return "GAUR"; + case ENET_GALR: + return "GALR"; + case ENET_TFWR: + return "TFWR"; + case ENET_RDSR: + return "RDSR"; + case ENET_TDSR: + return "TDSR"; + case ENET_MRBR: + return "MRBR"; + case ENET_FRBR: + return "FRBR"; + case ENET_FRSR: + return "FRSR"; + case ENET_MIIGSK_CFGR: + return "MIIGSK_CFGR"; + case ENET_MIIGSK_ENR: + return "MIIGSK_ENR"; + default: + sprintf(tmp, "index %d", index); + return tmp; + } +} + static const VMStateDescription vmstate_imx_fec = { .name = TYPE_IMX_FEC, - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .fields = (VMStateField[]) { - VMSTATE_UINT32(irq_state, IMXFECState), - VMSTATE_UINT32(eir, IMXFECState), - VMSTATE_UINT32(eimr, IMXFECState), - VMSTATE_UINT32(rx_enabled, IMXFECState), + VMSTATE_UINT32_ARRAY(regs, IMXFECState, ENET_MAX), VMSTATE_UINT32(rx_descriptor, IMXFECState), VMSTATE_UINT32(tx_descriptor, IMXFECState), - VMSTATE_UINT32(ecr, IMXFECState), - VMSTATE_UINT32(mmfr, IMXFECState), - VMSTATE_UINT32(mscr, IMXFECState), - VMSTATE_UINT32(mibc, IMXFECState), - VMSTATE_UINT32(rcr, IMXFECState), - VMSTATE_UINT32(tcr, IMXFECState), - VMSTATE_UINT32(tfwr, IMXFECState), - VMSTATE_UINT32(frsr, IMXFECState), - VMSTATE_UINT32(erdsr, IMXFECState), - VMSTATE_UINT32(etdsr, IMXFECState), - VMSTATE_UINT32(emrbr, IMXFECState), - VMSTATE_UINT32(miigsk_cfgr, IMXFECState), - VMSTATE_UINT32(miigsk_enr, IMXFECState), VMSTATE_UINT32(phy_status, IMXFECState), VMSTATE_UINT32(phy_control, IMXFECState), @@ -252,15 +297,13 @@ static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr) static void imx_fec_update(IMXFECState *s) { - uint32_t active; - uint32_t changed; - - active = s->eir & s->eimr; - changed = active ^ s->irq_state; - if (changed) { - qemu_set_irq(s->irq, active); + if (s->regs[ENET_EIR] & s->regs[ENET_EIMR]) { + FEC_PRINTF("interrupt raised\n"); + qemu_set_irq(s->irq, 1); + } else { + FEC_PRINTF("interrupt lowered\n"); + qemu_set_irq(s->irq, 0); } - s->irq_state = active; } static void imx_fec_do_tx(IMXFECState *s) @@ -284,7 +327,7 @@ static void imx_fec_do_tx(IMXFECState *s) len = bd.length; if (frame_size + len > ENET_MAX_FRAME_SIZE) { len = ENET_MAX_FRAME_SIZE - frame_size; - s->eir |= ENET_INT_BABT; + s->regs[ENET_EIR] |= ENET_INT_BABT; } dma_memory_read(&address_space_memory, bd.data, ptr, len); ptr += len; @@ -294,17 +337,17 @@ static void imx_fec_do_tx(IMXFECState *s) qemu_send_packet(qemu_get_queue(s->nic), frame, len); ptr = frame; frame_size = 0; - s->eir |= ENET_INT_TXF; + s->regs[ENET_EIR] |= ENET_INT_TXF; } - s->eir |= ENET_INT_TXB; + s->regs[ENET_EIR] |= ENET_INT_TXB; bd.flags &= ~ENET_BD_R; /* Write back the modified descriptor. */ imx_fec_write_bd(&bd, addr); /* Advance to the next descriptor. */ if ((bd.flags & ENET_BD_W) != 0) { - addr = s->etdsr; + addr = s->regs[ENET_TDSR]; } else { - addr += 8; + addr += sizeof(bd); } } @@ -316,7 +359,7 @@ static void imx_fec_do_tx(IMXFECState *s) static void imx_fec_enable_rx(IMXFECState *s) { IMXFECBufDesc bd; - uint32_t tmp; + bool tmp; imx_fec_read_bd(&bd, s->rx_descriptor); @@ -324,11 +367,11 @@ static void imx_fec_enable_rx(IMXFECState *s) if (!tmp) { FEC_PRINTF("RX buffer full\n"); - } else if (!s->rx_enabled) { + } else if (!s->regs[ENET_RDAR]) { qemu_flush_queued_packets(qemu_get_queue(s->nic)); } - s->rx_enabled = tmp; + s->regs[ENET_RDAR] = tmp ? ENET_RDAR_RDAR : 0; } static void imx_fec_reset(DeviceState *d) @@ -336,18 +379,26 @@ static void imx_fec_reset(DeviceState *d) IMXFECState *s = IMX_FEC(d); /* Reset the FEC */ - s->eir = 0; - s->eimr = 0; - s->rx_enabled = 0; - s->ecr = 0xf0000000; - s->mscr = 0; - s->mibc = 0xc0000000; - s->rcr = 0x05ee0001; - s->tcr = 0; - s->tfwr = 0; - s->frsr = 0x500; - s->miigsk_cfgr = 0; - s->miigsk_enr = 0x6; + memset(s->regs, 0, sizeof(s->regs)); + s->regs[ENET_ECR] = 0xf0000000; + s->regs[ENET_MIBC] = 0xc0000000; + s->regs[ENET_RCR] = 0x05ee0001; + s->regs[ENET_OPD] = 0x00010000; + + s->regs[ENET_PALR] = (s->conf.macaddr.a[0] << 24) + | (s->conf.macaddr.a[1] << 16) + | (s->conf.macaddr.a[2] << 8) + | s->conf.macaddr.a[3]; + s->regs[ENET_PAUR] = (s->conf.macaddr.a[4] << 24) + | (s->conf.macaddr.a[5] << 16) + | 0x8808; + + s->regs[ENET_FRBR] = 0x00000600; + s->regs[ENET_FRSR] = 0x00000500; + s->regs[ENET_MIIGSK_ENR] = 0x00000006; + + s->rx_descriptor = 0; + s->tx_descriptor = 0; /* We also reset the PHY */ phy_reset(s); @@ -355,185 +406,180 @@ static void imx_fec_reset(DeviceState *d) static uint64_t imx_fec_read(void *opaque, hwaddr addr, unsigned size) { + uint32_t value = 0; IMXFECState *s = IMX_FEC(opaque); - - FEC_PRINTF("reading from @ 0x%" HWADDR_PRIx "\n", addr); - - switch (addr & 0x3ff) { - case 0x004: - return s->eir; - case 0x008: - return s->eimr; - case 0x010: - return s->rx_enabled ? (1 << 24) : 0; /* RDAR */ - case 0x014: - return 0; /* TDAR */ - case 0x024: - return s->ecr; - case 0x040: - return s->mmfr; - case 0x044: - return s->mscr; - case 0x064: - return s->mibc; /* MIBC */ - case 0x084: - return s->rcr; - case 0x0c4: - return s->tcr; - case 0x0e4: /* PALR */ - return (s->conf.macaddr.a[0] << 24) - | (s->conf.macaddr.a[1] << 16) - | (s->conf.macaddr.a[2] << 8) - | s->conf.macaddr.a[3]; - break; - case 0x0e8: /* PAUR */ - return (s->conf.macaddr.a[4] << 24) - | (s->conf.macaddr.a[5] << 16) - | 0x8808; - case 0x0ec: - return 0x10000; /* OPD */ - case 0x118: - return 0; - case 0x11c: - return 0; - case 0x120: - return 0; - case 0x124: - return 0; - case 0x144: - return s->tfwr; - case 0x14c: - return 0x600; - case 0x150: - return s->frsr; - case 0x180: - return s->erdsr; - case 0x184: - return s->etdsr; - case 0x188: - return s->emrbr; - case 0x300: - return s->miigsk_cfgr; - case 0x308: - return s->miigsk_enr; + uint32_t index = addr >> 2; + + switch (index) { + case ENET_EIR: + case ENET_EIMR: + case ENET_RDAR: + case ENET_TDAR: + case ENET_ECR: + case ENET_MMFR: + case ENET_MSCR: + case ENET_MIBC: + case ENET_RCR: + case ENET_TCR: + case ENET_PALR: + case ENET_PAUR: + case ENET_OPD: + case ENET_IAUR: + case ENET_IALR: + case ENET_GAUR: + case ENET_GALR: + case ENET_TFWR: + case ENET_RDSR: + case ENET_TDSR: + case ENET_MRBR: + case ENET_FRBR: + case ENET_FRSR: + case ENET_MIIGSK_CFGR: + case ENET_MIIGSK_ENR: + value = s->regs[index]; + break; default: - qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%" - HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr); - return 0; + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%" + PRIx32 "\n", TYPE_IMX_FEC, __func__, index * 4); + break; } + + FEC_PRINTF("reg[%s] => 0x%" PRIx32 "\n", imx_fec_reg_name(s, index), + value); + + return value; } static void imx_fec_write(void *opaque, hwaddr addr, uint64_t value, unsigned size) { IMXFECState *s = IMX_FEC(opaque); + uint32_t index = addr >> 2; - FEC_PRINTF("writing 0x%08x @ 0x%" HWADDR_PRIx "\n", (int)value, addr); + FEC_PRINTF("reg[%s] <= 0x%" PRIx32 "\n", imx_fec_reg_name(s, index), + (uint32_t)value); - switch (addr & 0x3ff) { - case 0x004: /* EIR */ - s->eir &= ~value; + switch (index) { + case ENET_EIR: + s->regs[index] &= ~value; break; - case 0x008: /* EIMR */ - s->eimr = value; + case ENET_EIMR: + s->regs[index] = value; break; - case 0x010: /* RDAR */ - if ((s->ecr & ENET_ECR_ETHEREN) && !s->rx_enabled) { - imx_fec_enable_rx(s); + case ENET_RDAR: + if (s->regs[ENET_ECR] & ENET_ECR_ETHEREN) { + if (!s->regs[index]) { + s->regs[index] = ENET_RDAR_RDAR; + imx_fec_enable_rx(s); + } + } else { + s->regs[index] = 0; } break; - case 0x014: /* TDAR */ - if (s->ecr & ENET_ECR_ETHEREN) { + case ENET_TDAR: + if (s->regs[ENET_ECR] & ENET_ECR_ETHEREN) { + s->regs[index] = ENET_TDAR_TDAR; imx_fec_do_tx(s); } + s->regs[index] = 0; break; - case 0x024: /* ECR */ - s->ecr = value; + case ENET_ECR: if (value & ENET_ECR_RESET) { - imx_fec_reset(DEVICE(s)); + return imx_fec_reset(DEVICE(s)); } - if ((s->ecr & ENET_ECR_ETHEREN) == 0) { - s->rx_enabled = 0; - s->rx_descriptor = s->erdsr; - s->tx_descriptor = s->etdsr; + s->regs[index] = value; + if ((s->regs[index] & ENET_ECR_ETHEREN) == 0) { + s->regs[ENET_RDAR] = 0; + s->rx_descriptor = s->regs[ENET_RDSR]; + s->regs[ENET_TDAR] = 0; + s->tx_descriptor = s->regs[ENET_TDSR]; } break; - case 0x040: /* MMFR */ - /* store the value */ - s->mmfr = value; + case ENET_MMFR: + s->regs[index] = value; if (extract32(value, 29, 1)) { - s->mmfr = do_phy_read(s, extract32(value, 18, 10)); + /* This is a read operation */ + s->regs[ENET_MMFR] = deposit32(s->regs[ENET_MMFR], 0, 16, + do_phy_read(s, + extract32(value, + 18, 10))); } else { + /* This a write operation */ do_phy_write(s, extract32(value, 18, 10), extract32(value, 0, 16)); } /* raise the interrupt as the PHY operation is done */ - s->eir |= ENET_INT_MII; + s->regs[ENET_EIR] |= ENET_INT_MII; break; - case 0x044: /* MSCR */ - s->mscr = value & 0xfe; + case ENET_MSCR: + s->regs[index] = value & 0xfe; break; - case 0x064: /* MIBC */ + case ENET_MIBC: /* TODO: Implement MIB. */ - s->mibc = (value & 0x80000000) ? 0xc0000000 : 0; + s->regs[index] = (value & 0x80000000) ? 0xc0000000 : 0; break; - case 0x084: /* RCR */ - s->rcr = value & 0x07ff003f; + case ENET_RCR: + s->regs[index] = value & 0x07ff003f; /* TODO: Implement LOOP mode. */ break; - case 0x0c4: /* TCR */ + case ENET_TCR: /* We transmit immediately, so raise GRA immediately. */ - s->tcr = value; + s->regs[index] = value; if (value & 1) { - s->eir |= ENET_INT_GRA; + s->regs[ENET_EIR] |= ENET_INT_GRA; } break; - case 0x0e4: /* PALR */ + case ENET_PALR: + s->regs[index] = value; s->conf.macaddr.a[0] = value >> 24; s->conf.macaddr.a[1] = value >> 16; s->conf.macaddr.a[2] = value >> 8; s->conf.macaddr.a[3] = value; break; - case 0x0e8: /* PAUR */ + case ENET_PAUR: + s->regs[index] = (value | 0x0000ffff) & 0xffff8808; s->conf.macaddr.a[4] = value >> 24; s->conf.macaddr.a[5] = value >> 16; break; - case 0x0ec: /* OPDR */ + case ENET_OPD: + s->regs[index] = (value & 0x0000ffff) | 0x00010000; break; - case 0x118: /* IAUR */ - case 0x11c: /* IALR */ - case 0x120: /* GAUR */ - case 0x124: /* GALR */ + case ENET_IAUR: + case ENET_IALR: + case ENET_GAUR: + case ENET_GALR: /* TODO: implement MAC hash filtering. */ break; - case 0x144: /* TFWR */ - s->tfwr = value & 3; + case ENET_TFWR: + s->regs[index] = value & 3; break; - case 0x14c: /* FRBR */ - /* FRBR writes ignored. */ + case ENET_FRBR: + /* FRBR is read only */ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Register FRBR is read only\n", + TYPE_IMX_FEC, __func__); break; - case 0x150: /* FRSR */ - s->frsr = (value & 0x3fc) | 0x400; + case ENET_FRSR: + s->regs[index] = (value & 0x000003fc) | 0x00000400; break; - case 0x180: /* ERDSR */ - s->erdsr = value & ~3; - s->rx_descriptor = s->erdsr; + case ENET_RDSR: + s->regs[index] = value & ~3; + s->rx_descriptor = s->regs[index]; break; - case 0x184: /* ETDSR */ - s->etdsr = value & ~3; - s->tx_descriptor = s->etdsr; + case ENET_TDSR: + s->regs[index] = value & ~3; + s->tx_descriptor = s->regs[index]; break; - case 0x188: /* EMRBR */ - s->emrbr = value & 0x7f0; + case ENET_MRBR: + s->regs[index] = value & 0x000007f0; break; - case 0x300: /* MIIGSK_CFGR */ - s->miigsk_cfgr = value & 0x53; + case ENET_MIIGSK_CFGR: + s->regs[index] = value & 0x00000053; break; - case 0x308: /* MIIGSK_ENR */ - s->miigsk_enr = (value & 0x2) ? 0x6 : 0; + case ENET_MIIGSK_ENR: + s->regs[index] = (value & 0x00000002) ? 0x00000006 : 0; break; default: qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%" - HWADDR_PRIx "\n", TYPE_IMX_FEC, __func__, addr); + PRIx32 "\n", TYPE_IMX_FEC, __func__, index * 4); break; } @@ -544,7 +590,7 @@ static int imx_fec_can_receive(NetClientState *nc) { IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); - return s->rx_enabled; + return s->regs[ENET_RDAR] ? 1 : 0; } static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, @@ -562,7 +608,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, FEC_PRINTF("len %d\n", (int)size); - if (!s->rx_enabled) { + if (!s->regs[ENET_RDAR]) { qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n", TYPE_IMX_FEC, __func__); return 0; @@ -580,7 +626,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, } /* Frames larger than the user limit just set error flags. */ - if (size > (s->rcr >> 16)) { + if (size > (s->regs[ENET_RCR] >> 16)) { flags |= ENET_BD_LG; } @@ -598,7 +644,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, TYPE_IMX_FEC, __func__); break; } - buf_len = (size <= s->emrbr) ? size : s->emrbr; + buf_len = (size <= s->regs[ENET_MRBR]) ? size : s->regs[ENET_MRBR]; bd.length = buf_len; size -= buf_len; @@ -621,16 +667,16 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, /* Last buffer in frame. */ bd.flags |= flags | ENET_BD_L; FEC_PRINTF("rx frame flags %04x\n", bd.flags); - s->eir |= ENET_INT_RXF; + s->regs[ENET_EIR] |= ENET_INT_RXF; } else { - s->eir |= ENET_INT_RXB; + s->regs[ENET_EIR] |= ENET_INT_RXB; } imx_fec_write_bd(&bd, addr); /* Advance to the next descriptor. */ if ((bd.flags & ENET_BD_W) != 0) { - addr = s->erdsr; + addr = s->regs[ENET_RDSR]; } else { - addr += 8; + addr += sizeof(bd); } } s->rx_descriptor = addr; -- cgit v1.1 From a699b410d7120dd67bc6c9beae44868780866f09 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Dubois Date: Mon, 30 May 2016 19:26:10 +0200 Subject: Add ENET/Gbps Ethernet support to FEC device The ENET device (present in i.MX6) is "derived" from FEC and backward compatible with it. This patch adds the necessary support of the added feature in the ENET device to allow Linux to use it (on supported processors). Signed-off-by: Jean-Christophe Dubois Signed-off-by: Jason Wang --- hw/arm/fsl-imx25.c | 1 + hw/net/imx_fec.c | 673 +++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 573 insertions(+), 101 deletions(-) (limited to 'hw') diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c index 2f878b9..1cd749a 100644 --- a/hw/arm/fsl-imx25.c +++ b/hw/arm/fsl-imx25.c @@ -191,6 +191,7 @@ static void fsl_imx25_realize(DeviceState *dev, Error **errp) } qdev_set_nic_properties(DEVICE(&s->fec), &nd_table[0]); + object_property_set_bool(OBJECT(&s->fec), true, "realized", &err); if (err) { error_propagate(errp, err); diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index d8e4145..d91e029 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -25,6 +25,8 @@ #include "hw/net/imx_fec.h" #include "sysemu/dma.h" #include "qemu/log.h" +#include "net/checksum.h" +#include "net/eth.h" /* For crc32 */ #include @@ -53,10 +55,93 @@ } \ } while (0) -static const char *imx_fec_reg_name(IMXFECState *s, uint32_t index) +static const char *imx_default_reg_name(IMXFECState *s, uint32_t index) { static char tmp[20]; + sprintf(tmp, "index %d", index); + return tmp; +} + +static const char *imx_fec_reg_name(IMXFECState *s, uint32_t index) +{ + switch (index) { + case ENET_FRBR: + return "FRBR"; + case ENET_FRSR: + return "FRSR"; + case ENET_MIIGSK_CFGR: + return "MIIGSK_CFGR"; + case ENET_MIIGSK_ENR: + return "MIIGSK_ENR"; + default: + return imx_default_reg_name(s, index); + } +} + +static const char *imx_enet_reg_name(IMXFECState *s, uint32_t index) +{ + switch (index) { + case ENET_RSFL: + return "RSFL"; + case ENET_RSEM: + return "RSEM"; + case ENET_RAEM: + return "RAEM"; + case ENET_RAFL: + return "RAFL"; + case ENET_TSEM: + return "TSEM"; + case ENET_TAEM: + return "TAEM"; + case ENET_TAFL: + return "TAFL"; + case ENET_TIPG: + return "TIPG"; + case ENET_FTRL: + return "FTRL"; + case ENET_TACC: + return "TACC"; + case ENET_RACC: + return "RACC"; + case ENET_ATCR: + return "ATCR"; + case ENET_ATVR: + return "ATVR"; + case ENET_ATOFF: + return "ATOFF"; + case ENET_ATPER: + return "ATPER"; + case ENET_ATCOR: + return "ATCOR"; + case ENET_ATINC: + return "ATINC"; + case ENET_ATSTMP: + return "ATSTMP"; + case ENET_TGSR: + return "TGSR"; + case ENET_TCSR0: + return "TCSR0"; + case ENET_TCCR0: + return "TCCR0"; + case ENET_TCSR1: + return "TCSR1"; + case ENET_TCCR1: + return "TCCR1"; + case ENET_TCSR2: + return "TCSR2"; + case ENET_TCCR2: + return "TCCR2"; + case ENET_TCSR3: + return "TCSR3"; + case ENET_TCCR3: + return "TCCR3"; + default: + return imx_default_reg_name(s, index); + } +} +static const char *imx_eth_reg_name(IMXFECState *s, uint32_t index) +{ switch (index) { case ENET_EIR: return "EIR"; @@ -100,21 +185,16 @@ static const char *imx_fec_reg_name(IMXFECState *s, uint32_t index) return "TDSR"; case ENET_MRBR: return "MRBR"; - case ENET_FRBR: - return "FRBR"; - case ENET_FRSR: - return "FRSR"; - case ENET_MIIGSK_CFGR: - return "MIIGSK_CFGR"; - case ENET_MIIGSK_ENR: - return "MIIGSK_ENR"; default: - sprintf(tmp, "index %d", index); - return tmp; + if (s->is_fec) { + return imx_fec_reg_name(s, index); + } else { + return imx_enet_reg_name(s, index); + } } } -static const VMStateDescription vmstate_imx_fec = { +static const VMStateDescription vmstate_imx_eth = { .name = TYPE_IMX_FEC, .version_id = 2, .minimum_version_id = 2, @@ -140,7 +220,7 @@ static const VMStateDescription vmstate_imx_fec = { #define PHY_INT_PARFAULT (1 << 2) #define PHY_INT_AUTONEG_PAGE (1 << 1) -static void imx_fec_update(IMXFECState *s); +static void imx_eth_update(IMXFECState *s); /* * The MII phy could raise a GPIO to the processor which in turn @@ -150,7 +230,7 @@ static void imx_fec_update(IMXFECState *s); */ static void phy_update_irq(IMXFECState *s) { - imx_fec_update(s); + imx_eth_update(s); } static void phy_update_link(IMXFECState *s) @@ -169,7 +249,7 @@ static void phy_update_link(IMXFECState *s) phy_update_irq(s); } -static void imx_fec_set_link(NetClientState *nc) +static void imx_eth_set_link(NetClientState *nc) { phy_update_link(IMX_FEC(qemu_get_nic_opaque(nc))); } @@ -295,14 +375,28 @@ static void imx_fec_write_bd(IMXFECBufDesc *bd, dma_addr_t addr) dma_memory_write(&address_space_memory, addr, bd, sizeof(*bd)); } -static void imx_fec_update(IMXFECState *s) +static void imx_enet_read_bd(IMXENETBufDesc *bd, dma_addr_t addr) +{ + dma_memory_read(&address_space_memory, addr, bd, sizeof(*bd)); +} + +static void imx_enet_write_bd(IMXENETBufDesc *bd, dma_addr_t addr) { - if (s->regs[ENET_EIR] & s->regs[ENET_EIMR]) { - FEC_PRINTF("interrupt raised\n"); - qemu_set_irq(s->irq, 1); + dma_memory_write(&address_space_memory, addr, bd, sizeof(*bd)); +} + +static void imx_eth_update(IMXFECState *s) +{ + if (s->regs[ENET_EIR] & s->regs[ENET_EIMR] & ENET_INT_TS_TIMER) { + qemu_set_irq(s->irq[1], 1); + } else { + qemu_set_irq(s->irq[1], 0); + } + + if (s->regs[ENET_EIR] & s->regs[ENET_EIMR] & ENET_INT_MAC) { + qemu_set_irq(s->irq[0], 1); } else { - FEC_PRINTF("interrupt lowered\n"); - qemu_set_irq(s->irq, 0); + qemu_set_irq(s->irq[0], 0); } } @@ -322,6 +416,7 @@ static void imx_fec_do_tx(IMXFECState *s) addr, bd.flags, bd.length, bd.data); if ((bd.flags & ENET_BD_R) == 0) { /* Run out of descriptors to transmit. */ + FEC_PRINTF("tx_bd ran out of descriptors to transmit\n"); break; } len = bd.length; @@ -353,10 +448,90 @@ static void imx_fec_do_tx(IMXFECState *s) s->tx_descriptor = addr; - imx_fec_update(s); + imx_eth_update(s); +} + +static void imx_enet_do_tx(IMXFECState *s) +{ + int frame_size = 0; + uint8_t frame[ENET_MAX_FRAME_SIZE]; + uint8_t *ptr = frame; + uint32_t addr = s->tx_descriptor; + + while (1) { + IMXENETBufDesc bd; + int len; + + imx_enet_read_bd(&bd, addr); + FEC_PRINTF("tx_bd %x flags %04x len %d data %08x option %04x " + "status %04x\n", addr, bd.flags, bd.length, bd.data, + bd.option, bd.status); + if ((bd.flags & ENET_BD_R) == 0) { + /* Run out of descriptors to transmit. */ + break; + } + len = bd.length; + if (frame_size + len > ENET_MAX_FRAME_SIZE) { + len = ENET_MAX_FRAME_SIZE - frame_size; + s->regs[ENET_EIR] |= ENET_INT_BABT; + } + dma_memory_read(&address_space_memory, bd.data, ptr, len); + ptr += len; + frame_size += len; + if (bd.flags & ENET_BD_L) { + if (bd.option & ENET_BD_PINS) { + struct ip_header *ip_hd = PKT_GET_IP_HDR(frame); + if (IP_HEADER_VERSION(ip_hd) == 4) { + net_checksum_calculate(frame, frame_size); + } + } + if (bd.option & ENET_BD_IINS) { + struct ip_header *ip_hd = PKT_GET_IP_HDR(frame); + /* We compute checksum only for IPv4 frames */ + if (IP_HEADER_VERSION(ip_hd) == 4) { + uint16_t csum; + ip_hd->ip_sum = 0; + csum = net_raw_checksum((uint8_t *)ip_hd, sizeof(*ip_hd)); + ip_hd->ip_sum = cpu_to_be16(csum); + } + } + /* Last buffer in frame. */ + qemu_send_packet(qemu_get_queue(s->nic), frame, len); + ptr = frame; + frame_size = 0; + if (bd.option & ENET_BD_TX_INT) { + s->regs[ENET_EIR] |= ENET_INT_TXF; + } + } + if (bd.option & ENET_BD_TX_INT) { + s->regs[ENET_EIR] |= ENET_INT_TXB; + } + bd.flags &= ~ENET_BD_R; + /* Write back the modified descriptor. */ + imx_enet_write_bd(&bd, addr); + /* Advance to the next descriptor. */ + if ((bd.flags & ENET_BD_W) != 0) { + addr = s->regs[ENET_TDSR]; + } else { + addr += sizeof(bd); + } + } + + s->tx_descriptor = addr; + + imx_eth_update(s); +} + +static void imx_eth_do_tx(IMXFECState *s) +{ + if (!s->is_fec && (s->regs[ENET_ECR] & ENET_ECR_EN1588)) { + imx_enet_do_tx(s); + } else { + imx_fec_do_tx(s); + } } -static void imx_fec_enable_rx(IMXFECState *s) +static void imx_eth_enable_rx(IMXFECState *s) { IMXFECBufDesc bd; bool tmp; @@ -374,11 +549,11 @@ static void imx_fec_enable_rx(IMXFECState *s) s->regs[ENET_RDAR] = tmp ? ENET_RDAR_RDAR : 0; } -static void imx_fec_reset(DeviceState *d) +static void imx_eth_reset(DeviceState *d) { IMXFECState *s = IMX_FEC(d); - /* Reset the FEC */ + /* Reset the Device */ memset(s->regs, 0, sizeof(s->regs)); s->regs[ENET_ECR] = 0xf0000000; s->regs[ENET_MIBC] = 0xc0000000; @@ -393,9 +568,19 @@ static void imx_fec_reset(DeviceState *d) | (s->conf.macaddr.a[5] << 16) | 0x8808; - s->regs[ENET_FRBR] = 0x00000600; - s->regs[ENET_FRSR] = 0x00000500; - s->regs[ENET_MIIGSK_ENR] = 0x00000006; + if (s->is_fec) { + s->regs[ENET_FRBR] = 0x00000600; + s->regs[ENET_FRSR] = 0x00000500; + s->regs[ENET_MIIGSK_ENR] = 0x00000006; + } else { + s->regs[ENET_RAEM] = 0x00000004; + s->regs[ENET_RAFL] = 0x00000004; + s->regs[ENET_TAEM] = 0x00000004; + s->regs[ENET_TAFL] = 0x00000008; + s->regs[ENET_TIPG] = 0x0000000c; + s->regs[ENET_FTRL] = 0x000007ff; + s->regs[ENET_ATPER] = 0x3b9aca00; + } s->rx_descriptor = 0; s->tx_descriptor = 0; @@ -404,11 +589,67 @@ static void imx_fec_reset(DeviceState *d) phy_reset(s); } -static uint64_t imx_fec_read(void *opaque, hwaddr addr, unsigned size) +static uint32_t imx_default_read(IMXFECState *s, uint32_t index) +{ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%" + PRIx32 "\n", TYPE_IMX_FEC, __func__, index * 4); + return 0; +} + +static uint32_t imx_fec_read(IMXFECState *s, uint32_t index) +{ + switch (index) { + case ENET_FRBR: + case ENET_FRSR: + case ENET_MIIGSK_CFGR: + case ENET_MIIGSK_ENR: + return s->regs[index]; + default: + return imx_default_read(s, index); + } +} + +static uint32_t imx_enet_read(IMXFECState *s, uint32_t index) +{ + switch (index) { + case ENET_RSFL: + case ENET_RSEM: + case ENET_RAEM: + case ENET_RAFL: + case ENET_TSEM: + case ENET_TAEM: + case ENET_TAFL: + case ENET_TIPG: + case ENET_FTRL: + case ENET_TACC: + case ENET_RACC: + case ENET_ATCR: + case ENET_ATVR: + case ENET_ATOFF: + case ENET_ATPER: + case ENET_ATCOR: + case ENET_ATINC: + case ENET_ATSTMP: + case ENET_TGSR: + case ENET_TCSR0: + case ENET_TCCR0: + case ENET_TCSR1: + case ENET_TCCR1: + case ENET_TCSR2: + case ENET_TCCR2: + case ENET_TCSR3: + case ENET_TCCR3: + return s->regs[index]; + default: + return imx_default_read(s, index); + } +} + +static uint64_t imx_eth_read(void *opaque, hwaddr offset, unsigned size) { uint32_t value = 0; IMXFECState *s = IMX_FEC(opaque); - uint32_t index = addr >> 2; + uint32_t index = offset >> 2; switch (index) { case ENET_EIR: @@ -432,32 +673,126 @@ static uint64_t imx_fec_read(void *opaque, hwaddr addr, unsigned size) case ENET_RDSR: case ENET_TDSR: case ENET_MRBR: - case ENET_FRBR: - case ENET_FRSR: - case ENET_MIIGSK_CFGR: - case ENET_MIIGSK_ENR: value = s->regs[index]; break; default: - qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad register at offset 0x%" - PRIx32 "\n", TYPE_IMX_FEC, __func__, index * 4); + if (s->is_fec) { + value = imx_fec_read(s, index); + } else { + value = imx_enet_read(s, index); + } break; } - FEC_PRINTF("reg[%s] => 0x%" PRIx32 "\n", imx_fec_reg_name(s, index), + FEC_PRINTF("reg[%s] => 0x%" PRIx32 "\n", imx_eth_reg_name(s, index), value); return value; } -static void imx_fec_write(void *opaque, hwaddr addr, - uint64_t value, unsigned size) +static void imx_default_write(IMXFECState *s, uint32_t index, uint32_t value) +{ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%" + PRIx32 "\n", TYPE_IMX_FEC, __func__, index * 4); + return; +} + +static void imx_fec_write(IMXFECState *s, uint32_t index, uint32_t value) +{ + switch (index) { + case ENET_FRBR: + /* FRBR is read only */ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Register FRBR is read only\n", + TYPE_IMX_FEC, __func__); + break; + case ENET_FRSR: + s->regs[index] = (value & 0x000003fc) | 0x00000400; + break; + case ENET_MIIGSK_CFGR: + s->regs[index] = value & 0x00000053; + break; + case ENET_MIIGSK_ENR: + s->regs[index] = (value & 0x00000002) ? 0x00000006 : 0; + break; + default: + imx_default_write(s, index, value); + break; + } +} + +static void imx_enet_write(IMXFECState *s, uint32_t index, uint32_t value) +{ + switch (index) { + case ENET_RSFL: + case ENET_RSEM: + case ENET_RAEM: + case ENET_RAFL: + case ENET_TSEM: + case ENET_TAEM: + case ENET_TAFL: + s->regs[index] = value & 0x000001ff; + break; + case ENET_TIPG: + s->regs[index] = value & 0x0000001f; + break; + case ENET_FTRL: + s->regs[index] = value & 0x00003fff; + break; + case ENET_TACC: + s->regs[index] = value & 0x00000019; + break; + case ENET_RACC: + s->regs[index] = value & 0x000000C7; + break; + case ENET_ATCR: + s->regs[index] = value & 0x00002a9d; + break; + case ENET_ATVR: + case ENET_ATOFF: + case ENET_ATPER: + s->regs[index] = value; + break; + case ENET_ATSTMP: + /* ATSTMP is read only */ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Register ATSTMP is read only\n", + TYPE_IMX_FEC, __func__); + break; + case ENET_ATCOR: + s->regs[index] = value & 0x7fffffff; + break; + case ENET_ATINC: + s->regs[index] = value & 0x00007f7f; + break; + case ENET_TGSR: + /* implement clear timer flag */ + value = value & 0x0000000f; + break; + case ENET_TCSR0: + case ENET_TCSR1: + case ENET_TCSR2: + case ENET_TCSR3: + value = value & 0x000000fd; + break; + case ENET_TCCR0: + case ENET_TCCR1: + case ENET_TCCR2: + case ENET_TCCR3: + s->regs[index] = value; + break; + default: + imx_default_write(s, index, value); + break; + } +} + +static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value, + unsigned size) { IMXFECState *s = IMX_FEC(opaque); - uint32_t index = addr >> 2; + uint32_t index = offset >> 2; - FEC_PRINTF("reg[%s] <= 0x%" PRIx32 "\n", imx_fec_reg_name(s, index), - (uint32_t)value); + FEC_PRINTF("reg[%s] <= 0x%" PRIx32 "\n", imx_eth_reg_name(s, index), + (uint32_t)value); switch (index) { case ENET_EIR: @@ -470,7 +805,7 @@ static void imx_fec_write(void *opaque, hwaddr addr, if (s->regs[ENET_ECR] & ENET_ECR_ETHEREN) { if (!s->regs[index]) { s->regs[index] = ENET_RDAR_RDAR; - imx_fec_enable_rx(s); + imx_eth_enable_rx(s); } } else { s->regs[index] = 0; @@ -479,13 +814,13 @@ static void imx_fec_write(void *opaque, hwaddr addr, case ENET_TDAR: if (s->regs[ENET_ECR] & ENET_ECR_ETHEREN) { s->regs[index] = ENET_TDAR_TDAR; - imx_fec_do_tx(s); + imx_eth_do_tx(s); } s->regs[index] = 0; break; case ENET_ECR: if (value & ENET_ECR_RESET) { - return imx_fec_reset(DEVICE(s)); + return imx_eth_reset(DEVICE(s)); } s->regs[index] = value; if ((s->regs[index] & ENET_ECR_ETHEREN) == 0) { @@ -550,46 +885,49 @@ static void imx_fec_write(void *opaque, hwaddr addr, /* TODO: implement MAC hash filtering. */ break; case ENET_TFWR: - s->regs[index] = value & 3; - break; - case ENET_FRBR: - /* FRBR is read only */ - qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Register FRBR is read only\n", - TYPE_IMX_FEC, __func__); - break; - case ENET_FRSR: - s->regs[index] = (value & 0x000003fc) | 0x00000400; + if (s->is_fec) { + s->regs[index] = value & 0x3; + } else { + s->regs[index] = value & 0x13f; + } break; case ENET_RDSR: - s->regs[index] = value & ~3; + if (s->is_fec) { + s->regs[index] = value & ~3; + } else { + s->regs[index] = value & ~7; + } s->rx_descriptor = s->regs[index]; break; case ENET_TDSR: - s->regs[index] = value & ~3; + if (s->is_fec) { + s->regs[index] = value & ~3; + } else { + s->regs[index] = value & ~7; + } s->tx_descriptor = s->regs[index]; break; case ENET_MRBR: - s->regs[index] = value & 0x000007f0; - break; - case ENET_MIIGSK_CFGR: - s->regs[index] = value & 0x00000053; - break; - case ENET_MIIGSK_ENR: - s->regs[index] = (value & 0x00000002) ? 0x00000006 : 0; + s->regs[index] = value & 0x00003ff0; break; default: - qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Bad address at offset 0x%" - PRIx32 "\n", TYPE_IMX_FEC, __func__, index * 4); - break; + if (s->is_fec) { + imx_fec_write(s, index, value); + } else { + imx_enet_write(s, index, value); + } + return; } - imx_fec_update(s); + imx_eth_update(s); } -static int imx_fec_can_receive(NetClientState *nc) +static int imx_eth_can_receive(NetClientState *nc) { IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); + FEC_PRINTF("\n"); + return s->regs[ENET_RDAR] ? 1 : 0; } @@ -619,7 +957,7 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, crc = cpu_to_be32(crc32(~0, buf, size)); crc_ptr = (uint8_t *) &crc; - /* Huge frames are truncted. */ + /* Huge frames are truncated. */ if (size > ENET_MAX_FRAME_SIZE) { size = ENET_MAX_FRAME_SIZE; flags |= ENET_BD_TR | ENET_BD_LG; @@ -680,81 +1018,214 @@ static ssize_t imx_fec_receive(NetClientState *nc, const uint8_t *buf, } } s->rx_descriptor = addr; - imx_fec_enable_rx(s); - imx_fec_update(s); + imx_eth_enable_rx(s); + imx_eth_update(s); + return len; +} + +static ssize_t imx_enet_receive(NetClientState *nc, const uint8_t *buf, + size_t len) +{ + IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); + IMXENETBufDesc bd; + uint32_t flags = 0; + uint32_t addr; + uint32_t crc; + uint32_t buf_addr; + uint8_t *crc_ptr; + unsigned int buf_len; + size_t size = len; + + FEC_PRINTF("len %d\n", (int)size); + + if (!s->regs[ENET_RDAR]) { + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Unexpected packet\n", + TYPE_IMX_FEC, __func__); + return 0; + } + + /* 4 bytes for the CRC. */ + size += 4; + crc = cpu_to_be32(crc32(~0, buf, size)); + crc_ptr = (uint8_t *) &crc; + + /* Huge frames are truncted. */ + if (size > ENET_MAX_FRAME_SIZE) { + size = ENET_MAX_FRAME_SIZE; + flags |= ENET_BD_TR | ENET_BD_LG; + } + + /* Frames larger than the user limit just set error flags. */ + if (size > (s->regs[ENET_RCR] >> 16)) { + flags |= ENET_BD_LG; + } + + addr = s->rx_descriptor; + while (size > 0) { + imx_enet_read_bd(&bd, addr); + if ((bd.flags & ENET_BD_E) == 0) { + /* No descriptors available. Bail out. */ + /* + * FIXME: This is wrong. We should probably either + * save the remainder for when more RX buffers are + * available, or flag an error. + */ + qemu_log_mask(LOG_GUEST_ERROR, "[%s]%s: Lost end of frame\n", + TYPE_IMX_FEC, __func__); + break; + } + buf_len = (size <= s->regs[ENET_MRBR]) ? size : s->regs[ENET_MRBR]; + bd.length = buf_len; + size -= buf_len; + + FEC_PRINTF("rx_bd 0x%x length %d\n", addr, bd.length); + + /* The last 4 bytes are the CRC. */ + if (size < 4) { + buf_len += size - 4; + } + buf_addr = bd.data; + dma_memory_write(&address_space_memory, buf_addr, buf, buf_len); + buf += buf_len; + if (size < 4) { + dma_memory_write(&address_space_memory, buf_addr + buf_len, + crc_ptr, 4 - size); + crc_ptr += 4 - size; + } + bd.flags &= ~ENET_BD_E; + if (size == 0) { + /* Last buffer in frame. */ + bd.flags |= flags | ENET_BD_L; + FEC_PRINTF("rx frame flags %04x\n", bd.flags); + if (bd.option & ENET_BD_RX_INT) { + s->regs[ENET_EIR] |= ENET_INT_RXF; + } + } else { + if (bd.option & ENET_BD_RX_INT) { + s->regs[ENET_EIR] |= ENET_INT_RXB; + } + } + imx_enet_write_bd(&bd, addr); + /* Advance to the next descriptor. */ + if ((bd.flags & ENET_BD_W) != 0) { + addr = s->regs[ENET_RDSR]; + } else { + addr += sizeof(bd); + } + } + s->rx_descriptor = addr; + imx_eth_enable_rx(s); + imx_eth_update(s); return len; } -static const MemoryRegionOps imx_fec_ops = { - .read = imx_fec_read, - .write = imx_fec_write, +static ssize_t imx_eth_receive(NetClientState *nc, const uint8_t *buf, + size_t len) +{ + IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); + + if (!s->is_fec && (s->regs[ENET_ECR] & ENET_ECR_EN1588)) { + return imx_enet_receive(nc, buf, len); + } else { + return imx_fec_receive(nc, buf, len); + } +} + +static const MemoryRegionOps imx_eth_ops = { + .read = imx_eth_read, + .write = imx_eth_write, .valid.min_access_size = 4, .valid.max_access_size = 4, - .endianness = DEVICE_NATIVE_ENDIAN, + .endianness = DEVICE_NATIVE_ENDIAN, }; -static void imx_fec_cleanup(NetClientState *nc) +static void imx_eth_cleanup(NetClientState *nc) { IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); s->nic = NULL; } -static NetClientInfo net_imx_fec_info = { - .type = NET_CLIENT_OPTIONS_KIND_NIC, - .size = sizeof(NICState), - .can_receive = imx_fec_can_receive, - .receive = imx_fec_receive, - .cleanup = imx_fec_cleanup, - .link_status_changed = imx_fec_set_link, +static NetClientInfo imx_eth_net_info = { + .type = NET_CLIENT_OPTIONS_KIND_NIC, + .size = sizeof(NICState), + .can_receive = imx_eth_can_receive, + .receive = imx_eth_receive, + .cleanup = imx_eth_cleanup, + .link_status_changed = imx_eth_set_link, }; -static void imx_fec_realize(DeviceState *dev, Error **errp) +static void imx_eth_realize(DeviceState *dev, Error **errp) { IMXFECState *s = IMX_FEC(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - memory_region_init_io(&s->iomem, OBJECT(dev), &imx_fec_ops, s, + memory_region_init_io(&s->iomem, OBJECT(dev), &imx_eth_ops, s, TYPE_IMX_FEC, 0x400); sysbus_init_mmio(sbd, &s->iomem); - sysbus_init_irq(sbd, &s->irq); + sysbus_init_irq(sbd, &s->irq[0]); + sysbus_init_irq(sbd, &s->irq[1]); + qemu_macaddr_default_if_unset(&s->conf.macaddr); s->conf.peers.ncs[0] = nd_table[0].netdev; - s->nic = qemu_new_nic(&net_imx_fec_info, &s->conf, - object_get_typename(OBJECT(dev)), DEVICE(dev)->id, - s); + s->nic = qemu_new_nic(&imx_eth_net_info, &s->conf, + object_get_typename(OBJECT(dev)), + DEVICE(dev)->id, s); + qemu_format_nic_info_str(qemu_get_queue(s->nic), s->conf.macaddr.a); } -static Property imx_fec_properties[] = { +static Property imx_eth_properties[] = { DEFINE_NIC_PROPERTIES(IMXFECState, conf), DEFINE_PROP_END_OF_LIST(), }; -static void imx_fec_class_init(ObjectClass *klass, void *data) +static void imx_eth_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); - dc->vmsd = &vmstate_imx_fec; - dc->reset = imx_fec_reset; - dc->props = imx_fec_properties; - dc->realize = imx_fec_realize; - dc->desc = "i.MX FEC Ethernet Controller"; + dc->vmsd = &vmstate_imx_eth; + dc->reset = imx_eth_reset; + dc->props = imx_eth_properties; + dc->realize = imx_eth_realize; + dc->desc = "i.MX FEC/ENET Ethernet Controller"; +} + +static void imx_fec_init(Object *obj) +{ + IMXFECState *s = IMX_FEC(obj); + + s->is_fec = true; +} + +static void imx_enet_init(Object *obj) +{ + IMXFECState *s = IMX_FEC(obj); + + s->is_fec = false; } static const TypeInfo imx_fec_info = { - .name = TYPE_IMX_FEC, - .parent = TYPE_SYS_BUS_DEVICE, + .name = TYPE_IMX_FEC, + .parent = TYPE_SYS_BUS_DEVICE, .instance_size = sizeof(IMXFECState), - .class_init = imx_fec_class_init, + .instance_init = imx_fec_init, + .class_init = imx_eth_class_init, +}; + +static const TypeInfo imx_enet_info = { + .name = TYPE_IMX_ENET, + .parent = TYPE_IMX_FEC, + .instance_init = imx_enet_init, }; -static void imx_fec_register_types(void) +static void imx_eth_register_types(void) { type_register_static(&imx_fec_info); + type_register_static(&imx_enet_info); } -type_init(imx_fec_register_types) +type_init(imx_eth_register_types) -- cgit v1.1 From 517b5e9a175fe7d47cc0fab6c2310241fd33c115 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Dubois Date: Mon, 30 May 2016 19:26:14 +0200 Subject: Add ENET device to i.MX6 SOC. This adds the ENET device to the i.MX6 SOC. This was tested by booting Linux on an Qemu i.MX6 instance and accessing the internet from the linux guest. Reviewed-by: Peter Maydell Signed-off-by: Jean-Christophe Dubois Signed-off-by: Jason Wang --- hw/arm/fsl-imx6.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'hw') diff --git a/hw/arm/fsl-imx6.c b/hw/arm/fsl-imx6.c index a5331bf..0c00e7a 100644 --- a/hw/arm/fsl-imx6.c +++ b/hw/arm/fsl-imx6.c @@ -105,6 +105,10 @@ static void fsl_imx6_init(Object *obj) snprintf(name, NAME_SIZE, "spi%d", i + 1); object_property_add_child(obj, name, OBJECT(&s->spi[i]), NULL); } + + object_initialize(&s->eth, sizeof(s->eth), TYPE_IMX_ENET); + qdev_set_parent_bus(DEVICE(&s->eth), sysbus_get_default()); + object_property_add_child(obj, "eth", OBJECT(&s->eth), NULL); } static void fsl_imx6_realize(DeviceState *dev, Error **errp) @@ -381,6 +385,19 @@ static void fsl_imx6_realize(DeviceState *dev, Error **errp) spi_table[i].irq)); } + object_property_set_bool(OBJECT(&s->eth), true, "realized", &err); + if (err) { + error_propagate(errp, err); + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(&s->eth), 0, FSL_IMX6_ENET_ADDR); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->eth), 0, + qdev_get_gpio_in(DEVICE(&s->a9mpcore), + FSL_IMX6_ENET_MAC_IRQ)); + sysbus_connect_irq(SYS_BUS_DEVICE(&s->eth), 1, + qdev_get_gpio_in(DEVICE(&s->a9mpcore), + FSL_IMX6_ENET_MAC_1588_IRQ)); + /* ROM memory */ memory_region_init_rom_device(&s->rom, NULL, NULL, NULL, "imx6.rom", FSL_IMX6_ROM_SIZE, &err); -- cgit v1.1