diff options
Diffstat (limited to 'hw/net/igb.c')
-rw-r--r-- | hw/net/igb.c | 623 |
1 files changed, 623 insertions, 0 deletions
diff --git a/hw/net/igb.c b/hw/net/igb.c new file mode 100644 index 0000000..c6d753d --- /dev/null +++ b/hw/net/igb.c @@ -0,0 +1,623 @@ +/* + * QEMU Intel 82576 SR/IOV Ethernet Controller Emulation + * + * Datasheet: + * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/82576eg-gbe-datasheet.pdf + * + * Copyright (c) 2020-2023 Red Hat, Inc. + * Copyright (c) 2015 Ravello Systems LTD (http://ravellosystems.com) + * Developed by Daynix Computing LTD (http://www.daynix.com) + * + * Authors: + * Akihiko Odaki <akihiko.odaki@daynix.com> + * Gal Hammmer <gal.hammer@sap.com> + * Marcel Apfelbaum <marcel.apfelbaum@gmail.com> + * Dmitry Fleytman <dmitry@daynix.com> + * Leonid Bloch <leonid@daynix.com> + * Yan Vugenfirer <yan@daynix.com> + * + * Based on work done by: + * Nir Peleg, Tutis Systems Ltd. for Qumranet Inc. + * Copyright (c) 2008 Qumranet + * Based on work done by: + * Copyright (c) 2007 Dan Aloni + * Copyright (c) 2004 Antony T Curtis + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "net/eth.h" +#include "net/net.h" +#include "net/tap.h" +#include "qemu/module.h" +#include "qemu/range.h" +#include "sysemu/sysemu.h" +#include "hw/hw.h" +#include "hw/net/mii.h" +#include "hw/pci/pci.h" +#include "hw/pci/pcie.h" +#include "hw/pci/pcie_sriov.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" + +#include "igb_common.h" +#include "igb_core.h" + +#include "trace.h" +#include "qapi/error.h" +#include "qom/object.h" + +#define TYPE_IGB "igb" +OBJECT_DECLARE_SIMPLE_TYPE(IGBState, IGB) + +struct IGBState { + PCIDevice parent_obj; + NICState *nic; + NICConf conf; + + MemoryRegion mmio; + MemoryRegion flash; + MemoryRegion io; + MemoryRegion msix; + + uint32_t ioaddr; + + IGBCore core; +}; + +#define IGB_CAP_SRIOV_OFFSET (0x160) +#define IGB_VF_OFFSET (0x80) +#define IGB_VF_STRIDE (2) + +#define E1000E_MMIO_IDX 0 +#define E1000E_FLASH_IDX 1 +#define E1000E_IO_IDX 2 +#define E1000E_MSIX_IDX 3 + +#define E1000E_MMIO_SIZE (128 * KiB) +#define E1000E_FLASH_SIZE (128 * KiB) +#define E1000E_IO_SIZE (32) +#define E1000E_MSIX_SIZE (16 * KiB) + +static void igb_write_config(PCIDevice *dev, uint32_t addr, + uint32_t val, int len) +{ + IGBState *s = IGB(dev); + + trace_igb_write_config(addr, val, len); + pci_default_write_config(dev, addr, val, len); + + if (range_covers_byte(addr, len, PCI_COMMAND) && + (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { + igb_start_recv(&s->core); + } +} + +uint64_t +igb_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + IGBState *s = opaque; + return igb_core_read(&s->core, addr, size); +} + +void +igb_mmio_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) +{ + IGBState *s = opaque; + igb_core_write(&s->core, addr, val, size); +} + +static bool +igb_io_get_reg_index(IGBState *s, uint32_t *idx) +{ + if (s->ioaddr < 0x1FFFF) { + *idx = s->ioaddr; + return true; + } + + if (s->ioaddr < 0x7FFFF) { + trace_e1000e_wrn_io_addr_undefined(s->ioaddr); + return false; + } + + if (s->ioaddr < 0xFFFFF) { + trace_e1000e_wrn_io_addr_flash(s->ioaddr); + return false; + } + + trace_e1000e_wrn_io_addr_unknown(s->ioaddr); + return false; +} + +static uint64_t +igb_io_read(void *opaque, hwaddr addr, unsigned size) +{ + IGBState *s = opaque; + uint32_t idx = 0; + uint64_t val; + + switch (addr) { + case E1000_IOADDR: + trace_e1000e_io_read_addr(s->ioaddr); + return s->ioaddr; + case E1000_IODATA: + if (igb_io_get_reg_index(s, &idx)) { + val = igb_core_read(&s->core, idx, sizeof(val)); + trace_e1000e_io_read_data(idx, val); + return val; + } + return 0; + default: + trace_e1000e_wrn_io_read_unknown(addr); + return 0; + } +} + +static void +igb_io_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) +{ + IGBState *s = opaque; + uint32_t idx = 0; + + switch (addr) { + case E1000_IOADDR: + trace_e1000e_io_write_addr(val); + s->ioaddr = (uint32_t) val; + return; + case E1000_IODATA: + if (igb_io_get_reg_index(s, &idx)) { + trace_e1000e_io_write_data(idx, val); + igb_core_write(&s->core, idx, val, sizeof(val)); + } + return; + default: + trace_e1000e_wrn_io_write_unknown(addr); + return; + } +} + +static const MemoryRegionOps mmio_ops = { + .read = igb_mmio_read, + .write = igb_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static const MemoryRegionOps io_ops = { + .read = igb_io_read, + .write = igb_io_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static bool +igb_nc_can_receive(NetClientState *nc) +{ + IGBState *s = qemu_get_nic_opaque(nc); + return igb_can_receive(&s->core); +} + +static ssize_t +igb_nc_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) +{ + IGBState *s = qemu_get_nic_opaque(nc); + return igb_receive_iov(&s->core, iov, iovcnt); +} + +static ssize_t +igb_nc_receive(NetClientState *nc, const uint8_t *buf, size_t size) +{ + IGBState *s = qemu_get_nic_opaque(nc); + return igb_receive(&s->core, buf, size); +} + +static void +igb_set_link_status(NetClientState *nc) +{ + IGBState *s = qemu_get_nic_opaque(nc); + igb_core_set_link_status(&s->core); +} + +static NetClientInfo net_igb_info = { + .type = NET_CLIENT_DRIVER_NIC, + .size = sizeof(NICState), + .can_receive = igb_nc_can_receive, + .receive = igb_nc_receive, + .receive_iov = igb_nc_receive_iov, + .link_status_changed = igb_set_link_status, +}; + +/* + * EEPROM (NVM) contents documented in section 6.1, table 6-1: + * and in 6.10 Software accessed words. + */ +static const uint16_t igb_eeprom_template[] = { + /* Address |Compat.|OEM sp.| ImRev | OEM sp. */ + 0x0000, 0x0000, 0x0000, 0x0d34, 0xffff, 0x2010, 0xffff, 0xffff, + /* PBA |ICtrl1 | SSID | SVID | DevID |-------|ICtrl2 */ + 0x1040, 0xffff, 0x002b, 0x0000, 0x8086, 0x10c9, 0x0000, 0x70c3, + /* SwPin0| DevID | EESZ |-------|ICtrl3 |PCI-tc | MSIX | APtr */ + 0x0004, 0x10c9, 0x5c00, 0x0000, 0x2880, 0x0014, 0x4a40, 0x0060, + /* PCIe Init. Conf 1,2,3 |PCICtrl| LD1,3 |DDevID |DevRev | LD0,2 */ + 0x6cfb, 0xc7b0, 0x0abe, 0x0403, 0x0783, 0x10a6, 0x0001, 0x0602, + /* SwPin1| FunC |LAN-PWR|ManHwC |ICtrl3 | IOVct |VDevID |-------*/ + 0x0004, 0x0020, 0x0000, 0x004a, 0x2080, 0x00f5, 0x10ca, 0x0000, + /*---------------| LD1,3 | LD0,2 | ROEnd | ROSta | Wdog | VPD */ + 0x0000, 0x0000, 0x4784, 0x4602, 0x0000, 0x0000, 0x1000, 0xffff, + /* PCSet0| Ccfg0 |PXEver |IBAcap |PCSet1 | Ccfg1 |iSCVer | ?? */ + 0x0100, 0x4000, 0x131f, 0x4013, 0x0100, 0x4000, 0xffff, 0xffff, + /* PCSet2| Ccfg2 |PCSet3 | Ccfg3 | ?? |AltMacP| ?? |CHKSUM */ + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x00e0, 0xffff, 0x0000, + /* NC-SIC */ + 0x0003, +}; + +static void igb_core_realize(IGBState *s) +{ + s->core.owner = &s->parent_obj; + s->core.owner_nic = s->nic; +} + +static void +igb_init_msix(IGBState *s) +{ + int i, res; + + res = msix_init(PCI_DEVICE(s), IGB_MSIX_VEC_NUM, + &s->msix, + E1000E_MSIX_IDX, 0, + &s->msix, + E1000E_MSIX_IDX, 0x2000, + 0x70, NULL); + + if (res < 0) { + trace_e1000e_msix_init_fail(res); + } else { + for (i = 0; i < IGB_MSIX_VEC_NUM; i++) { + msix_vector_use(PCI_DEVICE(s), i); + } + } +} + +static void +igb_cleanup_msix(IGBState *s) +{ + msix_unuse_all_vectors(PCI_DEVICE(s)); + msix_uninit(PCI_DEVICE(s), &s->msix, &s->msix); +} + +static void +igb_init_net_peer(IGBState *s, PCIDevice *pci_dev, uint8_t *macaddr) +{ + DeviceState *dev = DEVICE(pci_dev); + NetClientState *nc; + int i; + + s->nic = qemu_new_nic(&net_igb_info, &s->conf, + object_get_typename(OBJECT(s)), dev->id, s); + + s->core.max_queue_num = s->conf.peers.queues ? s->conf.peers.queues - 1 : 0; + + trace_e1000e_mac_set_permanent(MAC_ARG(macaddr)); + memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac)); + + qemu_format_nic_info_str(qemu_get_queue(s->nic), macaddr); + + /* Setup virtio headers */ + for (i = 0; i < s->conf.peers.queues; i++) { + nc = qemu_get_subqueue(s->nic, i); + if (!nc->peer || !qemu_has_vnet_hdr(nc->peer)) { + trace_e1000e_cfg_support_virtio(false); + return; + } + } + + trace_e1000e_cfg_support_virtio(true); + s->core.has_vnet = true; + + for (i = 0; i < s->conf.peers.queues; i++) { + nc = qemu_get_subqueue(s->nic, i); + qemu_set_vnet_hdr_len(nc->peer, sizeof(struct virtio_net_hdr)); + qemu_using_vnet_hdr(nc->peer, true); + } +} + +static int +igb_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) +{ + Error *local_err = NULL; + int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, + PCI_PM_SIZEOF, &local_err); + + if (local_err) { + error_report_err(local_err); + return ret; + } + + pci_set_word(pdev->config + offset + PCI_PM_PMC, + PCI_PM_CAP_VER_1_1 | + pmc); + + pci_set_word(pdev->wmask + offset + PCI_PM_CTRL, + PCI_PM_CTRL_STATE_MASK | + PCI_PM_CTRL_PME_ENABLE | + PCI_PM_CTRL_DATA_SEL_MASK); + + pci_set_word(pdev->w1cmask + offset + PCI_PM_CTRL, + PCI_PM_CTRL_PME_STATUS); + + return ret; +} + +static void igb_pci_realize(PCIDevice *pci_dev, Error **errp) +{ + IGBState *s = IGB(pci_dev); + uint8_t *macaddr; + int ret; + + trace_e1000e_cb_pci_realize(); + + pci_dev->config_write = igb_write_config; + + pci_dev->config[PCI_CACHE_LINE_SIZE] = 0x10; + pci_dev->config[PCI_INTERRUPT_PIN] = 1; + + /* Define IO/MMIO regions */ + memory_region_init_io(&s->mmio, OBJECT(s), &mmio_ops, s, + "igb-mmio", E1000E_MMIO_SIZE); + pci_register_bar(pci_dev, E1000E_MMIO_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio); + + /* + * We provide a dummy implementation for the flash BAR + * for drivers that may theoretically probe for its presence. + */ + memory_region_init(&s->flash, OBJECT(s), + "igb-flash", E1000E_FLASH_SIZE); + pci_register_bar(pci_dev, E1000E_FLASH_IDX, + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->flash); + + memory_region_init_io(&s->io, OBJECT(s), &io_ops, s, + "igb-io", E1000E_IO_SIZE); + pci_register_bar(pci_dev, E1000E_IO_IDX, + PCI_BASE_ADDRESS_SPACE_IO, &s->io); + + memory_region_init(&s->msix, OBJECT(s), "igb-msix", + E1000E_MSIX_SIZE); + pci_register_bar(pci_dev, E1000E_MSIX_IDX, + PCI_BASE_ADDRESS_MEM_TYPE_64, &s->msix); + + /* Create networking backend */ + qemu_macaddr_default_if_unset(&s->conf.macaddr); + macaddr = s->conf.macaddr.a; + + /* Add PCI capabilities in reverse order */ + assert(pcie_endpoint_cap_init(pci_dev, 0xa0) > 0); + + igb_init_msix(s); + + ret = msi_init(pci_dev, 0x50, 1, true, true, NULL); + if (ret) { + trace_e1000e_msi_init_fail(ret); + } + + if (igb_add_pm_capability(pci_dev, 0x40, PCI_PM_CAP_DSI) < 0) { + hw_error("Failed to initialize PM capability"); + } + + /* PCIe extended capabilities (in order) */ + if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) { + hw_error("Failed to initialize AER capability"); + } + + pcie_ari_init(pci_dev, 0x150, 1); + + pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, "igbvf", + IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS, + IGB_VF_OFFSET, IGB_VF_STRIDE); + + pcie_sriov_pf_init_vf_bar(pci_dev, 0, + PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, + 16 * KiB); + pcie_sriov_pf_init_vf_bar(pci_dev, 3, + PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, + 16 * KiB); + + igb_init_net_peer(s, pci_dev, macaddr); + + /* Initialize core */ + igb_core_realize(s); + + igb_core_pci_realize(&s->core, + igb_eeprom_template, + sizeof(igb_eeprom_template), + macaddr); +} + +static void igb_pci_uninit(PCIDevice *pci_dev) +{ + IGBState *s = IGB(pci_dev); + + trace_e1000e_cb_pci_uninit(); + + igb_core_pci_uninit(&s->core); + + pcie_sriov_pf_exit(pci_dev); + pcie_cap_exit(pci_dev); + + qemu_del_nic(s->nic); + + igb_cleanup_msix(s); + msi_uninit(pci_dev); +} + +static void igb_qdev_reset_hold(Object *obj) +{ + PCIDevice *d = PCI_DEVICE(obj); + IGBState *s = IGB(obj); + + trace_e1000e_cb_qdev_reset_hold(); + + pcie_sriov_pf_disable_vfs(d); + igb_core_reset(&s->core); +} + +static int igb_pre_save(void *opaque) +{ + IGBState *s = opaque; + + trace_e1000e_cb_pre_save(); + + igb_core_pre_save(&s->core); + + return 0; +} + +static int igb_post_load(void *opaque, int version_id) +{ + IGBState *s = opaque; + + trace_e1000e_cb_post_load(); + return igb_core_post_load(&s->core); +} + +static const VMStateDescription igb_vmstate_tx = { + .name = "igb-tx", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT16(vlan, struct igb_tx), + VMSTATE_UINT16(mss, struct igb_tx), + VMSTATE_BOOL(tse, struct igb_tx), + VMSTATE_BOOL(ixsm, struct igb_tx), + VMSTATE_BOOL(txsm, struct igb_tx), + VMSTATE_BOOL(first, struct igb_tx), + VMSTATE_BOOL(skip_cp, struct igb_tx), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription igb_vmstate_intr_timer = { + .name = "igb-intr-timer", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_TIMER_PTR(timer, IGBIntrDelayTimer), + VMSTATE_BOOL(running, IGBIntrDelayTimer), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_IGB_INTR_DELAY_TIMER(_f, _s) \ + VMSTATE_STRUCT(_f, _s, 0, \ + igb_vmstate_intr_timer, IGBIntrDelayTimer) + +#define VMSTATE_IGB_INTR_DELAY_TIMER_ARRAY(_f, _s, _num) \ + VMSTATE_STRUCT_ARRAY(_f, _s, _num, 0, \ + igb_vmstate_intr_timer, IGBIntrDelayTimer) + +static const VMStateDescription igb_vmstate = { + .name = "igb", + .version_id = 1, + .minimum_version_id = 1, + .pre_save = igb_pre_save, + .post_load = igb_post_load, + .fields = (VMStateField[]) { + VMSTATE_PCI_DEVICE(parent_obj, IGBState), + VMSTATE_MSIX(parent_obj, IGBState), + + VMSTATE_UINT32(ioaddr, IGBState), + VMSTATE_UINT8(core.rx_desc_len, IGBState), + VMSTATE_UINT16_ARRAY(core.eeprom, IGBState, IGB_EEPROM_SIZE), + VMSTATE_UINT16_ARRAY(core.phy, IGBState, MAX_PHY_REG_ADDRESS + 1), + VMSTATE_UINT32_ARRAY(core.mac, IGBState, E1000E_MAC_SIZE), + VMSTATE_UINT8_ARRAY(core.permanent_mac, IGBState, ETH_ALEN), + + VMSTATE_IGB_INTR_DELAY_TIMER_ARRAY(core.eitr, IGBState, + IGB_INTR_NUM), + + VMSTATE_UINT32_ARRAY(core.eitr_guest_value, IGBState, IGB_INTR_NUM), + + VMSTATE_STRUCT_ARRAY(core.tx, IGBState, IGB_NUM_QUEUES, 0, + igb_vmstate_tx, struct igb_tx), + + VMSTATE_INT64(core.timadj, IGBState), + + VMSTATE_END_OF_LIST() + } +}; + +static Property igb_properties[] = { + DEFINE_NIC_PROPERTIES(IGBState, conf), + DEFINE_PROP_END_OF_LIST(), +}; + +static void igb_class_init(ObjectClass *class, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(class); + ResettableClass *rc = RESETTABLE_CLASS(class); + PCIDeviceClass *c = PCI_DEVICE_CLASS(class); + + c->realize = igb_pci_realize; + c->exit = igb_pci_uninit; + c->vendor_id = PCI_VENDOR_ID_INTEL; + c->device_id = E1000_DEV_ID_82576; + c->revision = 1; + c->class_id = PCI_CLASS_NETWORK_ETHERNET; + + rc->phases.hold = igb_qdev_reset_hold; + + dc->desc = "Intel 82576 Gigabit Ethernet Controller"; + dc->vmsd = &igb_vmstate; + + device_class_set_props(dc, igb_properties); + set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); +} + +static void igb_instance_init(Object *obj) +{ + IGBState *s = IGB(obj); + device_add_bootindex_property(obj, &s->conf.bootindex, + "bootindex", "/ethernet-phy@0", + DEVICE(obj)); +} + +static const TypeInfo igb_info = { + .name = TYPE_IGB, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(IGBState), + .class_init = igb_class_init, + .instance_init = igb_instance_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { } + }, +}; + +static void igb_register_types(void) +{ + type_register_static(&igb_info); +} + +type_init(igb_register_types) |