diff options
Diffstat (limited to 'hw/net/virtio-net.c')
-rw-r--r-- | hw/net/virtio-net.c | 609 |
1 files changed, 425 insertions, 184 deletions
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 8f30972..c4c49b0 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -26,7 +26,7 @@ #include "qemu/option.h" #include "qemu/option_int.h" #include "qemu/config-file.h" -#include "qapi/qmp/qdict.h" +#include "qobject/qdict.h" #include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" #include "net/announce.h" @@ -39,14 +39,15 @@ #include "hw/virtio/virtio-access.h" #include "migration/misc.h" #include "standard-headers/linux/ethtool.h" -#include "sysemu/sysemu.h" +#include "system/system.h" +#include "system/replay.h" #include "trace.h" #include "monitor/qdev.h" #include "monitor/monitor.h" #include "hw/pci/pci_device.h" #include "net_rx_pkt.h" #include "hw/virtio/vhost.h" -#include "sysemu/qtest.h" +#include "system/qtest.h" #define VIRTIO_NET_VM_VERSION 11 @@ -157,7 +158,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); virtio_stl_p(vdev, &netcfg.supported_hash_types, - VIRTIO_NET_RSS_SUPPORTED_HASHES); + n->rss_data.supported_hash_types); memcpy(config, &netcfg, n->config_size); /* @@ -381,7 +382,7 @@ static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq) } } -static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) +static int virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) { VirtIONet *n = VIRTIO_NET(vdev); VirtIONetQueue *q; @@ -417,7 +418,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) timer_mod(q->tx_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); } else { - qemu_bh_schedule(q->tx_bh); + replay_bh_schedule_event(q->tx_bh); } } else { if (q->tx_timer) { @@ -436,6 +437,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) } } } + return 0; } static void virtio_net_set_link_status(NetClientState *nc) @@ -668,34 +670,36 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, static int virtio_net_max_tx_queue_size(VirtIONet *n) { NetClientState *peer = n->nic_conf.peers.ncs[0]; + struct vhost_net *net; - /* - * Backends other than vhost-user or vhost-vdpa don't support max queue - * size. - */ if (!peer) { - return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; + goto default_value; } - switch(peer->info->type) { - case NET_CLIENT_DRIVER_VHOST_USER: - case NET_CLIENT_DRIVER_VHOST_VDPA: - return VIRTQUEUE_MAX_SIZE; - default: - return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; - }; + net = get_vhost_net(peer); + + if (!net || !net->max_tx_queue_size) { + goto default_value; + } + + return net->max_tx_queue_size; + +default_value: + return VIRTIO_NET_TX_QUEUE_DEFAULT_SIZE; } static int peer_attach(VirtIONet *n, int index) { NetClientState *nc = qemu_get_subqueue(n->nic, index); + struct vhost_net *net; if (!nc->peer) { return 0; } - if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { - vhost_set_vring_enable(nc->peer, 1); + net = get_vhost_net(nc->peer); + if (net && net->is_vhost_user) { + vhost_net_set_vring_enable(nc->peer, 1); } if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { @@ -712,13 +716,15 @@ static int peer_attach(VirtIONet *n, int index) static int peer_detach(VirtIONet *n, int index) { NetClientState *nc = qemu_get_subqueue(n->nic, index); + struct vhost_net *net; if (!nc->peer) { return 0; } - if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) { - vhost_set_vring_enable(nc->peer, 0); + net = get_vhost_net(nc->peer); + if (net && net->is_vhost_user) { + vhost_net_set_vring_enable(nc->peer, 0); } if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) { @@ -750,79 +756,6 @@ static void virtio_net_set_queue_pairs(VirtIONet *n) static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue); -static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, - Error **errp) -{ - VirtIONet *n = VIRTIO_NET(vdev); - NetClientState *nc = qemu_get_queue(n->nic); - - /* Firstly sync all virtio-net possible supported features */ - features |= n->host_features; - - virtio_add_feature(&features, VIRTIO_NET_F_MAC); - - if (!peer_has_vnet_hdr(n)) { - virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); - - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); - - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); - - virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); - } - - if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); - } - - if (!peer_has_uso(n)) { - virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); - } - - if (!get_vhost_net(nc->peer)) { - return features; - } - - if (!ebpf_rss_is_loaded(&n->ebpf_rss)) { - virtio_clear_feature(&features, VIRTIO_NET_F_RSS); - } - features = vhost_net_get_features(get_vhost_net(nc->peer), features); - vdev->backend_features = features; - - if (n->mtu_bypass_backend && - (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { - features |= (1ULL << VIRTIO_NET_F_MTU); - } - - /* - * Since GUEST_ANNOUNCE is emulated the feature bit could be set without - * enabled. This happens in the vDPA case. - * - * Make sure the feature set is not incoherent, as the driver could refuse - * to start. - * - * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, - * helping guest to notify the new location with vDPA devices that does not - * support it. - */ - if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { - virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); - } - - return features; -} - static uint64_t virtio_net_bad_features(VirtIODevice *vdev) { uint64_t features = 0; @@ -1240,6 +1173,7 @@ static bool virtio_net_attach_ebpf_to_backend(NICState *nic, int prog_fd) return false; } + trace_virtio_net_rss_attach_ebpf(nic, prog_fd); return nc->info->set_steering_ebpf(nc, prog_fd); } @@ -1248,12 +1182,12 @@ static void rss_data_to_rss_config(struct VirtioNetRssData *data, { config->redirect = data->redirect; config->populate_hash = data->populate_hash; - config->hash_types = data->hash_types; + config->hash_types = data->runtime_hash_types; config->indirections_len = data->indirections_len; config->default_queue = data->default_queue; } -static bool virtio_net_attach_epbf_rss(VirtIONet *n) +static bool virtio_net_attach_ebpf_rss(VirtIONet *n) { struct EBPFRSSConfig config = {}; @@ -1264,7 +1198,8 @@ static bool virtio_net_attach_epbf_rss(VirtIONet *n) rss_data_to_rss_config(&n->rss_data, &config); if (!ebpf_rss_set_all(&n->ebpf_rss, &config, - n->rss_data.indirections_table, n->rss_data.key)) { + n->rss_data.indirections_table, n->rss_data.key, + NULL)) { return false; } @@ -1275,18 +1210,22 @@ static bool virtio_net_attach_epbf_rss(VirtIONet *n) return true; } -static void virtio_net_detach_epbf_rss(VirtIONet *n) +static void virtio_net_detach_ebpf_rss(VirtIONet *n) { virtio_net_attach_ebpf_to_backend(n->nic, -1); } static void virtio_net_commit_rss_config(VirtIONet *n) { + if (n->rss_data.peer_hash_available) { + return; + } + if (n->rss_data.enabled) { n->rss_data.enabled_software_rss = n->rss_data.populate_hash; if (n->rss_data.populate_hash) { - virtio_net_detach_epbf_rss(n); - } else if (!virtio_net_attach_epbf_rss(n)) { + virtio_net_detach_ebpf_rss(n); + } else if (!virtio_net_attach_ebpf_rss(n)) { if (get_vhost_net(qemu_get_queue(n->nic)->peer)) { warn_report("Can't load eBPF RSS for vhost"); } else { @@ -1295,12 +1234,13 @@ static void virtio_net_commit_rss_config(VirtIONet *n) } } - trace_virtio_net_rss_enable(n->rss_data.hash_types, + trace_virtio_net_rss_enable(n, + n->rss_data.runtime_hash_types, n->rss_data.indirections_len, sizeof(n->rss_data.key)); } else { - virtio_net_detach_epbf_rss(n); - trace_virtio_net_rss_disable(); + virtio_net_detach_ebpf_rss(n); + trace_virtio_net_rss_disable(n); } } @@ -1314,28 +1254,27 @@ static void virtio_net_disable_rss(VirtIONet *n) virtio_net_commit_rss_config(n); } -static bool virtio_net_load_ebpf_fds(VirtIONet *n) +static bool virtio_net_load_ebpf_fds(VirtIONet *n, Error **errp) { int fds[EBPF_RSS_MAX_FDS] = { [0 ... EBPF_RSS_MAX_FDS - 1] = -1}; int ret = true; int i = 0; if (n->nr_ebpf_rss_fds != EBPF_RSS_MAX_FDS) { - warn_report("Expected %d file descriptors but got %d", - EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); - return false; - } + error_setg(errp, "Expected %d file descriptors but got %d", + EBPF_RSS_MAX_FDS, n->nr_ebpf_rss_fds); + return false; + } for (i = 0; i < n->nr_ebpf_rss_fds; i++) { - fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], - &error_warn); + fds[i] = monitor_fd_param(monitor_cur(), n->ebpf_rss_fds[i], errp); if (fds[i] < 0) { ret = false; goto exit; } } - ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3]); + ret = ebpf_rss_load_fds(&n->ebpf_rss, fds[0], fds[1], fds[2], fds[3], errp); exit: if (!ret) { @@ -1347,17 +1286,27 @@ exit: return ret; } -static bool virtio_net_load_ebpf(VirtIONet *n) +static bool virtio_net_load_ebpf(VirtIONet *n, Error **errp) { - bool ret = false; + if (!virtio_net_attach_ebpf_to_backend(n->nic, -1)) { + return true; + } - if (virtio_net_attach_ebpf_to_backend(n->nic, -1)) { - if (!(n->ebpf_rss_fds && virtio_net_load_ebpf_fds(n))) { - ret = ebpf_rss_load(&n->ebpf_rss); - } + trace_virtio_net_rss_load(n, n->nr_ebpf_rss_fds, n->ebpf_rss_fds); + + /* + * If user explicitly gave QEMU RSS FDs to use, then + * failing to use them must be considered a fatal + * error. If no RSS FDs were provided, QEMU is trying + * eBPF on a "best effort" basis only, so report a + * warning and allow fallback to software RSS. + */ + if (n->ebpf_rss_fds) { + return virtio_net_load_ebpf_fds(n, errp); } - return ret; + ebpf_rss_load(&n->ebpf_rss, &error_warn); + return true; } static void virtio_net_unload_ebpf(VirtIONet *n) @@ -1397,20 +1346,20 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, err_value = (uint32_t)s; goto error; } - n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); + n->rss_data.runtime_hash_types = virtio_ldl_p(vdev, &cfg.hash_types); n->rss_data.indirections_len = virtio_lduw_p(vdev, &cfg.indirection_table_mask); - n->rss_data.indirections_len++; if (!do_rss) { - n->rss_data.indirections_len = 1; + n->rss_data.indirections_len = 0; } - if (!is_power_of_2(n->rss_data.indirections_len)) { - err_msg = "Invalid size of indirection table"; + if (n->rss_data.indirections_len >= VIRTIO_NET_RSS_MAX_TABLE_LEN) { + err_msg = "Too large indirection table"; err_value = n->rss_data.indirections_len; goto error; } - if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { - err_msg = "Too large indirection table"; + n->rss_data.indirections_len++; + if (!is_power_of_2(n->rss_data.indirections_len)) { + err_msg = "Invalid size of indirection table"; err_value = n->rss_data.indirections_len; goto error; } @@ -1460,12 +1409,12 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, err_value = temp.b; goto error; } - if (!temp.b && n->rss_data.hash_types) { + if (!temp.b && n->rss_data.runtime_hash_types) { err_msg = "No key provided"; err_value = 0; goto error; } - if (!temp.b && !n->rss_data.hash_types) { + if (!temp.b && !n->rss_data.runtime_hash_types) { virtio_net_disable_rss(n); return queue_pairs; } @@ -1481,7 +1430,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, virtio_net_commit_rss_config(n); return queue_pairs; error: - trace_virtio_net_rss_error(err_msg, err_value); + trace_virtio_net_rss_error(n, err_msg, err_value); virtio_net_disable_rss(n); return 0; } @@ -1641,24 +1590,28 @@ static bool virtio_net_can_receive(NetClientState *nc) static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) { + int opaque; + unsigned int in_bytes; VirtIONet *n = q->n; - if (virtio_queue_empty(q->rx_vq) || - (n->mergeable_rx_bufs && - !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { - virtio_queue_set_notification(q->rx_vq, 1); - - /* To avoid a race condition where the guest has made some buffers - * available after the above check but before notification was - * enabled, check for available buffers again. - */ - if (virtio_queue_empty(q->rx_vq) || - (n->mergeable_rx_bufs && - !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) { + + while (virtio_queue_empty(q->rx_vq) || n->mergeable_rx_bufs) { + opaque = virtqueue_get_avail_bytes(q->rx_vq, &in_bytes, NULL, + bufsize, 0); + /* Buffer is enough, disable notifiaction */ + if (bufsize <= in_bytes) { + break; + } + + if (virtio_queue_enable_notification_and_check(q->rx_vq, opaque)) { + /* Guest has added some buffers, try again */ + continue; + } else { return 0; } } virtio_queue_set_notification(q->rx_vq, 0); + return 1; } @@ -1687,8 +1640,11 @@ static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr) static void work_around_broken_dhclient(struct virtio_net_hdr *hdr, uint8_t *buf, size_t size) { + size_t csum_size = ETH_HLEN + sizeof(struct ip_header) + + sizeof(struct udp_header); + if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */ - (size > 27 && size < 1500) && /* normal sized MTU */ + (size >= csum_size && size < 1500) && /* normal sized MTU */ (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */ (buf[23] == 17) && /* ip.protocol == UDP */ (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */ @@ -1860,7 +1816,7 @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, - n->rss_data.hash_types); + n->rss_data.runtime_hash_types); if (net_hash_type > NetPktRssIpV6UdpEx) { if (n->rss_data.populate_hash) { hdr->hash_value = VIRTIO_NET_HASH_REPORT_NONE; @@ -1885,31 +1841,34 @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, } static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, - size_t size, bool no_rss) + size_t size) { VirtIONet *n = qemu_get_nic_opaque(nc); - VirtIONetQueue *q = virtio_net_get_subqueue(nc); + VirtIONetQueue *q; VirtIODevice *vdev = VIRTIO_DEVICE(n); - VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; - size_t lens[VIRTQUEUE_MAX_SIZE]; - struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; + QEMU_UNINITIALIZED VirtQueueElement *elems[VIRTQUEUE_MAX_SIZE]; + QEMU_UNINITIALIZED size_t lens[VIRTQUEUE_MAX_SIZE]; + QEMU_UNINITIALIZED struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE]; struct virtio_net_hdr_v1_hash extra_hdr; unsigned mhdr_cnt = 0; size_t offset, i, guest_offset, j; ssize_t err; - if (!virtio_net_can_receive(nc)) { - return -1; - } + memset(&extra_hdr, 0, sizeof(extra_hdr)); - if (!no_rss && n->rss_data.enabled && n->rss_data.enabled_software_rss) { + if (n->rss_data.enabled && n->rss_data.enabled_software_rss) { int index = virtio_net_process_rss(nc, buf, size, &extra_hdr); if (index >= 0) { - NetClientState *nc2 = qemu_get_subqueue(n->nic, index); - return virtio_net_receive_rcu(nc2, buf, size, true); + nc = qemu_get_subqueue(n->nic, index % n->curr_queue_pairs); } } + if (!virtio_net_can_receive(nc)) { + return -1; + } + + q = virtio_net_get_subqueue(nc); + /* hdr_len refers to the header we supply to the guest */ if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { return 0; @@ -1965,6 +1924,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, sg, elem->in_num, offsetof(typeof(extra_hdr), hdr.num_buffers), sizeof(extra_hdr.hdr.num_buffers)); + } else { + extra_hdr.hdr.num_buffers = cpu_to_le16(1); } receive_header(n, sg, elem->in_num, buf, size); @@ -2035,7 +1996,22 @@ static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, { RCU_READ_LOCK_GUARD(); - return virtio_net_receive_rcu(nc, buf, size, false); + return virtio_net_receive_rcu(nc, buf, size); +} + +/* + * Accessors to read and write the IP packet data length field. This + * is a potentially unaligned network-byte-order 16 bit unsigned integer + * pointed to by unit->ip_len. + */ +static uint16_t read_unit_ip_len(VirtioNetRscUnit *unit) +{ + return lduw_be_p(unit->ip_plen); +} + +static void write_unit_ip_len(VirtioNetRscUnit *unit, uint16_t l) +{ + stw_be_p(unit->ip_plen, l); } static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, @@ -2052,7 +2028,7 @@ static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, unit->ip_plen = &ip->ip_len; unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; - unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; + unit->payload = read_unit_ip_len(unit) - ip_hdrlen - unit->tcp_hdrlen; } static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, @@ -2071,7 +2047,7 @@ static void virtio_net_rsc_extract_unit6(VirtioNetRscChain *chain, /* There is a difference between payload length in ipv4 and v6, ip header is excluded in ipv6 */ - unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; + unit->payload = read_unit_ip_len(unit) - unit->tcp_hdrlen; } static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, @@ -2118,7 +2094,7 @@ static void virtio_net_rsc_purge(void *opq) chain->stat.timer++; if (!QTAILQ_EMPTY(&chain->buffers)) { timer_mod(chain->drain_timer, - qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); } } @@ -2220,7 +2196,7 @@ static int32_t virtio_net_rsc_coalesce_data(VirtioNetRscChain *chain, VirtioNetRscUnit *o_unit; o_unit = &seg->unit; - o_ip_len = htons(*o_unit->ip_plen); + o_ip_len = read_unit_ip_len(o_unit); nseq = htonl(n_unit->tcp->th_seq); oseq = htonl(o_unit->tcp->th_seq); @@ -2256,7 +2232,7 @@ coalesce: o_unit->payload += n_unit->payload; /* update new data len */ /* update field in ip header */ - *o_unit->ip_plen = htons(o_ip_len + n_unit->payload); + write_unit_ip_len(o_unit, o_ip_len + n_unit->payload); /* Bring 'PUSH' big, the whql test guide says 'PUSH' can be coalesced for windows guest, while this may change the behavior for linux @@ -2354,7 +2330,7 @@ static size_t virtio_net_rsc_do_coalesce(VirtioNetRscChain *chain, chain->stat.empty_cache++; virtio_net_rsc_cache_buf(chain, nc, buf, size); timer_mod(chain->drain_timer, - qemu_clock_get_ns(QEMU_CLOCK_HOST) + chain->n->rsc_timeout); + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + chain->n->rsc_timeout); return size; } @@ -2592,7 +2568,7 @@ static VirtioNetRscChain *virtio_net_rsc_lookup_chain(VirtIONet *n, chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD; chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; } - chain->drain_timer = timer_new_ns(QEMU_CLOCK_HOST, + chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, virtio_net_rsc_purge, chain); memset(&chain->stat, 0, sizeof(chain->stat)); @@ -2667,7 +2643,7 @@ static void virtio_net_tx_complete(NetClientState *nc, ssize_t len) */ virtio_queue_set_notification(q->tx_vq, 0); if (q->tx_bh) { - qemu_bh_schedule(q->tx_bh); + replay_bh_schedule_event(q->tx_bh); } else { timer_mod(q->tx_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout); @@ -2833,7 +2809,7 @@ static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq) return; } virtio_queue_set_notification(vq, 0); - qemu_bh_schedule(q->tx_bh); + replay_bh_schedule_event(q->tx_bh); } static void virtio_net_tx_timer(void *opaque) @@ -2916,7 +2892,7 @@ static void virtio_net_tx_bh(void *opaque) /* If we flush a full burst of packets, assume there are * more coming and immediately reschedule */ if (ret >= n->tx_burst) { - qemu_bh_schedule(q->tx_bh); + replay_bh_schedule_event(q->tx_bh); q->tx_waiting = 1; return; } @@ -2930,7 +2906,7 @@ static void virtio_net_tx_bh(void *opaque) return; } else if (ret > 0) { virtio_queue_set_notification(q->tx_vq, 0); - qemu_bh_schedule(q->tx_bh); + replay_bh_schedule_event(q->tx_bh); q->tx_waiting = 1; } } @@ -2981,11 +2957,10 @@ static void virtio_net_del_queue(VirtIONet *n, int index) virtio_del_queue(vdev, index * 2 + 1); } -static void virtio_net_change_num_queue_pairs(VirtIONet *n, int new_max_queue_pairs) +static void virtio_net_change_num_queues(VirtIONet *n, int new_num_queues) { VirtIODevice *vdev = VIRTIO_DEVICE(n); int old_num_queues = virtio_get_num_queues(vdev); - int new_num_queues = new_max_queue_pairs * 2 + 1; int i; assert(old_num_queues >= 3); @@ -3021,11 +2996,115 @@ static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue) int max = multiqueue ? n->max_queue_pairs : 1; n->multiqueue = multiqueue; - virtio_net_change_num_queue_pairs(n, max); + virtio_net_change_num_queues(n, max * 2 + 1); virtio_net_set_queue_pairs(n); } +static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n) +{ + virtio_net_change_num_queues(VIRTIO_NET(vdev), n); + + return 0; +} + +static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_queue(n->nic); + uint32_t supported_hash_types = n->rss_data.supported_hash_types; + uint32_t peer_hash_types = n->rss_data.peer_hash_types; + bool use_own_hash = + (supported_hash_types & VIRTIO_NET_RSS_SUPPORTED_HASHES) == + supported_hash_types; + bool use_peer_hash = + n->rss_data.peer_hash_available && + (supported_hash_types & peer_hash_types) == supported_hash_types; + + /* Firstly sync all virtio-net possible supported features */ + features |= n->host_features; + + virtio_add_feature(&features, VIRTIO_NET_F_MAC); + + if (!peer_has_vnet_hdr(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); + + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); + + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); + + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); + } + + if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); + } + + if (!peer_has_uso(n)) { + virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); + } + + if (!get_vhost_net(nc->peer)) { + if (!use_own_hash) { + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); + virtio_clear_feature(&features, VIRTIO_NET_F_RSS); + } else if (virtio_has_feature(features, VIRTIO_NET_F_RSS)) { + virtio_net_load_ebpf(n, errp); + } + + return features; + } + + if (!use_peer_hash) { + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); + + if (!use_own_hash || !virtio_net_attach_ebpf_to_backend(n->nic, -1)) { + if (!virtio_net_load_ebpf(n, errp)) { + return features; + } + + virtio_clear_feature(&features, VIRTIO_NET_F_RSS); + } + } + + features = vhost_net_get_features(get_vhost_net(nc->peer), features); + vdev->backend_features = features; + + if (n->mtu_bypass_backend && + (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { + features |= (1ULL << VIRTIO_NET_F_MTU); + } + + /* + * Since GUEST_ANNOUNCE is emulated the feature bit could be set without + * enabled. This happens in the vDPA case. + * + * Make sure the feature set is not incoherent, as the driver could refuse + * to start. + * + * TODO: QEMU is able to emulate a CVQ just for guest_announce purposes, + * helping guest to notify the new location with vDPA devices that does not + * support it. + */ + if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { + virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); + } + + return features; +} + static int virtio_net_post_load_device(void *opaque, int version_id) { VirtIONet *n = opaque; @@ -3264,6 +3343,17 @@ static const VMStateDescription vmstate_virtio_net_has_vnet = { }, }; +static int virtio_net_rss_post_load(void *opaque, int version_id) +{ + VirtIONet *n = VIRTIO_NET(opaque); + + if (version_id == 1) { + n->rss_data.supported_hash_types = VIRTIO_NET_RSS_SUPPORTED_HASHES; + } + + return 0; +} + static bool virtio_net_rss_needed(void *opaque) { return VIRTIO_NET(opaque)->rss_data.enabled; @@ -3271,14 +3361,16 @@ static bool virtio_net_rss_needed(void *opaque) static const VMStateDescription vmstate_virtio_net_rss = { .name = "virtio-net-device/rss", - .version_id = 1, + .version_id = 2, .minimum_version_id = 1, + .post_load = virtio_net_rss_post_load, .needed = virtio_net_rss_needed, .fields = (const VMStateField[]) { VMSTATE_BOOL(rss_data.enabled, VirtIONet), VMSTATE_BOOL(rss_data.redirect, VirtIONet), VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), - VMSTATE_UINT32(rss_data.hash_types, VirtIONet), + VMSTATE_UINT32(rss_data.runtime_hash_types, VirtIONet), + VMSTATE_UINT32_V(rss_data.supported_hash_types, VirtIONet, 2), VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), VMSTATE_UINT16(rss_data.default_queue, VirtIONet), VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, @@ -3290,6 +3382,117 @@ static const VMStateDescription vmstate_virtio_net_rss = { }, }; +static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc; + struct vhost_net *net; + + if (!n->nic) { + return NULL; + } + + nc = qemu_get_queue(n->nic); + if (!nc) { + return NULL; + } + + net = get_vhost_net(nc->peer); + if (!net) { + return NULL; + } + + return &net->dev; +} + +static int vhost_user_net_save_state(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, + JSONWriter *vmdesc) +{ + VirtIONet *n = pv; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + Error *local_error = NULL; + int ret; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + error_reportf_err(local_error, + "Error getting vhost back-end of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return -1; + } + + ret = vhost_save_backend_state(vhdev, f, &local_error); + if (ret < 0) { + error_reportf_err(local_error, + "Error saving back-end state of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return ret; + } + + return 0; +} + +static int vhost_user_net_load_state(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + VirtIONet *n = pv; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + Error *local_error = NULL; + int ret; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + error_reportf_err(local_error, + "Error getting vhost back-end of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return -1; + } + + ret = vhost_load_backend_state(vhdev, f, &local_error); + if (ret < 0) { + error_reportf_err(local_error, + "Error loading back-end state of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return ret; + } + + return 0; +} + +static bool vhost_user_net_is_internal_migration(void *opaque) +{ + VirtIONet *n = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + return false; + } + + return vhost_supports_device_state(vhdev); +} + +static const VMStateDescription vhost_user_net_backend_state = { + .name = "virtio-net-device/backend", + .version_id = 0, + .needed = vhost_user_net_is_internal_migration, + .fields = (const VMStateField[]) { + { + .name = "backend", + .info = &(const VMStateInfo) { + .name = "virtio-net vhost-user backend state", + .get = vhost_user_net_load_state, + .put = vhost_user_net_save_state, + }, + }, + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio_net_device = { .name = "virtio-net-device", .version_id = VIRTIO_NET_VM_VERSION, @@ -3342,6 +3545,7 @@ static const VMStateDescription vmstate_virtio_net_device = { }, .subsections = (const VMStateDescription * const []) { &vmstate_virtio_net_rss, + &vhost_user_net_backend_state, NULL } }; @@ -3753,8 +3957,17 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) net_rx_pkt_init(&n->rx_pkt); - if (virtio_has_feature(n->host_features, VIRTIO_NET_F_RSS)) { - virtio_net_load_ebpf(n); + if (qemu_get_vnet_hash_supported_types(qemu_get_queue(n->nic)->peer, + &n->rss_data.peer_hash_types)) { + n->rss_data.peer_hash_available = true; + n->rss_data.supported_hash_types = + n->rss_data.specified_hash_types.on_bits | + (n->rss_data.specified_hash_types.auto_bits & + n->rss_data.peer_hash_types); + } else { + n->rss_data.supported_hash_types = + n->rss_data.specified_hash_types.on_bits | + n->rss_data.specified_hash_types.auto_bits; } } @@ -3887,14 +4100,6 @@ static bool dev_unplug_pending(void *opaque) return vdc->primary_unplug_pending(dev); } -static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) -{ - VirtIONet *n = VIRTIO_NET(vdev); - NetClientState *nc = qemu_get_queue(n->nic); - struct vhost_net *net = get_vhost_net(nc->peer); - return &net->dev; -} - static const VMStateDescription vmstate_virtio_net = { .name = "virtio-net", .minimum_version_id = VIRTIO_NET_VM_VERSION, @@ -3907,7 +4112,7 @@ static const VMStateDescription vmstate_virtio_net = { .dev_unplug_pending = dev_unplug_pending, }; -static Property virtio_net_properties[] = { +static const Property virtio_net_properties[] = { DEFINE_PROP_BIT64("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true), DEFINE_PROP_BIT64("guest_csum", VirtIONet, host_features, @@ -3979,10 +4184,45 @@ static Property virtio_net_properties[] = { VIRTIO_NET_F_GUEST_USO6, true), DEFINE_PROP_BIT64("host_uso", VirtIONet, host_features, VIRTIO_NET_F_HOST_USO, true), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv4", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_IPv4 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp4", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_TCPv4 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp4", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_UDPv4 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_IPv6 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_TCPv6 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_UDPv6 - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-ipv6ex", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_IPv6_EX - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-tcp6ex", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_TCPv6_EX - 1, + ON_OFF_AUTO_AUTO), + DEFINE_PROP_ON_OFF_AUTO_BIT64("hash-udp6ex", VirtIONet, + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_UDPv6_EX - 1, + ON_OFF_AUTO_AUTO), }; -static void virtio_net_class_init(ObjectClass *klass, void *data) +static void virtio_net_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); @@ -4004,6 +4244,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) vdc->guest_notifier_mask = virtio_net_guest_notifier_mask; vdc->guest_notifier_pending = virtio_net_guest_notifier_pending; vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO); + vdc->pre_load_queues = virtio_net_pre_load_queues; vdc->post_load = virtio_net_post_load_virtio; vdc->vmsd = &vmstate_virtio_net_device; vdc->primary_unplug_pending = primary_unplug_pending; |