diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/af-xdp.c | 99 | ||||
-rw-r--r-- | net/clients.h | 4 | ||||
-rw-r--r-- | net/dgram.c | 28 | ||||
-rw-r--r-- | net/hub.c | 3 | ||||
-rw-r--r-- | net/l2tpv3.c | 5 | ||||
-rw-r--r-- | net/meson.build | 6 | ||||
-rw-r--r-- | net/net.c | 62 | ||||
-rw-r--r-- | net/netmap.c | 3 | ||||
-rw-r--r-- | net/passt.c | 745 | ||||
-rw-r--r-- | net/slirp.c | 123 | ||||
-rw-r--r-- | net/socket.c | 29 | ||||
-rw-r--r-- | net/stream.c | 289 | ||||
-rw-r--r-- | net/stream_data.c | 191 | ||||
-rw-r--r-- | net/stream_data.h | 31 | ||||
-rw-r--r-- | net/tap-bsd.c | 20 | ||||
-rw-r--r-- | net/tap-linux.c | 45 | ||||
-rw-r--r-- | net/tap-linux.h | 9 | ||||
-rw-r--r-- | net/tap-solaris.c | 16 | ||||
-rw-r--r-- | net/tap-stub.c | 8 | ||||
-rw-r--r-- | net/tap-win32.c | 5 | ||||
-rw-r--r-- | net/tap.c | 92 | ||||
-rw-r--r-- | net/tap_int.h | 5 | ||||
-rw-r--r-- | net/vhost-user-stub.c | 1 | ||||
-rw-r--r-- | net/vhost-user.c | 69 | ||||
-rw-r--r-- | net/vhost-vdpa.c | 84 |
25 files changed, 1546 insertions, 426 deletions
diff --git a/net/af-xdp.c b/net/af-xdp.c index 01c5fb9..14f302e 100644 --- a/net/af-xdp.c +++ b/net/af-xdp.c @@ -49,9 +49,12 @@ typedef struct AFXDPState { char *buffer; struct xsk_umem *umem; - uint32_t n_queues; uint32_t xdp_flags; bool inhibit; + + char *map_path; + int map_fd; + uint32_t map_start_index; } AFXDPState; #define AF_XDP_BATCH_SIZE 64 @@ -261,6 +264,7 @@ static void af_xdp_send(void *opaque) static void af_xdp_cleanup(NetClientState *nc) { AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc); + int idx; qemu_purge_queued_packets(nc); @@ -275,13 +279,17 @@ static void af_xdp_cleanup(NetClientState *nc) qemu_vfree(s->buffer); s->buffer = NULL; - /* Remove the program if it's the last open queue. */ - if (!s->inhibit && nc->queue_index == s->n_queues - 1 && s->xdp_flags - && bpf_xdp_detach(s->ifindex, s->xdp_flags, NULL) != 0) { - fprintf(stderr, - "af-xdp: unable to remove XDP program from '%s', ifindex: %d\n", - s->ifname, s->ifindex); + if (s->map_fd >= 0) { + idx = nc->queue_index + s->map_start_index; + if (bpf_map_delete_elem(s->map_fd, &idx)) { + fprintf(stderr, "af-xdp: unable to remove AF_XDP socket from map" + " %s\n", s->map_path); + } + close(s->map_fd); + s->map_fd = -1; } + g_free(s->map_path); + s->map_path = NULL; } static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp) @@ -323,7 +331,7 @@ static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp) s->pool = g_new(uint64_t, n_descs); /* Fill the pool in the opposite order, because it's a LIFO queue. */ - for (i = n_descs; i >= 0; i--) { + for (i = n_descs - 1; i >= 0; i--) { s->pool[i] = i * XSK_UMEM__DEFAULT_FRAME_SIZE; } s->n_pool = n_descs; @@ -345,7 +353,6 @@ static int af_xdp_socket_create(AFXDPState *s, }; int queue_id, error = 0; - s->inhibit = opts->has_inhibit && opts->inhibit; if (s->inhibit) { cfg.libxdp_flags |= XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD; } @@ -396,6 +403,35 @@ static int af_xdp_socket_create(AFXDPState *s, return 0; } +static int af_xdp_update_xsk_map(AFXDPState *s, Error **errp) +{ + int xsk_fd, idx, error = 0; + + if (!s->map_path) { + return 0; + } + + s->map_fd = bpf_obj_get(s->map_path); + if (s->map_fd < 0) { + error = errno; + } else { + xsk_fd = xsk_socket__fd(s->xsk); + idx = s->nc.queue_index + s->map_start_index; + if (bpf_map_update_elem(s->map_fd, &idx, &xsk_fd, 0)) { + error = errno; + } + } + + if (error) { + error_setg_errno(errp, error, + "failed to insert AF_XDP socket into map %s", + s->map_path); + return -1; + } + + return 0; +} + /* NetClientInfo methods. */ static NetClientInfo net_af_xdp_info = { .type = NET_CLIENT_DRIVER_AF_XDP, @@ -444,12 +480,14 @@ int net_init_af_xdp(const Netdev *netdev, { const NetdevAFXDPOptions *opts = &netdev->u.af_xdp; NetClientState *nc, *nc0 = NULL; + int32_t map_start_index; unsigned int ifindex; uint32_t prog_id = 0; g_autofree int *sock_fds = NULL; int64_t i, queues; Error *err = NULL; AFXDPState *s; + bool inhibit; ifindex = if_nametoindex(opts->ifname); if (!ifindex) { @@ -465,8 +503,28 @@ int net_init_af_xdp(const Netdev *netdev, return -1; } - if ((opts->has_inhibit && opts->inhibit) != !!opts->sock_fds) { - error_setg(errp, "'inhibit=on' requires 'sock-fds' and vice versa"); + inhibit = opts->has_inhibit && opts->inhibit; + if (inhibit && !opts->sock_fds && !opts->map_path) { + error_setg(errp, "'inhibit=on' requires 'sock-fds' or 'map-path'"); + return -1; + } + if (!inhibit && (opts->sock_fds || opts->map_path)) { + error_setg(errp, "'sock-fds' and 'map-path' require 'inhibit=on'"); + return -1; + } + if (opts->sock_fds && opts->map_path) { + error_setg(errp, "'sock-fds' and 'map-path' are mutually exclusive"); + return -1; + } + if (!opts->map_path && opts->has_map_start_index) { + error_setg(errp, "'map-start-index' requires 'map-path'"); + return -1; + } + + map_start_index = opts->has_map_start_index ? opts->map_start_index : 0; + if (map_start_index < 0) { + error_setg(errp, "'map-start-index' cannot be negative (%d)", + map_start_index); return -1; } @@ -490,21 +548,23 @@ int net_init_af_xdp(const Netdev *netdev, pstrcpy(s->ifname, sizeof(s->ifname), opts->ifname); s->ifindex = ifindex; - s->n_queues = queues; + s->inhibit = inhibit; + + s->map_path = g_strdup(opts->map_path); + s->map_start_index = map_start_index; + s->map_fd = -1; - if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, errp) - || af_xdp_socket_create(s, opts, errp)) { - /* Make sure the XDP program will be removed. */ - s->n_queues = i; - error_propagate(errp, err); + if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, &err) || + af_xdp_socket_create(s, opts, &err) || + af_xdp_update_xsk_map(s, &err)) { goto err; } } - if (nc0) { + if (nc0 && !inhibit) { s = DO_UPCAST(AFXDPState, nc, nc0); if (bpf_xdp_query_id(s->ifindex, s->xdp_flags, &prog_id) || !prog_id) { - error_setg_errno(errp, errno, + error_setg_errno(&err, errno, "no XDP program loaded on '%s', ifindex: %d", s->ifname, s->ifindex); goto err; @@ -518,6 +578,7 @@ int net_init_af_xdp(const Netdev *netdev, err: if (nc0) { qemu_del_net_client(nc0); + error_propagate(errp, err); } return -1; diff --git a/net/clients.h b/net/clients.h index be53794..e786ab4 100644 --- a/net/clients.h +++ b/net/clients.h @@ -29,6 +29,10 @@ int net_init_dump(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); +#ifdef CONFIG_PASST +int net_init_passt(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp); +#endif #ifdef CONFIG_SLIRP int net_init_slirp(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); diff --git a/net/dgram.c b/net/dgram.c index 48f653b..baa126d 100644 --- a/net/dgram.c +++ b/net/dgram.c @@ -226,7 +226,10 @@ static int net_dgram_mcast_create(struct sockaddr_in *mcastaddr, } } - qemu_socket_set_nonblock(fd); + if (!qemu_set_blocking(fd, false, errp)) { + goto fail; + } + return fd; fail: if (fd >= 0) { @@ -284,7 +287,7 @@ static int net_dgram_mcast_init(NetClientState *peer, Error **errp) { NetDgramState *s; - int fd, ret; + int fd; struct sockaddr_in *saddr; if (remote->type != SOCKET_ADDRESS_TYPE_INET) { @@ -332,11 +335,8 @@ static int net_dgram_mcast_init(NetClientState *peer, g_free(saddr); return -1; } - ret = qemu_socket_try_set_nonblock(fd); - if (ret < 0) { + if (!qemu_set_blocking(fd, false, errp)) { g_free(saddr); - error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", - name, fd); return -1; } @@ -504,7 +504,11 @@ int net_init_dgram(const Netdev *netdev, const char *name, close(fd); return -1; } - qemu_socket_set_nonblock(fd); + + if (!qemu_set_blocking(fd, false, errp)) { + close(fd); + return -1; + } dest_len = sizeof(raddr_in); dest_addr = g_malloc(dest_len); @@ -551,7 +555,10 @@ int net_init_dgram(const Netdev *netdev, const char *name, close(fd); return -1; } - qemu_socket_set_nonblock(fd); + if (!qemu_set_blocking(fd, false, errp)) { + close(fd); + return -1; + } dest_len = sizeof(raddr_un); dest_addr = g_malloc(dest_len); @@ -562,10 +569,7 @@ int net_init_dgram(const Netdev *netdev, const char *name, if (fd == -1) { return -1; } - ret = qemu_socket_try_set_nonblock(fd); - if (ret < 0) { - error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", - name, fd); + if (!qemu_set_blocking(fd, false, errp)) { return -1; } dest_addr = NULL; @@ -285,6 +285,9 @@ void net_hub_check_clients(void) case NET_CLIENT_DRIVER_NIC: has_nic = 1; break; +#ifdef CONFIG_PASST + case NET_CLIENT_DRIVER_PASST: +#endif case NET_CLIENT_DRIVER_USER: case NET_CLIENT_DRIVER_TAP: case NET_CLIENT_DRIVER_SOCKET: diff --git a/net/l2tpv3.c b/net/l2tpv3.c index b5547cb..cdfc641 100644 --- a/net/l2tpv3.c +++ b/net/l2tpv3.c @@ -648,6 +648,9 @@ int net_init_l2tpv3(const Netdev *netdev, error_setg(errp, "could not bind socket err=%i", errno); goto outerr; } + if (!qemu_set_blocking(fd, false, errp)) { + goto outerr; + } freeaddrinfo(result); @@ -709,8 +712,6 @@ int net_init_l2tpv3(const Netdev *netdev, s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT); s->header_buf = g_malloc(s->header_size); - qemu_socket_set_nonblock(fd); - s->fd = fd; s->counter = 0; diff --git a/net/meson.build b/net/meson.build index bb97b4d..da6ea63 100644 --- a/net/meson.build +++ b/net/meson.build @@ -1,6 +1,7 @@ system_ss.add(files( 'announce.c', 'checksum.c', + 'dgram.c', 'dump.c', 'eth.c', 'filter-buffer.c', @@ -12,7 +13,7 @@ system_ss.add(files( 'queue.c', 'socket.c', 'stream.c', - 'dgram.c', + 'stream_data.c', 'util.c', )) @@ -33,6 +34,9 @@ system_ss.add(when: 'CONFIG_TCG', if_true: files('filter-replay.c')) if have_l2tpv3 system_ss.add(files('l2tpv3.c')) endif +if enable_passt + system_ss.add(files('passt.c')) +endif system_ss.add(when: slirp, if_true: files('slirp.c')) system_ss.add(when: vde, if_true: files('vde.c')) if have_netmap @@ -522,6 +522,15 @@ bool qemu_has_uso(NetClientState *nc) return nc->info->has_uso(nc); } +bool qemu_has_tunnel(NetClientState *nc) +{ + if (!nc || !nc->info->has_tunnel) { + return false; + } + + return nc->info->has_tunnel(nc); +} + bool qemu_has_vnet_hdr(NetClientState *nc) { if (!nc || !nc->info->has_vnet_hdr) { @@ -540,14 +549,13 @@ bool qemu_has_vnet_hdr_len(NetClientState *nc, int len) return nc->info->has_vnet_hdr_len(nc, len); } -void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6, - int ecn, int ufo, int uso4, int uso6) +void qemu_set_offload(NetClientState *nc, const NetOffloads *ol) { if (!nc || !nc->info->set_offload) { return; } - nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo, uso4, uso6); + nc->info->set_offload(nc, ol); } int qemu_get_vnet_hdr_len(NetClientState *nc) @@ -567,12 +575,22 @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len) assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || len == sizeof(struct virtio_net_hdr) || - len == sizeof(struct virtio_net_hdr_v1_hash)); + len == sizeof(struct virtio_net_hdr_v1_hash) || + len == sizeof(struct virtio_net_hdr_v1_hash_tunnel)); nc->vnet_hdr_len = len; nc->info->set_vnet_hdr_len(nc, len); } +bool qemu_get_vnet_hash_supported_types(NetClientState *nc, uint32_t *types) +{ + if (!nc || !nc->info->get_vnet_hash_supported_types) { + return false; + } + + return nc->info->get_vnet_hash_supported_types(nc, types); +} + int qemu_set_vnet_le(NetClientState *nc, bool is_le) { #if HOST_BIG_ENDIAN @@ -1248,6 +1266,9 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])( const char *name, NetClientState *peer, Error **errp) = { [NET_CLIENT_DRIVER_NIC] = net_init_nic, +#ifdef CONFIG_PASST + [NET_CLIENT_DRIVER_PASST] = net_init_passt, +#endif #ifdef CONFIG_SLIRP [NET_CLIENT_DRIVER_USER] = net_init_slirp, #endif @@ -1353,6 +1374,7 @@ void show_netdevs(void) "dgram", "hubport", "tap", + "passt", #ifdef CONFIG_SLIRP "user", #endif @@ -1601,21 +1623,11 @@ void colo_notify_filters_event(int event, Error **errp) } } -void qmp_set_link(const char *name, bool up, Error **errp) +void net_client_set_link(NetClientState **ncs, int queues, bool up) { - NetClientState *ncs[MAX_QUEUE_NUM]; NetClientState *nc; - int queues, i; - - queues = qemu_find_net_clients_except(name, ncs, - NET_CLIENT_DRIVER__MAX, - MAX_QUEUE_NUM); + int i; - if (queues == 0) { - error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, - "Device '%s' not found", name); - return; - } nc = ncs[0]; for (i = 0; i < queues; i++) { @@ -1646,6 +1658,24 @@ void qmp_set_link(const char *name, bool up, Error **errp) } } +void qmp_set_link(const char *name, bool up, Error **errp) +{ + NetClientState *ncs[MAX_QUEUE_NUM]; + int queues; + + queues = qemu_find_net_clients_except(name, ncs, + NET_CLIENT_DRIVER__MAX, + MAX_QUEUE_NUM); + + if (queues == 0) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", name); + return; + } + + net_client_set_link(ncs, queues, up); +} + static void net_vm_change_state_handler(void *opaque, bool running, RunState state) { diff --git a/net/netmap.c b/net/netmap.c index 297510e..6cd8f2b 100644 --- a/net/netmap.c +++ b/net/netmap.c @@ -366,8 +366,7 @@ static void netmap_set_vnet_hdr_len(NetClientState *nc, int len) } } -static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, - int ecn, int ufo, int uso4, int uso6) +static void netmap_set_offload(NetClientState *nc, const NetOffloads *ol) { NetmapState *s = DO_UPCAST(NetmapState, nc, nc); diff --git a/net/passt.c b/net/passt.c new file mode 100644 index 0000000..32ecffb --- /dev/null +++ b/net/passt.c @@ -0,0 +1,745 @@ +/* + * passt network backend + * + * Copyright Red Hat + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include <glib/gstdio.h> +#include "qemu/error-report.h" +#include <gio/gio.h> +#include "net/net.h" +#include "clients.h" +#include "qapi/error.h" +#include "io/net-listener.h" +#include "chardev/char-fe.h" +#include "net/vhost_net.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "standard-headers/linux/virtio_net.h" +#include "stream_data.h" + +#ifdef CONFIG_VHOST_USER +static const int user_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_F_NOTIFICATION_DATA, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + + VIRTIO_F_ANY_LAYOUT, + VIRTIO_F_VERSION_1, + VIRTIO_NET_F_CSUM, + VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_GSO, + VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_HOST_TSO4, + VIRTIO_NET_F_HOST_TSO6, + VIRTIO_NET_F_HOST_ECN, + VIRTIO_NET_F_HOST_UFO, + VIRTIO_NET_F_MRG_RXBUF, + VIRTIO_NET_F_MTU, + VIRTIO_F_IOMMU_PLATFORM, + VIRTIO_F_RING_PACKED, + VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, + VIRTIO_NET_F_RSS, + VIRTIO_NET_F_RSC_EXT, + VIRTIO_NET_F_HASH_REPORT, + VIRTIO_NET_F_GUEST_USO4, + VIRTIO_NET_F_GUEST_USO6, + VIRTIO_NET_F_HOST_USO, + + /* This bit implies RARP isn't sent by QEMU out of band */ + VIRTIO_NET_F_GUEST_ANNOUNCE, + + VIRTIO_NET_F_MQ, + + VHOST_INVALID_FEATURE_BIT +}; +#endif + +typedef struct NetPasstState { + NetStreamData data; + GPtrArray *args; + gchar *pidfile; + pid_t pid; +#ifdef CONFIG_VHOST_USER + /* vhost user */ + VhostUserState *vhost_user; + VHostNetState *vhost_net; + CharBackend vhost_chr; + guint vhost_watch; + uint64_t acked_features; + bool started; +#endif +} NetPasstState; + +static int net_passt_stream_start(NetPasstState *s, Error **errp); + +static void net_passt_cleanup(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + +#ifdef CONFIG_VHOST_USER + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); + s->vhost_net = NULL; + } + if (s->vhost_watch) { + g_source_remove(s->vhost_watch); + s->vhost_watch = 0; + } + qemu_chr_fe_deinit(&s->vhost_chr, true); + if (s->vhost_user) { + vhost_user_cleanup(s->vhost_user); + g_free(s->vhost_user); + s->vhost_user = NULL; + } +#endif + + kill(s->pid, SIGTERM); + if (g_remove(s->pidfile) != 0) { + warn_report("Failed to remove passt pidfile %s: %s", + s->pidfile, strerror(errno)); + } + g_free(s->pidfile); + g_ptr_array_free(s->args, TRUE); +} + +static ssize_t net_passt_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + NetStreamData *d = DO_UPCAST(NetStreamData, nc, nc); + + return net_stream_data_receive(d, buf, size); +} + +static gboolean net_passt_send(QIOChannel *ioc, GIOCondition condition, + gpointer data) +{ + if (net_stream_data_send(ioc, condition, data) == G_SOURCE_REMOVE) { + NetPasstState *s = DO_UPCAST(NetPasstState, data, data); + Error *error = NULL; + + /* we need to restart passt */ + kill(s->pid, SIGTERM); + if (net_passt_stream_start(s, &error) == -1) { + error_report_err(error); + } + + return G_SOURCE_REMOVE; + } + + return G_SOURCE_CONTINUE; +} + +#ifdef CONFIG_VHOST_USER +static int passt_set_vnet_endianness(NetClientState *nc, bool enable) +{ + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + return 0; +} + +static bool passt_has_vnet_hdr(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + return s->vhost_user != NULL; +} + +static bool passt_has_ufo(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + return s->vhost_user != NULL; +} + +static bool passt_check_peer_type(NetClientState *nc, ObjectClass *oc, + Error **errp) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + const char *driver = object_class_get_name(oc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + if (s->vhost_user == NULL) { + return true; + } + + if (!g_str_has_prefix(driver, "virtio-net-")) { + error_setg(errp, "vhost-user requires frontend driver virtio-net-*"); + return false; + } + + return true; +} + +static struct vhost_net *passt_get_vhost_net(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + return s->vhost_net; +} + +static uint64_t passt_get_acked_features(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + return s->acked_features; +} + +static void passt_save_acked_features(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + if (s->vhost_net) { + uint64_t features = vhost_net_get_acked_features(s->vhost_net); + if (features) { + s->acked_features = features; + } + } +} +#endif + +static NetClientInfo net_passt_info = { + .type = NET_CLIENT_DRIVER_PASST, + .size = sizeof(NetPasstState), + .receive = net_passt_receive, + .cleanup = net_passt_cleanup, +#ifdef CONFIG_VHOST_USER + .has_vnet_hdr = passt_has_vnet_hdr, + .has_ufo = passt_has_ufo, + .set_vnet_be = passt_set_vnet_endianness, + .set_vnet_le = passt_set_vnet_endianness, + .check_peer_type = passt_check_peer_type, + .get_vhost_net = passt_get_vhost_net, +#endif +}; + +static void net_passt_client_connected(QIOTask *task, gpointer opaque) +{ + NetPasstState *s = opaque; + + if (net_stream_data_client_connected(task, &s->data) == 0) { + qemu_set_info_str(&s->data.nc, "stream,connected to pid %d", s->pid); + } +} + +static int net_passt_start_daemon(NetPasstState *s, int sock, Error **errp) +{ + g_autoptr(GSubprocess) daemon = NULL; + g_autofree gchar *contents = NULL; + g_autoptr(GError) error = NULL; + GSubprocessLauncher *launcher; + + qemu_set_info_str(&s->data.nc, "launching passt"); + + launcher = g_subprocess_launcher_new(G_SUBPROCESS_FLAGS_NONE); + g_subprocess_launcher_take_fd(launcher, sock, 3); + + daemon = g_subprocess_launcher_spawnv(launcher, + (const gchar *const *)s->args->pdata, + &error); + g_object_unref(launcher); + + if (!daemon) { + error_setg(errp, "Error creating daemon: %s", error->message); + return -1; + } + + if (!g_subprocess_wait(daemon, NULL, &error)) { + error_setg(errp, "Error waiting for daemon: %s", error->message); + return -1; + } + + if (g_subprocess_get_if_exited(daemon) && + g_subprocess_get_exit_status(daemon)) { + return -1; + } + + if (!g_file_get_contents(s->pidfile, &contents, NULL, &error)) { + error_setg(errp, "Cannot read passt pid: %s", error->message); + return -1; + } + + s->pid = (pid_t)g_ascii_strtoll(contents, NULL, 10); + if (s->pid <= 0) { + error_setg(errp, "File '%s' did not contain a valid PID.", s->pidfile); + return -1; + } + + return 0; +} + +static int net_passt_stream_start(NetPasstState *s, Error **errp) +{ + QIOChannelSocket *sioc; + SocketAddress *addr; + int sv[2]; + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + error_setg_errno(errp, errno, "socketpair() failed"); + return -1; + } + + /* connect to passt */ + qemu_set_info_str(&s->data.nc, "connecting to passt"); + + /* create socket channel */ + sioc = qio_channel_socket_new(); + s->data.ioc = QIO_CHANNEL(sioc); + s->data.nc.link_down = true; + s->data.send = net_passt_send; + + addr = g_new0(SocketAddress, 1); + addr->type = SOCKET_ADDRESS_TYPE_FD; + addr->u.fd.str = g_strdup_printf("%d", sv[0]); + + qio_channel_socket_connect_async(sioc, addr, + net_passt_client_connected, s, + NULL, NULL); + + qapi_free_SocketAddress(addr); + + /* start passt */ + if (net_passt_start_daemon(s, sv[1], errp) == -1) { + close(sv[0]); + close(sv[1]); + return -1; + } + close(sv[1]); + + return 0; +} + +#ifdef CONFIG_VHOST_USER +static gboolean passt_vhost_user_watch(void *do_not_use, GIOCondition cond, + void *opaque) +{ + NetPasstState *s = opaque; + + qemu_chr_fe_disconnect(&s->vhost_chr); + + return G_SOURCE_CONTINUE; +} + +static void passt_vhost_user_event(void *opaque, QEMUChrEvent event); + +static void chr_closed_bh(void *opaque) +{ + NetPasstState *s = opaque; + + passt_save_acked_features(&s->data.nc); + + net_client_set_link(&(NetClientState *){ &s->data.nc }, 1, false); + + qemu_chr_fe_set_handlers(&s->vhost_chr, NULL, NULL, passt_vhost_user_event, + NULL, s, NULL, true); +} + +static void passt_vhost_user_stop(NetPasstState *s) +{ + passt_save_acked_features(&s->data.nc); + vhost_net_cleanup(s->vhost_net); +} + +static int passt_vhost_user_start(NetPasstState *s, VhostUserState *be) +{ + struct vhost_net *net = NULL; + VhostNetOptions options; + + options.backend_type = VHOST_BACKEND_TYPE_USER; + options.net_backend = &s->data.nc; + options.opaque = be; + options.busyloop_timeout = 0; + options.nvqs = 2; + options.feature_bits = user_feature_bits; + options.max_tx_queue_size = VIRTQUEUE_MAX_SIZE; + options.get_acked_features = passt_get_acked_features; + options.save_acked_features = passt_save_acked_features; + options.is_vhost_user = true; + + net = vhost_net_init(&options); + if (!net) { + error_report("failed to init passt vhost_net"); + passt_vhost_user_stop(s); + return -1; + } + + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); + } + s->vhost_net = net; + + return 0; +} + +static void passt_vhost_user_event(void *opaque, QEMUChrEvent event) +{ + NetPasstState *s = opaque; + + switch (event) { + case CHR_EVENT_OPENED: + if (passt_vhost_user_start(s, s->vhost_user) < 0) { + qemu_chr_fe_disconnect(&s->vhost_chr); + return; + } + s->vhost_watch = qemu_chr_fe_add_watch(&s->vhost_chr, G_IO_HUP, + passt_vhost_user_watch, s); + net_client_set_link(&(NetClientState *){ &s->data.nc }, 1, true); + s->started = true; + break; + case CHR_EVENT_CLOSED: + if (s->vhost_watch) { + AioContext *ctx = qemu_get_current_aio_context(); + + g_source_remove(s->vhost_watch); + s->vhost_watch = 0; + qemu_chr_fe_set_handlers(&s->vhost_chr, NULL, NULL, NULL, NULL, + NULL, NULL, false); + + aio_bh_schedule_oneshot(ctx, chr_closed_bh, s); + } + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static int net_passt_vhost_user_init(NetPasstState *s, Error **errp) +{ + Chardev *chr; + int sv[2]; + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + error_setg_errno(errp, errno, "socketpair() failed"); + return -1; + } + + /* connect to passt */ + qemu_set_info_str(&s->data.nc, "connecting to passt"); + + /* create chardev */ + + chr = CHARDEV(object_new(TYPE_CHARDEV_SOCKET)); + if (!chr || qemu_chr_add_client(chr, sv[0]) == -1) { + object_unref(OBJECT(chr)); + error_setg(errp, "Failed to make socket chardev"); + goto err; + } + + s->vhost_user = g_new0(struct VhostUserState, 1); + if (!qemu_chr_fe_init(&s->vhost_chr, chr, errp) || + !vhost_user_init(s->vhost_user, &s->vhost_chr, errp)) { + goto err; + } + + /* start passt */ + if (net_passt_start_daemon(s, sv[1], errp) == -1) { + goto err; + } + + do { + if (qemu_chr_fe_wait_connected(&s->vhost_chr, errp) < 0) { + goto err; + } + + qemu_chr_fe_set_handlers(&s->vhost_chr, NULL, NULL, + passt_vhost_user_event, NULL, s, NULL, + true); + } while (!s->started); + + qemu_set_info_str(&s->data.nc, "vhost-user,connected to pid %d", s->pid); + + close(sv[1]); + return 0; +err: + close(sv[0]); + close(sv[1]); + + return -1; +} +#else +static int net_passt_vhost_user_init(NetPasstState *s, Error **errp) +{ + error_setg(errp, "vhost-user support has not been built"); + + return -1; +} +#endif + +static GPtrArray *net_passt_decode_args(const NetDevPasstOptions *passt, + gchar *pidfile, Error **errp) +{ + GPtrArray *args = g_ptr_array_new_with_free_func(g_free); + + if (passt->path) { + g_ptr_array_add(args, g_strdup(passt->path)); + } else { + g_ptr_array_add(args, g_strdup("passt")); + } + + if (passt->has_vhost_user && passt->vhost_user) { + g_ptr_array_add(args, g_strdup("--vhost-user")); + } + + /* by default, be quiet */ + if (!passt->has_quiet || passt->quiet) { + g_ptr_array_add(args, g_strdup("--quiet")); + } + + if (passt->has_mtu) { + g_ptr_array_add(args, g_strdup("--mtu")); + g_ptr_array_add(args, g_strdup_printf("%"PRId64, passt->mtu)); + } + + if (passt->address) { + g_ptr_array_add(args, g_strdup("--address")); + g_ptr_array_add(args, g_strdup(passt->address)); + } + + if (passt->netmask) { + g_ptr_array_add(args, g_strdup("--netmask")); + g_ptr_array_add(args, g_strdup(passt->netmask)); + } + + if (passt->mac) { + g_ptr_array_add(args, g_strdup("--mac-addr")); + g_ptr_array_add(args, g_strdup(passt->mac)); + } + + if (passt->gateway) { + g_ptr_array_add(args, g_strdup("--gateway")); + g_ptr_array_add(args, g_strdup(passt->gateway)); + } + + if (passt->interface) { + g_ptr_array_add(args, g_strdup("--interface")); + g_ptr_array_add(args, g_strdup(passt->interface)); + } + + if (passt->outbound) { + g_ptr_array_add(args, g_strdup("--outbound")); + g_ptr_array_add(args, g_strdup(passt->outbound)); + } + + if (passt->outbound_if4) { + g_ptr_array_add(args, g_strdup("--outbound-if4")); + g_ptr_array_add(args, g_strdup(passt->outbound_if4)); + } + + if (passt->outbound_if6) { + g_ptr_array_add(args, g_strdup("--outbound-if6")); + g_ptr_array_add(args, g_strdup(passt->outbound_if6)); + } + + if (passt->dns) { + g_ptr_array_add(args, g_strdup("--dns")); + g_ptr_array_add(args, g_strdup(passt->dns)); + } + if (passt->fqdn) { + g_ptr_array_add(args, g_strdup("--fqdn")); + g_ptr_array_add(args, g_strdup(passt->fqdn)); + } + + if (passt->has_dhcp_dns && !passt->dhcp_dns) { + g_ptr_array_add(args, g_strdup("--no-dhcp-dns")); + } + + if (passt->has_dhcp_search && !passt->dhcp_search) { + g_ptr_array_add(args, g_strdup("--no-dhcp-search")); + } + + if (passt->map_host_loopback) { + g_ptr_array_add(args, g_strdup("--map-host-loopback")); + g_ptr_array_add(args, g_strdup(passt->map_host_loopback)); + } + + if (passt->map_guest_addr) { + g_ptr_array_add(args, g_strdup("--map-guest-addr")); + g_ptr_array_add(args, g_strdup(passt->map_guest_addr)); + } + + if (passt->dns_forward) { + g_ptr_array_add(args, g_strdup("--dns-forward")); + g_ptr_array_add(args, g_strdup(passt->dns_forward)); + } + + if (passt->dns_host) { + g_ptr_array_add(args, g_strdup("--dns-host")); + g_ptr_array_add(args, g_strdup(passt->dns_host)); + } + + if (passt->has_tcp && !passt->tcp) { + g_ptr_array_add(args, g_strdup("--no-tcp")); + } + + if (passt->has_udp && !passt->udp) { + g_ptr_array_add(args, g_strdup("--no-udp")); + } + + if (passt->has_icmp && !passt->icmp) { + g_ptr_array_add(args, g_strdup("--no-icmp")); + } + + if (passt->has_dhcp && !passt->dhcp) { + g_ptr_array_add(args, g_strdup("--no-dhcp")); + } + + if (passt->has_ndp && !passt->ndp) { + g_ptr_array_add(args, g_strdup("--no-ndp")); + } + if (passt->has_dhcpv6 && !passt->dhcpv6) { + g_ptr_array_add(args, g_strdup("--no-dhcpv6")); + } + + if (passt->has_ra && !passt->ra) { + g_ptr_array_add(args, g_strdup("--no-ra")); + } + + if (passt->has_freebind && passt->freebind) { + g_ptr_array_add(args, g_strdup("--freebind")); + } + + if (passt->has_ipv4 && !passt->ipv4) { + g_ptr_array_add(args, g_strdup("--ipv6-only")); + } + + if (passt->has_ipv6 && !passt->ipv6) { + g_ptr_array_add(args, g_strdup("--ipv4-only")); + } + + if (passt->has_search && passt->search) { + const StringList *list = passt->search; + GString *domains = g_string_new(list->value->str); + + list = list->next; + while (list) { + g_string_append(domains, " "); + g_string_append(domains, list->value->str); + list = list->next; + } + + g_ptr_array_add(args, g_strdup("--search")); + g_ptr_array_add(args, g_string_free(domains, FALSE)); + } + + if (passt->has_tcp_ports && passt->tcp_ports) { + const StringList *list = passt->tcp_ports; + GString *tcp_ports = g_string_new(list->value->str); + + list = list->next; + while (list) { + g_string_append(tcp_ports, ","); + g_string_append(tcp_ports, list->value->str); + list = list->next; + } + + g_ptr_array_add(args, g_strdup("--tcp-ports")); + g_ptr_array_add(args, g_string_free(tcp_ports, FALSE)); + } + + if (passt->has_udp_ports && passt->udp_ports) { + const StringList *list = passt->udp_ports; + GString *udp_ports = g_string_new(list->value->str); + + list = list->next; + while (list) { + g_string_append(udp_ports, ","); + g_string_append(udp_ports, list->value->str); + list = list->next; + } + + g_ptr_array_add(args, g_strdup("--udp-ports")); + g_ptr_array_add(args, g_string_free(udp_ports, FALSE)); + } + + if (passt->has_param && passt->param) { + const StringList *list = passt->param; + + while (list) { + g_ptr_array_add(args, g_strdup(list->value->str)); + list = list->next; + } + } + + /* provide a pid file to be able to kil passt on exit */ + g_ptr_array_add(args, g_strdup("--pid")); + g_ptr_array_add(args, g_strdup(pidfile)); + + /* g_subprocess_launcher_take_fd() will set the socket on fd 3 */ + g_ptr_array_add(args, g_strdup("--fd")); + g_ptr_array_add(args, g_strdup("3")); + + g_ptr_array_add(args, NULL); + + return args; +} + +int net_init_passt(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) +{ + g_autoptr(GError) error = NULL; + NetClientState *nc; + NetPasstState *s; + GPtrArray *args; + gchar *pidfile; + int pidfd; + + assert(netdev->type == NET_CLIENT_DRIVER_PASST); + + pidfd = g_file_open_tmp("passt-XXXXXX.pid", &pidfile, &error); + if (pidfd == -1) { + error_setg(errp, "Failed to create temporary file: %s", error->message); + return -1; + } + close(pidfd); + + args = net_passt_decode_args(&netdev->u.passt, pidfile, errp); + if (args == NULL) { + g_free(pidfile); + return -1; + } + + nc = qemu_new_net_client(&net_passt_info, peer, "passt", name); + s = DO_UPCAST(NetPasstState, data.nc, nc); + + s->args = args; + s->pidfile = pidfile; + + if (netdev->u.passt.has_vhost_user && netdev->u.passt.vhost_user) { + if (net_passt_vhost_user_init(s, errp) == -1) { + qemu_del_net_client(nc); + return -1; + } + + return 0; + } + + if (net_passt_stream_start(s, errp) == -1) { + qemu_del_net_client(nc); + return -1; + } + + return 0; +} diff --git a/net/slirp.c b/net/slirp.c index 9657e86..c627a9d 100644 --- a/net/slirp.c +++ b/net/slirp.c @@ -258,11 +258,13 @@ static void net_slirp_register_poll_sock(slirp_os_socket fd, void *opaque) { #ifdef WIN32 AioContext *ctxt = qemu_get_aio_context(); + g_autofree char *msg = NULL; if (WSAEventSelect(fd, event_notifier_get_handle(&ctxt->notifier), FD_READ | FD_ACCEPT | FD_CLOSE | FD_CONNECT | FD_WRITE | FD_OOB) != 0) { - error_setg_win32(&error_warn, WSAGetLastError(), "failed to WSAEventSelect()"); + msg = g_win32_error_message(WSAGetLastError()); + warn_report("failed to WSAEventSelect(): %s", msg); } #endif } @@ -270,8 +272,11 @@ static void net_slirp_register_poll_sock(slirp_os_socket fd, void *opaque) static void net_slirp_unregister_poll_sock(slirp_os_socket fd, void *opaque) { #ifdef WIN32 + g_autofree char *msg = NULL; + if (WSAEventSelect(fd, NULL, 0) != 0) { - error_setg_win32(&error_warn, WSAGetLastError(), "failed to WSAEventSelect()"); + msg = g_win32_error_message(WSAGetLastError()); + warn_report("failed to WSAEventSelect(): %s", msg); } #endif } @@ -727,6 +732,7 @@ static SlirpState *slirp_lookup(Monitor *mon, const char *id) void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict) { + /* TODO: support removing unix fwd */ struct sockaddr_in host_addr = { .sin_family = AF_INET, .sin_addr = { @@ -795,12 +801,13 @@ void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict) static int slirp_hostfwd(SlirpState *s, const char *redir_str, Error **errp) { - struct sockaddr_in host_addr = { - .sin_family = AF_INET, - .sin_addr = { - .s_addr = INADDR_ANY, - }, - }; + union { + struct sockaddr_in in; +#if !defined(WIN32) && SLIRP_CHECK_VERSION(4, 7, 0) + struct sockaddr_un un; +#endif + } host_addr = {0}; + struct sockaddr_in guest_addr = { .sin_family = AF_INET, .sin_addr = { @@ -811,9 +818,13 @@ static int slirp_hostfwd(SlirpState *s, const char *redir_str, Error **errp) int host_port, guest_port; const char *p; char buf[256]; - int is_udp; + int is_udp = 0; +#if !defined(WIN32) && SLIRP_CHECK_VERSION(4, 7, 0) + int is_unix = 0; +#endif const char *end; const char *fail_reason = "Unknown reason"; + socklen_t host_addr_size; p = redir_str; if (!p || get_str_sep(buf, sizeof(buf), &p, ':') < 0) { @@ -824,30 +835,83 @@ static int slirp_hostfwd(SlirpState *s, const char *redir_str, Error **errp) is_udp = 0; } else if (!strcmp(buf, "udp")) { is_udp = 1; - } else { - fail_reason = "Bad protocol name"; - goto fail_syntax; } - - if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) { - fail_reason = "Missing : separator"; - goto fail_syntax; +#if !defined(WIN32) && SLIRP_CHECK_VERSION(4, 7, 0) + else if (!strcmp(buf, "unix")) { + is_unix = 1; } - if (buf[0] != '\0' && !inet_aton(buf, &host_addr.sin_addr)) { - fail_reason = "Bad host address"; +#endif + else { + fail_reason = "Bad protocol name"; goto fail_syntax; } - if (get_str_sep(buf, sizeof(buf), &p, '-') < 0) { - fail_reason = "Bad host port separator"; - goto fail_syntax; - } - err = qemu_strtoi(buf, &end, 0, &host_port); - if (err || host_port < 0 || host_port > 65535) { - fail_reason = "Bad host port"; - goto fail_syntax; +#if !defined(WIN32) && SLIRP_CHECK_VERSION(4, 7, 0) + if (is_unix) { + if (get_str_sep(buf, sizeof(buf), &p, '-') < 0) { + fail_reason = "Missing - separator"; + goto fail_syntax; + } + if (buf[0] == '\0') { + fail_reason = "Missing unix socket path"; + goto fail_syntax; + } + if (buf[0] != '/') { + fail_reason = "unix socket path must be absolute"; + goto fail_syntax; + } + + size_t path_len = strlen(buf); + if (path_len > sizeof(host_addr.un.sun_path) - 1) { + fail_reason = "Unix socket path is too long"; + goto fail_syntax; + } + + struct stat st; + if (stat(buf, &st) == 0) { + if (!S_ISSOCK(st.st_mode)) { + fail_reason = "file exists and it's not unix socket"; + goto fail_syntax; + } + + if (unlink(buf) < 0) { + error_setg_errno(errp, errno, "Failed to unlink '%s'", buf); + goto fail_syntax; + } + } + host_addr.un.sun_family = AF_UNIX; + memcpy(host_addr.un.sun_path, buf, path_len); + host_addr_size = sizeof(host_addr.un); + } else +#endif + { + host_addr.in.sin_family = AF_INET; + host_addr.in.sin_addr.s_addr = INADDR_ANY; + + if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) { + fail_reason = "Missing : separator"; + goto fail_syntax; + } + + if (buf[0] != '\0' && !inet_aton(buf, &host_addr.in.sin_addr)) { + fail_reason = "Bad host address"; + goto fail_syntax; + } + + if (get_str_sep(buf, sizeof(buf), &p, '-') < 0) { + fail_reason = "Bad host port separator"; + goto fail_syntax; + } + + err = qemu_strtoi(buf, &end, 0, &host_port); + if (err || host_port < 0 || host_port > 65535) { + fail_reason = "Bad host port"; + goto fail_syntax; + } + + host_addr.in.sin_port = htons(host_port); + host_addr_size = sizeof(host_addr.in); } - host_addr.sin_port = htons(host_port); if (get_str_sep(buf, sizeof(buf), &p, ':') < 0) { fail_reason = "Missing guest address"; @@ -867,12 +931,13 @@ static int slirp_hostfwd(SlirpState *s, const char *redir_str, Error **errp) #if SLIRP_CHECK_VERSION(4, 5, 0) err = slirp_add_hostxfwd(s->slirp, - (struct sockaddr *) &host_addr, sizeof(host_addr), + (struct sockaddr *) &host_addr, host_addr_size, (struct sockaddr *) &guest_addr, sizeof(guest_addr), is_udp ? SLIRP_HOSTFWD_UDP : 0); #else + (void) host_addr_size; err = slirp_add_hostfwd(s->slirp, is_udp, - host_addr.sin_addr, host_port, + host_addr.in.sin_addr, host_port, guest_addr.sin_addr, guest_port); #endif diff --git a/net/socket.c b/net/socket.c index 8e3702e..1ad03fc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -157,7 +157,7 @@ static void net_socket_send(void *opaque) NetSocketState *s = opaque; int size; int ret; - uint8_t buf1[NET_BUFSIZE]; + QEMU_UNINITIALIZED uint8_t buf1[NET_BUFSIZE]; const uint8_t *buf; size = recv(s->fd, buf1, sizeof(buf1), 0); @@ -295,7 +295,10 @@ static int net_socket_mcast_create(struct sockaddr_in *mcastaddr, } } - qemu_socket_set_nonblock(fd); + if (!qemu_set_blocking(fd, false, errp)) { + goto fail; + } + return fd; fail: if (fd >= 0) @@ -508,7 +511,10 @@ static int net_socket_listen_init(NetClientState *peer, error_setg_errno(errp, errno, "can't create stream socket"); return -1; } - qemu_socket_set_nonblock(fd); + if (!qemu_set_blocking(fd, false, errp)) { + close(fd); + return -1; + } socket_set_fast_reuse(fd); @@ -556,7 +562,10 @@ static int net_socket_connect_init(NetClientState *peer, error_setg_errno(errp, errno, "can't create stream socket"); return -1; } - qemu_socket_set_nonblock(fd); + if (!qemu_set_blocking(fd, false, errp)) { + close(fd); + return -1; + } connected = 0; for(;;) { @@ -671,7 +680,10 @@ static int net_socket_udp_init(NetClientState *peer, close(fd); return -1; } - qemu_socket_set_nonblock(fd); + if (!qemu_set_blocking(fd, false, errp)) { + close(fd); + return -1; + } s = net_socket_fd_init_dgram(peer, model, name, fd, 0, NULL, errp); if (!s) { @@ -706,7 +718,7 @@ int net_init_socket(const Netdev *netdev, const char *name, } if (sock->fd) { - int fd, ret, so_type; + int fd, so_type; fd = monitor_fd_param(monitor_cur(), sock->fd, errp); if (fd == -1) { @@ -716,10 +728,7 @@ int net_init_socket(const Netdev *netdev, const char *name, if (so_type < 0) { return -1; } - ret = qemu_socket_try_set_nonblock(fd); - if (ret < 0) { - error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d", - name, fd); + if (!qemu_set_blocking(fd, false, errp)) { return -1; } switch (so_type) { diff --git a/net/stream.c b/net/stream.c index 4de5613..94f823a 100644 --- a/net/stream.c +++ b/net/stream.c @@ -27,173 +27,50 @@ #include "net/net.h" #include "clients.h" -#include "monitor/monitor.h" #include "qapi/error.h" -#include "qemu/error-report.h" -#include "qemu/option.h" -#include "qemu/sockets.h" -#include "qemu/iov.h" -#include "qemu/main-loop.h" -#include "qemu/cutils.h" -#include "io/channel.h" -#include "io/channel-socket.h" #include "io/net-listener.h" #include "qapi/qapi-events-net.h" #include "qapi/qapi-visit-sockets.h" #include "qapi/clone-visitor.h" +#include "stream_data.h" + typedef struct NetStreamState { - NetClientState nc; - QIOChannel *listen_ioc; - QIONetListener *listener; - QIOChannel *ioc; - guint ioc_read_tag; - guint ioc_write_tag; - SocketReadState rs; - unsigned int send_index; /* number of bytes sent*/ + NetStreamData data; uint32_t reconnect_ms; guint timer_tag; SocketAddress *addr; } NetStreamState; -static void net_stream_listen(QIONetListener *listener, - QIOChannelSocket *cioc, - void *opaque); static void net_stream_arm_reconnect(NetStreamState *s); -static gboolean net_stream_writable(QIOChannel *ioc, - GIOCondition condition, - gpointer data) -{ - NetStreamState *s = data; - - s->ioc_write_tag = 0; - - qemu_flush_queued_packets(&s->nc); - - return G_SOURCE_REMOVE; -} - static ssize_t net_stream_receive(NetClientState *nc, const uint8_t *buf, size_t size) { - NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); - uint32_t len = htonl(size); - struct iovec iov[] = { - { - .iov_base = &len, - .iov_len = sizeof(len), - }, { - .iov_base = (void *)buf, - .iov_len = size, - }, - }; - struct iovec local_iov[2]; - unsigned int nlocal_iov; - size_t remaining; - ssize_t ret; - - remaining = iov_size(iov, 2) - s->send_index; - nlocal_iov = iov_copy(local_iov, 2, iov, 2, s->send_index, remaining); - ret = qio_channel_writev(s->ioc, local_iov, nlocal_iov, NULL); - if (ret == QIO_CHANNEL_ERR_BLOCK) { - ret = 0; /* handled further down */ - } - if (ret == -1) { - s->send_index = 0; - return -errno; - } - if (ret < (ssize_t)remaining) { - s->send_index += ret; - s->ioc_write_tag = qio_channel_add_watch(s->ioc, G_IO_OUT, - net_stream_writable, s, NULL); - return 0; - } - s->send_index = 0; - return size; -} - -static gboolean net_stream_send(QIOChannel *ioc, - GIOCondition condition, - gpointer data); - -static void net_stream_send_completed(NetClientState *nc, ssize_t len) -{ - NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); + NetStreamData *d = DO_UPCAST(NetStreamData, nc, nc); - if (!s->ioc_read_tag) { - s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, - net_stream_send, s, NULL); - } -} - -static void net_stream_rs_finalize(SocketReadState *rs) -{ - NetStreamState *s = container_of(rs, NetStreamState, rs); - - if (qemu_send_packet_async(&s->nc, rs->buf, - rs->packet_len, - net_stream_send_completed) == 0) { - if (s->ioc_read_tag) { - g_source_remove(s->ioc_read_tag); - s->ioc_read_tag = 0; - } - } + return net_stream_data_receive(d, buf, size); } static gboolean net_stream_send(QIOChannel *ioc, GIOCondition condition, gpointer data) { - NetStreamState *s = data; - int size; - int ret; - char buf1[NET_BUFSIZE]; - const char *buf; - - size = qio_channel_read(s->ioc, buf1, sizeof(buf1), NULL); - if (size < 0) { - if (errno != EWOULDBLOCK) { - goto eoc; - } - } else if (size == 0) { - /* end of connection */ - eoc: - s->ioc_read_tag = 0; - if (s->ioc_write_tag) { - g_source_remove(s->ioc_write_tag); - s->ioc_write_tag = 0; - } - if (s->listener) { - qemu_set_info_str(&s->nc, "listening"); - qio_net_listener_set_client_func(s->listener, net_stream_listen, - s, NULL); - } - object_unref(OBJECT(s->ioc)); - s->ioc = NULL; - - net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); - s->nc.link_down = true; + if (net_stream_data_send(ioc, condition, data) == G_SOURCE_REMOVE) { + NetStreamState *s = DO_UPCAST(NetStreamState, data, data); - qapi_event_send_netdev_stream_disconnected(s->nc.name); + qapi_event_send_netdev_stream_disconnected(s->data.nc.name); net_stream_arm_reconnect(s); return G_SOURCE_REMOVE; } - buf = buf1; - - ret = net_fill_rstate(&s->rs, (const uint8_t *)buf, size); - - if (ret == -1) { - goto eoc; - } return G_SOURCE_CONTINUE; } static void net_stream_cleanup(NetClientState *nc) { - NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); + NetStreamState *s = DO_UPCAST(NetStreamState, data.nc, nc); if (s->timer_tag) { g_source_remove(s->timer_tag); s->timer_tag = 0; @@ -202,28 +79,28 @@ static void net_stream_cleanup(NetClientState *nc) qapi_free_SocketAddress(s->addr); s->addr = NULL; } - if (s->ioc) { - if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) { - if (s->ioc_read_tag) { - g_source_remove(s->ioc_read_tag); - s->ioc_read_tag = 0; + if (s->data.ioc) { + if (QIO_CHANNEL_SOCKET(s->data.ioc)->fd != -1) { + if (s->data.ioc_read_tag) { + g_source_remove(s->data.ioc_read_tag); + s->data.ioc_read_tag = 0; } - if (s->ioc_write_tag) { - g_source_remove(s->ioc_write_tag); - s->ioc_write_tag = 0; + if (s->data.ioc_write_tag) { + g_source_remove(s->data.ioc_write_tag); + s->data.ioc_write_tag = 0; } } - object_unref(OBJECT(s->ioc)); - s->ioc = NULL; + object_unref(OBJECT(s->data.ioc)); + s->data.ioc = NULL; } - if (s->listen_ioc) { - if (s->listener) { - qio_net_listener_disconnect(s->listener); - object_unref(OBJECT(s->listener)); - s->listener = NULL; + if (s->data.listen_ioc) { + if (s->data.listener) { + qio_net_listener_disconnect(s->data.listener); + object_unref(OBJECT(s->data.listener)); + s->data.listener = NULL; } - object_unref(OBJECT(s->listen_ioc)); - s->listen_ioc = NULL; + object_unref(OBJECT(s->data.listen_ioc)); + s->data.listen_ioc = NULL; } } @@ -235,23 +112,13 @@ static NetClientInfo net_stream_info = { }; static void net_stream_listen(QIONetListener *listener, - QIOChannelSocket *cioc, - void *opaque) + QIOChannelSocket *cioc, gpointer data) { - NetStreamState *s = opaque; + NetStreamData *d = data; SocketAddress *addr; char *uri; - object_ref(OBJECT(cioc)); - - qio_net_listener_set_client_func(s->listener, NULL, s, NULL); - - s->ioc = QIO_CHANNEL(cioc); - qio_channel_set_name(s->ioc, "stream-server"); - s->nc.link_down = false; - - s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send, - s, NULL); + net_stream_data_listen(listener, cioc, data); if (cioc->localAddr.ss_family == AF_UNIX) { addr = qio_channel_socket_get_local_address(cioc, NULL); @@ -260,44 +127,42 @@ static void net_stream_listen(QIONetListener *listener, } g_assert(addr != NULL); uri = socket_uri(addr); - qemu_set_info_str(&s->nc, "%s", uri); + qemu_set_info_str(&d->nc, "%s", uri); g_free(uri); - qapi_event_send_netdev_stream_connected(s->nc.name, addr); + qapi_event_send_netdev_stream_connected(d->nc.name, addr); qapi_free_SocketAddress(addr); } static void net_stream_server_listening(QIOTask *task, gpointer opaque) { - NetStreamState *s = opaque; - QIOChannelSocket *listen_sioc = QIO_CHANNEL_SOCKET(s->listen_ioc); + NetStreamData *d = opaque; + QIOChannelSocket *listen_sioc = QIO_CHANNEL_SOCKET(d->listen_ioc); SocketAddress *addr; - int ret; Error *err = NULL; if (qio_task_propagate_error(task, &err)) { - qemu_set_info_str(&s->nc, "error: %s", error_get_pretty(err)); + qemu_set_info_str(&d->nc, "error: %s", error_get_pretty(err)); error_free(err); return; } addr = qio_channel_socket_get_local_address(listen_sioc, NULL); g_assert(addr != NULL); - ret = qemu_socket_try_set_nonblock(listen_sioc->fd); - if (addr->type == SOCKET_ADDRESS_TYPE_FD && ret < 0) { - qemu_set_info_str(&s->nc, "can't use file descriptor %s (errno %d)", - addr->u.fd.str, -ret); + if (!qemu_set_blocking(listen_sioc->fd, false, &err)) { + qemu_set_info_str(&d->nc, "error: %s", error_get_pretty(err)); + error_free(err); return; } - g_assert(ret == 0); qapi_free_SocketAddress(addr); - s->nc.link_down = true; - s->listener = qio_net_listener_new(); + d->nc.link_down = true; + d->listener = qio_net_listener_new(); - qemu_set_info_str(&s->nc, "listening"); - net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); - qio_net_listener_set_client_func(s->listener, net_stream_listen, s, NULL); - qio_net_listener_add(s->listener, listen_sioc); + qemu_set_info_str(&d->nc, "listening"); + net_socket_rs_init(&d->rs, net_stream_data_rs_finalize, false); + qio_net_listener_set_client_func(d->listener, d->listen, d, + NULL); + qio_net_listener_add(d->listener, listen_sioc); } static int net_stream_server_init(NetClientState *peer, @@ -307,16 +172,18 @@ static int net_stream_server_init(NetClientState *peer, Error **errp) { NetClientState *nc; - NetStreamState *s; + NetStreamData *d; QIOChannelSocket *listen_sioc = qio_channel_socket_new(); nc = qemu_new_net_client(&net_stream_info, peer, model, name); - s = DO_UPCAST(NetStreamState, nc, nc); - qemu_set_info_str(&s->nc, "initializing"); + d = DO_UPCAST(NetStreamData, nc, nc); + d->send = net_stream_send; + d->listen = net_stream_listen; + qemu_set_info_str(&d->nc, "initializing"); - s->listen_ioc = QIO_CHANNEL(listen_sioc); + d->listen_ioc = QIO_CHANNEL(listen_sioc); qio_channel_socket_listen_async(listen_sioc, addr, 0, - net_stream_server_listening, s, + net_stream_server_listening, d, NULL, NULL); return 0; @@ -325,49 +192,23 @@ static int net_stream_server_init(NetClientState *peer, static void net_stream_client_connected(QIOTask *task, gpointer opaque) { NetStreamState *s = opaque; - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(s->ioc); + NetStreamData *d = &s->data; + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(d->ioc); SocketAddress *addr; gchar *uri; - int ret; - Error *err = NULL; - if (qio_task_propagate_error(task, &err)) { - qemu_set_info_str(&s->nc, "error: %s", error_get_pretty(err)); - error_free(err); - goto error; + if (net_stream_data_client_connected(task, d) == -1) { + net_stream_arm_reconnect(s); + return; } addr = qio_channel_socket_get_remote_address(sioc, NULL); g_assert(addr != NULL); uri = socket_uri(addr); - qemu_set_info_str(&s->nc, "%s", uri); + qemu_set_info_str(&d->nc, "%s", uri); g_free(uri); - - ret = qemu_socket_try_set_nonblock(sioc->fd); - if (addr->type == SOCKET_ADDRESS_TYPE_FD && ret < 0) { - qemu_set_info_str(&s->nc, "can't use file descriptor %s (errno %d)", - addr->u.fd.str, -ret); - qapi_free_SocketAddress(addr); - goto error; - } - g_assert(ret == 0); - - net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); - - /* Disable Nagle algorithm on TCP sockets to reduce latency */ - qio_channel_set_delay(s->ioc, false); - - s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send, - s, NULL); - s->nc.link_down = false; - qapi_event_send_netdev_stream_connected(s->nc.name, addr); + qapi_event_send_netdev_stream_connected(d->nc.name, addr); qapi_free_SocketAddress(addr); - - return; -error: - object_unref(OBJECT(s->ioc)); - s->ioc = NULL; - net_stream_arm_reconnect(s); } static gboolean net_stream_reconnect(gpointer data) @@ -378,7 +219,7 @@ static gboolean net_stream_reconnect(gpointer data) s->timer_tag = 0; sioc = qio_channel_socket_new(); - s->ioc = QIO_CHANNEL(sioc); + s->data.ioc = QIO_CHANNEL(sioc); qio_channel_socket_connect_async(sioc, s->addr, net_stream_client_connected, s, NULL, NULL); @@ -388,7 +229,7 @@ static gboolean net_stream_reconnect(gpointer data) static void net_stream_arm_reconnect(NetStreamState *s) { if (s->reconnect_ms && s->timer_tag == 0) { - qemu_set_info_str(&s->nc, "connecting"); + qemu_set_info_str(&s->data.nc, "connecting"); s->timer_tag = g_timeout_add(s->reconnect_ms, net_stream_reconnect, s); } } @@ -405,11 +246,13 @@ static int net_stream_client_init(NetClientState *peer, QIOChannelSocket *sioc = qio_channel_socket_new(); nc = qemu_new_net_client(&net_stream_info, peer, model, name); - s = DO_UPCAST(NetStreamState, nc, nc); - qemu_set_info_str(&s->nc, "connecting"); + s = DO_UPCAST(NetStreamState, data.nc, nc); + qemu_set_info_str(&s->data.nc, "connecting"); - s->ioc = QIO_CHANNEL(sioc); - s->nc.link_down = true; + s->data.ioc = QIO_CHANNEL(sioc); + s->data.nc.link_down = true; + s->data.send = net_stream_send; + s->data.listen = net_stream_listen; s->reconnect_ms = reconnect_ms; if (reconnect_ms) { diff --git a/net/stream_data.c b/net/stream_data.c new file mode 100644 index 0000000..03740e9 --- /dev/null +++ b/net/stream_data.c @@ -0,0 +1,191 @@ +/* + * net stream generic functions + * + * Copyright Red Hat + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/iov.h" +#include "qapi/error.h" +#include "net/net.h" +#include "io/channel.h" +#include "io/net-listener.h" +#include "qemu/sockets.h" + +#include "stream_data.h" + +static gboolean net_stream_data_writable(QIOChannel *ioc, + GIOCondition condition, gpointer data) +{ + NetStreamData *d = data; + + d->ioc_write_tag = 0; + + qemu_flush_queued_packets(&d->nc); + + return G_SOURCE_REMOVE; +} + +ssize_t net_stream_data_receive(NetStreamData *d, const uint8_t *buf, + size_t size) +{ + uint32_t len = htonl(size); + struct iovec iov[] = { + { + .iov_base = &len, + .iov_len = sizeof(len), + }, { + .iov_base = (void *)buf, + .iov_len = size, + }, + }; + struct iovec local_iov[2]; + unsigned int nlocal_iov; + size_t remaining; + ssize_t ret; + + remaining = iov_size(iov, 2) - d->send_index; + nlocal_iov = iov_copy(local_iov, 2, iov, 2, d->send_index, remaining); + ret = qio_channel_writev(d->ioc, local_iov, nlocal_iov, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + ret = 0; /* handled further down */ + } + if (ret == -1) { + d->send_index = 0; + return -errno; + } + if (ret < (ssize_t)remaining) { + d->send_index += ret; + d->ioc_write_tag = qio_channel_add_watch(d->ioc, G_IO_OUT, + net_stream_data_writable, d, + NULL); + return 0; + } + d->send_index = 0; + return size; +} + +static void net_stream_data_send_completed(NetClientState *nc, ssize_t len) +{ + NetStreamData *d = DO_UPCAST(NetStreamData, nc, nc); + + if (!d->ioc_read_tag) { + d->ioc_read_tag = qio_channel_add_watch(d->ioc, G_IO_IN, d->send, d, + NULL); + } +} + +void net_stream_data_rs_finalize(SocketReadState *rs) +{ + NetStreamData *d = container_of(rs, NetStreamData, rs); + + if (qemu_send_packet_async(&d->nc, rs->buf, + rs->packet_len, + net_stream_data_send_completed) == 0) { + if (d->ioc_read_tag) { + g_source_remove(d->ioc_read_tag); + d->ioc_read_tag = 0; + } + } +} + +gboolean net_stream_data_send(QIOChannel *ioc, GIOCondition condition, + NetStreamData *d) +{ + int size; + int ret; + QEMU_UNINITIALIZED char buf1[NET_BUFSIZE]; + const char *buf; + + size = qio_channel_read(d->ioc, buf1, sizeof(buf1), NULL); + if (size < 0) { + if (errno != EWOULDBLOCK) { + goto eoc; + } + } else if (size == 0) { + /* end of connection */ + eoc: + d->ioc_read_tag = 0; + if (d->ioc_write_tag) { + g_source_remove(d->ioc_write_tag); + d->ioc_write_tag = 0; + } + if (d->listener) { + qemu_set_info_str(&d->nc, "listening"); + qio_net_listener_set_client_func(d->listener, + d->listen, d, NULL); + } + object_unref(OBJECT(d->ioc)); + d->ioc = NULL; + + net_socket_rs_init(&d->rs, net_stream_data_rs_finalize, false); + d->nc.link_down = true; + + return G_SOURCE_REMOVE; + } + buf = buf1; + + ret = net_fill_rstate(&d->rs, (const uint8_t *)buf, size); + + if (ret == -1) { + goto eoc; + } + + return G_SOURCE_CONTINUE; +} + +void net_stream_data_listen(QIONetListener *listener, + QIOChannelSocket *cioc, + NetStreamData *d) +{ + object_ref(OBJECT(cioc)); + + qio_net_listener_set_client_func(d->listener, NULL, d, NULL); + + d->ioc = QIO_CHANNEL(cioc); + qio_channel_set_name(d->ioc, "stream-server"); + d->nc.link_down = false; + + d->ioc_read_tag = qio_channel_add_watch(d->ioc, G_IO_IN, d->send, d, NULL); +} + +int net_stream_data_client_connected(QIOTask *task, NetStreamData *d) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(d->ioc); + SocketAddress *addr; + Error *err = NULL; + + if (qio_task_propagate_error(task, &err)) { + qemu_set_info_str(&d->nc, "error: %s", error_get_pretty(err)); + error_free(err); + goto error; + } + + addr = qio_channel_socket_get_remote_address(sioc, NULL); + g_assert(addr != NULL); + + if (!qemu_set_blocking(sioc->fd, false, &err)) { + qemu_set_info_str(&d->nc, "error: %s", error_get_pretty(err)); + error_free(err); + qapi_free_SocketAddress(addr); + goto error; + } + qapi_free_SocketAddress(addr); + + net_socket_rs_init(&d->rs, net_stream_data_rs_finalize, false); + + /* Disable Nagle algorithm on TCP sockets to reduce latency */ + qio_channel_set_delay(d->ioc, false); + + d->ioc_read_tag = qio_channel_add_watch(d->ioc, G_IO_IN, d->send, d, NULL); + d->nc.link_down = false; + + return 0; +error: + object_unref(OBJECT(d->ioc)); + d->ioc = NULL; + + return -1; +} diff --git a/net/stream_data.h b/net/stream_data.h new file mode 100644 index 0000000..b868625 --- /dev/null +++ b/net/stream_data.h @@ -0,0 +1,31 @@ +/* + * net stream generic functions + * + * Copyright Red Hat + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +typedef struct NetStreamData { + NetClientState nc; + QIOChannel *ioc; + guint ioc_read_tag; + guint ioc_write_tag; + SocketReadState rs; + unsigned int send_index; /* number of bytes sent*/ + QIOChannelFunc send; + /* server data */ + QIOChannel *listen_ioc; + QIONetListener *listener; + QIONetListenerClientFunc listen; +} NetStreamData; + +ssize_t net_stream_data_receive(NetStreamData *d, const uint8_t *buf, + size_t size); +void net_stream_data_rs_finalize(SocketReadState *rs); +gboolean net_stream_data_send(QIOChannel *ioc, GIOCondition condition, + NetStreamData *d); +int net_stream_data_client_connected(QIOTask *task, NetStreamData *d); +void net_stream_data_listen(QIONetListener *listener, + QIOChannelSocket *cioc, + NetStreamData *d); diff --git a/net/tap-bsd.c b/net/tap-bsd.c index b4c8444..bbf84d1 100644 --- a/net/tap-bsd.c +++ b/net/tap-bsd.c @@ -98,7 +98,12 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, return -1; } } - g_unix_set_fd_nonblocking(fd, true, NULL); + + if (!qemu_set_blocking(fd, false, errp)) { + close(fd); + return -1; + } + return fd; } @@ -189,7 +194,10 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, goto error; } - g_unix_set_fd_nonblocking(fd, true, NULL); + if (!qemu_set_blocking(fd, false, errp)) { + goto error; + } + return fd; error: @@ -217,6 +225,11 @@ int tap_probe_has_uso(int fd) return 0; } +bool tap_probe_has_tunnel(int fd) +{ + return false; +} + void tap_fd_set_vnet_hdr_len(int fd, int len) { } @@ -231,8 +244,7 @@ int tap_fd_set_vnet_be(int fd, int is_be) return -EINVAL; } -void tap_fd_set_offload(int fd, int csum, int tso4, - int tso6, int ecn, int ufo, int uso4, int uso6) +void tap_fd_set_offload(int fd, const NetOffloads *ol) { } diff --git a/net/tap-linux.c b/net/tap-linux.c index 22ec2f4..2a90b58 100644 --- a/net/tap-linux.c +++ b/net/tap-linux.c @@ -124,7 +124,12 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, return -1; } pstrcpy(ifname, ifname_size, ifr.ifr_name); - g_unix_set_fd_nonblocking(fd, true, NULL); + + if (!qemu_set_blocking(fd, false, errp)) { + close(fd); + return -1; + } + return fd; } @@ -196,6 +201,17 @@ int tap_probe_has_uso(int fd) return 1; } +bool tap_probe_has_tunnel(int fd) +{ + unsigned offload; + + offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_UDP_TUNNEL_GSO; + if (ioctl(fd, TUNSETOFFLOAD, offload) < 0) { + return false; + } + return true; +} + void tap_fd_set_vnet_hdr_len(int fd, int len) { if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) { @@ -239,8 +255,7 @@ int tap_fd_set_vnet_be(int fd, int is_be) abort(); } -void tap_fd_set_offload(int fd, int csum, int tso4, - int tso6, int ecn, int ufo, int uso4, int uso6) +void tap_fd_set_offload(int fd, const NetOffloads *ol) { unsigned int offload = 0; @@ -249,22 +264,32 @@ void tap_fd_set_offload(int fd, int csum, int tso4, return; } - if (csum) { + if (ol->csum) { offload |= TUN_F_CSUM; - if (tso4) + if (ol->tso4) { offload |= TUN_F_TSO4; - if (tso6) + } + if (ol->tso6) { offload |= TUN_F_TSO6; - if ((tso4 || tso6) && ecn) + } + if ((ol->tso4 || ol->tso6) && ol->ecn) { offload |= TUN_F_TSO_ECN; - if (ufo) + } + if (ol->ufo) { offload |= TUN_F_UFO; - if (uso4) { + } + if (ol->uso4) { offload |= TUN_F_USO4; } - if (uso6) { + if (ol->uso6) { offload |= TUN_F_USO6; } + if (ol->tnl) { + offload |= TUN_F_UDP_TUNNEL_GSO; + } + if (ol->tnl_csum) { + offload |= TUN_F_UDP_TUNNEL_GSO_CSUM; + } } if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) { diff --git a/net/tap-linux.h b/net/tap-linux.h index 9a58cec..8cd6b58 100644 --- a/net/tap-linux.h +++ b/net/tap-linux.h @@ -53,4 +53,13 @@ #define TUN_F_USO4 0x20 /* I can handle USO for IPv4 packets */ #define TUN_F_USO6 0x40 /* I can handle USO for IPv6 packets */ +/* I can handle TSO/USO for UDP tunneled packets */ +#define TUN_F_UDP_TUNNEL_GSO 0x080 + +/* + * I can handle TSO/USO for UDP tunneled packets requiring csum offload for + * the outer header + */ +#define TUN_F_UDP_TUNNEL_GSO_CSUM 0x100 + #endif /* QEMU_TAP_LINUX_H */ diff --git a/net/tap-solaris.c b/net/tap-solaris.c index 51b7830..75397e6 100644 --- a/net/tap-solaris.c +++ b/net/tap-solaris.c @@ -27,6 +27,7 @@ #include "tap_int.h" #include "qemu/ctype.h" #include "qemu/cutils.h" +#include "net/net.h" #include <sys/ethernet.h> #include <sys/sockio.h> @@ -198,7 +199,12 @@ int tap_open(char *ifname, int ifname_size, int *vnet_hdr, return -1; } } - g_unix_set_fd_nonblocking(fd, true, NULL); + + if (!qemu_set_blocking(fd, false, errp)) { + close(fd); + return -1; + } + return fd; } @@ -221,6 +227,11 @@ int tap_probe_has_uso(int fd) return 0; } +bool tap_probe_has_tunnel(int fd) +{ + return false; +} + void tap_fd_set_vnet_hdr_len(int fd, int len) { } @@ -235,8 +246,7 @@ int tap_fd_set_vnet_be(int fd, int is_be) return -EINVAL; } -void tap_fd_set_offload(int fd, int csum, int tso4, - int tso6, int ecn, int ufo, int uso4, int uso6) +void tap_fd_set_offload(int fd, const NetOffloads *ol) { } diff --git a/net/tap-stub.c b/net/tap-stub.c index 3867343..f7a5e0c 100644 --- a/net/tap-stub.c +++ b/net/tap-stub.c @@ -52,6 +52,11 @@ int tap_probe_has_uso(int fd) return 0; } +bool tap_probe_has_tunnel(int fd) +{ + return false; +} + void tap_fd_set_vnet_hdr_len(int fd, int len) { } @@ -66,8 +71,7 @@ int tap_fd_set_vnet_be(int fd, int is_be) return -EINVAL; } -void tap_fd_set_offload(int fd, int csum, int tso4, - int tso6, int ecn, int ufo, int uso4, int uso6) +void tap_fd_set_offload(int fd, const NetOffloads *ol) { } diff --git a/net/tap-win32.c b/net/tap-win32.c index 671dee9..38baf90 100644 --- a/net/tap-win32.c +++ b/net/tap-win32.c @@ -704,11 +704,6 @@ static void tap_win32_send(void *opaque) } } -struct vhost_net *tap_get_vhost_net(NetClientState *nc) -{ - return NULL; -} - static NetClientInfo net_tap_win32_info = { .type = NET_CLIENT_DRIVER_TAP, .size = sizeof(TAPState), @@ -42,11 +42,31 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "qemu/sockets.h" +#include "hw/virtio/vhost.h" #include "net/tap.h" #include "net/vhost_net.h" +static const int kernel_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_NET_F_MRG_RXBUF, + VIRTIO_F_VERSION_1, + VIRTIO_NET_F_MTU, + VIRTIO_F_IOMMU_PLATFORM, + VIRTIO_F_RING_PACKED, + VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, + VIRTIO_F_NOTIFICATION_DATA, + VIRTIO_NET_F_RSC_EXT, + VIRTIO_NET_F_HASH_REPORT, + VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, + VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, + VHOST_INVALID_FEATURE_BIT +}; + typedef struct TAPState { NetClientState nc; int fd; @@ -58,6 +78,7 @@ typedef struct TAPState { bool using_vnet_hdr; bool has_ufo; bool has_uso; + bool has_tunnel; bool enabled; VHostNetState *vhost_net; unsigned host_vnet_hdr_len; @@ -172,6 +193,11 @@ static void tap_send(void *opaque) break; } + if (s->host_vnet_hdr_len && size <= s->host_vnet_hdr_len) { + /* Invalid packet */ + break; + } + if (s->host_vnet_hdr_len && !s->using_vnet_hdr) { buf += s->host_vnet_hdr_len; size -= s->host_vnet_hdr_len; @@ -223,6 +249,14 @@ static bool tap_has_uso(NetClientState *nc) return s->has_uso; } +static bool tap_has_tunnel(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_TAP); + return s->has_tunnel; +} + static bool tap_has_vnet_hdr(NetClientState *nc) { TAPState *s = DO_UPCAST(TAPState, nc, nc); @@ -262,15 +296,14 @@ static int tap_set_vnet_be(NetClientState *nc, bool is_be) return tap_fd_set_vnet_be(s->fd, is_be); } -static void tap_set_offload(NetClientState *nc, int csum, int tso4, - int tso6, int ecn, int ufo, int uso4, int uso6) +static void tap_set_offload(NetClientState *nc, const NetOffloads *ol) { TAPState *s = DO_UPCAST(TAPState, nc, nc); if (s->fd < 0) { return; } - tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo, uso4, uso6); + tap_fd_set_offload(s->fd, ol); } static void tap_exit_notify(Notifier *notifier, void *data) @@ -329,6 +362,18 @@ int tap_get_fd(NetClientState *nc) return s->fd; } +/* + * tap_get_vhost_net() can return NULL if a tap net-device backend is + * created with 'vhost=off' option, 'vhostforce=off' or no vhost or + * vhostforce or vhostfd options at all. Please see net_init_tap_one(). + */ +static VHostNetState *tap_get_vhost_net(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + assert(nc->info->type == NET_CLIENT_DRIVER_TAP); + return s->vhost_net; +} + /* fd support */ static NetClientInfo net_tap_info = { @@ -340,6 +385,7 @@ static NetClientInfo net_tap_info = { .cleanup = tap_cleanup, .has_ufo = tap_has_ufo, .has_uso = tap_has_uso, + .has_tunnel = tap_has_tunnel, .has_vnet_hdr = tap_has_vnet_hdr, .has_vnet_hdr_len = tap_has_vnet_hdr_len, .set_offload = tap_set_offload, @@ -347,6 +393,7 @@ static NetClientInfo net_tap_info = { .set_vnet_le = tap_set_vnet_le, .set_vnet_be = tap_set_vnet_be, .set_steering_ebpf = tap_set_steering_ebpf, + .get_vhost_net = tap_get_vhost_net, }; static TAPState *net_tap_fd_init(NetClientState *peer, @@ -355,6 +402,7 @@ static TAPState *net_tap_fd_init(NetClientState *peer, int fd, int vnet_hdr) { + NetOffloads ol = {}; NetClientState *nc; TAPState *s; @@ -367,8 +415,9 @@ static TAPState *net_tap_fd_init(NetClientState *peer, s->using_vnet_hdr = false; s->has_ufo = tap_probe_has_ufo(s->fd); s->has_uso = tap_probe_has_uso(s->fd); + s->has_tunnel = tap_probe_has_tunnel(s->fd); s->enabled = true; - tap_set_offload(&s->nc, 0, 0, 0, 0, 0, 0, 0); + tap_set_offload(&s->nc, &ol); /* * Make sure host header length is set correctly in tap: * it might have been modified by another instance of qemu. @@ -591,8 +640,7 @@ int net_init_bridge(const Netdev *netdev, const char *name, return -1; } - if (!g_unix_set_fd_nonblocking(fd, true, NULL)) { - error_setg_errno(errp, errno, "Failed to set FD nonblocking"); + if (!qemu_set_blocking(fd, false, errp)) { return -1; } vnet_hdr = tap_probe_vnet_hdr(fd, errp); @@ -693,9 +741,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, error_propagate(errp, err); goto failed; } - if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) { - error_setg_errno(errp, errno, "%s: Can't use file descriptor %d", - name, fd); + if (!qemu_set_blocking(vhostfd, false, errp)) { goto failed; } } else { @@ -705,13 +751,17 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, "tap: open vhost char device failed"); goto failed; } - if (!g_unix_set_fd_nonblocking(vhostfd, true, NULL)) { - error_setg_errno(errp, errno, "Failed to set FD nonblocking"); + if (!qemu_set_blocking(vhostfd, false, errp)) { goto failed; } } options.opaque = (void *)(uintptr_t)vhostfd; options.nvqs = 2; + options.feature_bits = kernel_feature_bits; + options.get_acked_features = NULL; + options.save_acked_features = NULL; + options.max_tx_queue_size = 0; + options.is_vhost_user = false; s->vhost_net = vhost_net_init(&options); if (!s->vhost_net) { @@ -798,9 +848,7 @@ int net_init_tap(const Netdev *netdev, const char *name, return -1; } - if (!g_unix_set_fd_nonblocking(fd, true, NULL)) { - error_setg_errno(errp, errno, "%s: Can't use file descriptor %d", - name, fd); + if (!qemu_set_blocking(fd, false, errp)) { close(fd); return -1; } @@ -854,10 +902,8 @@ int net_init_tap(const Netdev *netdev, const char *name, goto free_fail; } - ret = g_unix_set_fd_nonblocking(fd, true, NULL); - if (!ret) { - error_setg_errno(errp, errno, "%s: Can't use file descriptor %d", - name, fd); + if (!qemu_set_blocking(fd, false, errp)) { + ret = -1; goto free_fail; } @@ -910,8 +956,7 @@ free_fail: return -1; } - if (!g_unix_set_fd_nonblocking(fd, true, NULL)) { - error_setg_errno(errp, errno, "Failed to set FD nonblocking"); + if (!qemu_set_blocking(fd, false, errp)) { return -1; } vnet_hdr = tap_probe_vnet_hdr(fd, errp); @@ -980,13 +1025,6 @@ free_fail: return 0; } -VHostNetState *tap_get_vhost_net(NetClientState *nc) -{ - TAPState *s = DO_UPCAST(TAPState, nc, nc); - assert(nc->info->type == NET_CLIENT_DRIVER_TAP); - return s->vhost_net; -} - int tap_enable(NetClientState *nc) { TAPState *s = DO_UPCAST(TAPState, nc, nc); diff --git a/net/tap_int.h b/net/tap_int.h index 8857ff2..b76a050 100644 --- a/net/tap_int.h +++ b/net/tap_int.h @@ -27,6 +27,7 @@ #define NET_TAP_INT_H #include "qapi/qapi-types-net.h" +#include "net/net.h" int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required, int mq_required, Error **errp); @@ -37,8 +38,8 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp); int tap_probe_vnet_hdr(int fd, Error **errp); int tap_probe_has_ufo(int fd); int tap_probe_has_uso(int fd); -void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo, - int uso4, int uso6); +bool tap_probe_has_tunnel(int fd); +void tap_fd_set_offload(int fd, const NetOffloads *ol); void tap_fd_set_vnet_hdr_len(int fd, int len); int tap_fd_set_vnet_le(int fd, int vnet_is_le); int tap_fd_set_vnet_be(int fd, int vnet_is_be); diff --git a/net/vhost-user-stub.c b/net/vhost-user-stub.c index 52ab4e1..283dee8 100644 --- a/net/vhost-user-stub.c +++ b/net/vhost-user-stub.c @@ -11,7 +11,6 @@ #include "qemu/osdep.h" #include "clients.h" #include "net/vhost_net.h" -#include "net/vhost-user.h" #include "qemu/error-report.h" #include "qapi/error.h" diff --git a/net/vhost-user.c b/net/vhost-user.c index 0b235e5..8b96157 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -11,8 +11,9 @@ #include "qemu/osdep.h" #include "clients.h" #include "net/vhost_net.h" -#include "net/vhost-user.h" +#include "hw/virtio/vhost.h" #include "hw/virtio/vhost-user.h" +#include "standard-headers/linux/virtio_net.h" #include "chardev/char-fe.h" #include "qapi/error.h" #include "qapi/qapi-commands-net.h" @@ -22,6 +23,46 @@ #include "qemu/option.h" #include "trace.h" +static const int user_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_F_NOTIFICATION_DATA, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + + VIRTIO_F_ANY_LAYOUT, + VIRTIO_F_VERSION_1, + VIRTIO_NET_F_CSUM, + VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_GSO, + VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_HOST_TSO4, + VIRTIO_NET_F_HOST_TSO6, + VIRTIO_NET_F_HOST_ECN, + VIRTIO_NET_F_HOST_UFO, + VIRTIO_NET_F_MRG_RXBUF, + VIRTIO_NET_F_MTU, + VIRTIO_F_IOMMU_PLATFORM, + VIRTIO_F_RING_PACKED, + VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, + VIRTIO_NET_F_RSS, + VIRTIO_NET_F_RSC_EXT, + VIRTIO_NET_F_HASH_REPORT, + VIRTIO_NET_F_GUEST_USO4, + VIRTIO_NET_F_GUEST_USO6, + VIRTIO_NET_F_HOST_USO, + + /* This bit implies RARP isn't sent by QEMU out of band */ + VIRTIO_NET_F_GUEST_ANNOUNCE, + + VIRTIO_NET_F_MQ, + + VHOST_INVALID_FEATURE_BIT +}; + typedef struct NetVhostUserState { NetClientState nc; CharBackend chr; /* only queue index 0 */ @@ -32,21 +73,21 @@ typedef struct NetVhostUserState { bool started; } NetVhostUserState; -VHostNetState *vhost_user_get_vhost_net(NetClientState *nc) +static struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc) { NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc); assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER); return s->vhost_net; } -uint64_t vhost_user_get_acked_features(NetClientState *nc) +static uint64_t vhost_user_get_acked_features(NetClientState *nc) { NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc); assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER); return s->acked_features; } -void vhost_user_save_acked_features(NetClientState *nc) +static void vhost_user_save_acked_features(NetClientState *nc) { NetVhostUserState *s; @@ -96,6 +137,12 @@ static int vhost_user_start(int queues, NetClientState *ncs[], options.opaque = be; options.busyloop_timeout = 0; options.nvqs = 2; + options.feature_bits = user_feature_bits; + options.max_tx_queue_size = VIRTQUEUE_MAX_SIZE; + options.get_acked_features = vhost_user_get_acked_features; + options.save_acked_features = vhost_user_save_acked_features; + options.is_vhost_user = true; + net = vhost_net_init(&options); if (!net) { error_report("failed to init vhost_net for queue %d", i); @@ -231,6 +278,7 @@ static NetClientInfo net_vhost_user_info = { .set_vnet_be = vhost_user_set_vnet_endianness, .set_vnet_le = vhost_user_set_vnet_endianness, .check_peer_type = vhost_user_check_peer_type, + .get_vhost_net = vhost_user_get_vhost_net, }; static gboolean net_vhost_user_watch(void *do_not_use, GIOCondition cond, @@ -250,7 +298,6 @@ static void chr_closed_bh(void *opaque) const char *name = opaque; NetClientState *ncs[MAX_QUEUE_NUM]; NetVhostUserState *s; - Error *err = NULL; int queues, i; queues = qemu_find_net_clients_except(name, ncs, @@ -264,14 +311,11 @@ static void chr_closed_bh(void *opaque) vhost_user_save_acked_features(ncs[i]); } - qmp_set_link(name, false, &err); + net_client_set_link(ncs, queues, false); qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, NULL, opaque, NULL, true); - if (err) { - error_report_err(err); - } qapi_event_send_netdev_vhost_user_disconnected(name); } @@ -281,7 +325,6 @@ static void net_vhost_user_event(void *opaque, QEMUChrEvent event) NetClientState *ncs[MAX_QUEUE_NUM]; NetVhostUserState *s; Chardev *chr; - Error *err = NULL; int queues; queues = qemu_find_net_clients_except(name, ncs, @@ -300,7 +343,7 @@ static void net_vhost_user_event(void *opaque, QEMUChrEvent event) } s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, net_vhost_user_watch, s); - qmp_set_link(name, true, &err); + net_client_set_link(ncs, queues, true); s->started = true; qapi_event_send_netdev_vhost_user_connected(name, chr->label); break; @@ -327,10 +370,6 @@ static void net_vhost_user_event(void *opaque, QEMUChrEvent event) /* Ignore */ break; } - - if (err) { - error_report_err(err); - } } static int net_vhost_user_init(NetClientState *peer, const char *device, diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 7ca8b46..74d26a9 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -55,7 +55,7 @@ typedef struct VhostVDPAState { * with the exception of VHOST_INVALID_FEATURE_BIT, * which should always be the last entry. */ -const int vdpa_feature_bits[] = { +static const int vdpa_feature_bits[] = { VIRTIO_F_ANY_LAYOUT, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_NOTIFY_ON_EMPTY, @@ -132,7 +132,7 @@ static const uint64_t vdpa_svq_device_features = #define VHOST_VDPA_NET_CVQ_ASID 1 -VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) +static struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); @@ -201,6 +201,11 @@ static int vhost_vdpa_add(NetClientState *ncs, void *be, options.opaque = be; options.busyloop_timeout = 0; options.nvqs = nvqs; + options.feature_bits = vdpa_feature_bits; + options.get_acked_features = NULL; + options.save_acked_features = NULL; + options.max_tx_queue_size = VIRTQUEUE_MAX_SIZE; + options.is_vhost_user = false; net = vhost_net_init(&options); if (!net) { @@ -235,18 +240,39 @@ static void vhost_vdpa_cleanup(NetClientState *nc) return; } qemu_close(s->vhost_vdpa.shared->device_fd); + g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, vhost_iova_tree_delete); g_free(s->vhost_vdpa.shared); } -/** Dummy SetSteeringEBPF to support RSS for vhost-vdpa backend */ -static bool vhost_vdpa_set_steering_ebpf(NetClientState *nc, int prog_fd) +static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc) { + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + return true; } -static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc) +static bool vhost_vdpa_get_vnet_hash_supported_types(NetClientState *nc, + uint32_t *types) { assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + uint64_t features = s->vhost_vdpa.dev->features; + int fd = s->vhost_vdpa.shared->device_fd; + struct { + struct vhost_vdpa_config hdr; + uint32_t supported_hash_types; + } config; + + if (!virtio_has_feature(features, VIRTIO_NET_F_HASH_REPORT) && + !virtio_has_feature(features, VIRTIO_NET_F_RSS)) { + return false; + } + + config.hdr.off = offsetof(struct virtio_net_config, supported_hash_types); + config.hdr.len = sizeof(config.supported_hash_types); + + assert(!ioctl(fd, VHOST_VDPA_GET_CONFIG, &config)); + *types = le32_to_cpu(config.supported_hash_types); return true; } @@ -362,14 +388,8 @@ static int vdpa_net_migration_state_notifier(NotifierWithReturn *notifier, static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) { - struct vhost_vdpa *v = &s->vhost_vdpa; - migration_add_notifier(&s->migration_state, vdpa_net_migration_state_notifier); - if (v->shadow_vqs_enabled) { - v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, - v->shared->iova_range.last); - } } static int vhost_vdpa_net_data_start(NetClientState *nc) @@ -416,19 +436,12 @@ static int vhost_vdpa_net_data_load(NetClientState *nc) static void vhost_vdpa_net_client_stop(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); - struct vhost_dev *dev; assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); if (s->vhost_vdpa.index == 0) { migration_remove_notifier(&s->migration_state); } - - dev = s->vhost_vdpa.dev; - if (dev->vq_index + dev->nvqs == dev->vq_index_end) { - g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, - vhost_iova_tree_delete); - } } static NetClientInfo net_vhost_vdpa_info = { @@ -440,10 +453,11 @@ static NetClientInfo net_vhost_vdpa_info = { .stop = vhost_vdpa_net_client_stop, .cleanup = vhost_vdpa_cleanup, .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, + .get_vnet_hash_supported_types = vhost_vdpa_get_vnet_hash_supported_types, .has_ufo = vhost_vdpa_has_ufo, .set_vnet_le = vhost_vdpa_set_vnet_le, .check_peer_type = vhost_vdpa_check_peer_type, - .set_steering_ebpf = vhost_vdpa_set_steering_ebpf, + .get_vhost_net = vhost_vdpa_get_vhost_net, }; static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, @@ -600,24 +614,6 @@ out: return 0; } - /* - * If other vhost_vdpa already have an iova_tree, reuse it for simplicity, - * whether CVQ shares ASID with guest or not, because: - * - Memory listener need access to guest's memory addresses allocated in - * the IOVA tree. - * - There should be plenty of IOVA address space for both ASID not to - * worry about collisions between them. Guest's translations are still - * validated with virtio virtqueue_pop so there is no risk for the guest - * to access memory that it shouldn't. - * - * To allocate a iova tree per ASID is doable but it complicates the code - * and it is not worth it for the moment. - */ - if (!v->shared->iova_tree) { - v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, - v->shared->iova_range.last); - } - r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_page_len(), false); if (unlikely(r < 0)) { @@ -868,13 +864,13 @@ static int vhost_vdpa_net_load_rss(VhostVDPAState *s, const VirtIONet *n, * configuration only at live migration. */ if (!n->rss_data.enabled || - n->rss_data.hash_types == VIRTIO_NET_HASH_REPORT_NONE) { + n->rss_data.runtime_hash_types == VIRTIO_NET_HASH_REPORT_NONE) { return 0; } table = g_malloc_n(n->rss_data.indirections_len, sizeof(n->rss_data.indirections_table[0])); - cfg.hash_types = cpu_to_le32(n->rss_data.hash_types); + cfg.hash_types = cpu_to_le32(n->rss_data.runtime_hash_types); if (do_rss) { /* @@ -1314,9 +1310,10 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { .stop = vhost_vdpa_net_cvq_stop, .cleanup = vhost_vdpa_cleanup, .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, + .get_vnet_hash_supported_types = vhost_vdpa_get_vnet_hash_supported_types, .has_ufo = vhost_vdpa_has_ufo, .check_peer_type = vhost_vdpa_check_peer_type, - .set_steering_ebpf = vhost_vdpa_set_steering_ebpf, + .get_vhost_net = vhost_vdpa_get_vhost_net, }; /* @@ -1726,6 +1723,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.shared->device_fd = vdpa_device_fd; s->vhost_vdpa.shared->iova_range = iova_range; s->vhost_vdpa.shared->shadow_data = svq; + s->vhost_vdpa.shared->iova_tree = vhost_iova_tree_new(iova_range.first, + iova_range.last); } else if (!is_datapath) { s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), PROT_READ | PROT_WRITE, @@ -1841,9 +1840,8 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, &has_cvq, errp); - if (queue_pairs < 0) { - qemu_close(vdpa_device_fd); - return queue_pairs; + if (queue_pairs <= 0) { + goto err; } r = vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); |