diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/af-xdp.c | 99 | ||||
-rw-r--r-- | net/clients.h | 4 | ||||
-rw-r--r-- | net/hub.c | 3 | ||||
-rw-r--r-- | net/meson.build | 6 | ||||
-rw-r--r-- | net/net.c | 45 | ||||
-rw-r--r-- | net/passt.c | 753 | ||||
-rw-r--r-- | net/stream.c | 282 | ||||
-rw-r--r-- | net/stream_data.c | 193 | ||||
-rw-r--r-- | net/stream_data.h | 31 | ||||
-rw-r--r-- | net/tap-win32.c | 5 | ||||
-rw-r--r-- | net/tap.c | 43 | ||||
-rw-r--r-- | net/vhost-user-stub.c | 1 | ||||
-rw-r--r-- | net/vhost-user.c | 60 | ||||
-rw-r--r-- | net/vhost-vdpa.c | 45 |
14 files changed, 1292 insertions, 278 deletions
diff --git a/net/af-xdp.c b/net/af-xdp.c index 01c5fb9..14f302e 100644 --- a/net/af-xdp.c +++ b/net/af-xdp.c @@ -49,9 +49,12 @@ typedef struct AFXDPState { char *buffer; struct xsk_umem *umem; - uint32_t n_queues; uint32_t xdp_flags; bool inhibit; + + char *map_path; + int map_fd; + uint32_t map_start_index; } AFXDPState; #define AF_XDP_BATCH_SIZE 64 @@ -261,6 +264,7 @@ static void af_xdp_send(void *opaque) static void af_xdp_cleanup(NetClientState *nc) { AFXDPState *s = DO_UPCAST(AFXDPState, nc, nc); + int idx; qemu_purge_queued_packets(nc); @@ -275,13 +279,17 @@ static void af_xdp_cleanup(NetClientState *nc) qemu_vfree(s->buffer); s->buffer = NULL; - /* Remove the program if it's the last open queue. */ - if (!s->inhibit && nc->queue_index == s->n_queues - 1 && s->xdp_flags - && bpf_xdp_detach(s->ifindex, s->xdp_flags, NULL) != 0) { - fprintf(stderr, - "af-xdp: unable to remove XDP program from '%s', ifindex: %d\n", - s->ifname, s->ifindex); + if (s->map_fd >= 0) { + idx = nc->queue_index + s->map_start_index; + if (bpf_map_delete_elem(s->map_fd, &idx)) { + fprintf(stderr, "af-xdp: unable to remove AF_XDP socket from map" + " %s\n", s->map_path); + } + close(s->map_fd); + s->map_fd = -1; } + g_free(s->map_path); + s->map_path = NULL; } static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp) @@ -323,7 +331,7 @@ static int af_xdp_umem_create(AFXDPState *s, int sock_fd, Error **errp) s->pool = g_new(uint64_t, n_descs); /* Fill the pool in the opposite order, because it's a LIFO queue. */ - for (i = n_descs; i >= 0; i--) { + for (i = n_descs - 1; i >= 0; i--) { s->pool[i] = i * XSK_UMEM__DEFAULT_FRAME_SIZE; } s->n_pool = n_descs; @@ -345,7 +353,6 @@ static int af_xdp_socket_create(AFXDPState *s, }; int queue_id, error = 0; - s->inhibit = opts->has_inhibit && opts->inhibit; if (s->inhibit) { cfg.libxdp_flags |= XSK_LIBXDP_FLAGS__INHIBIT_PROG_LOAD; } @@ -396,6 +403,35 @@ static int af_xdp_socket_create(AFXDPState *s, return 0; } +static int af_xdp_update_xsk_map(AFXDPState *s, Error **errp) +{ + int xsk_fd, idx, error = 0; + + if (!s->map_path) { + return 0; + } + + s->map_fd = bpf_obj_get(s->map_path); + if (s->map_fd < 0) { + error = errno; + } else { + xsk_fd = xsk_socket__fd(s->xsk); + idx = s->nc.queue_index + s->map_start_index; + if (bpf_map_update_elem(s->map_fd, &idx, &xsk_fd, 0)) { + error = errno; + } + } + + if (error) { + error_setg_errno(errp, error, + "failed to insert AF_XDP socket into map %s", + s->map_path); + return -1; + } + + return 0; +} + /* NetClientInfo methods. */ static NetClientInfo net_af_xdp_info = { .type = NET_CLIENT_DRIVER_AF_XDP, @@ -444,12 +480,14 @@ int net_init_af_xdp(const Netdev *netdev, { const NetdevAFXDPOptions *opts = &netdev->u.af_xdp; NetClientState *nc, *nc0 = NULL; + int32_t map_start_index; unsigned int ifindex; uint32_t prog_id = 0; g_autofree int *sock_fds = NULL; int64_t i, queues; Error *err = NULL; AFXDPState *s; + bool inhibit; ifindex = if_nametoindex(opts->ifname); if (!ifindex) { @@ -465,8 +503,28 @@ int net_init_af_xdp(const Netdev *netdev, return -1; } - if ((opts->has_inhibit && opts->inhibit) != !!opts->sock_fds) { - error_setg(errp, "'inhibit=on' requires 'sock-fds' and vice versa"); + inhibit = opts->has_inhibit && opts->inhibit; + if (inhibit && !opts->sock_fds && !opts->map_path) { + error_setg(errp, "'inhibit=on' requires 'sock-fds' or 'map-path'"); + return -1; + } + if (!inhibit && (opts->sock_fds || opts->map_path)) { + error_setg(errp, "'sock-fds' and 'map-path' require 'inhibit=on'"); + return -1; + } + if (opts->sock_fds && opts->map_path) { + error_setg(errp, "'sock-fds' and 'map-path' are mutually exclusive"); + return -1; + } + if (!opts->map_path && opts->has_map_start_index) { + error_setg(errp, "'map-start-index' requires 'map-path'"); + return -1; + } + + map_start_index = opts->has_map_start_index ? opts->map_start_index : 0; + if (map_start_index < 0) { + error_setg(errp, "'map-start-index' cannot be negative (%d)", + map_start_index); return -1; } @@ -490,21 +548,23 @@ int net_init_af_xdp(const Netdev *netdev, pstrcpy(s->ifname, sizeof(s->ifname), opts->ifname); s->ifindex = ifindex; - s->n_queues = queues; + s->inhibit = inhibit; + + s->map_path = g_strdup(opts->map_path); + s->map_start_index = map_start_index; + s->map_fd = -1; - if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, errp) - || af_xdp_socket_create(s, opts, errp)) { - /* Make sure the XDP program will be removed. */ - s->n_queues = i; - error_propagate(errp, err); + if (af_xdp_umem_create(s, sock_fds ? sock_fds[i] : -1, &err) || + af_xdp_socket_create(s, opts, &err) || + af_xdp_update_xsk_map(s, &err)) { goto err; } } - if (nc0) { + if (nc0 && !inhibit) { s = DO_UPCAST(AFXDPState, nc, nc0); if (bpf_xdp_query_id(s->ifindex, s->xdp_flags, &prog_id) || !prog_id) { - error_setg_errno(errp, errno, + error_setg_errno(&err, errno, "no XDP program loaded on '%s', ifindex: %d", s->ifname, s->ifindex); goto err; @@ -518,6 +578,7 @@ int net_init_af_xdp(const Netdev *netdev, err: if (nc0) { qemu_del_net_client(nc0); + error_propagate(errp, err); } return -1; diff --git a/net/clients.h b/net/clients.h index be53794..e786ab4 100644 --- a/net/clients.h +++ b/net/clients.h @@ -29,6 +29,10 @@ int net_init_dump(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); +#ifdef CONFIG_PASST +int net_init_passt(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp); +#endif #ifdef CONFIG_SLIRP int net_init_slirp(const Netdev *netdev, const char *name, NetClientState *peer, Error **errp); @@ -285,6 +285,9 @@ void net_hub_check_clients(void) case NET_CLIENT_DRIVER_NIC: has_nic = 1; break; +#ifdef CONFIG_PASST + case NET_CLIENT_DRIVER_PASST: +#endif case NET_CLIENT_DRIVER_USER: case NET_CLIENT_DRIVER_TAP: case NET_CLIENT_DRIVER_SOCKET: diff --git a/net/meson.build b/net/meson.build index bb97b4d..da6ea63 100644 --- a/net/meson.build +++ b/net/meson.build @@ -1,6 +1,7 @@ system_ss.add(files( 'announce.c', 'checksum.c', + 'dgram.c', 'dump.c', 'eth.c', 'filter-buffer.c', @@ -12,7 +13,7 @@ system_ss.add(files( 'queue.c', 'socket.c', 'stream.c', - 'dgram.c', + 'stream_data.c', 'util.c', )) @@ -33,6 +34,9 @@ system_ss.add(when: 'CONFIG_TCG', if_true: files('filter-replay.c')) if have_l2tpv3 system_ss.add(files('l2tpv3.c')) endif +if enable_passt + system_ss.add(files('passt.c')) +endif system_ss.add(when: slirp, if_true: files('slirp.c')) system_ss.add(when: vde, if_true: files('vde.c')) if have_netmap @@ -573,6 +573,15 @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len) nc->info->set_vnet_hdr_len(nc, len); } +bool qemu_get_vnet_hash_supported_types(NetClientState *nc, uint32_t *types) +{ + if (!nc || !nc->info->get_vnet_hash_supported_types) { + return false; + } + + return nc->info->get_vnet_hash_supported_types(nc, types); +} + int qemu_set_vnet_le(NetClientState *nc, bool is_le) { #if HOST_BIG_ENDIAN @@ -1248,6 +1257,9 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])( const char *name, NetClientState *peer, Error **errp) = { [NET_CLIENT_DRIVER_NIC] = net_init_nic, +#ifdef CONFIG_PASST + [NET_CLIENT_DRIVER_PASST] = net_init_passt, +#endif #ifdef CONFIG_SLIRP [NET_CLIENT_DRIVER_USER] = net_init_slirp, #endif @@ -1353,6 +1365,7 @@ void show_netdevs(void) "dgram", "hubport", "tap", + "passt", #ifdef CONFIG_SLIRP "user", #endif @@ -1601,21 +1614,11 @@ void colo_notify_filters_event(int event, Error **errp) } } -void qmp_set_link(const char *name, bool up, Error **errp) +void net_client_set_link(NetClientState **ncs, int queues, bool up) { - NetClientState *ncs[MAX_QUEUE_NUM]; NetClientState *nc; - int queues, i; - - queues = qemu_find_net_clients_except(name, ncs, - NET_CLIENT_DRIVER__MAX, - MAX_QUEUE_NUM); + int i; - if (queues == 0) { - error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, - "Device '%s' not found", name); - return; - } nc = ncs[0]; for (i = 0; i < queues; i++) { @@ -1646,6 +1649,24 @@ void qmp_set_link(const char *name, bool up, Error **errp) } } +void qmp_set_link(const char *name, bool up, Error **errp) +{ + NetClientState *ncs[MAX_QUEUE_NUM]; + int queues; + + queues = qemu_find_net_clients_except(name, ncs, + NET_CLIENT_DRIVER__MAX, + MAX_QUEUE_NUM); + + if (queues == 0) { + error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, + "Device '%s' not found", name); + return; + } + + net_client_set_link(ncs, queues, up); +} + static void net_vm_change_state_handler(void *opaque, bool running, RunState state) { diff --git a/net/passt.c b/net/passt.c new file mode 100644 index 0000000..6f616ba --- /dev/null +++ b/net/passt.c @@ -0,0 +1,753 @@ +/* + * passt network backend + * + * Copyright Red Hat + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include <glib/gstdio.h> +#include "qemu/error-report.h" +#include <gio/gio.h> +#include "net/net.h" +#include "clients.h" +#include "qapi/error.h" +#include "io/net-listener.h" +#include "chardev/char-fe.h" +#include "net/vhost_net.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" +#include "standard-headers/linux/virtio_net.h" +#include "stream_data.h" + +#ifdef CONFIG_VHOST_USER +static const int user_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_F_NOTIFICATION_DATA, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + + VIRTIO_F_ANY_LAYOUT, + VIRTIO_F_VERSION_1, + VIRTIO_NET_F_CSUM, + VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_GSO, + VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_HOST_TSO4, + VIRTIO_NET_F_HOST_TSO6, + VIRTIO_NET_F_HOST_ECN, + VIRTIO_NET_F_HOST_UFO, + VIRTIO_NET_F_MRG_RXBUF, + VIRTIO_NET_F_MTU, + VIRTIO_F_IOMMU_PLATFORM, + VIRTIO_F_RING_PACKED, + VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, + VIRTIO_NET_F_RSS, + VIRTIO_NET_F_RSC_EXT, + VIRTIO_NET_F_HASH_REPORT, + VIRTIO_NET_F_GUEST_USO4, + VIRTIO_NET_F_GUEST_USO6, + VIRTIO_NET_F_HOST_USO, + + /* This bit implies RARP isn't sent by QEMU out of band */ + VIRTIO_NET_F_GUEST_ANNOUNCE, + + VIRTIO_NET_F_MQ, + + VHOST_INVALID_FEATURE_BIT +}; +#endif + +typedef struct NetPasstState { + NetStreamData data; + GPtrArray *args; + gchar *pidfile; + pid_t pid; +#ifdef CONFIG_VHOST_USER + /* vhost user */ + VhostUserState *vhost_user; + VHostNetState *vhost_net; + CharBackend vhost_chr; + guint vhost_watch; + uint64_t acked_features; + bool started; +#endif +} NetPasstState; + +static int net_passt_stream_start(NetPasstState *s, Error **errp); + +static void net_passt_cleanup(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + +#ifdef CONFIG_VHOST_USER + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); + s->vhost_net = NULL; + } + if (s->vhost_watch) { + g_source_remove(s->vhost_watch); + s->vhost_watch = 0; + } + qemu_chr_fe_deinit(&s->vhost_chr, true); + if (s->vhost_user) { + vhost_user_cleanup(s->vhost_user); + g_free(s->vhost_user); + s->vhost_user = NULL; + } +#endif + + kill(s->pid, SIGTERM); + g_remove(s->pidfile); + g_free(s->pidfile); + g_ptr_array_free(s->args, TRUE); +} + +static ssize_t net_passt_receive(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + NetStreamData *d = DO_UPCAST(NetStreamData, nc, nc); + + return net_stream_data_receive(d, buf, size); +} + +static gboolean net_passt_send(QIOChannel *ioc, GIOCondition condition, + gpointer data) +{ + if (net_stream_data_send(ioc, condition, data) == G_SOURCE_REMOVE) { + NetPasstState *s = DO_UPCAST(NetPasstState, data, data); + Error *error; + + /* we need to restart passt */ + kill(s->pid, SIGTERM); + if (net_passt_stream_start(s, &error) == -1) { + error_report_err(error); + } + + return G_SOURCE_REMOVE; + } + + return G_SOURCE_CONTINUE; +} + +#ifdef CONFIG_VHOST_USER +static int passt_set_vnet_endianness(NetClientState *nc, bool enable) +{ + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + return 0; +} + +static bool passt_has_vnet_hdr(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + return s->vhost_user != NULL; +} + +static bool passt_has_ufo(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + return s->vhost_user != NULL; +} + +static bool passt_check_peer_type(NetClientState *nc, ObjectClass *oc, + Error **errp) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + const char *driver = object_class_get_name(oc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + if (s->vhost_user == NULL) { + return true; + } + + if (!g_str_has_prefix(driver, "virtio-net-")) { + error_setg(errp, "vhost-user requires frontend driver virtio-net-*"); + return false; + } + + return true; +} + +static struct vhost_net *passt_get_vhost_net(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + return s->vhost_net; +} + +static uint64_t passt_get_acked_features(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + return s->acked_features; +} + +static void passt_save_acked_features(NetClientState *nc) +{ + NetPasstState *s = DO_UPCAST(NetPasstState, data.nc, nc); + + assert(nc->info->type == NET_CLIENT_DRIVER_PASST); + + if (s->vhost_net) { + uint64_t features = vhost_net_get_acked_features(s->vhost_net); + if (features) { + s->acked_features = features; + } + } +} +#endif + +static NetClientInfo net_passt_info = { + .type = NET_CLIENT_DRIVER_PASST, + .size = sizeof(NetPasstState), + .receive = net_passt_receive, + .cleanup = net_passt_cleanup, +#ifdef CONFIG_VHOST_USER + .has_vnet_hdr = passt_has_vnet_hdr, + .has_ufo = passt_has_ufo, + .set_vnet_be = passt_set_vnet_endianness, + .set_vnet_le = passt_set_vnet_endianness, + .check_peer_type = passt_check_peer_type, + .get_vhost_net = passt_get_vhost_net, +#endif +}; + +static void net_passt_client_connected(QIOTask *task, gpointer opaque) +{ + NetPasstState *s = opaque; + + if (net_stream_data_client_connected(task, &s->data) == 0) { + qemu_set_info_str(&s->data.nc, "stream,connected to pid %d", s->pid); + } +} + +static int net_passt_start_daemon(NetPasstState *s, int sock, Error **errp) +{ + g_autoptr(GSubprocess) daemon = NULL; + g_autofree gchar *contents = NULL; + g_autoptr(GError) error = NULL; + GSubprocessLauncher *launcher; + + qemu_set_info_str(&s->data.nc, "launching passt"); + + launcher = g_subprocess_launcher_new(G_SUBPROCESS_FLAGS_NONE); + g_subprocess_launcher_take_fd(launcher, sock, 3); + + daemon = g_subprocess_launcher_spawnv(launcher, + (const gchar *const *)s->args->pdata, + &error); + g_object_unref(launcher); + + if (!daemon) { + error_setg(errp, "Error creating daemon: %s", error->message); + return -1; + } + + if (!g_subprocess_wait(daemon, NULL, &error)) { + error_setg(errp, "Error waiting for daemon: %s", error->message); + return -1; + } + + if (g_subprocess_get_if_exited(daemon) && + g_subprocess_get_exit_status(daemon)) { + return -1; + } + + if (!g_file_get_contents(s->pidfile, &contents, NULL, &error)) { + error_setg(errp, "Cannot read passt pid: %s", error->message); + return -1; + } + + s->pid = (pid_t)g_ascii_strtoll(contents, NULL, 10); + if (s->pid <= 0) { + error_setg(errp, "File '%s' did not contain a valid PID.", s->pidfile); + return -1; + } + + return 0; +} + +static int net_passt_stream_start(NetPasstState *s, Error **errp) +{ + QIOChannelSocket *sioc; + SocketAddress *addr; + int sv[2]; + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + error_setg_errno(errp, errno, "socketpair() failed"); + return -1; + } + + /* connect to passt */ + qemu_set_info_str(&s->data.nc, "connecting to passt"); + + /* create socket channel */ + sioc = qio_channel_socket_new(); + s->data.ioc = QIO_CHANNEL(sioc); + s->data.nc.link_down = true; + s->data.send = net_passt_send; + + addr = g_new0(SocketAddress, 1); + addr->type = SOCKET_ADDRESS_TYPE_FD; + addr->u.fd.str = g_strdup_printf("%d", sv[0]); + + qio_channel_socket_connect_async(sioc, addr, + net_passt_client_connected, s, + NULL, NULL); + + qapi_free_SocketAddress(addr); + + /* start passt */ + if (net_passt_start_daemon(s, sv[1], errp) == -1) { + close(sv[0]); + close(sv[1]); + return -1; + } + close(sv[1]); + + return 0; +} + +#ifdef CONFIG_VHOST_USER +static gboolean passt_vhost_user_watch(void *do_not_use, GIOCondition cond, + void *opaque) +{ + NetPasstState *s = opaque; + + qemu_chr_fe_disconnect(&s->vhost_chr); + + return G_SOURCE_CONTINUE; +} + +static void passt_vhost_user_event(void *opaque, QEMUChrEvent event); + +static void chr_closed_bh(void *opaque) +{ + NetPasstState *s = opaque; + + passt_save_acked_features(&s->data.nc); + + net_client_set_link(&(NetClientState *){ &s->data.nc }, 1, false); + + qemu_chr_fe_set_handlers(&s->vhost_chr, NULL, NULL, passt_vhost_user_event, + NULL, s, NULL, true); +} + +static void passt_vhost_user_stop(NetPasstState *s) +{ + passt_save_acked_features(&s->data.nc); + vhost_net_cleanup(s->vhost_net); +} + +static int passt_vhost_user_start(NetPasstState *s, VhostUserState *be) +{ + struct vhost_net *net = NULL; + VhostNetOptions options; + + options.backend_type = VHOST_BACKEND_TYPE_USER; + options.net_backend = &s->data.nc; + options.opaque = be; + options.busyloop_timeout = 0; + options.nvqs = 2; + options.feature_bits = user_feature_bits; + options.max_tx_queue_size = VIRTQUEUE_MAX_SIZE; + options.get_acked_features = passt_get_acked_features; + options.save_acked_features = passt_save_acked_features; + options.is_vhost_user = true; + + net = vhost_net_init(&options); + if (!net) { + error_report("failed to init passt vhost_net"); + goto err; + } + + if (s->vhost_net) { + vhost_net_cleanup(s->vhost_net); + g_free(s->vhost_net); + } + s->vhost_net = net; + + return 0; +err: + if (net) { + vhost_net_cleanup(net); + g_free(net); + } + passt_vhost_user_stop(s); + return -1; +} + +static void passt_vhost_user_event(void *opaque, QEMUChrEvent event) +{ + NetPasstState *s = opaque; + Error *err = NULL; + + switch (event) { + case CHR_EVENT_OPENED: + if (passt_vhost_user_start(s, s->vhost_user) < 0) { + qemu_chr_fe_disconnect(&s->vhost_chr); + return; + } + s->vhost_watch = qemu_chr_fe_add_watch(&s->vhost_chr, G_IO_HUP, + passt_vhost_user_watch, s); + net_client_set_link(&(NetClientState *){ &s->data.nc }, 1, true); + s->started = true; + break; + case CHR_EVENT_CLOSED: + if (s->vhost_watch) { + AioContext *ctx = qemu_get_current_aio_context(); + + g_source_remove(s->vhost_watch); + s->vhost_watch = 0; + qemu_chr_fe_set_handlers(&s->vhost_chr, NULL, NULL, NULL, NULL, + NULL, NULL, false); + + aio_bh_schedule_oneshot(ctx, chr_closed_bh, s); + } + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } + + if (err) { + error_report_err(err); + } +} + +static int net_passt_vhost_user_init(NetPasstState *s, Error **errp) +{ + Chardev *chr; + int sv[2]; + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + error_setg_errno(errp, errno, "socketpair() failed"); + return -1; + } + + /* connect to passt */ + qemu_set_info_str(&s->data.nc, "connecting to passt"); + + /* create chardev */ + + chr = CHARDEV(object_new(TYPE_CHARDEV_SOCKET)); + if (!chr || qemu_chr_add_client(chr, sv[0]) == -1) { + object_unref(OBJECT(chr)); + error_setg(errp, "Failed to make socket chardev"); + goto err; + } + + s->vhost_user = g_new0(struct VhostUserState, 1); + if (!qemu_chr_fe_init(&s->vhost_chr, chr, errp) || + !vhost_user_init(s->vhost_user, &s->vhost_chr, errp)) { + goto err; + } + + /* start passt */ + if (net_passt_start_daemon(s, sv[1], errp) == -1) { + goto err; + } + + do { + if (qemu_chr_fe_wait_connected(&s->vhost_chr, errp) < 0) { + goto err; + } + + qemu_chr_fe_set_handlers(&s->vhost_chr, NULL, NULL, + passt_vhost_user_event, NULL, s, NULL, + true); + } while (!s->started); + + qemu_set_info_str(&s->data.nc, "vhost-user,connected to pid %d", s->pid); + + close(sv[1]); + return 0; +err: + close(sv[0]); + close(sv[1]); + + return -1; +} +#else +static int net_passt_vhost_user_init(NetPasstState *s, Error **errp) +{ + error_setg(errp, "vhost-user support has not been built"); + + return -1; +} +#endif + +static GPtrArray *net_passt_decode_args(const NetDevPasstOptions *passt, + gchar *pidfile, Error **errp) +{ + GPtrArray *args = g_ptr_array_new_with_free_func(g_free); + + if (passt->path) { + g_ptr_array_add(args, g_strdup(passt->path)); + } else { + g_ptr_array_add(args, g_strdup("passt")); + } + + if (passt->has_vhost_user && passt->vhost_user) { + g_ptr_array_add(args, g_strdup("--vhost-user")); + } + + /* by default, be quiet */ + if (!passt->has_quiet || passt->quiet) { + g_ptr_array_add(args, g_strdup("--quiet")); + } + + if (passt->has_mtu) { + g_ptr_array_add(args, g_strdup("--mtu")); + g_ptr_array_add(args, g_strdup_printf("%"PRId64, passt->mtu)); + } + + if (passt->address) { + g_ptr_array_add(args, g_strdup("--address")); + g_ptr_array_add(args, g_strdup(passt->address)); + } + + if (passt->netmask) { + g_ptr_array_add(args, g_strdup("--netmask")); + g_ptr_array_add(args, g_strdup(passt->netmask)); + } + + if (passt->mac) { + g_ptr_array_add(args, g_strdup("--mac-addr")); + g_ptr_array_add(args, g_strdup(passt->mac)); + } + + if (passt->gateway) { + g_ptr_array_add(args, g_strdup("--gateway")); + g_ptr_array_add(args, g_strdup(passt->gateway)); + } + + if (passt->interface) { + g_ptr_array_add(args, g_strdup("--interface")); + g_ptr_array_add(args, g_strdup(passt->interface)); + } + + if (passt->outbound) { + g_ptr_array_add(args, g_strdup("--outbound")); + g_ptr_array_add(args, g_strdup(passt->outbound)); + } + + if (passt->outbound_if4) { + g_ptr_array_add(args, g_strdup("--outbound-if4")); + g_ptr_array_add(args, g_strdup(passt->outbound_if4)); + } + + if (passt->outbound_if6) { + g_ptr_array_add(args, g_strdup("--outbound-if6")); + g_ptr_array_add(args, g_strdup(passt->outbound_if6)); + } + + if (passt->dns) { + g_ptr_array_add(args, g_strdup("--dns")); + g_ptr_array_add(args, g_strdup(passt->dns)); + } + if (passt->fqdn) { + g_ptr_array_add(args, g_strdup("--fqdn")); + g_ptr_array_add(args, g_strdup(passt->fqdn)); + } + + if (passt->has_dhcp_dns && !passt->dhcp_dns) { + g_ptr_array_add(args, g_strdup("--no-dhcp-dns")); + } + + if (passt->has_dhcp_search && !passt->dhcp_search) { + g_ptr_array_add(args, g_strdup("--no-dhcp-search")); + } + + if (passt->map_host_loopback) { + g_ptr_array_add(args, g_strdup("--map-host-loopback")); + g_ptr_array_add(args, g_strdup(passt->map_host_loopback)); + } + + if (passt->map_guest_addr) { + g_ptr_array_add(args, g_strdup("--map-guest-addr")); + g_ptr_array_add(args, g_strdup(passt->map_guest_addr)); + } + + if (passt->dns_forward) { + g_ptr_array_add(args, g_strdup("--dns-forward")); + g_ptr_array_add(args, g_strdup(passt->dns_forward)); + } + + if (passt->dns_host) { + g_ptr_array_add(args, g_strdup("--dns-host")); + g_ptr_array_add(args, g_strdup(passt->dns_host)); + } + + if (passt->has_tcp && !passt->tcp) { + g_ptr_array_add(args, g_strdup("--no-tcp")); + } + + if (passt->has_udp && !passt->udp) { + g_ptr_array_add(args, g_strdup("--no-udp")); + } + + if (passt->has_icmp && !passt->icmp) { + g_ptr_array_add(args, g_strdup("--no-icmp")); + } + + if (passt->has_dhcp && !passt->dhcp) { + g_ptr_array_add(args, g_strdup("--no-dhcp")); + } + + if (passt->has_ndp && !passt->ndp) { + g_ptr_array_add(args, g_strdup("--no-ndp")); + } + if (passt->has_dhcpv6 && !passt->dhcpv6) { + g_ptr_array_add(args, g_strdup("--no-dhcpv6")); + } + + if (passt->has_ra && !passt->ra) { + g_ptr_array_add(args, g_strdup("--no-ra")); + } + + if (passt->has_freebind && passt->freebind) { + g_ptr_array_add(args, g_strdup("--freebind")); + } + + if (passt->has_ipv4 && !passt->ipv4) { + g_ptr_array_add(args, g_strdup("--ipv6-only")); + } + + if (passt->has_ipv6 && !passt->ipv6) { + g_ptr_array_add(args, g_strdup("--ipv4-only")); + } + + if (passt->has_search && passt->search) { + const StringList *list = passt->search; + GString *domains = g_string_new(list->value->str); + + list = list->next; + while (list) { + g_string_append(domains, " "); + g_string_append(domains, list->value->str); + list = list->next; + } + + g_ptr_array_add(args, g_strdup("--search")); + g_ptr_array_add(args, g_string_free(domains, FALSE)); + } + + if (passt->has_tcp_ports && passt->tcp_ports) { + const StringList *list = passt->tcp_ports; + GString *tcp_ports = g_string_new(list->value->str); + + list = list->next; + while (list) { + g_string_append(tcp_ports, ","); + g_string_append(tcp_ports, list->value->str); + list = list->next; + } + + g_ptr_array_add(args, g_strdup("--tcp-ports")); + g_ptr_array_add(args, g_string_free(tcp_ports, FALSE)); + } + + if (passt->has_udp_ports && passt->udp_ports) { + const StringList *list = passt->udp_ports; + GString *udp_ports = g_string_new(list->value->str); + + list = list->next; + while (list) { + g_string_append(udp_ports, ","); + g_string_append(udp_ports, list->value->str); + list = list->next; + } + + g_ptr_array_add(args, g_strdup("--udp-ports")); + g_ptr_array_add(args, g_string_free(udp_ports, FALSE)); + } + + if (passt->has_param && passt->param) { + const StringList *list = passt->param; + + while (list) { + g_ptr_array_add(args, g_strdup(list->value->str)); + list = list->next; + } + } + + /* provide a pid file to be able to kil passt on exit */ + g_ptr_array_add(args, g_strdup("--pid")); + g_ptr_array_add(args, g_strdup(pidfile)); + + /* g_subprocess_launcher_take_fd() will set the socket on fd 3 */ + g_ptr_array_add(args, g_strdup("--fd")); + g_ptr_array_add(args, g_strdup("3")); + + g_ptr_array_add(args, NULL); + + return args; +} + +int net_init_passt(const Netdev *netdev, const char *name, + NetClientState *peer, Error **errp) +{ + g_autoptr(GError) error = NULL; + NetClientState *nc; + NetPasstState *s; + GPtrArray *args; + gchar *pidfile; + int pidfd; + + assert(netdev->type == NET_CLIENT_DRIVER_PASST); + + pidfd = g_file_open_tmp("passt-XXXXXX.pid", &pidfile, &error); + if (pidfd == -1) { + error_setg(errp, "Failed to create temporary file: %s", error->message); + return -1; + } + close(pidfd); + + args = net_passt_decode_args(&netdev->u.passt, pidfile, errp); + if (args == NULL) { + g_free(pidfile); + return -1; + } + + nc = qemu_new_net_client(&net_passt_info, peer, "passt", name); + s = DO_UPCAST(NetPasstState, data.nc, nc); + + s->args = args; + s->pidfile = pidfile; + + if (netdev->u.passt.has_vhost_user && netdev->u.passt.vhost_user) { + if (net_passt_vhost_user_init(s, errp) == -1) { + qemu_del_net_client(nc); + return -1; + } + + return 0; + } + + if (net_passt_stream_start(s, errp) == -1) { + qemu_del_net_client(nc); + return -1; + } + + return 0; +} diff --git a/net/stream.c b/net/stream.c index 6152d2a..d893f02 100644 --- a/net/stream.c +++ b/net/stream.c @@ -27,173 +27,50 @@ #include "net/net.h" #include "clients.h" -#include "monitor/monitor.h" #include "qapi/error.h" -#include "qemu/error-report.h" -#include "qemu/option.h" -#include "qemu/sockets.h" -#include "qemu/iov.h" -#include "qemu/main-loop.h" -#include "qemu/cutils.h" -#include "io/channel.h" -#include "io/channel-socket.h" #include "io/net-listener.h" #include "qapi/qapi-events-net.h" #include "qapi/qapi-visit-sockets.h" #include "qapi/clone-visitor.h" +#include "stream_data.h" + typedef struct NetStreamState { - NetClientState nc; - QIOChannel *listen_ioc; - QIONetListener *listener; - QIOChannel *ioc; - guint ioc_read_tag; - guint ioc_write_tag; - SocketReadState rs; - unsigned int send_index; /* number of bytes sent*/ + NetStreamData data; uint32_t reconnect_ms; guint timer_tag; SocketAddress *addr; } NetStreamState; -static void net_stream_listen(QIONetListener *listener, - QIOChannelSocket *cioc, - void *opaque); static void net_stream_arm_reconnect(NetStreamState *s); -static gboolean net_stream_writable(QIOChannel *ioc, - GIOCondition condition, - gpointer data) -{ - NetStreamState *s = data; - - s->ioc_write_tag = 0; - - qemu_flush_queued_packets(&s->nc); - - return G_SOURCE_REMOVE; -} - static ssize_t net_stream_receive(NetClientState *nc, const uint8_t *buf, size_t size) { - NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); - uint32_t len = htonl(size); - struct iovec iov[] = { - { - .iov_base = &len, - .iov_len = sizeof(len), - }, { - .iov_base = (void *)buf, - .iov_len = size, - }, - }; - struct iovec local_iov[2]; - unsigned int nlocal_iov; - size_t remaining; - ssize_t ret; - - remaining = iov_size(iov, 2) - s->send_index; - nlocal_iov = iov_copy(local_iov, 2, iov, 2, s->send_index, remaining); - ret = qio_channel_writev(s->ioc, local_iov, nlocal_iov, NULL); - if (ret == QIO_CHANNEL_ERR_BLOCK) { - ret = 0; /* handled further down */ - } - if (ret == -1) { - s->send_index = 0; - return -errno; - } - if (ret < (ssize_t)remaining) { - s->send_index += ret; - s->ioc_write_tag = qio_channel_add_watch(s->ioc, G_IO_OUT, - net_stream_writable, s, NULL); - return 0; - } - s->send_index = 0; - return size; -} - -static gboolean net_stream_send(QIOChannel *ioc, - GIOCondition condition, - gpointer data); - -static void net_stream_send_completed(NetClientState *nc, ssize_t len) -{ - NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); - - if (!s->ioc_read_tag) { - s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, - net_stream_send, s, NULL); - } -} + NetStreamData *d = DO_UPCAST(NetStreamData, nc, nc); -static void net_stream_rs_finalize(SocketReadState *rs) -{ - NetStreamState *s = container_of(rs, NetStreamState, rs); - - if (qemu_send_packet_async(&s->nc, rs->buf, - rs->packet_len, - net_stream_send_completed) == 0) { - if (s->ioc_read_tag) { - g_source_remove(s->ioc_read_tag); - s->ioc_read_tag = 0; - } - } + return net_stream_data_receive(d, buf, size); } static gboolean net_stream_send(QIOChannel *ioc, GIOCondition condition, gpointer data) { - NetStreamState *s = data; - int size; - int ret; - QEMU_UNINITIALIZED char buf1[NET_BUFSIZE]; - const char *buf; - - size = qio_channel_read(s->ioc, buf1, sizeof(buf1), NULL); - if (size < 0) { - if (errno != EWOULDBLOCK) { - goto eoc; - } - } else if (size == 0) { - /* end of connection */ - eoc: - s->ioc_read_tag = 0; - if (s->ioc_write_tag) { - g_source_remove(s->ioc_write_tag); - s->ioc_write_tag = 0; - } - if (s->listener) { - qemu_set_info_str(&s->nc, "listening"); - qio_net_listener_set_client_func(s->listener, net_stream_listen, - s, NULL); - } - object_unref(OBJECT(s->ioc)); - s->ioc = NULL; - - net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); - s->nc.link_down = true; + if (net_stream_data_send(ioc, condition, data) == G_SOURCE_REMOVE) { + NetStreamState *s = DO_UPCAST(NetStreamState, data, data); - qapi_event_send_netdev_stream_disconnected(s->nc.name); + qapi_event_send_netdev_stream_disconnected(s->data.nc.name); net_stream_arm_reconnect(s); return G_SOURCE_REMOVE; } - buf = buf1; - - ret = net_fill_rstate(&s->rs, (const uint8_t *)buf, size); - - if (ret == -1) { - goto eoc; - } return G_SOURCE_CONTINUE; } static void net_stream_cleanup(NetClientState *nc) { - NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); + NetStreamState *s = DO_UPCAST(NetStreamState, data.nc, nc); if (s->timer_tag) { g_source_remove(s->timer_tag); s->timer_tag = 0; @@ -202,28 +79,28 @@ static void net_stream_cleanup(NetClientState *nc) qapi_free_SocketAddress(s->addr); s->addr = NULL; } - if (s->ioc) { - if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) { - if (s->ioc_read_tag) { - g_source_remove(s->ioc_read_tag); - s->ioc_read_tag = 0; + if (s->data.ioc) { + if (QIO_CHANNEL_SOCKET(s->data.ioc)->fd != -1) { + if (s->data.ioc_read_tag) { + g_source_remove(s->data.ioc_read_tag); + s->data.ioc_read_tag = 0; } - if (s->ioc_write_tag) { - g_source_remove(s->ioc_write_tag); - s->ioc_write_tag = 0; + if (s->data.ioc_write_tag) { + g_source_remove(s->data.ioc_write_tag); + s->data.ioc_write_tag = 0; } } - object_unref(OBJECT(s->ioc)); - s->ioc = NULL; + object_unref(OBJECT(s->data.ioc)); + s->data.ioc = NULL; } - if (s->listen_ioc) { - if (s->listener) { - qio_net_listener_disconnect(s->listener); - object_unref(OBJECT(s->listener)); - s->listener = NULL; + if (s->data.listen_ioc) { + if (s->data.listener) { + qio_net_listener_disconnect(s->data.listener); + object_unref(OBJECT(s->data.listener)); + s->data.listener = NULL; } - object_unref(OBJECT(s->listen_ioc)); - s->listen_ioc = NULL; + object_unref(OBJECT(s->data.listen_ioc)); + s->data.listen_ioc = NULL; } } @@ -235,23 +112,13 @@ static NetClientInfo net_stream_info = { }; static void net_stream_listen(QIONetListener *listener, - QIOChannelSocket *cioc, - void *opaque) + QIOChannelSocket *cioc, gpointer data) { - NetStreamState *s = opaque; + NetStreamData *d = data; SocketAddress *addr; char *uri; - object_ref(OBJECT(cioc)); - - qio_net_listener_set_client_func(s->listener, NULL, s, NULL); - - s->ioc = QIO_CHANNEL(cioc); - qio_channel_set_name(s->ioc, "stream-server"); - s->nc.link_down = false; - - s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send, - s, NULL); + net_stream_data_listen(listener, cioc, data); if (cioc->localAddr.ss_family == AF_UNIX) { addr = qio_channel_socket_get_local_address(cioc, NULL); @@ -260,22 +127,22 @@ static void net_stream_listen(QIONetListener *listener, } g_assert(addr != NULL); uri = socket_uri(addr); - qemu_set_info_str(&s->nc, "%s", uri); + qemu_set_info_str(&d->nc, "%s", uri); g_free(uri); - qapi_event_send_netdev_stream_connected(s->nc.name, addr); + qapi_event_send_netdev_stream_connected(d->nc.name, addr); qapi_free_SocketAddress(addr); } static void net_stream_server_listening(QIOTask *task, gpointer opaque) { - NetStreamState *s = opaque; - QIOChannelSocket *listen_sioc = QIO_CHANNEL_SOCKET(s->listen_ioc); + NetStreamData *d = opaque; + QIOChannelSocket *listen_sioc = QIO_CHANNEL_SOCKET(d->listen_ioc); SocketAddress *addr; int ret; Error *err = NULL; if (qio_task_propagate_error(task, &err)) { - qemu_set_info_str(&s->nc, "error: %s", error_get_pretty(err)); + qemu_set_info_str(&d->nc, "error: %s", error_get_pretty(err)); error_free(err); return; } @@ -284,20 +151,21 @@ static void net_stream_server_listening(QIOTask *task, gpointer opaque) g_assert(addr != NULL); ret = qemu_socket_try_set_nonblock(listen_sioc->fd); if (addr->type == SOCKET_ADDRESS_TYPE_FD && ret < 0) { - qemu_set_info_str(&s->nc, "can't use file descriptor %s (errno %d)", + qemu_set_info_str(&d->nc, "can't use file descriptor %s (errno %d)", addr->u.fd.str, -ret); return; } g_assert(ret == 0); qapi_free_SocketAddress(addr); - s->nc.link_down = true; - s->listener = qio_net_listener_new(); + d->nc.link_down = true; + d->listener = qio_net_listener_new(); - qemu_set_info_str(&s->nc, "listening"); - net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); - qio_net_listener_set_client_func(s->listener, net_stream_listen, s, NULL); - qio_net_listener_add(s->listener, listen_sioc); + qemu_set_info_str(&d->nc, "listening"); + net_socket_rs_init(&d->rs, net_stream_data_rs_finalize, false); + qio_net_listener_set_client_func(d->listener, d->listen, d, + NULL); + qio_net_listener_add(d->listener, listen_sioc); } static int net_stream_server_init(NetClientState *peer, @@ -307,16 +175,18 @@ static int net_stream_server_init(NetClientState *peer, Error **errp) { NetClientState *nc; - NetStreamState *s; + NetStreamData *d; QIOChannelSocket *listen_sioc = qio_channel_socket_new(); nc = qemu_new_net_client(&net_stream_info, peer, model, name); - s = DO_UPCAST(NetStreamState, nc, nc); - qemu_set_info_str(&s->nc, "initializing"); + d = DO_UPCAST(NetStreamData, nc, nc); + d->send = net_stream_send; + d->listen = net_stream_listen; + qemu_set_info_str(&d->nc, "initializing"); - s->listen_ioc = QIO_CHANNEL(listen_sioc); + d->listen_ioc = QIO_CHANNEL(listen_sioc); qio_channel_socket_listen_async(listen_sioc, addr, 0, - net_stream_server_listening, s, + net_stream_server_listening, d, NULL, NULL); return 0; @@ -325,49 +195,23 @@ static int net_stream_server_init(NetClientState *peer, static void net_stream_client_connected(QIOTask *task, gpointer opaque) { NetStreamState *s = opaque; - QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(s->ioc); + NetStreamData *d = &s->data; + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(d->ioc); SocketAddress *addr; gchar *uri; - int ret; - Error *err = NULL; - if (qio_task_propagate_error(task, &err)) { - qemu_set_info_str(&s->nc, "error: %s", error_get_pretty(err)); - error_free(err); - goto error; + if (net_stream_data_client_connected(task, d) == -1) { + net_stream_arm_reconnect(s); + return; } addr = qio_channel_socket_get_remote_address(sioc, NULL); g_assert(addr != NULL); uri = socket_uri(addr); - qemu_set_info_str(&s->nc, "%s", uri); + qemu_set_info_str(&d->nc, "%s", uri); g_free(uri); - - ret = qemu_socket_try_set_nonblock(sioc->fd); - if (addr->type == SOCKET_ADDRESS_TYPE_FD && ret < 0) { - qemu_set_info_str(&s->nc, "can't use file descriptor %s (errno %d)", - addr->u.fd.str, -ret); - qapi_free_SocketAddress(addr); - goto error; - } - g_assert(ret == 0); - - net_socket_rs_init(&s->rs, net_stream_rs_finalize, false); - - /* Disable Nagle algorithm on TCP sockets to reduce latency */ - qio_channel_set_delay(s->ioc, false); - - s->ioc_read_tag = qio_channel_add_watch(s->ioc, G_IO_IN, net_stream_send, - s, NULL); - s->nc.link_down = false; - qapi_event_send_netdev_stream_connected(s->nc.name, addr); + qapi_event_send_netdev_stream_connected(d->nc.name, addr); qapi_free_SocketAddress(addr); - - return; -error: - object_unref(OBJECT(s->ioc)); - s->ioc = NULL; - net_stream_arm_reconnect(s); } static gboolean net_stream_reconnect(gpointer data) @@ -378,7 +222,7 @@ static gboolean net_stream_reconnect(gpointer data) s->timer_tag = 0; sioc = qio_channel_socket_new(); - s->ioc = QIO_CHANNEL(sioc); + s->data.ioc = QIO_CHANNEL(sioc); qio_channel_socket_connect_async(sioc, s->addr, net_stream_client_connected, s, NULL, NULL); @@ -388,7 +232,7 @@ static gboolean net_stream_reconnect(gpointer data) static void net_stream_arm_reconnect(NetStreamState *s) { if (s->reconnect_ms && s->timer_tag == 0) { - qemu_set_info_str(&s->nc, "connecting"); + qemu_set_info_str(&s->data.nc, "connecting"); s->timer_tag = g_timeout_add(s->reconnect_ms, net_stream_reconnect, s); } } @@ -405,11 +249,13 @@ static int net_stream_client_init(NetClientState *peer, QIOChannelSocket *sioc = qio_channel_socket_new(); nc = qemu_new_net_client(&net_stream_info, peer, model, name); - s = DO_UPCAST(NetStreamState, nc, nc); - qemu_set_info_str(&s->nc, "connecting"); + s = DO_UPCAST(NetStreamState, data.nc, nc); + qemu_set_info_str(&s->data.nc, "connecting"); - s->ioc = QIO_CHANNEL(sioc); - s->nc.link_down = true; + s->data.ioc = QIO_CHANNEL(sioc); + s->data.nc.link_down = true; + s->data.send = net_stream_send; + s->data.listen = net_stream_listen; s->reconnect_ms = reconnect_ms; if (reconnect_ms) { diff --git a/net/stream_data.c b/net/stream_data.c new file mode 100644 index 0000000..5af27e0 --- /dev/null +++ b/net/stream_data.c @@ -0,0 +1,193 @@ +/* + * net stream generic functions + * + * Copyright Red Hat + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu/iov.h" +#include "qapi/error.h" +#include "net/net.h" +#include "io/channel.h" +#include "io/net-listener.h" + +#include "stream_data.h" + +static gboolean net_stream_data_writable(QIOChannel *ioc, + GIOCondition condition, gpointer data) +{ + NetStreamData *d = data; + + d->ioc_write_tag = 0; + + qemu_flush_queued_packets(&d->nc); + + return G_SOURCE_REMOVE; +} + +ssize_t net_stream_data_receive(NetStreamData *d, const uint8_t *buf, + size_t size) +{ + uint32_t len = htonl(size); + struct iovec iov[] = { + { + .iov_base = &len, + .iov_len = sizeof(len), + }, { + .iov_base = (void *)buf, + .iov_len = size, + }, + }; + struct iovec local_iov[2]; + unsigned int nlocal_iov; + size_t remaining; + ssize_t ret; + + remaining = iov_size(iov, 2) - d->send_index; + nlocal_iov = iov_copy(local_iov, 2, iov, 2, d->send_index, remaining); + ret = qio_channel_writev(d->ioc, local_iov, nlocal_iov, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + ret = 0; /* handled further down */ + } + if (ret == -1) { + d->send_index = 0; + return -errno; + } + if (ret < (ssize_t)remaining) { + d->send_index += ret; + d->ioc_write_tag = qio_channel_add_watch(d->ioc, G_IO_OUT, + net_stream_data_writable, d, + NULL); + return 0; + } + d->send_index = 0; + return size; +} + +static void net_stream_data_send_completed(NetClientState *nc, ssize_t len) +{ + NetStreamData *d = DO_UPCAST(NetStreamData, nc, nc); + + if (!d->ioc_read_tag) { + d->ioc_read_tag = qio_channel_add_watch(d->ioc, G_IO_IN, d->send, d, + NULL); + } +} + +void net_stream_data_rs_finalize(SocketReadState *rs) +{ + NetStreamData *d = container_of(rs, NetStreamData, rs); + + if (qemu_send_packet_async(&d->nc, rs->buf, + rs->packet_len, + net_stream_data_send_completed) == 0) { + if (d->ioc_read_tag) { + g_source_remove(d->ioc_read_tag); + d->ioc_read_tag = 0; + } + } +} + +gboolean net_stream_data_send(QIOChannel *ioc, GIOCondition condition, + NetStreamData *d) +{ + int size; + int ret; + QEMU_UNINITIALIZED char buf1[NET_BUFSIZE]; + const char *buf; + + size = qio_channel_read(d->ioc, buf1, sizeof(buf1), NULL); + if (size < 0) { + if (errno != EWOULDBLOCK) { + goto eoc; + } + } else if (size == 0) { + /* end of connection */ + eoc: + d->ioc_read_tag = 0; + if (d->ioc_write_tag) { + g_source_remove(d->ioc_write_tag); + d->ioc_write_tag = 0; + } + if (d->listener) { + qemu_set_info_str(&d->nc, "listening"); + qio_net_listener_set_client_func(d->listener, + d->listen, d, NULL); + } + object_unref(OBJECT(d->ioc)); + d->ioc = NULL; + + net_socket_rs_init(&d->rs, net_stream_data_rs_finalize, false); + d->nc.link_down = true; + + return G_SOURCE_REMOVE; + } + buf = buf1; + + ret = net_fill_rstate(&d->rs, (const uint8_t *)buf, size); + + if (ret == -1) { + goto eoc; + } + + return G_SOURCE_CONTINUE; +} + +void net_stream_data_listen(QIONetListener *listener, + QIOChannelSocket *cioc, + NetStreamData *d) +{ + object_ref(OBJECT(cioc)); + + qio_net_listener_set_client_func(d->listener, NULL, d, NULL); + + d->ioc = QIO_CHANNEL(cioc); + qio_channel_set_name(d->ioc, "stream-server"); + d->nc.link_down = false; + + d->ioc_read_tag = qio_channel_add_watch(d->ioc, G_IO_IN, d->send, d, NULL); +} + +int net_stream_data_client_connected(QIOTask *task, NetStreamData *d) +{ + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(d->ioc); + SocketAddress *addr; + int ret; + Error *err = NULL; + + if (qio_task_propagate_error(task, &err)) { + qemu_set_info_str(&d->nc, "error: %s", error_get_pretty(err)); + error_free(err); + goto error; + } + + addr = qio_channel_socket_get_remote_address(sioc, NULL); + g_assert(addr != NULL); + + ret = qemu_socket_try_set_nonblock(sioc->fd); + if (addr->type == SOCKET_ADDRESS_TYPE_FD && ret < 0) { + qemu_set_info_str(&d->nc, "can't use file descriptor %s (errno %d)", + addr->u.fd.str, -ret); + qapi_free_SocketAddress(addr); + goto error; + } + g_assert(ret == 0); + qapi_free_SocketAddress(addr); + + net_socket_rs_init(&d->rs, net_stream_data_rs_finalize, false); + + /* Disable Nagle algorithm on TCP sockets to reduce latency */ + qio_channel_set_delay(d->ioc, false); + + d->ioc_read_tag = qio_channel_add_watch(d->ioc, G_IO_IN, d->send, d, NULL); + d->nc.link_down = false; + + return 0; +error: + object_unref(OBJECT(d->ioc)); + d->ioc = NULL; + + return -1; +} diff --git a/net/stream_data.h b/net/stream_data.h new file mode 100644 index 0000000..b868625 --- /dev/null +++ b/net/stream_data.h @@ -0,0 +1,31 @@ +/* + * net stream generic functions + * + * Copyright Red Hat + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +typedef struct NetStreamData { + NetClientState nc; + QIOChannel *ioc; + guint ioc_read_tag; + guint ioc_write_tag; + SocketReadState rs; + unsigned int send_index; /* number of bytes sent*/ + QIOChannelFunc send; + /* server data */ + QIOChannel *listen_ioc; + QIONetListener *listener; + QIONetListenerClientFunc listen; +} NetStreamData; + +ssize_t net_stream_data_receive(NetStreamData *d, const uint8_t *buf, + size_t size); +void net_stream_data_rs_finalize(SocketReadState *rs); +gboolean net_stream_data_send(QIOChannel *ioc, GIOCondition condition, + NetStreamData *d); +int net_stream_data_client_connected(QIOTask *task, NetStreamData *d); +void net_stream_data_listen(QIONetListener *listener, + QIOChannelSocket *cioc, + NetStreamData *d); diff --git a/net/tap-win32.c b/net/tap-win32.c index 671dee9..38baf90 100644 --- a/net/tap-win32.c +++ b/net/tap-win32.c @@ -704,11 +704,6 @@ static void tap_win32_send(void *opaque) } } -struct vhost_net *tap_get_vhost_net(NetClientState *nc) -{ - return NULL; -} - static NetClientInfo net_tap_win32_info = { .type = NET_CLIENT_DRIVER_TAP, .size = sizeof(TAPState), @@ -42,11 +42,29 @@ #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "qemu/sockets.h" +#include "hw/virtio/vhost.h" #include "net/tap.h" #include "net/vhost_net.h" +static const int kernel_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_NET_F_MRG_RXBUF, + VIRTIO_F_VERSION_1, + VIRTIO_NET_F_MTU, + VIRTIO_F_IOMMU_PLATFORM, + VIRTIO_F_RING_PACKED, + VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, + VIRTIO_F_NOTIFICATION_DATA, + VIRTIO_NET_F_RSC_EXT, + VIRTIO_NET_F_HASH_REPORT, + VHOST_INVALID_FEATURE_BIT +}; + typedef struct TAPState { NetClientState nc; int fd; @@ -329,6 +347,18 @@ int tap_get_fd(NetClientState *nc) return s->fd; } +/* + * tap_get_vhost_net() can return NULL if a tap net-device backend is + * created with 'vhost=off' option, 'vhostforce=off' or no vhost or + * vhostforce or vhostfd options at all. Please see net_init_tap_one(). + */ +static VHostNetState *tap_get_vhost_net(NetClientState *nc) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + assert(nc->info->type == NET_CLIENT_DRIVER_TAP); + return s->vhost_net; +} + /* fd support */ static NetClientInfo net_tap_info = { @@ -347,6 +377,7 @@ static NetClientInfo net_tap_info = { .set_vnet_le = tap_set_vnet_le, .set_vnet_be = tap_set_vnet_be, .set_steering_ebpf = tap_set_steering_ebpf, + .get_vhost_net = tap_get_vhost_net, }; static TAPState *net_tap_fd_init(NetClientState *peer, @@ -712,6 +743,11 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer, } options.opaque = (void *)(uintptr_t)vhostfd; options.nvqs = 2; + options.feature_bits = kernel_feature_bits; + options.get_acked_features = NULL; + options.save_acked_features = NULL; + options.max_tx_queue_size = 0; + options.is_vhost_user = false; s->vhost_net = vhost_net_init(&options); if (!s->vhost_net) { @@ -980,13 +1016,6 @@ free_fail: return 0; } -VHostNetState *tap_get_vhost_net(NetClientState *nc) -{ - TAPState *s = DO_UPCAST(TAPState, nc, nc); - assert(nc->info->type == NET_CLIENT_DRIVER_TAP); - return s->vhost_net; -} - int tap_enable(NetClientState *nc) { TAPState *s = DO_UPCAST(TAPState, nc, nc); diff --git a/net/vhost-user-stub.c b/net/vhost-user-stub.c index 52ab4e1..283dee8 100644 --- a/net/vhost-user-stub.c +++ b/net/vhost-user-stub.c @@ -11,7 +11,6 @@ #include "qemu/osdep.h" #include "clients.h" #include "net/vhost_net.h" -#include "net/vhost-user.h" #include "qemu/error-report.h" #include "qapi/error.h" diff --git a/net/vhost-user.c b/net/vhost-user.c index 0b235e5..1c3b8b3 100644 --- a/net/vhost-user.c +++ b/net/vhost-user.c @@ -11,8 +11,9 @@ #include "qemu/osdep.h" #include "clients.h" #include "net/vhost_net.h" -#include "net/vhost-user.h" +#include "hw/virtio/vhost.h" #include "hw/virtio/vhost-user.h" +#include "standard-headers/linux/virtio_net.h" #include "chardev/char-fe.h" #include "qapi/error.h" #include "qapi/qapi-commands-net.h" @@ -22,6 +23,46 @@ #include "qemu/option.h" #include "trace.h" +static const int user_feature_bits[] = { + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_F_NOTIFICATION_DATA, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + + VIRTIO_F_ANY_LAYOUT, + VIRTIO_F_VERSION_1, + VIRTIO_NET_F_CSUM, + VIRTIO_NET_F_GUEST_CSUM, + VIRTIO_NET_F_GSO, + VIRTIO_NET_F_GUEST_TSO4, + VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, + VIRTIO_NET_F_GUEST_UFO, + VIRTIO_NET_F_HOST_TSO4, + VIRTIO_NET_F_HOST_TSO6, + VIRTIO_NET_F_HOST_ECN, + VIRTIO_NET_F_HOST_UFO, + VIRTIO_NET_F_MRG_RXBUF, + VIRTIO_NET_F_MTU, + VIRTIO_F_IOMMU_PLATFORM, + VIRTIO_F_RING_PACKED, + VIRTIO_F_RING_RESET, + VIRTIO_F_IN_ORDER, + VIRTIO_NET_F_RSS, + VIRTIO_NET_F_RSC_EXT, + VIRTIO_NET_F_HASH_REPORT, + VIRTIO_NET_F_GUEST_USO4, + VIRTIO_NET_F_GUEST_USO6, + VIRTIO_NET_F_HOST_USO, + + /* This bit implies RARP isn't sent by QEMU out of band */ + VIRTIO_NET_F_GUEST_ANNOUNCE, + + VIRTIO_NET_F_MQ, + + VHOST_INVALID_FEATURE_BIT +}; + typedef struct NetVhostUserState { NetClientState nc; CharBackend chr; /* only queue index 0 */ @@ -32,21 +73,21 @@ typedef struct NetVhostUserState { bool started; } NetVhostUserState; -VHostNetState *vhost_user_get_vhost_net(NetClientState *nc) +static struct vhost_net *vhost_user_get_vhost_net(NetClientState *nc) { NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc); assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER); return s->vhost_net; } -uint64_t vhost_user_get_acked_features(NetClientState *nc) +static uint64_t vhost_user_get_acked_features(NetClientState *nc) { NetVhostUserState *s = DO_UPCAST(NetVhostUserState, nc, nc); assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_USER); return s->acked_features; } -void vhost_user_save_acked_features(NetClientState *nc) +static void vhost_user_save_acked_features(NetClientState *nc) { NetVhostUserState *s; @@ -96,6 +137,12 @@ static int vhost_user_start(int queues, NetClientState *ncs[], options.opaque = be; options.busyloop_timeout = 0; options.nvqs = 2; + options.feature_bits = user_feature_bits; + options.max_tx_queue_size = VIRTQUEUE_MAX_SIZE; + options.get_acked_features = vhost_user_get_acked_features; + options.save_acked_features = vhost_user_save_acked_features; + options.is_vhost_user = true; + net = vhost_net_init(&options); if (!net) { error_report("failed to init vhost_net for queue %d", i); @@ -231,6 +278,7 @@ static NetClientInfo net_vhost_user_info = { .set_vnet_be = vhost_user_set_vnet_endianness, .set_vnet_le = vhost_user_set_vnet_endianness, .check_peer_type = vhost_user_check_peer_type, + .get_vhost_net = vhost_user_get_vhost_net, }; static gboolean net_vhost_user_watch(void *do_not_use, GIOCondition cond, @@ -264,7 +312,7 @@ static void chr_closed_bh(void *opaque) vhost_user_save_acked_features(ncs[i]); } - qmp_set_link(name, false, &err); + net_client_set_link(ncs, queues, false); qemu_chr_fe_set_handlers(&s->chr, NULL, NULL, net_vhost_user_event, NULL, opaque, NULL, true); @@ -300,7 +348,7 @@ static void net_vhost_user_event(void *opaque, QEMUChrEvent event) } s->watch = qemu_chr_fe_add_watch(&s->chr, G_IO_HUP, net_vhost_user_watch, s); - qmp_set_link(name, true, &err); + net_client_set_link(ncs, queues, true); s->started = true; qapi_event_send_netdev_vhost_user_connected(name, chr->label); break; diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 58d7389..6a30a44 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -55,7 +55,7 @@ typedef struct VhostVDPAState { * with the exception of VHOST_INVALID_FEATURE_BIT, * which should always be the last entry. */ -const int vdpa_feature_bits[] = { +static const int vdpa_feature_bits[] = { VIRTIO_F_ANY_LAYOUT, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_NOTIFY_ON_EMPTY, @@ -132,7 +132,7 @@ static const uint64_t vdpa_svq_device_features = #define VHOST_VDPA_NET_CVQ_ASID 1 -VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) +static struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); @@ -201,6 +201,11 @@ static int vhost_vdpa_add(NetClientState *ncs, void *be, options.opaque = be; options.busyloop_timeout = 0; options.nvqs = nvqs; + options.feature_bits = vdpa_feature_bits; + options.get_acked_features = NULL; + options.save_acked_features = NULL; + options.max_tx_queue_size = VIRTQUEUE_MAX_SIZE; + options.is_vhost_user = false; net = vhost_net_init(&options); if (!net) { @@ -239,15 +244,35 @@ static void vhost_vdpa_cleanup(NetClientState *nc) g_free(s->vhost_vdpa.shared); } -/** Dummy SetSteeringEBPF to support RSS for vhost-vdpa backend */ -static bool vhost_vdpa_set_steering_ebpf(NetClientState *nc, int prog_fd) +static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc) { + assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + return true; } -static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc) +static bool vhost_vdpa_get_vnet_hash_supported_types(NetClientState *nc, + uint32_t *types) { assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA); + VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); + uint64_t features = s->vhost_vdpa.dev->features; + int fd = s->vhost_vdpa.shared->device_fd; + struct { + struct vhost_vdpa_config hdr; + uint32_t supported_hash_types; + } config; + + if (!virtio_has_feature(features, VIRTIO_NET_F_HASH_REPORT) && + !virtio_has_feature(features, VIRTIO_NET_F_RSS)) { + return false; + } + + config.hdr.off = offsetof(struct virtio_net_config, supported_hash_types); + config.hdr.len = sizeof(config.supported_hash_types); + + assert(!ioctl(fd, VHOST_VDPA_GET_CONFIG, &config)); + *types = le32_to_cpu(config.supported_hash_types); return true; } @@ -428,10 +453,11 @@ static NetClientInfo net_vhost_vdpa_info = { .stop = vhost_vdpa_net_client_stop, .cleanup = vhost_vdpa_cleanup, .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, + .get_vnet_hash_supported_types = vhost_vdpa_get_vnet_hash_supported_types, .has_ufo = vhost_vdpa_has_ufo, .set_vnet_le = vhost_vdpa_set_vnet_le, .check_peer_type = vhost_vdpa_check_peer_type, - .set_steering_ebpf = vhost_vdpa_set_steering_ebpf, + .get_vhost_net = vhost_vdpa_get_vhost_net, }; static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index, @@ -838,13 +864,13 @@ static int vhost_vdpa_net_load_rss(VhostVDPAState *s, const VirtIONet *n, * configuration only at live migration. */ if (!n->rss_data.enabled || - n->rss_data.hash_types == VIRTIO_NET_HASH_REPORT_NONE) { + n->rss_data.runtime_hash_types == VIRTIO_NET_HASH_REPORT_NONE) { return 0; } table = g_malloc_n(n->rss_data.indirections_len, sizeof(n->rss_data.indirections_table[0])); - cfg.hash_types = cpu_to_le32(n->rss_data.hash_types); + cfg.hash_types = cpu_to_le32(n->rss_data.runtime_hash_types); if (do_rss) { /* @@ -1284,9 +1310,10 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { .stop = vhost_vdpa_net_cvq_stop, .cleanup = vhost_vdpa_cleanup, .has_vnet_hdr = vhost_vdpa_has_vnet_hdr, + .get_vnet_hash_supported_types = vhost_vdpa_get_vnet_hash_supported_types, .has_ufo = vhost_vdpa_has_ufo, .check_peer_type = vhost_vdpa_check_peer_type, - .set_steering_ebpf = vhost_vdpa_set_steering_ebpf, + .get_vhost_net = vhost_vdpa_get_vhost_net, }; /* |